diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 64a05c87e0f80..3c328939955d9 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1169,9 +1169,16 @@ def put( This parameter is currently not accepted. complevel : int, 0-9, default None Specifies a compression level for data. - A value of 0 or None disables compression. - min_itemsize : int, dict, or None - Dict of columns that specify minimum str sizes. + A value of 0 or None disables compression. + min_itemsize : int, dict of str: int, or None, default None + Minimum size in bytes for string columns when format = 'table'. + int - Apply the same minimum size to all string columns, + dict - Map column names to their minimum sizes or, + None - use the default the sizing + Important: This specifies the byte length after encoding, not the + character count. For multi-byte characters, calculate the required + size using the encoded byte length. + See examples below for use. nan_rep : str Str to use as str nan representation. data_columns : list of columns or True, default None @@ -1203,6 +1210,10 @@ def put( >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP >>> store.put("data", df) # doctest: +SKIP + + >>> ASCII 'hello' = 5 bytes + >>> UTF-8 '香' = 3 bytes (though only 1 character) + >>> To find byte length: len(string.encode('utf-8')) """ if format is None: format = get_option("io.hdf.default_format") or "fixed" @@ -1329,9 +1340,16 @@ def append( Specifies a compression level for data. A value of 0 or None disables compression. columns : default None - This parameter is currently not accepted, try data_columns. - min_itemsize : int, dict, or None - Dict of columns that specify minimum str sizes. + This parameter is currently not accepted, try data_columns. + min_itemsize : int, dict of str: int, or None, default None + Minimum size in bytes for string columns when format = 'table'. + int - Apply the same minimum size to all string columns, + dict - Map column names to their minimum sizes or, + None - use the default the sizing + Important: This specifies the byte length after encoding, not the + character count. For multi-byte characters, calculate the required + size using the encoded byte length. + See examples below for use. nan_rep : str Str to use as str nan representation. chunksize : int or None @@ -1364,6 +1382,10 @@ def append( Does *not* check if data being appended overlaps with existing data in the table, so be careful + When appending to an existing table, the min_itemsize parameter has no effect + as column sizes are already fixed. Set min_itemsize when initially creating + the table with put() or the first append() call. + Examples -------- >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) @@ -1377,6 +1399,10 @@ def append( 1 3 4 0 5 6 1 7 8 + + >>> ASCII 'hello' = 5 bytes + >>> UTF-8 '香' = 3 bytes (though only 1 character) + >>> To find byte length: len(string.encode('utf-8')) """ if columns is not None: raise TypeError(