From eaacfeb6c45453979df757960d77c66bf0df023d Mon Sep 17 00:00:00 2001
From: Musgrove <Joseph.Musgrove@lamresearch.com>
Date: Thu, 24 Jul 2025 12:52:21 -0700
Subject: [PATCH] modified pytables.py put() and append() docstrings to ensure
 more accurate documentation for the min_itemsize variable

---
 pandas/io/pytables.py | 523 +++++++++++++++++++++++-------------------
 1 file changed, 287 insertions(+), 236 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 64a05c87e0f80..0cf8d78f28fb9 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2,7 +2,6 @@
 High level interface to PyTables for reading and writing pandas data structures
 to disk
 """
-
 from __future__ import annotations
 
 from contextlib import suppress
@@ -18,9 +17,9 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     Final,
     Literal,
-    TypeAlias,
     cast,
     overload,
 )
@@ -31,6 +30,7 @@
 from pandas._config import (
     config,
     get_option,
+    using_copy_on_write,
     using_string_dtype,
 )
 
@@ -87,7 +87,6 @@
     DatetimeArray,
     PeriodArray,
 )
-from pandas.core.arrays.datetimes import tz_to_dtype
 from pandas.core.arrays.string_ import BaseStringArray
 import pandas.core.common as com
 from pandas.core.computation.pytables import (
@@ -99,6 +98,10 @@
     extract_array,
 )
 from pandas.core.indexes.api import ensure_index
+from pandas.core.internals import (
+    ArrayManager,
+    BlockManager,
+)
 
 from pandas.io.common import stringify_path
 from pandas.io.formats.printing import (
@@ -108,7 +111,6 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
-        Callable,
         Hashable,
         Iterator,
         Sequence,
@@ -141,6 +143,13 @@
 _default_encoding = "UTF-8"
 
 
+def _ensure_decoded(s):
+    """if we have bytes, decode them to unicode"""
+    if isinstance(s, np.bytes_):
+        s = s.decode("UTF-8")
+    return s
+
+
 def _ensure_encoding(encoding: str | None) -> str:
     # set the encoding if we need
     if encoding is None:
@@ -161,7 +170,7 @@ def _ensure_str(name):
     return name
 
 
-Term: TypeAlias = PyTablesExpr
+Term = PyTablesExpr
 
 
 def _ensure_term(where, scope_level: int):
@@ -300,14 +309,14 @@ def to_hdf(
             dropna=dropna,
         )
 
-    if isinstance(path_or_buf, HDFStore):
-        f(path_or_buf)
-    else:
-        path_or_buf = stringify_path(path_or_buf)
+    path_or_buf = stringify_path(path_or_buf)
+    if isinstance(path_or_buf, str):
         with HDFStore(
             path_or_buf, mode=mode, complevel=complevel, complib=complib
         ) as store:
             f(store)
+    else:
+        f(path_or_buf)
 
 
 def read_hdf(
@@ -362,7 +371,7 @@ def read_hdf(
         A list of Term (or convertible) objects.
     start : int, optional
         Row number to start selection.
-    stop : int, optional
+    stop  : int, optional
         Row number to stop selection.
     columns : list, optional
         A list of columns names to return.
@@ -392,9 +401,9 @@ def read_hdf(
 
     Examples
     --------
-    >>> df = pd.DataFrame([[1, 1.0, "a"]], columns=["x", "y", "z"])  # doctest: +SKIP
-    >>> df.to_hdf("./store.h5", "data")  # doctest: +SKIP
-    >>> reread = pd.read_hdf("./store.h5")  # doctest: +SKIP
+    >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])  # doctest: +SKIP
+    >>> df.to_hdf('./store.h5', 'data')  # doctest: +SKIP
+    >>> reread = pd.read_hdf('./store.h5')  # doctest: +SKIP
     """
     if mode not in ["r", "r+", "a"]:
         raise ValueError(
@@ -535,9 +544,9 @@ class HDFStore:
     Examples
     --------
     >>> bar = pd.DataFrame(np.random.randn(10, 4))
-    >>> store = pd.HDFStore("test.h5")
-    >>> store["foo"] = bar  # write to HDF5
-    >>> bar = store["foo"]  # retrieve
+    >>> store = pd.HDFStore('test.h5')
+    >>> store['foo'] = bar   # write to HDF5
+    >>> bar = store['foo']   # retrieve
     >>> store.close()
 
     **Create or load HDF5 file in-memory**
@@ -547,9 +556,9 @@ class HDFStore:
     written when closed:
 
     >>> bar = pd.DataFrame(np.random.randn(10, 4))
-    >>> store = pd.HDFStore("test.h5", driver="H5FD_CORE")
-    >>> store["foo"] = bar
-    >>> store.close()  # only now, data is written to disk
+    >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE')
+    >>> store['foo'] = bar
+    >>> store.close()   # only now, data is written to disk
     """
 
     _handle: File | None
@@ -608,7 +617,7 @@ def __getitem__(self, key: str):
     def __setitem__(self, key: str, value) -> None:
         self.put(key, value)
 
-    def __delitem__(self, key: str) -> int | None:
+    def __delitem__(self, key: str) -> None:
         return self.remove(key)
 
     def __getattr__(self, name: str):
@@ -671,18 +680,12 @@ def keys(self, include: str = "pandas") -> list[str]:
         ------
         raises ValueError if kind has an illegal value
 
-        See Also
-        --------
-        HDFStore.info : Prints detailed information on the store.
-        HDFStore.get_node : Returns the node with the key.
-        HDFStore.get_storer : Returns the storer object for a key.
-
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
-        >>> store.get("data")  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
+        >>> store.get('data')  # doctest: +SKIP
         >>> print(store.keys())  # doctest: +SKIP
         ['/data1', '/data2']
         >>> store.close()  # doctest: +SKIP
@@ -800,24 +803,18 @@ def get(self, key: str):
         Parameters
         ----------
         key : str
-            Object to retrieve from file. Raises KeyError if not found.
 
         Returns
         -------
         object
             Same type as object stored in file.
 
-        See Also
-        --------
-        HDFStore.get_node : Returns the node with the key.
-        HDFStore.get_storer : Returns the storer object for a key.
-
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
-        >>> store.get("data")  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
+        >>> store.get('data')  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
         """
         with patch_pickle():
@@ -874,25 +871,19 @@ def select(
         object
             Retrieved object from file.
 
-        See Also
-        --------
-        HDFStore.select_as_coordinates : Returns the selection as an index.
-        HDFStore.select_column : Returns a single column from the table.
-        HDFStore.select_as_multiple : Retrieves pandas objects from multiple tables.
-
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
-        >>> store.get("data")  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
+        >>> store.get('data')  # doctest: +SKIP
         >>> print(store.keys())  # doctest: +SKIP
         ['/data1', '/data2']
-        >>> store.select("/data1")  # doctest: +SKIP
+        >>> store.select('/data1')  # doctest: +SKIP
            A  B
         0  1  2
         1  3  4
-        >>> store.select("/data1", where="columns == A")  # doctest: +SKIP
+        >>> store.select('/data1', where='columns == A')  # doctest: +SKIP
            A
         0  1
         1  3
@@ -1140,18 +1131,10 @@ def put(
         """
         Store object in HDFStore.
 
-        This method writes a pandas DataFrame or Series into an HDF5 file using
-        either the fixed or table format. The `table` format allows additional
-        operations like incremental appends and queries but may have performance
-        trade-offs. The `fixed` format provides faster read/write operations but
-        does not support appends or queries.
-
         Parameters
         ----------
         key : str
-            Key of object to store in file.
         value : {Series, DataFrame}
-            Value of object to store in file.
         format : 'fixed(f)|table(t)', default is 'fixed'
             Format to use when storing object in HDFStore. Value can be one of:
 
@@ -1165,27 +1148,26 @@ def put(
             Write DataFrame index as a column.
         append : bool, default False
             This will force Table format, append the input data to the existing.
-        complib : default None
-            This parameter is currently not accepted.
-        complevel : int, 0-9, default None
-            Specifies a compression level for data.
-            A value of 0 or None disables compression.
-        min_itemsize : int, dict, or None
-            Dict of columns that specify minimum str sizes.
-        nan_rep : str
-            Str to use as str nan representation.
+        min_itemsize : int, dict of str: int, or None, default None
+            Minimum size in bytes for string columns. This parameter is only used when
+            format='table'. Can be:
+
+            - int: Apply the same minimum size to all string columns
+            - dict: Map column names to their minimum sizes
+            - None: Use default sizing
+
+            **Important**: The size refers to the number of bytes after encoding, not
+            the number of characters. For multi-byte characters (e.g., Chinese, Arabic),
+            you need to account for the encoding. For example, the character '香' is
+            1 character but 3 bytes when encoded as UTF-8
+
+            See examples below for proper usage with encoded strings. 
         data_columns : list of columns or True, default None
             List of columns to create as data columns, or True to use all columns.
             See `here
             <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
         encoding : str, default None
             Provide an encoding for strings.
-        errors : str, default 'strict'
-            The error handling scheme to use for encoding errors.
-            The default is 'strict' meaning that encoding errors raise a
-            UnicodeEncodeError.  Other possible values are 'ignore', 'replace' and
-            'xmlcharrefreplace' as well as any other name registered with
-            codecs.register_error that can handle UnicodeEncodeErrors.
         track_times : bool, default True
             Parameter is propagated to 'create_table' method of 'PyTables'.
             If set to False it enables to have the same h5 files (same hashes)
@@ -1193,16 +1175,33 @@ def put(
         dropna : bool, default False, optional
             Remove missing values.
 
-        See Also
-        --------
-        HDFStore.info : Prints detailed information on the store.
-        HDFStore.get_storer : Returns the storer object for a key.
-
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
+
+        Basic usage with ASCII strings:
+
+        >>> df = pd.DataFrame([['hello', 'world']], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df, format='table', min_itemsize={'A': 10, 'B': 10})  # doctest: +SKIP
+
+        Usage with multi-byte characters:
+
+        >>> df_unicode = pd.DataFrame([['香港', '北京']], columns=['city1', 'city2'])  # doctest: +SKIP
+        >>> # Each Chinese character is 3 bytes in UTF-8, so '香港' needs 6 bytes
+        >>> store.put('cities', df_unicode, format='table',  # doctest: +SKIP
+        ...           min_itemsize={'city1': 12, 'city2': 12}, encoding='utf-8')  # doctest: +SKIP
+
+        Determining the correct size for encoded strings:
+
+        >>> text = '香港'  # doctest: +SKIP
+        >>> len(text)  # Character length  # doctest: +SKIP
+        2
+        >>> len(text.encode('utf-8'))  # Byte length  # doctest: +SKIP
+        6
+        >>> # Use the byte length for min_itemsize
         """
         if format is None:
             format = get_option("io.hdf.default_format") or "fixed"
@@ -1224,7 +1223,7 @@ def put(
             dropna=dropna,
         )
 
-    def remove(self, key: str, where=None, start=None, stop=None) -> int | None:
+    def remove(self, key: str, where=None, start=None, stop=None) -> None:
         """
         Remove pandas object partially by specifying the where condition
 
@@ -1272,12 +1271,14 @@ def remove(self, key: str, where=None, start=None, stop=None) -> int | None:
         # remove the node
         if com.all_none(where, start, stop):
             s.group._f_remove(recursive=True)
-            return None
 
         # delete from the table
-        if not s.is_table:
-            raise ValueError("can only remove with where on objects written as tables")
-        return s.delete(where=where, start=start, stop=stop)
+        else:
+            if not s.is_table:
+                raise ValueError(
+                    "can only remove with where on objects written as tables"
+                )
+            return s.delete(where=where, start=start, stop=stop)
 
     def append(
         self,
@@ -1307,9 +1308,7 @@ def append(
         Parameters
         ----------
         key : str
-            Key of object to append.
         value : {Series, DataFrame}
-            Value of object to append.
         format : 'table' is the default
             Format to use when storing object in HDFStore.  Value can be one of:
 
@@ -1317,66 +1316,92 @@ def append(
                 Table format. Write as a PyTables Table structure which may perform
                 worse but allow more flexible operations like searching / selecting
                 subsets of the data.
-        axes : default None
-            This parameter is currently not accepted.
         index : bool, default True
             Write DataFrame index as a column.
-        append : bool, default True
+        append       : bool, default True
             Append the input data to the existing.
-        complib : default None
-            This parameter is currently not accepted.
-        complevel : int, 0-9, default None
-            Specifies a compression level for data.
-            A value of 0 or None disables compression.
-        columns : default None
-            This parameter is currently not accepted, try data_columns.
-        min_itemsize : int, dict, or None
-            Dict of columns that specify minimum str sizes.
-        nan_rep : str
-            Str to use as str nan representation.
-        chunksize : int or None
-            Size to chunk the writing.
-        expectedrows : int
-            Expected TOTAL row size of this table.
-        dropna : bool, default False, optional
-            Do not write an ALL nan row to the store settable
-            by the option 'io.hdf.dropna_table'.
         data_columns : list of columns, or True, default None
             List of columns to create as indexed data columns for on-disk
             queries, or True to use all columns. By default only the axes
             of the object are indexed. See `here
             <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
-        encoding : default None
-            Provide an encoding for str.
-        errors : str, default 'strict'
-            The error handling scheme to use for encoding errors.
-            The default is 'strict' meaning that encoding errors raise a
-            UnicodeEncodeError.  Other possible values are 'ignore', 'replace' and
-            'xmlcharrefreplace' as well as any other name registered with
-            codecs.register_error that can handle UnicodeEncodeErrors.
-
-        See Also
-        --------
-        HDFStore.append_to_multiple : Append to multiple tables.
+        min_itemsize : int, dict of str: int, or None, default None
+            Minimum size in bytes for string columns. Can be:
+
+            - int: Apply the same minimum size to all string columns
+            - dict: Map column names to their minimum sizes  
+            - None: Use the existing table's column sizes
+
+             **Important**: This parameter is only effective when creating a new table.
+            If the table already exists, the column sizes are fixed and cannot be
+            changed. The size refers to the number of bytes after encoding, not
+            the number of characters.
+
+            For multi-byte characters, calculate the size using the encoded byte length. 
+
+            For example: len('香'.encode('utf-8')) returns 3, not len('香') which returns 1.
+        nan_rep      : str to use as str nan representation
+        chunksize    : size to chunk the writing
+        expectedrows : expected TOTAL row size of this table
+        encoding     : default None, provide an encoding for str
+        dropna : bool, default False, optional
+            Do not write an ALL nan row to the store settable
+            by the option 'io.hdf.dropna_table'.
 
         Notes
         -----
         Does *not* check if data being appended overlaps with existing
         data in the table, so be careful
 
+        When appending to an existing table, the min_itemsize parameter has no effect
+        as column sizes are already fixed. Set min_itemsize when initially creating
+        the table with put() or the first append() call.
+
         Examples
         --------
-        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df1, format="table")  # doctest: +SKIP
-        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"])
-        >>> store.append("data", df2)  # doctest: +SKIP
+        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df1, format='table')  # doctest: +SKIP
+        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['A', 'B'])
+        >>> store.append('data', df2)  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
            A  B
         0  1  2
         1  3  4
         0  5  6
         1  7  8
+
+        Creating a table and appending data:
+    
+        >>> df1 = pd.DataFrame([['short', 'text']], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> # Set min_itemsize when creating the table
+        >>> store.put('data', df1, format='table', min_itemsize={'A': 20, 'B': 20})  # doctest: +SKIP
+        >>> 
+        >>> df2 = pd.DataFrame([['longer text here', 'more text']], columns=['A', 'B'])
+        >>> store.append('data', df2)  # doctest: +SKIP
+        >>> store.close()  # doctest: +SKIP
+        
+        Handling multi-byte characters:
+        
+        >>> df_en = pd.DataFrame([['hello']], columns=['text'])
+        >>> df_zh = pd.DataFrame([['你好世界']], columns=['text'])  # "Hello World" in Chinese
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> # Calculate size needed: len('你好世界'.encode('utf-8')) = 12 bytes
+        >>> store.put('messages', df_en, format='table', 
+        ...           min_itemsize={'text': 15}, encoding='utf-8')  # doctest: +SKIP
+        >>> store.append('messages', df_zh)  # doctest: +SKIP
+        >>> store.close()  # doctest: +SKIP
+        
+        Common error when min_itemsize is too small:
+        
+        >>> df = pd.DataFrame([['香']], columns=['char'])  # 3 bytes in UTF-8
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> # This will raise ValueError: string length [3] exceeds limit [1]
+        >>> # store.put('test', df, format='table', min_itemsize={'char': 1})
+        >>> # Correct usage:
+        >>> store.put('test', df, format='table', min_itemsize={'char': 3})  # doctest: +SKIP
+        >>> store.close()  # doctest: +SKIP
         """
         if columns is not None:
             raise TypeError(
@@ -1555,15 +1580,11 @@ def groups(self) -> list:
         list
             List of objects.
 
-        See Also
-        --------
-        HDFStore.get_node : Returns the node with the key.
-
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df)  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
         >>> print(store.groups())  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
         [/data (Group) ''
@@ -1614,17 +1635,13 @@ def walk(self, where: str = "/") -> Iterator[tuple[str, list[str], list[str]]]:
         leaves : list
             Names (strings) of the pandas objects contained in `path`.
 
-        See Also
-        --------
-        HDFStore.info : Prints detailed information on the store.
-
         Examples
         --------
-        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data", df1, format="table")  # doctest: +SKIP
-        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"])
-        >>> store.append("data", df2)  # doctest: +SKIP
+        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df1, format='table')  # doctest: +SKIP
+        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['A', 'B'])
+        >>> store.append('data', df2)  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
         >>> for group in store.walk():  # doctest: +SKIP
         ...     print(group)  # doctest: +SKIP
@@ -1743,33 +1760,24 @@ def info(self) -> str:
         Returns
         -------
         str
-            A String containing the python pandas class name, filepath to the HDF5
-            file and all the object keys along with their respective dataframe shapes.
-
-        See Also
-        --------
-        HDFStore.get_storer : Returns the storer object for a key.
 
         Examples
         --------
-        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
-        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["C", "D"])
-        >>> store = pd.HDFStore("store.h5", "w")  # doctest: +SKIP
-        >>> store.put("data1", df1)  # doctest: +SKIP
-        >>> store.put("data2", df2)  # doctest: +SKIP
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
+        >>> store = pd.HDFStore("store.h5", 'w')  # doctest: +SKIP
+        >>> store.put('data', df)  # doctest: +SKIP
         >>> print(store.info())  # doctest: +SKIP
         >>> store.close()  # doctest: +SKIP
         <class 'pandas.io.pytables.HDFStore'>
         File path: store.h5
-        /data1            frame        (shape->[2,2])
-        /data2            frame        (shape->[2,2])
+        /data    frame    (shape->[2,2])
         """
         path = pprint_thing(self._path)
         output = f"{type(self)}\nFile path: {path}\n"
 
         if self.is_open:
             lkeys = sorted(self.keys())
-            if lkeys:
+            if len(lkeys):
                 keys = []
                 values = []
 
@@ -1826,8 +1834,8 @@ def _create_storer(
         if value is not None and not isinstance(value, (Series, DataFrame)):
             raise TypeError("value must be None, Series, or DataFrame")
 
-        pt = getattr(group._v_attrs, "pandas_type", None)
-        tt = getattr(group._v_attrs, "table_type", None)
+        pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None))
+        tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None))
 
         # infer the pt from the passed value
         if pt is None:
@@ -1894,7 +1902,7 @@ def _create_storer(
             "worm": WORMTable,
         }
         try:
-            cls = _TABLE_MAP[tt]  # type: ignore[index]
+            cls = _TABLE_MAP[tt]
         except KeyError as err:
             raise TypeError(
                 f"cannot properly create the storer for: [_TABLE_MAP] [group->"
@@ -2241,13 +2249,13 @@ def convert(
             # preventing the original recarry from being free'ed
             values = values[self.cname].copy()
 
-        val_kind = self.kind
+        val_kind = _ensure_decoded(self.kind)
         values = _maybe_convert(values, val_kind, encoding, errors)
         kwargs = {}
-        kwargs["name"] = self.index_name
+        kwargs["name"] = _ensure_decoded(self.index_name)
 
         if self.freq is not None:
-            kwargs["freq"] = self.freq
+            kwargs["freq"] = _ensure_decoded(self.freq)
 
         factory: type[Index | DatetimeIndex] = Index
         if lib.is_np_dtype(values.dtype, "M") or isinstance(
@@ -2261,7 +2269,9 @@ def convert(
             # "Union[Type[Index], Type[DatetimeIndex]]")
             factory = lambda x, **kwds: PeriodIndex.from_ordinals(  # type: ignore[assignment]
                 x, freq=kwds.get("freq", None)
-            )._rename(kwds["name"])
+            )._rename(
+                kwds["name"]
+            )
 
         # making an Index instance could throw a number of different errors
         try:
@@ -2286,12 +2296,7 @@ def convert(
             if "freq" in kwargs:
                 kwargs["freq"] = None
             new_pd_index = factory(values, **kwargs)
-
-        final_pd_index: Index
-        if self.tz is not None and isinstance(new_pd_index, DatetimeIndex):
-            final_pd_index = new_pd_index.tz_localize("UTC").tz_convert(self.tz)
-        else:
-            final_pd_index = new_pd_index
+        final_pd_index = _set_tz(new_pd_index, self.tz)
         return final_pd_index, final_pd_index
 
     def take_data(self):
@@ -2325,7 +2330,7 @@ def maybe_set_size(self, min_itemsize=None) -> None:
             min_itemsize can be an integer or a dict with this columns name
             with an integer size
         """
-        if self.kind == "string":
+        if _ensure_decoded(self.kind) == "string":
             if isinstance(min_itemsize, dict):
                 min_itemsize = min_itemsize.get(self.name)
 
@@ -2346,7 +2351,7 @@ def validate_and_set(self, handler: AppendableTable, append: bool) -> None:
     def validate_col(self, itemsize=None):
         """validate this column: return the compared against itemsize"""
         # validate this column for string truncation (or reset to the max size)
-        if self.kind == "string":
+        if _ensure_decoded(self.kind) == "string":
             c = self.col
             if c is not None:
                 if itemsize is None:
@@ -2676,19 +2681,19 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         assert isinstance(converted, np.ndarray)  # for mypy
 
         # use the meta if needed
-        meta = self.meta
+        meta = _ensure_decoded(self.meta)
         metadata = self.metadata
         ordered = self.ordered
         tz = self.tz
 
         assert dtype_name is not None
         # convert to the correct dtype
-        dtype = dtype_name
+        dtype = _ensure_decoded(dtype_name)
 
         # reverse converts
         if dtype.startswith("datetime64"):
             # recreate with tz if indicated
-            converted = _set_tz(converted, tz, dtype)
+            converted = _set_tz(converted, tz, coerce=True)
 
         elif dtype == "timedelta64":
             converted = np.asarray(converted, dtype="m8[ns]")
@@ -2733,7 +2738,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
                 converted = converted.astype("O", copy=False)
 
         # convert nans / decode
-        if kind == "string":
+        if _ensure_decoded(kind) == "string":
             converted = _unconvert_string_array(
                 converted, nan_rep=nan_rep, encoding=encoding, errors=errors
             )
@@ -2821,19 +2826,18 @@ def is_old_version(self) -> bool:
     @property
     def version(self) -> tuple[int, int, int]:
         """compute and set our version"""
-        version = getattr(self.group._v_attrs, "pandas_version", None)
-        if isinstance(version, str):
-            version_tup = tuple(int(x) for x in version.split("."))
-            if len(version_tup) == 2:
-                version_tup = version_tup + (0,)
-            assert len(version_tup) == 3  # needed for mypy
-            return version_tup
-        else:
-            return (0, 0, 0)
+        version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None))
+        try:
+            version = tuple(int(x) for x in version.split("."))
+            if len(version) == 2:
+                version = version + (0,)
+        except AttributeError:
+            version = (0, 0, 0)
+        return version
 
     @property
     def pandas_type(self):
-        return getattr(self.group._v_attrs, "pandas_type", None)
+        return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None))
 
     def __repr__(self) -> str:
         """return a pretty representation of myself"""
@@ -2928,7 +2932,7 @@ def read(
         columns=None,
         start: int | None = None,
         stop: int | None = None,
-    ) -> Series | DataFrame:
+    ):
         raise NotImplementedError(
             "cannot read on an abstract storer: subclasses should implement"
         )
@@ -2940,7 +2944,7 @@ def write(self, obj, **kwargs) -> None:
 
     def delete(
         self, where=None, start: int | None = None, stop: int | None = None
-    ) -> int | None:
+    ) -> None:
         """
         support fully deleting the node in its entirety (only) - where
         specification must be None
@@ -2970,7 +2974,9 @@ def _alias_to_class(self, alias):
         return self._reverse_index_map.get(alias, Index)
 
     def _get_index_factory(self, attrs):
-        index_class = self._alias_to_class(getattr(attrs, "index_class", ""))
+        index_class = self._alias_to_class(
+            _ensure_decoded(getattr(attrs, "index_class", ""))
+        )
 
         factory: Callable
 
@@ -3006,7 +3012,12 @@ def f(values, freq=None, tz=None):
                 factory = TimedeltaIndex
 
         if "tz" in attrs:
-            kwargs["tz"] = attrs["tz"]
+            if isinstance(attrs["tz"], bytes):
+                # created by python2
+                kwargs["tz"] = attrs["tz"].decode("utf-8")
+            else:
+                # created by python3
+                kwargs["tz"] = attrs["tz"]
             assert index_class is DatetimeIndex  # just checking
 
         return factory, kwargs
@@ -3038,9 +3049,9 @@ def set_attrs(self) -> None:
     def get_attrs(self) -> None:
         """retrieve our attributes"""
         self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
-        self.errors = getattr(self.attrs, "errors", "strict")
+        self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
         for n in self.attributes:
-            setattr(self, n, getattr(self.attrs, n, None))
+            setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))
 
     def write(self, obj, **kwargs) -> None:
         self.set_attrs()
@@ -3060,7 +3071,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
             if dtype is not None:
                 ret = pd_array(ret, dtype=dtype)
         else:
-            dtype = getattr(attrs, "value_type", None)
+            dtype = _ensure_decoded(getattr(attrs, "value_type", None))
             shape = getattr(attrs, "shape", None)
 
             if shape is not None:
@@ -3072,7 +3083,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
             if dtype and dtype.startswith("datetime64"):
                 # reconstruct a timezone if indicated
                 tz = getattr(attrs, "tz", None)
-                ret = _set_tz(ret, tz, dtype)
+                ret = _set_tz(ret, tz, coerce=True)
 
             elif dtype == "timedelta64":
                 ret = np.asarray(ret, dtype="m8[ns]")
@@ -3085,7 +3096,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
     def read_index(
         self, key: str, start: int | None = None, stop: int | None = None
     ) -> Index:
-        variety = getattr(self.attrs, f"{key}_variety")
+        variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety"))
 
         if variety == "multi":
             return self.read_multi_index(key, start=start, stop=stop)
@@ -3175,11 +3186,12 @@ def read_index_node(
         # have written a sentinel. Here we replace it with the original.
         if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0:
             data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type)
-        kind = node._v_attrs.kind
+        kind = _ensure_decoded(node._v_attrs.kind)
         name = None
 
         if "name" in node._v_attrs:
             name = _ensure_str(node._v_attrs.name)
+            name = _ensure_decoded(name)
 
         attrs = node._v_attrs
         factory, kwargs = self._get_index_factory(attrs)
@@ -3285,7 +3297,7 @@ def write_array(
                 pass
             elif inferred_type == "string":
                 pass
-            elif get_option("performance_warnings"):
+            else:
                 ws = performance_doc % (inferred_type, key, items)
                 warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level())
 
@@ -3302,9 +3314,7 @@ def write_array(
             # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
             # attribute "asi8"
             self._handle.create_array(
-                self.group,
-                key,
-                value.asi8,  # type: ignore[union-attr]
+                self.group, key, value.asi8  # type: ignore[union-attr]
             )
 
             node = getattr(self.group, key)
@@ -3335,7 +3345,7 @@ class SeriesFixed(GenericFixed):
     name: Hashable
 
     @property
-    def shape(self) -> tuple[int] | None:
+    def shape(self):
         try:
             return (len(self.group.values),)
         except (TypeError, AttributeError):
@@ -3445,14 +3455,23 @@ def read(
             dfs.append(df)
 
         if len(dfs) > 0:
-            out = concat(dfs, axis=1).copy()
-            return out.reindex(columns=items)
+            out = concat(dfs, axis=1, copy=True)
+            if using_copy_on_write():
+                # with CoW, concat ignores the copy keyword. Here, we still want
+                # to copy to enforce optimized column-major layout
+                out = out.copy()
+            out = out.reindex(columns=items, copy=False)
+            return out
 
         return DataFrame(columns=axes[0], index=axes[1])
 
     def write(self, obj, **kwargs) -> None:
         super().write(obj, **kwargs)
 
+        # TODO(ArrayManager) HDFStore relies on accessing the blocks
+        if isinstance(obj._mgr, ArrayManager):
+            obj = obj._as_manager("block")
+
         data = obj._mgr
         if not data.is_consolidated():
             data = data.consolidate()
@@ -3684,7 +3703,7 @@ def queryables(self) -> dict[str, Any]:
 
         return dict(d1 + d2 + d3)
 
-    def index_cols(self) -> list[tuple[Any, Any]]:
+    def index_cols(self):
         """return a list of my index cols"""
         # Note: each `i.cname` below is assured to be a str.
         return [(i.axis, i.cname) for i in self.index_axes]
@@ -3742,7 +3761,7 @@ def get_attrs(self) -> None:
         self.info = getattr(self.attrs, "info", None) or {}
         self.nan_rep = getattr(self.attrs, "nan_rep", None)
         self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
-        self.errors = getattr(self.attrs, "errors", "strict")
+        self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
         self.levels: list[Hashable] = getattr(self.attrs, "levels", None) or []
         self.index_axes = [a for a in self.indexables if a.is_an_indexable]
         self.values_axes = [a for a in self.indexables if not a.is_an_indexable]
@@ -3814,7 +3833,7 @@ def indexables(self):
         dc = set(self.data_columns)
         base_pos = len(_indexables)
 
-        def f(i, c: str) -> DataCol:
+        def f(i, c):
             assert isinstance(c, str)
             klass = DataCol
             if c in dc:
@@ -3980,7 +3999,7 @@ def get_object(cls, obj, transposed: bool):
         """return the data for this obj"""
         return obj
 
-    def validate_data_columns(self, data_columns, min_itemsize, non_index_axes) -> list:
+    def validate_data_columns(self, data_columns, min_itemsize, non_index_axes):
         """
         take the input data_columns and min_itemize and create a data
         columns spec
@@ -4269,10 +4288,16 @@ def _get_blocks_and_items(
         data_columns,
     ):
         # Helper to clarify non-state-altering parts of _create_axes
+
+        # TODO(ArrayManager) HDFStore relies on accessing the blocks
+        if isinstance(frame._mgr, ArrayManager):
+            frame = frame._as_manager("block")
+
         def get_blk_items(mgr):
             return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks]
 
         mgr = frame._mgr
+        mgr = cast(BlockManager, mgr)
         blocks: list[Block] = list(mgr.blocks)
         blk_items: list[Index] = get_blk_items(mgr)
 
@@ -4284,6 +4309,7 @@ def get_blk_items(mgr):
             axis, axis_labels = new_non_index_axes[0]
             new_labels = Index(axis_labels).difference(Index(data_columns))
             mgr = frame.reindex(new_labels, axis=axis)._mgr
+            mgr = cast(BlockManager, mgr)
 
             blocks = list(mgr.blocks)
             blk_items = get_blk_items(mgr)
@@ -4292,6 +4318,7 @@ def get_blk_items(mgr):
                 #  index, so we can infer that (as long as axis==1) we
                 #  get a single column back, so a single block.
                 mgr = frame.reindex([c], axis=axis)._mgr
+                mgr = cast(BlockManager, mgr)
                 blocks.extend(mgr.blocks)
                 blk_items.extend(get_blk_items(mgr))
 
@@ -4471,7 +4498,7 @@ def read_column(
                     encoding=self.encoding,
                     errors=self.errors,
                 )
-                cvs = col_values[1]
+                cvs = _set_tz(col_values[1], a.tz)
                 dtype = getattr(self.table.attrs, f"{column}_meta", None)
                 return Series(cvs, name=column, copy=False, dtype=dtype)
 
@@ -4591,7 +4618,7 @@ def write_data(self, chunksize: int | None, dropna: bool = False) -> None:
                     masks.append(mask.astype("u1", copy=False))
 
         # consolidate masks
-        if masks:
+        if len(masks):
             mask = masks[0]
             for m in masks[1:]:
                 mask = mask & m
@@ -4676,9 +4703,7 @@ def write_data_chunk(
             self.table.append(rows)
             self.table.flush()
 
-    def delete(
-        self, where=None, start: int | None = None, stop: int | None = None
-    ) -> int | None:
+    def delete(self, where=None, start: int | None = None, stop: int | None = None):
         # delete all rows (and return the nrows)
         if where is None or not len(where):
             if start is None and stop is None:
@@ -4711,7 +4736,7 @@ def delete(
             groups = list(diff[diff > 1].index)
 
             # 1 group
-            if not groups:
+            if not len(groups):
                 groups = [0]
 
             # final element
@@ -4814,7 +4839,7 @@ def read(
             if values.ndim == 1 and isinstance(values, np.ndarray):
                 values = values.reshape((1, values.shape[0]))
 
-            if isinstance(values, (np.ndarray, DatetimeArray)):
+            if isinstance(values, np.ndarray):
                 try:
                     df = DataFrame(values.T, columns=cols_, index=index_, copy=False)
                 except UnicodeEncodeError as err:
@@ -5024,7 +5049,7 @@ def read(
         columns=None,
         start: int | None = None,
         stop: int | None = None,
-    ) -> DataFrame:
+    ):
         df = super().read(where=where, columns=columns, start=start, stop=stop)
         df = df.set_index(self.levels)
 
@@ -5068,25 +5093,55 @@ def _get_tz(tz: tzinfo) -> str | tzinfo:
     return zone
 
 
+@overload
+def _set_tz(
+    values: np.ndarray | Index, tz: str | tzinfo, coerce: bool = False
+) -> DatetimeIndex:
+    ...
+
+
+@overload
+def _set_tz(values: np.ndarray | Index, tz: None, coerce: bool = False) -> np.ndarray:
+    ...
+
+
 def _set_tz(
-    values: npt.NDArray[np.int64], tz: str | tzinfo | None, datetime64_dtype: str
-) -> DatetimeArray:
+    values: np.ndarray | Index, tz: str | tzinfo | None, coerce: bool = False
+) -> np.ndarray | DatetimeIndex:
     """
-    Coerce the values to a DatetimeArray with appropriate tz.
+    coerce the values to a DatetimeIndex if tz is set
+    preserve the input shape if possible
 
     Parameters
     ----------
-    values : ndarray[int64]
-    tz : str, tzinfo, or None
-    datetime64_dtype : str, e.g. "datetime64[ns]", "datetime64[25s]"
+    values : ndarray or Index
+    tz : str or tzinfo
+    coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray
     """
-    assert values.dtype == "i8", values.dtype
-    # Argument "tz" to "tz_to_dtype" has incompatible type "str | tzinfo | None";
-    # expected "tzinfo"
-    unit, _ = np.datetime_data(datetime64_dtype)  # parsing dtype: unit, count
-    dtype = tz_to_dtype(tz=tz, unit=unit)  # type: ignore[arg-type]
-    dta = DatetimeArray._from_sequence(values, dtype=dtype)
-    return dta
+    if isinstance(values, DatetimeIndex):
+        # If values is tzaware, the tz gets dropped in the values.ravel()
+        #  call below (which returns an ndarray).  So we are only non-lossy
+        #  if `tz` matches `values.tz`.
+        assert values.tz is None or values.tz == tz
+        if values.tz is not None:
+            return values
+
+    if tz is not None:
+        if isinstance(values, DatetimeIndex):
+            name = values.name
+        else:
+            name = None
+            values = values.ravel()
+
+        tz = _ensure_decoded(tz)
+        values = DatetimeIndex(values, name=name)
+        values = values.tz_localize("UTC").tz_convert(tz)
+    elif coerce:
+        values = np.asarray(values, dtype="M8[ns]")
+
+    # error: Incompatible return value type (got "Union[ndarray, Index]",
+    # expected "Union[ndarray, DatetimeIndex]")
+    return values  # type: ignore[return-value]
 
 
 def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol:
@@ -5390,6 +5445,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
     """
     Find the "kind" string describing the given dtype name.
     """
+    dtype_str = _ensure_decoded(dtype_str)
+
     if dtype_str.startswith(("string", "bytes")):
         kind = "string"
     elif dtype_str.startswith("float"):
@@ -5501,13 +5558,7 @@ def __init__(
             if self.terms is not None:
                 self.condition, self.filter = self.terms.evaluate()
 
-    @overload
-    def generate(self, where: dict | list | tuple | str) -> PyTablesExpr: ...
-
-    @overload
-    def generate(self, where: None) -> None: ...
-
-    def generate(self, where: dict | list | tuple | str | None) -> PyTablesExpr | None:
+    def generate(self, where):
         """where can be a : dict,list,tuple,string"""
         if where is None:
             return None