From c3ca0adef3697198c5866d0a10bbd1df828867c9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 17 Jul 2025 08:37:22 +0200 Subject: [PATCH 1/2] DOC: fix doctests for string dtype changes (top-level) --- pandas/core/arrays/categorical.py | 10 +++++----- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/dtypes/missing.py | 12 ++++++------ pandas/core/frame.py | 5 ++--- pandas/core/indexes/base.py | 2 +- pandas/core/reshape/concat.py | 8 ++++---- 6 files changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3d2ad109a55ba..4595bc16ef336 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -794,28 +794,28 @@ def categories(self) -> Index: >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category") >>> ser.cat.categories - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"]) >>> ser = pd.Series(raw_cat) >>> ser.cat.categories - Index(['b', 'c', 'd'], dtype='object') + Index(['b', 'c', 'd'], dtype='str') For :class:`pandas.Categorical`: >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat.categories - Index(['a', 'b'], dtype='object') + Index(['a', 'b'], dtype='str') For :class:`pandas.CategoricalIndex`: >>> ci = pd.CategoricalIndex(["a", "c", "b", "a", "c", "b"]) >>> ci.categories - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> ci = pd.CategoricalIndex(["a", "c"], categories=["c", "b", "a"]) >>> ci.categories - Index(['c', 'b', 'a'], dtype='object') + Index(['c', 'b', 'a'], dtype='str') """ return self.dtype.categories diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3986392774f28..912421dff1026 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -647,7 +647,7 @@ def categories(self) -> Index: -------- >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True) >>> cat_type.categories - Index(['a', 'b'], dtype='object') + Index(['a', 'b'], dtype='str') """ return self._categories diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 71fe0f6e4feb0..408c2858aa876 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -158,9 +158,9 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) >>> df - 0 1 2 - 0 ant bee cat - 1 dog None fly + 0 1 2 + 0 ant bee cat + 1 dog NaN fly >>> pd.isna(df) 0 1 2 0 False False False @@ -373,9 +373,9 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) >>> df - 0 1 2 - 0 ant bee cat - 1 dog None fly + 0 1 2 + 0 ant bee cat + 1 dog NaN fly >>> pd.notna(df) 0 1 2 0 True True True diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 632ab12edd7e4..48a5596e00061 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1015,8 +1015,7 @@ def axes(self) -> list[Index]: -------- >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df.axes - [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], - dtype='object')] + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='str')] """ return [self.index, self.columns] @@ -14070,7 +14069,7 @@ def values(self) -> np.ndarray: ... columns=("name", "max_speed", "rank"), ... ) >>> df2.dtypes - name object + name str max_speed float64 rank object dtype: object diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fb395f4f7bb1a..a743c5d06ae98 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -368,7 +368,7 @@ class Index(IndexOpsMixin, PandasObject): Index([1, 2, 3], dtype='int64') >>> pd.Index(list("abc")) - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> pd.Index([1, 2, 3], dtype="uint8") Index([1, 2, 3], dtype='uint8') diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index cd7cc33e9ae7f..ef7949b778ff7 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -258,7 +258,7 @@ def concat( 1 b 0 c 1 d - dtype: object + dtype: str Clear the existing index and reset it in the result by setting the ``ignore_index`` option to ``True``. @@ -268,7 +268,7 @@ def concat( 1 b 2 c 3 d - dtype: object + dtype: str Add a hierarchical index at the outermost level of the data with the ``keys`` option. @@ -278,7 +278,7 @@ def concat( 1 b s2 0 c 1 d - dtype: object + dtype: str Label the index keys you create with the ``names`` option. @@ -288,7 +288,7 @@ def concat( 1 b s2 0 c 1 d - dtype: object + dtype: str Combine two ``DataFrame`` objects with identical columns. From 873627181e34d55358b45abadf62de3bcf3adc90 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 17 Jul 2025 09:01:40 +0200 Subject: [PATCH 2/2] more misc changes --- pandas/core/groupby/groupby.py | 14 +++++++------- pandas/core/indexes/base.py | 2 +- pandas/core/interchange/from_dataframe.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f29423ce5e77c..74497ca723edb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4628,13 +4628,13 @@ def ngroup(self, ascending: bool = True): -------- >>> df = pd.DataFrame({"color": ["red", None, "red", "blue", "blue", "red"]}) >>> df - color - 0 red - 1 None - 2 red - 3 blue - 4 blue - 5 red + color + 0 red + 1 NaN + 2 red + 3 blue + 4 blue + 5 red >>> df.groupby("color").ngroup() 0 1.0 1 NaN diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a743c5d06ae98..fd3db9b9c7ec7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7599,7 +7599,7 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: Examples -------- >>> ensure_index(["a", "b"]) - Index(['a', 'b'], dtype='object') + Index(['a', 'b'], dtype='str') >>> ensure_index([("a", "a"), ("b", "c")]) Index([('a', 'a'), ('b', 'c')], dtype='object') diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index c2fbef1089d5a..daef8287e3263 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -77,7 +77,7 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: >>> df_not_necessarily_pandas = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> interchange_object = df_not_necessarily_pandas.__dataframe__() >>> interchange_object.column_names() - Index(['A', 'B'], dtype='object') + Index(['A', 'B'], dtype='str') >>> df_pandas = pd.api.interchange.from_dataframe( ... interchange_object.select_columns_by_name(["A"]) ... )