pandas-dev · mroeschke · Sep 16, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -603,6 +603,7 @@ Other API changes
   an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
   the dtype of the resulting Index (:issue:`60797`)
 - :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
+- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
 - :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
 - Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
 - Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
@@ -974,8 +975,8 @@ Indexing
 - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
 - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
 - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
+- Bug in adding new rows to a :class:`DataFrame` or :class:`Series` with :meth:`.loc` failing to retain dtype on the object's index in some cases (:issue:`41626`)
 - Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
--
 
 Missing
 ^^^^^^^
@@ -1094,7 +1095,7 @@ Reshaping
 - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
 - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
 - Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
--
+- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10918,6 +10918,13 @@ def _append_internal(
                 ),
             )
             row_df = other.to_frame().T
+            if isinstance(self.index.dtype, ExtensionDtype):
+                # GH#41626 retain e.g. CategoricalDtype if reached via
+                #  df.loc[key] = item
+                row_df.index = self.index.array._cast_pointwise_result(
+                    row_df.index._values
+                )
+
             # infer_objects is needed for
             #  test_append_empty_frame_to_series_with_dateutil_tz
             other = row_df.infer_objects().rename_axis(index.names)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -18,7 +18,6 @@
 )
 
 from pandas.core.dtypes.common import is_scalar
-from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import (
     is_valid_na_for_dtype,
@@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
         """
         mapped = self._values.map(mapper, na_action=na_action)
         return Index(mapped, name=self.name)
-
-    def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
-        # if calling index is category, don't check dtype of others
-        try:
-            cat = Categorical._concat_same_type(
-                [self._is_dtype_compat(c) for c in to_concat]
-            )
-        except TypeError:
-            # not all to_concat elements are among our categories (or NA)
-
-            res = concat_compat([x._values for x in to_concat])
-            return Index(res, name=name)
-        else:
-            return type(self)._simple_new(cat, name=name)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1483,7 +1483,11 @@ def _create_join_index(
             mask = indexer == -1
             if np.any(mask):
                 fill_value = na_value_for_dtype(index.dtype, compat=False)
-                index = index.append(Index([fill_value]))
+                if not index._can_hold_na:
+                    new_index = Index([fill_value])
+                else:
+                    new_index = Index([fill_value], dtype=index.dtype)
+                index = index.append(new_index)
         if indexer is None:
             return index.copy()
         return index.take(indexer)

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.common import is_hashable
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -310,6 +312,22 @@ def test_setitem_expand_with_extension(self, data):
         result.loc[:, "B"] = data
         tm.assert_frame_equal(result, expected)
 
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
+        # GH#41626 retain index.dtype in setitem-with-expansion
+        if not is_hashable(data[0]):
+            pytest.skip("Test does not apply to non-hashable data.")
+        data = data.unique()
+        expected = pd.DataFrame({"A": range(len(data))}, index=data)
+        df = expected.iloc[:-1]
+        ser = df["A"]
+        item = data[-1]
+
+        df.loc[item] = len(data) - 1
+        tm.assert_frame_equal(df, expected)
+
+        ser.loc[item] = len(data) - 1
+        tm.assert_series_equal(ser, expected["A"])
+
     def test_setitem_frame_invalid_length(self, data):
         df = pd.DataFrame({"A": [1] * len(data)})
         xpr = (

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -1067,6 +1067,15 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op):
         expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
         tm.assert_series_equal(result, expected)
 
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
+        pa_dtype = data.dtype.pyarrow_dtype
+        if pa.types.is_date(pa_dtype):
+            mark = pytest.mark.xfail(
+                reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]"
+            )
+            request.applymarker(mark)
+        super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
+
 
 class TestLogicalOps:
     """Various Series and DataFrame logical ops methods."""

diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
@@ -126,6 +126,13 @@ def test_EA_types(self, engine, data, request):
     def test_astype_str(self, data):
         super().test_astype_str(data)
 
+    @pytest.mark.xfail(
+        reason="Test is invalid for IntervalDtype, needs to be adapted for "
+        "this dtype with an index with index._index_as_unique."
+    )
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
+        super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
+
 
 # TODO: either belongs in tests.arrays.interval or move into base tests.
 def test_fillna_non_scalar_raises(data_missing):

diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
@@ -360,3 +360,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
             )
         )
         tm.assert_series_equal(result, expected)
+
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
+        if data.dtype.kind == "b":
+            mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object")
+            request.applymarker(mark)
+        super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -421,6 +421,12 @@ def test_index_from_listlike_with_dtype(self, data):
     def test_EA_types(self, engine, data, request):
         super().test_EA_types(engine, data, request)
 
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
+        if isinstance(data[-1], tuple):
+            mark = pytest.mark.xfail(reason="Unpacks tuple")
+            request.applymarker(mark)
+        super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
+
 
 class Test2DCompat(base.NDArrayBacked2DTests):
     pass
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
@@ -257,6 +257,14 @@ def test_arith_series_with_array(
             request.applymarker(mark)
         super().test_arith_series_with_array(data, all_arithmetic_operators)
 
+    def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
+        self, data, request, using_infer_string
+    ):
+        if not using_infer_string and data.dtype.storage == "python":
+            mark = pytest.mark.xfail(reason="Casts to object")
+            request.applymarker(mark)
+        super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
+
 
 class Test2DCompat(base.Dim2CompatTests):
     @pytest.fixture(autouse=True)

diff --git a/pandas/tests/indexes/categorical/test_append.py b/pandas/tests/indexes/categorical/test_append.py
@@ -36,9 +36,11 @@ def test_append_mismatched_categories(self, ci):
             ci.append(ci.values.reorder_categories(list("abc")))
 
     def test_append_category_objects(self, ci):
+        # GH#41626 pre-3.0 this used to cast the object-dtype index to
+        #  ci.dtype
         # with objects
         result = ci.append(Index(["c", "a"]))
-        expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
+        expected = Index(list("aabbcaca"))
         tm.assert_index_equal(result, expected, exact=True)
 
     def test_append_non_categories(self, ci):

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -16,6 +16,7 @@
 
 from pandas._libs import index as libindex
 from pandas.errors import IndexingError
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import (
@@ -1963,6 +1964,22 @@ def test_loc_drops_level(self):
 
 
 class TestLocSetitemWithExpansion:
+    @td.skip_if_no("pyarrow")
+    def test_loc_setitem_with_expansion_preserves_ea_dtype(self):
+        # GH#41626 retain index.dtype in setitem-with-expansion
+        idx = Index([Timestamp(0).date()], dtype="date32[pyarrow]")
+        df = DataFrame({"A": range(1)}, index=idx)
+        item = Timestamp("1970-01-02").date()
+
+        df.loc[item] = 1
+
+        exp_index = Index([idx[0], item], dtype=idx.dtype)
+        tm.assert_index_equal(df.index, exp_index)
+
+        ser = df["A"].iloc[:-1]
+        ser.loc[item] = 1
+        tm.assert_index_equal(ser.index, exp_index)
+
     def test_loc_setitem_with_expansion_large_dataframe(self, monkeypatch):
         # GH#10692
         size_cutoff = 50

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -1378,6 +1378,9 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
         # GH 24212
         # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
         # -1 is interpreted as a missing value instead of the last element
+        if index.dtype == "float32" and expected_index.dtype == "float64":
+            # GH#41626
+            expected_index = expected_index.astype("float32")
         df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
         df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]})
         result = df1.merge(df2, left_on="key", right_index=True, how=how)