Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ Other API changes
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
the dtype of the resulting Index (:issue:`60797`)
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
Expand Down Expand Up @@ -974,8 +975,8 @@ Indexing
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
-

Missing
^^^^^^^
Expand Down Expand Up @@ -1094,7 +1095,7 @@ Reshaping
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
-
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)

Sparse
^^^^^^
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10918,6 +10918,13 @@ def _append_internal(
),
)
row_df = other.to_frame().T
if isinstance(self.index.dtype, ExtensionDtype):
# GH#41626 retain e.g. CategoricalDtype if reached via
# df.loc[key] = item
row_df.index = self.index.array._cast_pointwise_result(
row_df.index._values
)

# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects().rename_axis(index.names)
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)

from pandas.core.dtypes.common import is_scalar
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
Expand Down Expand Up @@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
"""
mapped = self._values.map(mapper, na_action=na_action)
return Index(mapped, name=self.name)

def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
# if calling index is category, don't check dtype of others
try:
cat = Categorical._concat_same_type(
[self._is_dtype_compat(c) for c in to_concat]
)
except TypeError:
# not all to_concat elements are among our categories (or NA)

res = concat_compat([x._values for x in to_concat])
return Index(res, name=name)
else:
return type(self)._simple_new(cat, name=name)
6 changes: 5 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,11 @@ def _create_join_index(
mask = indexer == -1
if np.any(mask):
fill_value = na_value_for_dtype(index.dtype, compat=False)
index = index.append(Index([fill_value]))
if not index._can_hold_na:
new_index = Index([fill_value])
else:
new_index = Index([fill_value], dtype=index.dtype)
index = index.append(new_index)
if indexer is None:
return index.copy()
return index.take(indexer)
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_hashable

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -310,6 +312,22 @@ def test_setitem_expand_with_extension(self, data):
result.loc[:, "B"] = data
tm.assert_frame_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
# GH#41626 retain index.dtype in setitem-with-expansion
if not is_hashable(data[0]):
pytest.skip("Test does not apply to non-hashable data.")
data = data.unique()
expected = pd.DataFrame({"A": range(len(data))}, index=data)
df = expected.iloc[:-1]
ser = df["A"]
item = data[-1]

df.loc[item] = len(data) - 1
tm.assert_frame_equal(df, expected)

ser.loc[item] = len(data) - 1
tm.assert_series_equal(ser, expected["A"])

def test_setitem_frame_invalid_length(self, data):
df = pd.DataFrame({"A": [1] * len(data)})
xpr = (
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,15 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op):
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
tm.assert_series_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
if pa.types.is_date(pa_dtype):
mark = pytest.mark.xfail(
reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]"
)
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class TestLogicalOps:
"""Various Series and DataFrame logical ops methods."""
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def test_EA_types(self, engine, data, request):
def test_astype_str(self, data):
super().test_astype_str(data)

@pytest.mark.xfail(
reason="Test is invalid for IntervalDtype, needs to be adapted for "
"this dtype with an index with index._index_as_unique."
)
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


# TODO: either belongs in tests.arrays.interval or move into base tests.
def test_fillna_non_scalar_raises(data_missing):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
)
)
tm.assert_series_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
if data.dtype.kind == "b":
mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
6 changes: 6 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ def test_index_from_listlike_with_dtype(self, data):
def test_EA_types(self, engine, data, request):
super().test_EA_types(engine, data, request)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
if isinstance(data[-1], tuple):
mark = pytest.mark.xfail(reason="Unpacks tuple")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class Test2DCompat(base.NDArrayBacked2DTests):
pass
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,14 @@ def test_arith_series_with_array(
request.applymarker(mark)
super().test_arith_series_with_array(data, all_arithmetic_operators)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
self, data, request, using_infer_string
):
if not using_infer_string and data.dtype.storage == "python":
mark = pytest.mark.xfail(reason="Casts to object")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class Test2DCompat(base.Dim2CompatTests):
@pytest.fixture(autouse=True)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/categorical/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ def test_append_mismatched_categories(self, ci):
ci.append(ci.values.reorder_categories(list("abc")))

def test_append_category_objects(self, ci):
# GH#41626 pre-3.0 this used to cast the object-dtype index to
# ci.dtype
# with objects
result = ci.append(Index(["c", "a"]))
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
expected = Index(list("aabbcaca"))
tm.assert_index_equal(result, expected, exact=True)

def test_append_non_categories(self, ci):
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from pandas._libs import index as libindex
from pandas.errors import IndexingError
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -1963,6 +1964,22 @@ def test_loc_drops_level(self):


class TestLocSetitemWithExpansion:
@td.skip_if_no("pyarrow")
def test_loc_setitem_with_expansion_preserves_ea_dtype(self):
# GH#41626 retain index.dtype in setitem-with-expansion
idx = Index([Timestamp(0).date()], dtype="date32[pyarrow]")
df = DataFrame({"A": range(1)}, index=idx)
item = Timestamp("1970-01-02").date()

df.loc[item] = 1

exp_index = Index([idx[0], item], dtype=idx.dtype)
tm.assert_index_equal(df.index, exp_index)

ser = df["A"].iloc[:-1]
ser.loc[item] = 1
tm.assert_index_equal(ser.index, exp_index)

def test_loc_setitem_with_expansion_large_dataframe(self, monkeypatch):
# GH#10692
size_cutoff = 50
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,9 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
# GH 24212
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
# -1 is interpreted as a missing value instead of the last element
if index.dtype == "float32" and expected_index.dtype == "float64":
# GH#41626
expected_index = expected_index.astype("float32")
df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]})
result = df1.merge(df2, left_on="key", right_index=True, how=how)
Expand Down
Loading