Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ Other API changes
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
the dtype of the resulting Index (:issue:`60797`)
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
Expand Down Expand Up @@ -974,8 +975,8 @@ Indexing
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
- Bug in adding new rows to a :class:`DataFrame` or :class:`Series` with :meth:`.loc` failing to retain dtype on the object's index in some cases (:issue:`41626`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
-

Missing
^^^^^^^
Expand Down Expand Up @@ -1094,7 +1095,7 @@ Reshaping
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
-
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)

Sparse
^^^^^^
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10918,6 +10918,13 @@ def _append_internal(
),
)
row_df = other.to_frame().T
if isinstance(self.index.dtype, ExtensionDtype):
# GH#41626 retain e.g. CategoricalDtype if reached via
# df.loc[key] = item
row_df.index = self.index.array._cast_pointwise_result(
row_df.index._values
)

# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects().rename_axis(index.names)
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)

from pandas.core.dtypes.common import is_scalar
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
Expand Down Expand Up @@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
"""
mapped = self._values.map(mapper, na_action=na_action)
return Index(mapped, name=self.name)

def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
# if calling index is category, don't check dtype of others
try:
cat = Categorical._concat_same_type(
[self._is_dtype_compat(c) for c in to_concat]
)
except TypeError:
# not all to_concat elements are among our categories (or NA)

res = concat_compat([x._values for x in to_concat])
return Index(res, name=name)
else:
return type(self)._simple_new(cat, name=name)
6 changes: 5 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,11 @@ def _create_join_index(
mask = indexer == -1
if np.any(mask):
fill_value = na_value_for_dtype(index.dtype, compat=False)
index = index.append(Index([fill_value]))
if not index._can_hold_na:
new_index = Index([fill_value])
else:
new_index = Index([fill_value], dtype=index.dtype)
index = index.append(new_index)
if indexer is None:
return index.copy()
return index.take(indexer)
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_hashable

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -310,6 +312,22 @@ def test_setitem_expand_with_extension(self, data):
result.loc[:, "B"] = data
tm.assert_frame_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
# GH#41626 retain index.dtype in setitem-with-expansion
if not is_hashable(data[0]):
pytest.skip("Test does not apply to non-hashable data.")
data = data.unique()
expected = pd.DataFrame({"A": range(len(data))}, index=data)
df = expected.iloc[:-1]
ser = df["A"]
item = data[-1]

df.loc[item] = len(data) - 1
tm.assert_frame_equal(df, expected)

ser.loc[item] = len(data) - 1
tm.assert_series_equal(ser, expected["A"])

def test_setitem_frame_invalid_length(self, data):
df = pd.DataFrame({"A": [1] * len(data)})
xpr = (
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,15 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op):
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
tm.assert_series_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
if pa.types.is_date(pa_dtype):
mark = pytest.mark.xfail(
reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]"
)
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class TestLogicalOps:
"""Various Series and DataFrame logical ops methods."""
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def test_EA_types(self, engine, data, request):
def test_astype_str(self, data):
super().test_astype_str(data)

@pytest.mark.xfail(
reason="Test is invalid for IntervalDtype, needs to be adapted for "
"this dtype with an index with index._index_as_unique."
)
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


# TODO: either belongs in tests.arrays.interval or move into base tests.
def test_fillna_non_scalar_raises(data_missing):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/extension/test_masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
)
)
tm.assert_series_equal(result, expected)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
if data.dtype.kind == "b":
mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
6 changes: 6 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ def test_index_from_listlike_with_dtype(self, data):
def test_EA_types(self, engine, data, request):
super().test_EA_types(engine, data, request)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
if isinstance(data[-1], tuple):
mark = pytest.mark.xfail(reason="Unpacks tuple")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class Test2DCompat(base.NDArrayBacked2DTests):
pass
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,14 @@ def test_arith_series_with_array(
request.applymarker(mark)
super().test_arith_series_with_array(data, all_arithmetic_operators)

def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
self, data, request, using_infer_string
):
if not using_infer_string and data.dtype.storage == "python":
mark = pytest.mark.xfail(reason="Casts to object")
request.applymarker(mark)
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)


class Test2DCompat(base.Dim2CompatTests):
@pytest.fixture(autouse=True)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexes/categorical/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ def test_append_mismatched_categories(self, ci):
ci.append(ci.values.reorder_categories(list("abc")))

def test_append_category_objects(self, ci):
# GH#41626 pre-3.0 this used to cast the object-dtype index to
# ci.dtype
# with objects
result = ci.append(Index(["c", "a"]))
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
expected = Index(list("aabbcaca"))
tm.assert_index_equal(result, expected, exact=True)

def test_append_non_categories(self, ci):
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from pandas._libs import index as libindex
from pandas.errors import IndexingError
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -1963,6 +1964,22 @@ def test_loc_drops_level(self):


class TestLocSetitemWithExpansion:
@td.skip_if_no("pyarrow")
def test_loc_setitem_with_expansion_preserves_ea_dtype(self):
# GH#41626 retain index.dtype in setitem-with-expansion
idx = Index([Timestamp(0).date()], dtype="date32[pyarrow]")
df = DataFrame({"A": range(1)}, index=idx)
item = Timestamp("1970-01-02").date()

df.loc[item] = 1

exp_index = Index([idx[0], item], dtype=idx.dtype)
tm.assert_index_equal(df.index, exp_index)

ser = df["A"].iloc[:-1]
ser.loc[item] = 1
tm.assert_index_equal(ser.index, exp_index)

def test_loc_setitem_with_expansion_large_dataframe(self, monkeypatch):
# GH#10692
size_cutoff = 50
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,9 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
# GH 24212
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
# -1 is interpreted as a missing value instead of the last element
if index.dtype == "float32" and expected_index.dtype == "float64":
# GH#41626
expected_index = expected_index.astype("float32")
df1 = DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
df2 = DataFrame({"b": [0, 1, 2, 3, 4, 5]})
result = df1.merge(df2, left_on="key", right_index=True, how=how)
Expand Down
Loading