Skip to content

Commit 3168604

Browse files
authored
BUG: merge with CategoricalDtype RecursionError (#62282)
1 parent 10574df commit 3168604

File tree

5 files changed

+43
-0
lines changed

5 files changed

+43
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,8 @@ Reshaping
10841084
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
10851085
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
10861086
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
1087+
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
1088+
-
10871089

10881090
Sparse
10891091
^^^^^^

pandas/core/dtypes/cast.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,17 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
17271727

17281728
if tipo is not None:
17291729
# TODO: itemsize check?
1730+
1731+
if isinstance(tipo, CategoricalDtype):
1732+
# GH#56376
1733+
if tipo.categories.dtype.kind not in "iuf":
1734+
# Anything other than float/integer we cannot hold
1735+
raise LossySetitemError
1736+
casted = np.asarray(element, dtype=dtype)
1737+
if np.array_equal(casted, element, equal_nan=True):
1738+
return casted
1739+
raise LossySetitemError
1740+
17301741
if tipo.kind not in "iuf":
17311742
# Anything other than float/integer we cannot hold
17321743
raise LossySetitemError

pandas/core/indexes/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5390,6 +5390,12 @@ def putmask(self, mask, value) -> Index:
53905390

53915391
# See also: Block.coerce_to_target_dtype
53925392
dtype = self._find_common_type_compat(value)
5393+
if dtype == self.dtype:
5394+
# GH#56376 avoid RecursionError
5395+
raise AssertionError(
5396+
"Something has gone wrong. Please report a bug at "
5397+
"github.com/pandas-dev/pandas"
5398+
) from err
53935399
return self.astype(dtype).putmask(mask, value)
53945400

53955401
values = self._values.copy()

pandas/tests/dtypes/cast/test_can_hold_element.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from pandas.core.dtypes.cast import can_hold_element
44

5+
from pandas import Categorical
6+
57

68
def test_can_hold_element_range(any_int_numpy_dtype):
79
# GH#44261
@@ -96,3 +98,11 @@ def test_can_hold_element_bool():
9698
assert not can_hold_element(arr, element)
9799
assert not can_hold_element(arr, np.array([element]))
98100
assert not can_hold_element(arr, np.array([element], dtype=object))
101+
102+
103+
def test_can_hold_element_categorical():
104+
# GH#56376
105+
arr = np.array([], dtype=np.float64)
106+
cat = Categorical([1, 2, None])
107+
108+
assert can_hold_element(arr, cat)

pandas/tests/reshape/merge/test_merge.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,3 +3070,17 @@ def test_merge_for_suffix_collisions(suffixes):
30703070
df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
30713071
with pytest.raises(MergeError, match="duplicate columns"):
30723072
merge(df1, df2, on="col1", suffixes=suffixes)
3073+
3074+
3075+
def test_merge_categorical_key_recursion():
3076+
# GH#56376
3077+
lt = CategoricalDtype(categories=np.asarray([1, 2, 3], dtype="int64"))
3078+
rt = CategoricalDtype(categories=np.asarray([1, 2, 3], dtype="float64"))
3079+
left = DataFrame({"key": Series([1, 2], dtype=lt)})
3080+
right = DataFrame({"key": Series([1, 3], dtype=rt)})
3081+
3082+
result = left.merge(right, on="key", how="outer")
3083+
expected = left.astype("int64").merge(
3084+
right.astype("float64"), on="key", how="outer"
3085+
)
3086+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)