Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ Indexing
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` returning incorrect dtype when selecting from a :class:`DataFrame` with mixed data types. (:issue:`60600`)
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
- Bug in :meth:`Index.equals` when comparing between :class:`Series` with string dtype :class:`Index` (:issue:`61099`)
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
- Bug in :meth:`Series.__setitem__` when assigning boolean series with boolean indexer will raise ``LossySetitemError`` (:issue:`57338`)
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool:
# quickly return if the lengths are different
return False

if (
isinstance(self.dtype, StringDtype)
and self.dtype.na_value is np.nan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition was added in #56106, I think the na_value part was added just to be conservative.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Yes, maybe the bug was only confirmed for pyarrow_numpy at that moment.

and other.dtype != self.dtype
):
if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype:
# TODO(infer_string) can we avoid this special case?
# special case for object behavior
return other.equals(self.astype(object))
Expand Down
17 changes: 5 additions & 12 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -2183,19 +2181,14 @@ def test_enum_column_equality():
tm.assert_series_equal(result, expected)


def test_mixed_col_index_dtype(using_infer_string):
def test_mixed_col_index_dtype(string_dtype_no_object):
# GH 47382
df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
df1.columns = df2.columns.astype("string")
df1.columns = df2.columns.astype(string_dtype_no_object)
result = df1 + df2
expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
if using_infer_string:
# df2.columns.dtype will be "str" instead of object,
# so the aligned result will be "string", not object
if HAS_PYARROW:
dtype = "string[pyarrow]"
else:
dtype = "string"
expected.columns = expected.columns.astype(dtype)

expected.columns = expected.columns.astype(string_dtype_no_object)

tm.assert_frame_equal(result, expected)
25 changes: 25 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
ensure_index,
ensure_index_from_sequences,
)
from pandas.testing import assert_series_equal


class TestIndex:
Expand Down Expand Up @@ -1717,3 +1718,27 @@ def test_is_monotonic_pyarrow_list_type():
idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
assert not idx.is_monotonic_increasing
assert not idx.is_monotonic_decreasing


def test_index_equals_different_string_dtype(string_dtype_no_object):
# GH 61099
idx_obj = Index(["a", "b", "c"])
idx_str = Index(["a", "b", "c"], dtype=string_dtype_no_object)

assert idx_obj.equals(idx_str)
assert idx_str.equals(idx_obj)


def test_index_comparison_different_string_dtype(string_dtype_no_object):
# GH 61099
idx = Index(["a", "b", "c"])
s_obj = Series([1, 2, 3], index=idx)
s_str = Series([4, 5, 6], index=idx.astype(string_dtype_no_object))

expected = Series([True, True, True], index=["a", "b", "c"])
result = s_obj < s_str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also check s_str > s_obj.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback! added s_str > s_obj.

assert_series_equal(result, expected)

result = s_str > s_obj
expected.index = idx.astype(string_dtype_no_object)
assert_series_equal(result, expected)
Loading