Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ Indexing
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
- Bug in :meth:`Index.equals` when comparing between :class:`Series` with string dtype :class:`Index` (:issue:`61099`)
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5481,11 +5481,7 @@ def equals(self, other: Any) -> bool:
# quickly return if the lengths are different
return False

if (
isinstance(self.dtype, StringDtype)
and self.dtype.na_value is np.nan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition was added in #56106, I think the na_value part was added just to be conservative.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Yes, maybe the bug was only confirmed for pyarrow_numpy at that moment.

and other.dtype != self.dtype
):
if isinstance(self.dtype, StringDtype) and other.dtype != self.dtype:
# TODO(infer_string) can we avoid this special case?
# special case for object behavior
return other.equals(self.astype(object))
Expand Down
31 changes: 20 additions & 11 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -2183,19 +2183,28 @@ def test_enum_column_equality():
tm.assert_series_equal(result, expected)


def test_mixed_col_index_dtype(using_infer_string):
@pytest.mark.parametrize(
"dtype",
[
"string[python]",
pytest.param(
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
pytest.param(
"str",
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_mixed_col_index_dtype(dtype):
# GH 47382
df1 = DataFrame(columns=list("abc"), data=1.0, index=[0])
df2 = DataFrame(columns=list("abc"), data=0.0, index=[0])
df1.columns = df2.columns.astype("string")
df1.columns = df2.columns.astype(dtype)
result = df1 + df2
expected = DataFrame(columns=list("abc"), data=1.0, index=[0])
if using_infer_string:
# df2.columns.dtype will be "str" instead of object,
# so the aligned result will be "string", not object
if HAS_PYARROW:
dtype = "string[pyarrow]"
else:
dtype = "string"
expected.columns = expected.columns.astype(dtype)

expected.columns = expected.columns.astype(dtype)

tm.assert_frame_equal(result, expected)
49 changes: 49 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
ensure_index,
ensure_index_from_sequences,
)
from pandas.testing import assert_series_equal


class TestIndex:
Expand Down Expand Up @@ -1717,3 +1718,51 @@ def test_is_monotonic_pyarrow_list_type():
idx = Index([[1], [2, 3]], dtype=pd.ArrowDtype(pa.list_(pa.int64())))
assert not idx.is_monotonic_increasing
assert not idx.is_monotonic_decreasing


@pytest.mark.parametrize(
"dtype",
[
"string[python]",
pytest.param(
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
pytest.param(
"str",
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_index_equals_different_string_dtype(dtype):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you instead use the fixture string_dtype_no_object throughout these tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @rhshadrach , changed the code.

# GH 61099
idx_obj = Index(["a", "b", "c"])
idx_str = Index(["a", "b", "c"], dtype=dtype)

assert idx_obj.equals(idx_str)
assert idx_str.equals(idx_obj)


@pytest.mark.parametrize(
"dtype",
[
"string[python]",
pytest.param(
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
pytest.param(
"str",
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_index_comparison_different_string_dtype(dtype):
# GH 61099
idx = Index(["a", "b", "c"])
s_obj = Series([1, 2, 3], index=idx)
s_str = Series([4, 5, 6], index=idx.astype(dtype))

expected = Series([True, True, True], index=["a", "b", "c"])
result = s_obj < s_str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also check s_str > s_obj.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback! added s_str > s_obj.

assert_series_equal(result, expected)
Loading