Skip to content

Commit 27b6167

Browse files
cast non-string to string for __from_arrow__
1 parent 3b49e2f commit 27b6167

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

pandas/core/arrays/string_.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@
7575

7676
from pandas.io.formats import printing
7777

78+
if HAS_PYARROW:
79+
import pyarrow as pa
80+
import pyarrow.compute as pc
81+
7882
if TYPE_CHECKING:
7983
from collections.abc import MutableMapping
8084

@@ -337,7 +341,15 @@ def __from_arrow__(
337341
Construct StringArray from pyarrow Array/ChunkedArray.
338342
"""
339343
if self.storage == "pyarrow":
340-
from pandas.core.arrays.string_arrow import ArrowStringArray
344+
from pandas.core.arrays.string_arrow import (
345+
ArrowStringArray,
346+
_chk_pyarrow_available,
347+
)
348+
349+
_chk_pyarrow_available()
350+
351+
if not pa.types.is_large_string(array.type):
352+
array = pc.cast(array, pa.large_string())
341353

342354
return ArrowStringArray(array, dtype=self)
343355

pandas/tests/io/test_parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def test_roundtrip_decimal(self, tmp_path, pa):
11451145
df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
11461146
result = read_parquet(path)
11471147
if pa_version_under19p0:
1148-
expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
1148+
expected = pd.DataFrame({"a": ["123"]}, dtype="string")
11491149
else:
11501150
expected = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="object")
11511151
tm.assert_frame_equal(result, expected)

scripts/validate_unwanted_patterns.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"_fill_limit_area_1d",
5959
"_make_block",
6060
"_DatetimeTZBlock",
61+
"_chk_pyarrow_available",
6162
}
6263

6364

0 commit comments

Comments
 (0)