Skip to content

Commit 4d343fb

Browse files
george-adams1george-adams1
authored andcommitted
fixes ArrowDtype.itemsize for fixed-width types
1 parent 188b2da commit 4d343fb

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

pandas/core/dtypes/dtypes.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2307,7 +2307,12 @@ def kind(self) -> str:
23072307
@cache_readonly
23082308
def itemsize(self) -> int:
23092309
"""Return the number of bytes in this dtype"""
2310-
return self.numpy_dtype.itemsize
2310+
try:
2311+
# Use PyArrow's bit_width for fixed-width types
2312+
return self.pyarrow_dtype.bit_width // 8 # convert from bit to bytes
2313+
except (AttributeError, NotImplementedError, ValueError):
2314+
# Fall back to numpy dtype for variable-width or unsupported types
2315+
return self.numpy_dtype.itemsize
23112316

23122317
def construct_array_type(self) -> type_t[ArrowExtensionArray]:
23132318
"""

pandas/tests/extension/test_arrow.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3576,6 +3576,29 @@ def test_timestamp_dtype_disallows_decimal():
35763576
pd.array(vals, dtype=ArrowDtype(pa.timestamp("us")))
35773577

35783578

3579+
def test_arrow_dtype_itemsize():
3580+
# Regression test for GH#57948 where date32[day] was incorrectly
3581+
# reporting 8 bytes instead of 4.
3582+
3583+
# date32 should be 4 bytes, not 8
3584+
dtype = ArrowDtype(pa.date32())
3585+
assert dtype.itemsize == 4
3586+
3587+
# Testing other fixed-width types
3588+
assert ArrowDtype(pa.int32()).itemsize == 4
3589+
assert ArrowDtype(pa.int64()).itemsize == 8
3590+
assert ArrowDtype(pa.float32()).itemsize == 4
3591+
assert ArrowDtype(pa.float64()).itemsize == 8
3592+
assert ArrowDtype(pa.date64()).itemsize == 8
3593+
3594+
# Test that variable-width types fall back gracefully
3595+
string_dtype = ArrowDtype(pa.string())
3596+
assert isinstance(string_dtype.itemsize, int)
3597+
3598+
list_dtype = ArrowDtype(pa.list_(pa.int32()))
3599+
assert isinstance(list_dtype.itemsize, int)
3600+
3601+
35793602
def test_timestamp_dtype_matches_to_datetime():
35803603
# GH#61775
35813604
dtype1 = "datetime64[ns, US/Eastern]"

0 commit comments

Comments
 (0)