-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
###########
# main.py #
###########
# import astropy.units as u
import numpy as np
from pandas._libs.lib import item_from_zerodim
from new import item_from_zerodim_new
# Define a custom ndarray subclass
class TestArray(np.ndarray):
def __new__(cls, input_array):
return np.asarray(input_array).view(cls)
def __array_finalize__(self, obj) -> None:
self._is_test_array = True
# Define test data
val_0_dim = 1
val_1_dim = [1, 2, 3]
# 0-dim and 1-dim numpy arrays
arr_0_dim = np.array(val_0_dim)
arr_1_dim = np.array(val_1_dim)
# 0-dim and 1-dim TestArray arrays
test_arr_0_dim = TestArray(val_0_dim)
test_arr_1_dim = TestArray(val_1_dim)
# 0-dim and 1-dim astropy Quantity arrays
# q_0_dim: u.Quantity[u.physical.length] = u.Quantity(val_0_dim, u.m)
# q_1_dim: u.Quantity[u.physical.length] = u.Quantity(val_1_dim, u.m)
# Test each value
for val in [arr_0_dim, arr_1_dim, test_arr_0_dim, test_arr_1_dim]:
# for val in [arr_0_dim, arr_1_dim, test_arr_0_dim, test_arr_1_dim, q_0_dim, q_1_dim]:
print(f"Testing value: {val=} with {val.ndim=}")
print("\nShow val is instance of ndarray, TestArray or Quantity:")
print(f"{isinstance(val, np.ndarray)=}")
print(f"{isinstance(val, TestArray)=}")
# print(f"{isinstance(val, u.Quantity)=}")
print("\nShow type of val is ndarray, TestArray or Quantity:")
print(f"{type(val) is np.ndarray=}")
print(f"{type(val) is TestArray=}")
# print(f"{type(val) is u.Quantity=}")
print("\nUsing pandas item_from_zerodim:")
val_zero_dim_pandas = item_from_zerodim(val)
print(f"{val_zero_dim_pandas=}")
print(f"{type(val_zero_dim_pandas)=}")
print(f"{getattr(val_zero_dim_pandas, "_is_test_array", None)=}")
# print(f"{getattr(val_zero_dim_pandas, "unit", None)=}")
print("\nUsing new item_from_zerodim with cnp.PyArray_CheckExact check:")
val_zero_dim_new = item_from_zerodim_new(val)
print(f"{val_zero_dim_new=}")
print(f"{type(val_zero_dim_new)=}")
print(f"{getattr(val_zero_dim_new, "_is_test_array", None)=}")
# print(f"{getattr(val_zero_dim_new, "unit", None)=}")
print("\n\n\n")
###########
# new.pyx #
###########
cimport numpy as cnp
def item_from_zerodim_new(val: object) -> object:
"""
If the value is a zerodim ndarray (NOT subclass), return the item it contains.
Parameters
----------
val : object
Returns
-------
object
Examples
--------
>>> item_from_zerodim(1)
1
>>> item_from_zerodim('foobar')
'foobar'
>>> item_from_zerodim(np.array(1))
1
>>> item_from_zerodim(np.array([1]))
array([1])
"""
if cnp.PyArray_IsZeroDim(val) and cnp.PyArray_CheckExact(val):
return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val)
return val
############
# setup.py #
############
from setuptools import setup
from Cython.Build import cythonize
import numpy as np
setup(
ext_modules=cythonize("new.pyx"),
include_dirs=[np.get_include()]
)Issue Description
Issue Description
When using arithmetic operations of a pandas Series or DataFrame with scalar objects subclassed from np.ndarray (so obj.ndim == 0), the scalar objects are converted to float by the item_from_zerodim function as it is only checking if the value is of 0-dim (see L325). This is becomes an issue if custom attributes of the subclass are needed in the arithmetic operation.
Minimal Reproducible Example
For a minimal reproducible example I created the custom TestArray class (if astropy is in your dependency you can uncomment the corresponding lines). To execute the code, first compile the new.pyx file with python setup.py build_ext --inplace. Then executing the main.py file gives the following output:
Testing value: val=array(1) with val.ndim=0
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=False
isinstance(val, u.Quantity)=False
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=True
type(val) is TestArray=False
type(val) is u.Quantity=False
Using pandas item_from_zerodim:
val_zero_dim_pandas=np.int64(1)
type(val_zero_dim_pandas)=<class 'numpy.int64'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=None
getattr(val_zero_dim_pandas, "unit", None)=None
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=np.int64(1)
type(val_zero_dim_new)=<class 'numpy.int64'>
getattr(val_zero_dim_new, "_is_test_array", None)=None
getattr(val_zero_dim_new, "unit", None)=None
Testing value: val=array([1, 2, 3]) with val.ndim=1
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=False
isinstance(val, u.Quantity)=False
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=True
type(val) is TestArray=False
type(val) is u.Quantity=False
Using pandas item_from_zerodim:
val_zero_dim_pandas=array([1, 2, 3])
type(val_zero_dim_pandas)=<class 'numpy.ndarray'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=None
getattr(val_zero_dim_pandas, "unit", None)=None
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=array([1, 2, 3])
type(val_zero_dim_new)=<class 'numpy.ndarray'>
getattr(val_zero_dim_new, "_is_test_array", None)=None
getattr(val_zero_dim_new, "unit", None)=None
Testing value: val=TestArray(1) with val.ndim=0
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=True
isinstance(val, u.Quantity)=False
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=False
type(val) is TestArray=True
type(val) is u.Quantity=False
Using pandas item_from_zerodim:
val_zero_dim_pandas=np.int64(1)
type(val_zero_dim_pandas)=<class 'numpy.int64'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=None
getattr(val_zero_dim_pandas, "unit", None)=None
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=TestArray(1)
type(val_zero_dim_new)=<class '__main__.TestArray'>
getattr(val_zero_dim_new, "_is_test_array", None)=True
getattr(val_zero_dim_new, "unit", None)=None
Testing value: val=TestArray([1, 2, 3]) with val.ndim=1
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=True
isinstance(val, u.Quantity)=False
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=False
type(val) is TestArray=True
type(val) is u.Quantity=False
Using pandas item_from_zerodim:
val_zero_dim_pandas=TestArray([1, 2, 3])
type(val_zero_dim_pandas)=<class '__main__.TestArray'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=True
getattr(val_zero_dim_pandas, "unit", None)=None
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=TestArray([1, 2, 3])
type(val_zero_dim_new)=<class '__main__.TestArray'>
getattr(val_zero_dim_new, "_is_test_array", None)=True
getattr(val_zero_dim_new, "unit", None)=None
Testing value: val=<Quantity 1. m> with val.ndim=0
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=False
isinstance(val, u.Quantity)=True
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=False
type(val) is TestArray=False
type(val) is u.Quantity=True
Using pandas item_from_zerodim:
val_zero_dim_pandas=np.float64(1.0)
type(val_zero_dim_pandas)=<class 'numpy.float64'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=None
getattr(val_zero_dim_pandas, "unit", None)=None
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=<Quantity 1. m>
type(val_zero_dim_new)=<class 'astropy.units.quantity.Quantity'>
getattr(val_zero_dim_new, "_is_test_array", None)=None
getattr(val_zero_dim_new, "unit", None)=Unit("m")
Testing value: val=<Quantity [1., 2., 3.] m> with val.ndim=1
Show val is instance of ndarray, TestArray or Quantity:
isinstance(val, np.ndarray)=True
isinstance(val, TestArray)=False
isinstance(val, u.Quantity)=True
Show type of val is ndarray, TestArray or Quantity:
type(val) is np.ndarray=False
type(val) is TestArray=False
type(val) is u.Quantity=True
Using pandas item_from_zerodim:
val_zero_dim_pandas=<Quantity [1., 2., 3.] m>
type(val_zero_dim_pandas)=<class 'astropy.units.quantity.Quantity'>
getattr(val_zero_dim_pandas, "_is_test_array", None)=None
getattr(val_zero_dim_pandas, "unit", None)=Unit("m")
Using new item_from_zerodim with cnp.PyArray_CheckExact check:
val_zero_dim_new=<Quantity [1., 2., 3.] m>
type(val_zero_dim_new)=<class 'astropy.units.quantity.Quantity'>
getattr(val_zero_dim_new, "_is_test_array", None)=None
getattr(val_zero_dim_new, "unit", None)=Unit("m")
It shows that the values of TestArray are converted to float for the 0-dim case but not the 1-dim case using the pandas item_from_zerodim function. However the newly created item_from_zerodim_new function, which adds cnp.PyArray_CheckExact(val) as an additional check, retains the types for both cases but still converts the 0-dim np.ndarray case to a float.
Reservation
I currently only have tested the behavior of the new function with np.ndarray and the two subclasses. Not sure if there are other subclasses of np.ndarray (e.g. in pandas or other ExtensionDtype) that expect to be converted to float here and which would be broken by the new function.
Background:
I am currently developing an ExtensionDtype/ExtensionArray for astropy Quantity objects (see pandas-units-extension). The Quantity objects are a subclass of np.ndarray, but add an unit attribute for a physical dimension, e.g. "m" or "cm" for a length so that u.Quantity(1, "m") + u.Quantity(10, "cm") is equal to u.Quantity(1.1, "m") (1 meter plus 10 centimeter is 1.1 meter).
The unit attribute of the Quantity object is lost in arithmetic operations due to the aforementioned issue leading to a astropy UnitConversionError as it forbids to add two values of mismatching physical dimension. This cannot be ignored as in the example above the 10 cm would be converted to the float value of 10.0 (in astropy terms is therefore dimensionless) and 1 m + 10 cm = 1.1 m is not equal to 1 + 10 = 11.
Expected Behavior
I would expect that the item_from_zerodim only converts objects of type np.ndarray and dimension 0-dim to float. Subclasses of np.ndarray lose required attributes at the moment. A possible fix is already discussed above.
Installed Versions
INSTALLED VERSIONS
commit : 9c8bc3e
python : 3.13.5
python-bits : 64
OS : Linux
OS-release : 5.15.153.1-microsoft-standard-WSL2
Version : #1 SMP Fri Mar 29 23:14:13 UTC 2024
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : C.UTF-8
pandas : 2.3.3
numpy : 2.3.2
pytz : 2025.2
dateutil : 2.9.0.post0
pip : 25.2
Cython : None
sphinx : None
IPython : 9.4.0
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.13.5
blosc : None
bottleneck : 1.5.0
dataframe-api-compat : None
fastparquet : None
fsspec : 2025.7.0
html5lib : 1.1
hypothesis : None
gcsfs : None
jinja2 : 3.1.6
lxml.etree : None
matplotlib : 3.10.6
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
psycopg2 : None
pymysql : None
pyarrow : 21.0.0
pyreadstat : None
pytest : 8.4.1
python-calamine : None
pyxlsb : None
s3fs : 2025.7.0
scipy : 1.16.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlsxwriter : None
zstandard : 0.24.0
tzdata : 2025.2
qtpy : None
pyqt5 : None