diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 87c18fe346c62..812b507ea22c1 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -379,7 +379,7 @@ def _logical_method(self, other, op): # type: ignore[override] elif is_list_like(other): other = np.asarray(other, dtype="bool") if other.ndim > 1: - raise NotImplementedError("can only perform ops with 1-d structures") + return NotImplemented other, mask = coerce_to_array(other, copy=False) elif isinstance(other, np.bool_): other = other.item() diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py deleted file mode 100644 index bc45343d6e2d3..0000000000000 --- a/pandas/core/tools/numeric.py +++ /dev/null @@ -1,316 +0,0 @@ -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - Literal, -) - -import numpy as np - -from pandas._libs import ( - lib, - missing as libmissing, -) -from pandas._libs.tslibs import ( - Timedelta, - Timestamp, -) -from pandas.util._validators import check_dtype_backend - -from pandas.core.dtypes.cast import maybe_downcast_numeric -from pandas.core.dtypes.common import ( - ensure_object, - is_bool_dtype, - is_decimal, - is_integer_dtype, - is_number, - is_numeric_dtype, - is_scalar, - is_string_dtype, - needs_i8_conversion, -) -from pandas.core.dtypes.dtypes import ArrowDtype -from pandas.core.dtypes.generic import ( - ABCIndex, - ABCSeries, -) - -from pandas.core.arrays import BaseMaskedArray -from pandas.core.arrays.string_ import StringDtype - -if TYPE_CHECKING: - from pandas._typing import ( - DateTimeErrorChoices, - DtypeBackend, - npt, - ) - - -def to_numeric( - arg, - errors: DateTimeErrorChoices = "raise", - downcast: Literal["integer", "signed", "unsigned", "float"] | None = None, - dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, -): - """ - Convert argument to a numeric type. - - The default return dtype is `float64` or `int64` - depending on the data supplied. Use the `downcast` parameter - to obtain other dtypes. - - Please note that precision loss may occur if really large numbers - are passed in. Due to the internal limitations of `ndarray`, if - numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) - or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are - passed in, it is very likely they will be converted to float so that - they can be stored in an `ndarray`. These warnings apply similarly to - `Series` since it internally leverages `ndarray`. - - Parameters - ---------- - arg : scalar, list, tuple, 1-d array, or Series - Argument to be converted. - - errors : {'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception. - - If 'coerce', then invalid parsing will be set as NaN. - - downcast : str, default None - Can be 'integer', 'signed', 'unsigned', or 'float'. - If not None, and if the data has been successfully cast to a - numerical dtype (or if the data was numeric to begin with), - downcast that resulting data to the smallest numerical dtype - possible according to the following rules: - - - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) - - 'unsigned': smallest unsigned int dtype (min.: np.uint8) - - 'float': smallest float dtype (min.: np.float32) - - As this behaviour is separate from the core conversion to - numeric values, any errors raised during the downcasting - will be surfaced regardless of the value of the 'errors' input. - - In addition, downcasting will only occur if the size - of the resulting data's dtype is strictly larger than - the dtype it is to be cast to, so if none of the dtypes - checked satisfy that specification, no downcasting will be - performed on the data. - - dtype_backend : {'numpy_nullable', 'pyarrow'} - Back-end data type applied to the resultant :class:`DataFrame` - (still experimental). If not specified, the default behavior - is to not use nullable data types. If specified, the behavior - is as follows: - - * ``"numpy_nullable"``: returns nullable-dtype-backed object - * ``"pyarrow"``: returns with pyarrow-backed nullable object - - .. versionadded:: 2.0 - - Returns - ------- - ret - Numeric if parsing succeeded. - Return type depends on input. Series if Series, otherwise ndarray. - - See Also - -------- - DataFrame.astype : Cast argument to a specified dtype. - to_datetime : Convert argument to datetime. - to_timedelta : Convert argument to timedelta. - numpy.ndarray.astype : Cast a numpy array to a specified type. - DataFrame.convert_dtypes : Convert dtypes. - - Examples - -------- - Take separate series and convert to numeric, coercing when told to - - >>> s = pd.Series(["1.0", "2", -3]) - >>> pd.to_numeric(s) - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float64 - >>> pd.to_numeric(s, downcast="float") - 0 1.0 - 1 2.0 - 2 -3.0 - dtype: float32 - >>> pd.to_numeric(s, downcast="signed") - 0 1 - 1 2 - 2 -3 - dtype: int8 - >>> s = pd.Series(["apple", "1.0", "2", -3]) - >>> pd.to_numeric(s, errors="coerce") - 0 NaN - 1 1.0 - 2 2.0 - 3 -3.0 - dtype: float64 - - Downcasting of nullable integer and floating dtypes is supported: - - >>> s = pd.Series([1, 2, 3], dtype="Int64") - >>> pd.to_numeric(s, downcast="integer") - 0 1 - 1 2 - 2 3 - dtype: Int8 - >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") - >>> pd.to_numeric(s, downcast="float") - 0 1.0 - 1 2.1 - 2 3.0 - dtype: Float32 - """ - if downcast not in (None, "integer", "signed", "unsigned", "float"): - raise ValueError("invalid downcasting method provided") - - if errors not in ("raise", "coerce"): - raise ValueError("invalid error value specified") - - check_dtype_backend(dtype_backend) - - is_series = False - is_index = False - is_scalars = False - - if isinstance(arg, ABCSeries): - is_series = True - values = arg.values - elif isinstance(arg, ABCIndex): - is_index = True - if needs_i8_conversion(arg.dtype): - values = arg.view("i8") - else: - values = arg.values - elif isinstance(arg, (list, tuple)): - values = np.array(arg, dtype="O") - elif is_scalar(arg): - if is_decimal(arg): - return float(arg) - if is_number(arg): - return arg - if isinstance(arg, (Timedelta, Timestamp)): - return arg._value - is_scalars = True - values = np.array([arg], dtype="O") - elif getattr(arg, "ndim", 1) > 1: - raise TypeError("arg must be a list, tuple, 1-d array, or Series") - else: - values = arg - - # GH33013: for IntegerArray & FloatingArray extract non-null values for casting - # save mask to reconstruct the full array after casting - mask: npt.NDArray[np.bool_] | None = None - if isinstance(values, BaseMaskedArray): - mask = values._mask - values = values._data[~mask] - - values_dtype = getattr(values, "dtype", None) - if isinstance(values_dtype, ArrowDtype): - mask = values.isna() - values = values.dropna().to_numpy() - new_mask: np.ndarray | None = None - if is_numeric_dtype(values_dtype): - pass - elif lib.is_np_dtype(values_dtype, "mM"): - values = values.view(np.int64) - else: - values = ensure_object(values) - coerce_numeric = errors != "raise" - values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload] - values, - set(), - coerce_numeric=coerce_numeric, - convert_to_masked_nullable=dtype_backend is not lib.no_default - or ( - isinstance(values_dtype, StringDtype) - and values_dtype.na_value is libmissing.NA - ), - ) - - if new_mask is not None: - # Remove unnecessary values, is expected later anyway and enables - # downcasting - values = values[~new_mask] - elif (dtype_backend is not lib.no_default and new_mask is None) or ( - isinstance(values_dtype, StringDtype) and values_dtype.na_value is libmissing.NA - ): - new_mask = np.zeros(values.shape, dtype=np.bool_) - - # attempt downcast only if the data has been successfully converted - # to a numerical dtype and if a downcast method has been specified - if downcast is not None and is_numeric_dtype(values.dtype): - typecodes: str | None = None - - if downcast in ("integer", "signed"): - typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): - typecodes = np.typecodes["UnsignedInteger"] - elif downcast == "float": - typecodes = np.typecodes["Float"] - - # pandas support goes only to np.float32, - # as float dtypes smaller than that are - # extremely rare and not well supported - float_32_char = np.dtype(np.float32).char - float_32_ind = typecodes.index(float_32_char) - typecodes = typecodes[float_32_ind:] - - if typecodes is not None: - # from smallest to largest - for typecode in typecodes: - dtype = np.dtype(typecode) - if dtype.itemsize <= values.dtype.itemsize: - values = maybe_downcast_numeric(values, dtype) - - # successful conversion - if values.dtype == dtype: - break - - # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct - # masked array - if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype): - if mask is None or (new_mask is not None and new_mask.shape == mask.shape): - # GH 52588 - mask = new_mask - else: - mask = mask.copy() - assert isinstance(mask, np.ndarray) - data = np.zeros(mask.shape, dtype=values.dtype) - data[~mask] = values - - from pandas.core.arrays import ( - ArrowExtensionArray, - BooleanArray, - FloatingArray, - IntegerArray, - ) - - klass: type[IntegerArray | BooleanArray | FloatingArray] - if is_integer_dtype(data.dtype): - klass = IntegerArray - elif is_bool_dtype(data.dtype): - klass = BooleanArray - else: - klass = FloatingArray - values = klass(data, mask) - - if dtype_backend == "pyarrow" or isinstance(values_dtype, ArrowDtype): - values = ArrowExtensionArray(values.__arrow_array__()) - - if is_series: - return arg._constructor(values, index=arg.index, name=arg.name) - elif is_index: - # because we want to coerce to numeric if possible, - # do not use _shallow_copy - from pandas import Index - - return Index(values, name=arg.name) - elif is_scalars: - return values[0] - else: - return values