diff --git a/modin/numpy/__init__.py b/modin/numpy/__init__.py index c0e00afa38e..5476b7b668c 100644 --- a/modin/numpy/__init__.py +++ b/modin/numpy/__init__.py @@ -108,6 +108,10 @@ def where(condition, x=None, y=None): + x_specified = x is not None + y_specified = y is not None + if x_specified != y_specified: + raise ValueError("either both or neither of x and y should be given") if condition is True: return x if condition is False: diff --git a/modin/numpy/arr.py b/modin/numpy/arr.py index 42f79f0a365..0e63c682783 100644 --- a/modin/numpy/arr.py +++ b/modin/numpy/arr.py @@ -397,10 +397,29 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if isinstance(input, pd.Series): input = input._query_compiler.to_numpy().flatten() args += [input] + out_kwarg = kwargs.get("out", None) + if out_kwarg is not None: + # If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple + # whose only element is that array, so we need to unwrap it from the tuple. + out_kwarg = out_kwarg[0] + kwargs.pop("out", None) + where_kwarg = kwargs.get("where", None) + if where_kwarg is not None: + if isinstance(where_kwarg, type(self)): + kwargs["where"] = where_kwarg._to_numpy() output = self._to_numpy().__array_ufunc__(ufunc, method, *args, **kwargs) if is_scalar(output): return output - return array(output) + if out_kwarg is None: + return array(output) + else: + return fix_dtypes_and_determine_return( + array(output)._query_compiler, + len(output.shape), + dtype=kwargs.get("dtype", None), + out=out_kwarg, + where=True, + ) args = [] for input in inputs: input = try_convert_from_interoperable_type(input) @@ -414,16 +433,14 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # If `out` is a modin.numpy.array, `kwargs.get("out")` returns a 1-tuple # whose only element is that array, so we need to unwrap it from the tuple. out_kwarg = out_kwarg[0] - where_kwarg = kwargs.get("where", True) kwargs["out"] = None - kwargs["where"] = True result = new_ufunc(*args, **kwargs) return fix_dtypes_and_determine_return( result, out_ndim, dtype=kwargs.get("dtype", None), out=out_kwarg, - where=where_kwarg, + where=True, ) def __array_function__(self, func, types, args, kwargs): @@ -437,11 +454,17 @@ def __array_function__(self, func, types, args, kwargs): modin_func = getattr(shaping, func_name) elif hasattr(creation, func_name): modin_func = getattr(creation, func_name) + if func_name == "where": + return self.where(*args[1:]) if modin_func is None: return NotImplemented return modin_func(*args, **kwargs) def where(self, x=None, y=None): + x_specified = x is not None + y_specified = y is not None + if x_specified != y_specified: + raise ValueError("either both or neither of x and y should be given") if not is_bool_dtype(self.dtype): raise NotImplementedError( "Modin currently only supports where on condition arrays with boolean dtype." @@ -2600,3 +2623,9 @@ def _to_numpy(self): if self._ndim == 1: arr = arr.flatten() return arr + + def __array__(self, dtype=None): + arr = self._to_numpy() + if dtype is not None: + return arr.astype(dtype) + return arr diff --git a/modin/numpy/test/test_array.py b/modin/numpy/test/test_array.py index f96962741d0..1d2dca959a1 100644 --- a/modin/numpy/test/test_array.py +++ b/modin/numpy/test/test_array.py @@ -242,7 +242,9 @@ def test_array_where(): ): warnings.filterwarnings("ignore", message="Distributing") (modin_flat_arr <= 0).where() - with pytest.raises(ValueError, match="np.where requires x and y"): + with pytest.raises( + ValueError, match="either both or neither of x and y should be given" + ): (modin_flat_arr <= 0).where(x=["Should Fail."]) with pytest.warns(UserWarning, match="np.where not supported when both x and y"): warnings.filterwarnings("ignore", message="Distributing") diff --git a/modin/pandas/base.py b/modin/pandas/base.py index d6a513851f2..20a8c6d5886 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -61,7 +61,7 @@ from modin.error_message import ErrorMessage from modin import pandas as pd from modin.pandas.utils import is_scalar -from modin.config import IsExperimental +from modin.config import IsExperimental, ExperimentalNumPyAPI from modin.logging import disable_logging, ClassLogger # Similar to pandas, sentinel value to use as kwarg in place of None when None has @@ -3426,6 +3426,80 @@ def __and__(self, other): def __rand__(self, other): return self._binary_op("__rand__", other, axis=0) + def __array_function__(self, func, types, args, kwargs): + """ + Return the result of calling an array function on self. + + Parameters + ---------- + func : Callable + The function to call. + types : list[types] + Types of arguments. + args : list + Arguments to pass to function. + kwargs : dict + Key word arguments to pass to function. + + Returns + ------- + arr : np.ndarray or modin.numpy.array + The result of calling the array function on self. + """ + out = self.to_numpy().__array_function__(func, types, args, kwargs) + if out is NotImplemented: + func_name = func.__name__ + arr = self.__array__() + if ExperimentalNumPyAPI.get(): + ErrorMessage.warn( + f"Attempted to use Experimental NumPy API for function {func_name} but failed. Defaulting to NumPy." + ) + converted_args = [] + for input in args: + if hasattr(input, "_query_compiler"): + input = input.__array__() + converted_args += [input] + where_kwarg = kwargs.get("where") + if where_kwarg is not None: + if hasattr(where_kwarg, "_query_compiler"): + kwargs["where"] = where_kwarg.__array__() + return func(arr, *converted_args[1:], **kwargs) + return out + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + """ + Return the result of calling ufunc `ufunc`. + + Parameters + ---------- + ufunc : Callable + The ufunc that was called. + method : str + Which method of the ufunc was called. + *inputs : tuple + Tuple of inputs passed to the ufunc. + **kwargs : dict + Keyword arguments passed to the ufunc. + + Returns + ------- + arr : np.ndarray or modin.numpy.array + The result of calling the array function on self. + """ + if ExperimentalNumPyAPI.get(): + return self.to_numpy().__array_ufunc__(ufunc, method, *inputs, **kwargs) + else: + # If we are not using our Experimental NumPy API, we need to convert + # all of the inputs to the ufunc to compatible types with NumPy - otherwise + # NumPy will not be able to find valid implementations for the ufunc. + arr = self.to_numpy() + args = [] + for input in inputs: + if hasattr(input, "_query_compiler"): + input = input.__array__() + args += [input] + return arr.__array_ufunc__(ufunc, method, *args, **kwargs) + def __array__(self, dtype=None): """ Return the values as a NumPy array. @@ -3441,6 +3515,8 @@ def __array__(self, dtype=None): NumPy representation of Modin object. """ arr = self.to_numpy(dtype) + if ExperimentalNumPyAPI.get(): + arr = arr._to_numpy() return arr def __copy__(self, deep=True):