Skip to content

Commit 0755a61

Browse files
anmyachevYarShev
andauthored
FIX-#7157: Make sure quantile function works with numeric_only=True (#7160)
Co-authored-by: Iaroslav Igoshev <[email protected]> Signed-off-by: Anatoly Myachev <[email protected]>
1 parent fe57e19 commit 0755a61

File tree

3 files changed

+43
-14
lines changed

3 files changed

+43
-14
lines changed

modin/core/storage_formats/pandas/query_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2588,7 +2588,7 @@ def quantile_for_list_of_values(self, **kwargs):
25882588
axis = kwargs.get("axis", 0)
25892589
q = kwargs.get("q")
25902590
numeric_only = kwargs.get("numeric_only", True)
2591-
assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list))
2591+
assert isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple))
25922592

25932593
if numeric_only:
25942594
new_columns = self._modin_frame.numeric_columns()

modin/pandas/base.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2304,6 +2304,7 @@ def quantile(
23042304
def check_dtype(t):
23052305
return is_numeric_dtype(t) or lib.is_np_dtype(t, "mM")
23062306

2307+
numeric_only_df = self
23072308
if not numeric_only:
23082309
# If not numeric_only and columns, then check all columns are either
23092310
# numeric, timestamp, or timedelta
@@ -2322,31 +2323,33 @@ def check_dtype(t):
23222323
)
23232324
)
23242325
else:
2325-
# Normally pandas returns this near the end of the quantile, but we
2326-
# can't afford the overhead of running the entire operation before
2327-
# we error.
2328-
if not any(is_numeric_dtype(t) for t in self._get_dtypes()):
2329-
raise ValueError("need at least one array to concatenate")
2326+
numeric_only_df = self.drop(
2327+
columns=[
2328+
i for i in self.dtypes.index if not is_numeric_dtype(self.dtypes[i])
2329+
]
2330+
)
23302331

23312332
# check that all qs are between 0 and 1
23322333
validate_percentile(q)
2333-
axis = self._get_axis_number(axis)
2334-
if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list)):
2335-
return self.__constructor__(
2336-
query_compiler=self._query_compiler.quantile_for_list_of_values(
2334+
axis = numeric_only_df._get_axis_number(axis)
2335+
if isinstance(q, (pandas.Series, np.ndarray, pandas.Index, list, tuple)):
2336+
return numeric_only_df.__constructor__(
2337+
query_compiler=numeric_only_df._query_compiler.quantile_for_list_of_values(
23372338
q=q,
23382339
axis=axis,
2339-
numeric_only=numeric_only,
2340+
# `numeric_only=True` has already been processed by using `self.drop` function
2341+
numeric_only=False,
23402342
interpolation=interpolation,
23412343
method=method,
23422344
)
23432345
)
23442346
else:
2345-
result = self._reduce_dimension(
2346-
self._query_compiler.quantile_for_single_value(
2347+
result = numeric_only_df._reduce_dimension(
2348+
numeric_only_df._query_compiler.quantile_for_single_value(
23472349
q=q,
23482350
axis=axis,
2349-
numeric_only=numeric_only,
2351+
# `numeric_only=True` has already been processed by using `self.drop` function
2352+
numeric_only=False,
23502353
interpolation=interpolation,
23512354
method=method,
23522355
)

modin/tests/pandas/dataframe/test_window.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,32 @@ def test_quantile(request, data, q):
669669
modin_df.T.quantile(q)
670670

671671

672+
def test_quantile_7157():
673+
# for details: https://github.com/modin-project/modin/issues/7157
674+
n_rows = 100
675+
n_fcols = 10
676+
n_mcols = 5
677+
678+
df1_md, df1_pd = create_test_dfs(
679+
random_state.rand(n_rows, n_fcols),
680+
columns=[f"feat_{i}" for i in range(n_fcols)],
681+
)
682+
df2_md, df2_pd = create_test_dfs(
683+
{
684+
"test_string1": ["test_string2" for _ in range(n_rows)]
685+
for _ in range(n_mcols)
686+
}
687+
)
688+
df3_md = pd.concat([df2_md, df1_md], axis=1)
689+
df3_pd = pandas.concat([df2_pd, df1_pd], axis=1)
690+
691+
eval_general(df3_md, df3_pd, lambda df: df.quantile(0.25, numeric_only=True))
692+
eval_general(df3_md, df3_pd, lambda df: df.quantile((0.25,), numeric_only=True))
693+
eval_general(
694+
df3_md, df3_pd, lambda df: df.quantile((0.25, 0.75), numeric_only=True)
695+
)
696+
697+
672698
@pytest.mark.parametrize("axis", ["rows", "columns"])
673699
@pytest.mark.parametrize(
674700
"na_option", ["keep", "top", "bottom"], ids=["keep", "top", "bottom"]

0 commit comments

Comments
 (0)