Skip to content

Commit 35275e1

Browse files
sfc-gh-mvashishthaanmyachevsfc-gh-joshi
authored
REFACTOR-#7427: Require query compilers to expose engine and storage format. (#7430)
Signed-off-by: sfc-gh-mvashishtha <[email protected]> Co-authored-by: Anatoly Myachev <[email protected]> Co-authored-by: Jonathan Shi <[email protected]>
1 parent a76cdb6 commit 35275e1

File tree

16 files changed

+153
-5
lines changed

16 files changed

+153
-5
lines changed

modin/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ class TestQC(BaseQueryCompiler):
165165
def __init__(self, modin_frame):
166166
self._modin_frame = modin_frame
167167

168+
storage_format = property(
169+
lambda self: "Base", doc=BaseQueryCompiler.storage_format.__doc__
170+
)
171+
engine = property(lambda self: "Python", doc=BaseQueryCompiler.engine.__doc__)
172+
168173
def finalize(self):
169174
self._modin_frame.finalize()
170175

modin/core/dataframe/pandas/dataframe/dataframe.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import datetime
2424
import re
25+
from abc import ABC, abstractmethod
2526
from functools import cached_property
2627
from typing import TYPE_CHECKING, Callable, Dict, Hashable, List, Optional, Union
2728

@@ -33,7 +34,6 @@
3334
from pandas.core.indexes.api import Index, RangeIndex
3435

3536
from modin.config import (
36-
Engine,
3737
IsRayCluster,
3838
MinColumnPartitionSize,
3939
MinRowPartitionSize,
@@ -80,7 +80,7 @@
8080

8181

8282
class PandasDataframe(
83-
ClassLogger, modin_layer="CORE-DATAFRAME", log_level=LogLevel.DEBUG
83+
ABC, ClassLogger, modin_layer="CORE-DATAFRAME", log_level=LogLevel.DEBUG
8484
):
8585
"""
8686
An abstract class that represents the parent class for any pandas storage format dataframe class.
@@ -122,6 +122,31 @@ class PandasDataframe(
122122
_dtypes: Optional[ModinDtypes] = None
123123
_pandas_backend: Optional[str] = None
124124

125+
@property
126+
def storage_format(self) -> str:
127+
"""
128+
The storage format for this frame's data.
129+
130+
Returns
131+
-------
132+
str
133+
The storage format.
134+
"""
135+
return "Pandas"
136+
137+
@property
138+
@abstractmethod
139+
def engine(self) -> str:
140+
"""
141+
The engine for this frame.
142+
143+
Returns
144+
-------
145+
str
146+
The engine.
147+
"""
148+
pass
149+
125150
@cached_property
126151
def __constructor__(self) -> type[PandasDataframe]:
127152
"""
@@ -1707,7 +1732,7 @@ def astype(self, col_dtypes, errors: str = "raise"):
17071732
new_dtypes = self_dtypes.copy()
17081733
# Update the new dtype series to the proper pandas dtype
17091734
new_dtype = pandas.api.types.pandas_dtype(dtype)
1710-
if Engine.get() == "Dask" and hasattr(dtype, "_is_materialized"):
1735+
if self.engine == "Dask" and hasattr(dtype, "_is_materialized"):
17111736
# FIXME: https://github.com/dask/distributed/issues/8585
17121737
_ = dtype._materialize_categories()
17131738

@@ -1736,7 +1761,7 @@ def astype_builder(df):
17361761
if not (col_dtypes == self_dtypes).all():
17371762
new_dtypes = self_dtypes.copy()
17381763
new_dtype = pandas.api.types.pandas_dtype(col_dtypes)
1739-
if Engine.get() == "Dask" and hasattr(new_dtype, "_is_materialized"):
1764+
if self.engine == "Dask" and hasattr(new_dtype, "_is_materialized"):
17401765
# FIXME: https://github.com/dask/distributed/issues/8585
17411766
_ = new_dtype._materialize_categories()
17421767
if isinstance(new_dtype, pandas.CategoricalDtype):

modin/core/execution/dask/implementations/pandas_on_dask/dataframe/dataframe.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Module houses class that implements ``PandasDataframe``."""
1515

1616
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
17+
from modin.utils import _inherit_docstrings
1718

1819
from ..partitioning.partition_manager import PandasOnDaskDataframePartitionManager
1920

@@ -66,3 +67,8 @@ def __reduce__(self): # noqa: GL08
6667

6768
address = default_client().scheduler_info()["address"]
6869
return self.reconnect, (address, self.__dict__)
70+
71+
@property
72+
@_inherit_docstrings(PandasDataframe.engine)
73+
def engine(self) -> str:
74+
return "Dask"

modin/core/execution/dispatching/factories/factories.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,8 @@ def _to_pickle_glob(cls, *args, **kwargs):
629629
**kwargs : kwargs
630630
Arguments to the writer method.
631631
"""
632+
# TODO(https://github.com/modin-project/modin/issues/7429): Use
633+
# frame-level execution instead of the global, default execution.
632634
current_execution = get_current_execution()
633635
if current_execution not in supported_executions:
634636
raise NotImplementedError(

modin/core/execution/modin_aqp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def call_progress_bar(result_parts, line_no):
9191

9292
threading.Thread(target=_show_time_updates, args=(progress_bars[pbar_id],)).start()
9393

94+
# TODO(https://github.com/modin-project/modin/issues/7429): Use
95+
# frame-level engine config.
9496
modin_engine = Engine.get()
9597
engine_wrapper = None
9698
if modin_engine == "Ray":

modin/core/execution/python/implementations/pandas_on_python/dataframe/dataframe.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""
1919

2020
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
21+
from modin.utils import _inherit_docstrings
2122

2223
from ..partitioning.partition_manager import PandasOnPythonDataframePartitionManager
2324

@@ -50,3 +51,8 @@ class PandasOnPythonDataframe(PandasDataframe):
5051
"""
5152

5253
_partition_mgr_cls = PandasOnPythonDataframePartitionManager
54+
55+
@property
56+
@_inherit_docstrings(PandasDataframe.engine)
57+
def engine(self) -> str:
58+
return "Python"

modin/core/execution/ray/implementations/pandas_on_ray/dataframe/dataframe.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from modin.core.dataframe.base.dataframe.utils import Axis
1717
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
18+
from modin.utils import _inherit_docstrings
1819

1920
from ..partitioning.partition_manager import PandasOnRayDataframePartitionManager
2021

@@ -66,3 +67,8 @@ def _get_lengths(self, parts, axis):
6667
dims = [part.width(False) for part in parts]
6768

6869
return self._partition_mgr_cls.materialize_futures(dims)
70+
71+
@property
72+
@_inherit_docstrings(PandasDataframe.engine)
73+
def engine(self) -> str:
74+
return "Ray"

modin/core/execution/unidist/implementations/pandas_on_unidist/dataframe/dataframe.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""Module houses class that implements ``PandasDataframe`` using unidist."""
1515

1616
from modin.core.dataframe.pandas.dataframe.dataframe import PandasDataframe
17+
from modin.utils import _inherit_docstrings
1718

1819
from ..partitioning.partition_manager import PandasOnUnidistDataframePartitionManager
1920

@@ -47,3 +48,8 @@ class PandasOnUnidistDataframe(PandasDataframe):
4748
def support_materialization_in_worker_process(self) -> bool:
4849
# more details why this is not `True` in https://github.com/modin-project/modin/pull/6673
4950
return False
51+
52+
@property
53+
@_inherit_docstrings(PandasDataframe.engine)
54+
def engine(self) -> str:
55+
return "Unidist"

modin/core/execution/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def remote_function(func, ignore_defaults=False):
4242

4343
# Check if the function already exists to avoid circular imports
4444
elif "remote_function" not in dir():
45+
# TODO(https://github.com/modin-project/modin/issues/7429): Use
46+
# frame-level engine config.
47+
4548
from modin.config import Engine
4649

4750
if Engine.get() == "Ray":

modin/core/storage_formats/base/query_compiler.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,32 @@ class BaseQueryCompiler(
150150
_modin_frame: PandasDataframe
151151
_shape_hint: Optional[str]
152152

153+
@property
154+
@abc.abstractmethod
155+
def storage_format(self) -> str:
156+
"""
157+
The storage format for this query compiler.
158+
159+
Returns
160+
-------
161+
str
162+
The storage format.
163+
"""
164+
pass
165+
166+
@property
167+
@abc.abstractmethod
168+
def engine(self) -> str:
169+
"""
170+
The engine for this query compiler.
171+
172+
Returns
173+
-------
174+
str
175+
The engine.
176+
"""
177+
pass
178+
153179
def __wrap_in_qc(self, obj):
154180
"""
155181
Wrap `obj` in query compiler.

0 commit comments

Comments
 (0)