From 70846c92ba3549d56c089e1706ed7b74a10e8be3 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 18 Jan 2025 20:45:36 +0100 Subject: [PATCH 1/7] Start working on pandas accessors tutorial --- docs/tutorials/pandas_accessor_tutorial.py | 251 ++++++++++++++++++ src/continuous_timeseries/pandas_accessors.py | 191 ++++++++++++- src/continuous_timeseries/timeseries.py | 80 ++++++ 3 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 docs/tutorials/pandas_accessor_tutorial.py diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py new file mode 100644 index 0000000..5544a5b --- /dev/null +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -0,0 +1,251 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.6 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Pandas accessors API +# +# Here we introduce our Pandas accessor API. +# There make use of +# [pandas accessors API](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors) +# to create an easy integration between Continuous Timeseries and pandas objects. +# +# For many use cases, simply using pandas objects directly is the best option. +# However, the interoperability makes it easy to convert between, +# so you get the best of both worlds. + +# %% [markdown] +# ## Imports + +# %% +import itertools +import multiprocessing +import traceback + +import numpy as np +import openscm_units +import pandas as pd +import pint + +import continuous_timeseries as ct +import continuous_timeseries.pandas_accessors + +# %% [markdown] +# ## Registering the accessors +# +# You must register the accessors before they can be used. +# We make this step explicit so that our imports don't have side effects +# (we've had bad experiences with imports with side effects +# and don't want you to have to those bad experiences too). + +# %% +# If you try and use the accessor before registering, +# you will get an AttributeError. +try: + pd.Series.ct +except AttributeError: + traceback.print_exc(limit=0) + +# %% +continuous_timeseries.pandas_accessors.register_pandas_accessor() + +# %% +# Having registered the accessor, +# the "ct" namespace is now available. +pd.Series.ct + +# %% [markdown] +# ## Set up pint + +# %% +pint.set_application_registry(openscm_units.unit_registry) + +# %% [markdown] +# ## Handy pint aliases + +# %% +UR = pint.get_application_registry() + +# %% [markdown] +# ## Set up matplotlib to work with pint +# +# For details, see the pint docs +# ([stable docs](https://pint.readthedocs.io/en/stable/user/plotting.html), +# [last version that we checked at the time of writing](https://pint.readthedocs.io/en/0.24.4/user/plotting.html)) +# [or our docs on unit-aware plotting](../discrete_timeseries_tutorial#unit-aware-plotting). + +# %% +UR.setup_matplotlib(enable=True) + + +# %% [markdown] +# ## Helper functions + + +# %% +def create_df( + *, + n_scenarios: int, + n_variables: int, + n_runs: int, + timepoints: np.typing.NDArray[np.floating], + units: str = "Mt / yr", +) -> pd.DataFrame: + """ + Create an example `pd.DataFrame` + + This uses the idea of simple climate model runs, + where you have a number of scenarios, + each of which has a number of variables + from a number of different model runs + with output for a number of different time points. + """ + idx = pd.MultiIndex.from_frame( + pd.DataFrame( + ( + (s, v, r, units) + for s, v, r in itertools.product( + [f"variable_{i}" for i in range(n_variables)], + [f"scenario_{i}" for i in range(n_scenarios)], + [i for i in range(n_runs)], + ) + ), + columns=["scenario", "variable", "region", "units"], + # This makes updates later way way faster + dtype="category", + ) + ) + + df = pd.DataFrame( + np.random.random((n_variables * n_runs * n_scenarios, timepoints.size)), + columns=timepoints, + index=idx, + ) + + return df + + +# %% [markdown] +# ## Converting to `Timeseries` +# +# Here we show how to convert to `Timeseries`. +# More specifically, how to take a `pd.DataFrame` +# and convert it to a `pd.Series` of `Timeseries`. +# The benefit here is that you can still filter/manipulate the result +# using standard pandas filtering, +# but you have `Timeseries` objects to work with from there. + +# %% [markdown] +# We start with a basic `pd.DataFrame`. + +# %% +small_df = create_df( + n_scenarios=25, + n_variables=10, + n_runs=30, + timepoints=np.arange(250) + 1850.0, +) +small_df + +# %% [markdown] +# Then we convert it time series. + +# %% +small_df.ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.PiecewiseConstantPreviousLeftClosed, +) + +# %% [markdown] +# Then we can use standard Continuous timeseries APIs, +# e.g. plotting. + +# %% + +# %% [markdown] +# If we have a bigger `pd.DataFrame`, this process can be much slower. +# If you're not sure what's happening, you can activate the progress bar if you have +# [`tdqm`](https://tqdm.github.io/) installed. + +# %% +bigger_df = create_df( + n_scenarios=100, + n_variables=2, + n_runs=300, + timepoints=np.arange(351) + 1850.0, +) +bigger_df + +# %% +bigger_df.ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Linear, + progress=True, +) + +# %% [markdown] +# If you want to speed things up, +# you may want to process the `pd.DataFrame` in parallel. + +# %% +n_processes = multiprocessing.cpu_count() +n_processes + +# %% +bigger_df.ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Linear, + n_processes=n_processes, +) + +# %% [markdown] +# If you want progress bars in parallel, +# we support that too. + +# %% +bigger_df.ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Linear, + n_processes=n_processes, + progress=True, +) + +# %% [markdown] +# If you want nested progress bars in parallel, +# we support that too +# (although we're not sure if this works on windows +# because of the need for forking...). + +# %% +bigger_df.ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Linear, + n_processes=n_processes, + progress=True, + # We have found that nested progress bars in parallel + # only really work if we use forking + # (on windows, probably best to just not use the nested bars + # because forking isn't supported). + progress_nested=True, + mp_context=multiprocessing.get_context("fork"), +) + +# %% [markdown] +# - filtering with pandas-indexing +# - bigger df +# - convert more rows (progress, parallel, parallel with progress) +# - other operations, also with progress, parallel, parallel with progress +# - convert to seaborn df for more fine-grained plotting control +# - also requires adding a `increase_resolution` method to `Timeseries` +# - convert with more fine-grained control over interpolation +# - unit conversion diff --git a/src/continuous_timeseries/pandas_accessors.py b/src/continuous_timeseries/pandas_accessors.py index 8da32a2..94a0fa1 100644 --- a/src/continuous_timeseries/pandas_accessors.py +++ b/src/continuous_timeseries/pandas_accessors.py @@ -4,14 +4,23 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import concurrent.futures +from collections.abc import Iterator +from multiprocessing.context import BaseContext +from typing import TYPE_CHECKING, Any, TypeVar +import numpy as np +import pint + +from continuous_timeseries.discrete_to_continuous import InterpolationOption from continuous_timeseries.exceptions import MissingOptionalDependencyError from continuous_timeseries.timeseries import Timeseries if TYPE_CHECKING: import pandas as pd + P = TypeVar("P", bound=pd.DataFrame | pd.Series[Any]) + class SeriesCTAccessor: """ @@ -42,6 +51,185 @@ def metadata(self) -> pd.DataFrame: return self._series.index.to_frame(index=False) +def get_chunks(pd_obj: P, n_chunks: int) -> Iterator[P]: + # Late import to avoid hard dependency on pandas + try: + import pandas as pd + except ImportError as exc: + raise MissingOptionalDependencyError( + "interpolate", requirement="pandas" + ) from exc + + if isinstance(pd_obj, pd.DataFrame): + total = pd_obj.shape[0] + else: + # Series + total = pd_obj.size + + chunk_size = int(np.ceil(total / n_chunks)) + for i in range(n_chunks): + start = i * chunk_size + end = (i + 1) * chunk_size + if end >= total: + end = None + + if isinstance(pd_obj, pd.DataFrame): + yield pd_obj.iloc[start:end, :] + else: + yield pd_obj.iloc[start:end] + + +def get_timeseries_parallel_helper( + df: pd.DataFrame, + interpolation: InterpolationOption, + time_units: str | pint.facets.plain.PlainUnit, + units_col: str, + idx_separator: str, + ur: pint.facets.PlainRegistry | None = None, + progress: bool = False, + progress_bar_position: int = 0, +) -> pd.Series[Timeseries]: + if progress: + try: + from tqdm.auto import tqdm + except ImportError as exc: + raise MissingOptionalDependencyError( # noqa: TRY003 + "get_timeseries_parallel_helper(..., progress=True)", requirement="tdqm" + ) from exc + + tqdm_kwargs = dict(position=progress_bar_position) + tqdm.pandas(**tqdm_kwargs) + meth_to_call = "progress_apply" + # No-one knows why this is needed, but it is + # jupyter notebooks + print(end=" ") + + else: + meth_to_call = "apply" + + try: + units_idx = df.index.names.index(units_col) + except ValueError as exc: + msg = f"{units_col} not available. {df.index.names=}" + + raise KeyError(msg) from exc + + res = getattr(df, meth_to_call)( + # TODO: make this injectable too + Timeseries.from_pandas_series, + axis="columns", + interpolation=interpolation, + units_idx=units_idx, + time_units=time_units, + # name="injectable?", + idx_separator=idx_separator, + ur=ur, + ) + + return res + + +class DataFrameCTAccessor: + """ + [`pd.DataFrame`][pandas.DataFrame] accessors + + For details, see + [pandas' docs](https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors). + """ + + def __init__(self, pandas_obj: pd.DataFrame): + """ + Initialise + + Parameters + ---------- + pandas_obj + Pandas object to use via the accessor + """ + # TODO: consider adding validation + # validate_series(pandas_obj) + self._df = pandas_obj + + def to_timeseries( # noqa: PLR0913 + self, + interpolation: InterpolationOption, + time_units: str | pint.facets.plain.PlainUnit, + units_col: str = "units", + ur: None = None, + idx_separator: str = "__", + res_name: str = "ts", + progress: bool = False, + progress_nested: bool = False, + n_processes: int = 1, + mp_context: BaseContext | None = None, + ) -> pd.Series[Timeseries]: + if n_processes == 1: + res = get_timeseries_parallel_helper( + self._df, + interpolation=interpolation, + time_units=time_units, + units_col=units_col, + idx_separator=idx_separator, + ur=ur, + progress=progress, + ) + + return res + + # I think it should be possible to split out a + # `apply_pandas_op_parallel` or similar function. + iterator = get_chunks(self._df, n_chunks=n_processes) + if progress: + try: + from tqdm.auto import tqdm + except ImportError as exc: + raise MissingOptionalDependencyError( # noqa: TRY003 + "to_timeseries(..., progress=True)", requirement="tdqm" + ) from exc + + iterator = tqdm(iterator, desc="submitting to pool") + + with concurrent.futures.ProcessPoolExecutor( + max_workers=n_processes, mp_context=mp_context + ) as pool: + futures = [ + pool.submit( + get_timeseries_parallel_helper, + chunk, + interpolation=interpolation, + time_units=time_units, + units_col=units_col, + idx_separator=idx_separator, + ur=ur, + progress=progress_nested, + progress_bar_position=i, + ) + for i, chunk in enumerate(iterator) + ] + + iterator_results = concurrent.futures.as_completed(futures) + if progress: + iterator_results = tqdm( + iterator_results, + desc="Retrieving parallel results", + total=len(futures), + ) + + res_l = [future.result() for future in iterator_results] + + # Late import to avoid hard dependency on pandas + try: + import pandas as pd + except ImportError as exc: + raise MissingOptionalDependencyError( + "interpolate", requirement="pandas" + ) from exc + + res = pd.concat(res_l) + + return res + + def register_pandas_accessor(namespace: str = "ct") -> None: """ Register the pandas accessors @@ -66,3 +254,4 @@ def register_pandas_accessor(namespace: str = "ct") -> None: ) from exc pd.api.extensions.register_series_accessor(namespace)(SeriesCTAccessor) + pd.api.extensions.register_dataframe_accessor(namespace)(DataFrameCTAccessor) diff --git a/src/continuous_timeseries/timeseries.py b/src/continuous_timeseries/timeseries.py index 22b3cc2..cc629c7 100644 --- a/src/continuous_timeseries/timeseries.py +++ b/src/continuous_timeseries/timeseries.py @@ -43,6 +43,7 @@ if TYPE_CHECKING: import IPython.lib.pretty import matplotlib.axes + import pandas as pd class UnreachableIntegralPreservingInterpolationTarget(ValueError): @@ -214,6 +215,85 @@ def from_arrays( timeseries_continuous=continuous, ) + @classmethod + def from_pandas_series( # noqa: PLR0913 + cls, + series: pd.Series, + interpolation: InterpolationOption, + units_idx: int, + time_units: str | pint.facets.plain.PlainUnit, + name: str | None = None, + idx_separator: str = "__", + ur: pint.facets.PlainRegistry | None = None, + ) -> Timeseries: + """ + Initialise from a [`pd.Series`][pandas.Series] + + Parameters + ---------- + series + [`pd.Series`][pandas.Series] from which to initialise. + + interpolation + Interpolation to apply when converting + the discrete values to a continuous representation + + units_idx + The index of `series.name` (assumed to be a tuple) + which holds the units information. + + time_units + The units to attach to `series`'s columns to create a time axis. + + name + The value of the result's name attribute. + + If not supplied, we automatically generate this based on the `series` + index values. + + idx_separator + The separator to use to join the values of `idx_row[0]` + to get the result's name. + + Only used if `name is None`. + + All parts of `series.name` are included in the name + except the units information. + + ur + Unit registry to use for the conversion. + + If not supplied, we use the result of calling + [`pint.get_application_registry`][]. + + Returns + ------- + : + Initialised [`Timeseries`][(m)]. + """ + if ur is None: + ur = pint.get_application_registry() + + if isinstance(time_units, str): + time_units = ur.Unit(time_units) + + index_values = series.name + units_str = index_values[units_idx] + units = ur.Unit(units_str) + + x = series.index.values * time_units + y = series.values * units + + if name is None: + name = idx_separator.join(str(v) for v in index_values if v != units_str) + + return cls.from_arrays( + x=x, + y=y, + interpolation=interpolation, + name=name, + ) + def differentiate( self, name_res: str | None = None, From d46b1a387b3d55023f73cdae8bac931855d76f01 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 18 Jan 2025 22:33:16 +0100 Subject: [PATCH 2/7] Add more features --- docs/tutorials/pandas_accessor_tutorial.py | 105 ++++++++-- pyproject.toml | 1 + src/continuous_timeseries/pandas_accessors.py | 124 +++++++++++- src/continuous_timeseries/time_axis.py | 69 +++++++ src/continuous_timeseries/timeseries.py | 43 +++- .../timeseries_continuous.py | 185 ++++++------------ tests/unit/test_timeseries_continuous.py | 4 +- uv.lock | 105 ++++++++++ 8 files changed, 485 insertions(+), 151 deletions(-) diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py index 5544a5b..57db081 100644 --- a/docs/tutorials/pandas_accessor_tutorial.py +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -35,7 +35,9 @@ import numpy as np import openscm_units import pandas as pd +import pandas_indexing as pix import pint +import seaborn as sns import continuous_timeseries as ct import continuous_timeseries.pandas_accessors @@ -115,19 +117,21 @@ def create_df( ( (s, v, r, units) for s, v, r in itertools.product( - [f"variable_{i}" for i in range(n_variables)], [f"scenario_{i}" for i in range(n_scenarios)], + [f"variable_{i}" for i in range(n_variables)], [i for i in range(n_runs)], ) ), - columns=["scenario", "variable", "region", "units"], - # This makes updates later way way faster + columns=["scenario", "variable", "run", "units"], + # This makes updates and general handling later way way faster. + # TODO: make this tip clearer. dtype="category", ) ) + n_ts = n_scenarios * n_variables * n_runs df = pd.DataFrame( - np.random.random((n_variables * n_runs * n_scenarios, timepoints.size)), + 50.0 * np.linspace(0.3, 1, n_ts)[:, np.newaxis] * np.linspace(0, 1, timepoints.size)[np.newaxis, :] + np.random.random((n_ts, timepoints.size)), columns=timepoints, index=idx, ) @@ -150,9 +154,9 @@ def create_df( # %% small_df = create_df( - n_scenarios=25, - n_variables=10, - n_runs=30, + n_scenarios=3, + n_variables=2, + n_runs=5, timepoints=np.arange(250) + 1850.0, ) small_df @@ -161,21 +165,36 @@ def create_df( # Then we convert it time series. # %% -small_df.ct.to_timeseries( +small_ts = small_df.ct.to_timeseries( time_units="yr", interpolation=ct.InterpolationOption.PiecewiseConstantPreviousLeftClosed, ) +small_ts # %% [markdown] # Then we can use standard Continuous timeseries APIs, # e.g. plotting. # %% +small_ts.ct.plot(continuous_plot_kwargs=dict(alpha=0.3)) +# # TODO: move this to plotting +# small_ts.ct.plot(continuous_plot_kwargs=dict(alpha=0.3), progress=True) # %% [markdown] -# If we have a bigger `pd.DataFrame`, this process can be much slower. -# If you're not sure what's happening, you can activate the progress bar if you have -# [`tdqm`](https://tqdm.github.io/) installed. +# When combined with [pandas-indexing](https://pandas-indexing.readthedocs.io/en/latest/index.html), +# this can be quite powerful for quick plots. + +# %% +ax = small_ts.loc[pix.isin(variable="variable_0")].ct.plot(continuous_plot_kwargs=dict(alpha=0.3)) +ax.legend(ncols=3, loc="upper center", bbox_to_anchor=(0.5, -0.15)) + +# %% +# TODO: move this to plotting section +ax = small_ts.loc[pix.isin(variable="variable_0", run=0)].ct.plot(label="scenario", continuous_plot_kwargs=dict(alpha=0.9)) +ax.legend() + +# %% [markdown] +# If we have a bigger `pd.DataFrame`, the conversion process can be much slower. # %% bigger_df = create_df( @@ -184,7 +203,12 @@ def create_df( n_runs=300, timepoints=np.arange(351) + 1850.0, ) -bigger_df +bigger_df.shape + +# %% [markdown] +# If want to see the conversion's progress, +# you can activate the progress bar if you have +# [`tdqm`](https://tqdm.github.io/) installed. # %% bigger_df.ct.to_timeseries( @@ -224,7 +248,8 @@ def create_df( # If you want nested progress bars in parallel, # we support that too # (although we're not sure if this works on windows -# because of the need for forking...). +# because of the need for forking, for details see +# [here](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods)). # %% bigger_df.ct.to_timeseries( @@ -241,11 +266,55 @@ def create_df( ) # %% [markdown] -# - filtering with pandas-indexing -# - bigger df -# - convert more rows (progress, parallel, parallel with progress) +# On big `pd.DataFrame`'s the combination with +# [pandas indexing](https://pandas-indexing.readthedocs.io/) +# becomes particularly powerful. + +# %% +ax = ( + bigger_df + .loc[pix.isin(variable="variable_1")] + .groupby(["scenario", "variable", "units"], observed=True) + .median() + .loc[pix.ismatch(scenario="scenario_1*")] + .ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Quadratic, + ) + .ct.plot() +) +ax.legend() + +# %% +# # Units don't round trip +# pd.testing.assert_frame_equal( +# small_df, +# small_ts.ct.to_df() +# ) +small_ts.ct.to_df() + +# %% +small_ts.ct.to_df(increase_resolution=3) + +# %% +sns_df = small_ts.loc[pix.isin(scenario=[f"scenario_{i}" for i in range(2)])].ct.to_sns_df(increase_resolution=100) +sns_df + +# %% +sns.lineplot( + data=sns_df[sns_df["time"] <= 1855], + x="time", + y="value", + hue="scenario", + style="variable", + estimator=None, + units="run", +) + +# %% [markdown] # - other operations, also with progress, parallel, parallel with progress -# - convert to seaborn df for more fine-grained plotting control -# - also requires adding a `increase_resolution` method to `Timeseries` +# - plot with basic control over labels +# - plot with grouping and plumes for ranges # - convert with more fine-grained control over interpolation +# (e.g. interpolation being passed as pd.Series) # - unit conversion diff --git a/pyproject.toml b/pyproject.toml index ae529ce..fd73485 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ docs = [ "jupyterlab==4.3.4", "jupytext==1.16.6", "mkdocs-jupyter==0.25.1", + "pandas-indexing>=0.6.1", "seaborn>=0.13.2", ] tests = [ diff --git a/src/continuous_timeseries/pandas_accessors.py b/src/continuous_timeseries/pandas_accessors.py index 94a0fa1..dbd6fdb 100644 --- a/src/continuous_timeseries/pandas_accessors.py +++ b/src/continuous_timeseries/pandas_accessors.py @@ -17,6 +17,7 @@ from continuous_timeseries.timeseries import Timeseries if TYPE_CHECKING: + import matplotlib import pandas as pd P = TypeVar("P", bound=pd.DataFrame | pd.Series[Any]) @@ -50,6 +51,110 @@ def metadata(self) -> pd.DataFrame: """ return self._series.index.to_frame(index=False) + def to_df(self, increase_resolution: int | None = None) -> pd.DataFrame: + # Late import to avoid hard dependency on pandas + try: + import pandas as pd + except ImportError as exc: + raise MissingOptionalDependencyError( + "to_pandas_series", requirement="pandas" + ) from exc + + series_l = [] + indexes_l = [] + # TODO: progress bar and parallelisation + for idx, value in self._series.items(): + # TODO: time_units and out_units passing + if increase_resolution is not None: + value_use = value.increase_resolution(increase_resolution) + else: + value_use = value + + pd_series = value_use.to_pandas_series() + series_l.append(pd_series) + indexes_l.append((*idx, pd_series.name)) + + idx = pd.MultiIndex.from_frame( + pd.DataFrame( + indexes_l, + columns=[*self._series.index.names, "units"], + dtype="category", + ) + ) + df = pd.DataFrame( + series_l, + index=idx, + ) + + return df + + # TODO: add this to DataFrame accessor to allow for time filtering in the middle + def to_sns_df(self, increase_resolution: int = 100): + # TODO: progress bar and parallelisation + # TODO: time_units and out_units passing + return ( + self.to_df(increase_resolution=increase_resolution) + # Will become `.ct.to_sns_df` + .melt( + var_name="time", + ignore_index=False, + ) + .reset_index() + ) + + def plot( + self, + label: str | tuple[str, ...] | None = None, + show_continuous: bool = True, + continuous_plot_kwargs: dict[str, Any] | None = None, + show_discrete: bool = False, + discrete_plot_kwargs: dict[str, Any] | None = None, + ax: matplotlib.axes.Axes | None = None, + progress: bool = False, + ) -> matplotlib.axes.Axes: + iterator = self._series.items() + if progress: + try: + from tqdm.auto import tqdm + except ImportError as exc: + raise MissingOptionalDependencyError( # noqa: TRY003 + "get_timeseries_parallel_helper(..., progress=True)", + requirement="tdqm", + ) from exc + + iterator = tqdm(iterator, desc="Timeseries to plot") + + if label is not None: + if isinstance(label, tuple): + raise NotImplementedError() + + label_idx: int | None = get_index_level_idx(self._series, index_level=label) + + else: + label_idx = None + + for idx, ts in iterator: + if label_idx is not None: + label = idx[label_idx] + if "label" in continuous_plot_kwargs: + # clash (could just warn here instead) + raise KeyError + + continuous_plot_kwargs_use = continuous_plot_kwargs | dict(label=label) + + else: + continuous_plot_kwargs_use = continuous_plot_kwargs + + ax = ts.plot( + show_continuous=show_continuous, + continuous_plot_kwargs=continuous_plot_kwargs_use, + show_discrete=show_discrete, + discrete_plot_kwargs=discrete_plot_kwargs, + ax=ax, + ) + + return ax + def get_chunks(pd_obj: P, n_chunks: int) -> Iterator[P]: # Late import to avoid hard dependency on pandas @@ -79,6 +184,16 @@ def get_chunks(pd_obj: P, n_chunks: int) -> Iterator[P]: yield pd_obj.iloc[start:end] +def get_index_level_idx(obj: pd.DataFrame | pd.Series, index_level: str) -> int: + try: + level_idx = obj.index.names.index(index_level) + except ValueError as exc: + msg = f"{index_level} not available. {obj.index.names=}" + raise KeyError(msg) from exc + + return level_idx + + def get_timeseries_parallel_helper( df: pd.DataFrame, interpolation: InterpolationOption, @@ -107,12 +222,7 @@ def get_timeseries_parallel_helper( else: meth_to_call = "apply" - try: - units_idx = df.index.names.index(units_col) - except ValueError as exc: - msg = f"{units_col} not available. {df.index.names=}" - - raise KeyError(msg) from exc + units_idx = get_index_level_idx(df, index_level=units_col) res = getattr(df, meth_to_call)( # TODO: make this injectable too @@ -125,6 +235,8 @@ def get_timeseries_parallel_helper( idx_separator=idx_separator, ur=ur, ) + # Units now handled by timeseries + res = res.reset_index(units_col, drop=True) return res diff --git a/src/continuous_timeseries/time_axis.py b/src/continuous_timeseries/time_axis.py index 69d293f..f747ca1 100644 --- a/src/continuous_timeseries/time_axis.py +++ b/src/continuous_timeseries/time_axis.py @@ -180,3 +180,72 @@ def bounds_2d(self) -> PINT_NUMPY_ARRAY: res: PINT_NUMPY_ARRAY = np.vstack([starts, ends]).T # type: ignore # mypy confused by pint return res + + +# TODO: make this support TimeAxis input too +# TODO: add check that input is sorted if input is a plain numpy array +def increase_time_axis_resolution( + time_axis: PINT_NUMPY_ARRAY, res_increase: int +) -> PINT_NUMPY_ARRAY: + """ + Get a higher resolution time axis + + Parameters + ---------- + time_axis + Time axis of which to increase the resolution + + res_increase + The increase in resolution we want. + + In each window defined by `time_axis[n]` to `time_axis[n + 1]`, + we create `res_increase - 1` evenly spaced points + between `time_axis[n]` and `time_axis[n + 1]`. + The points defined by `time_axis` are also included. + As a result, the total number of plotted points is equal to + `time_axis.size + (res_increase - 1) * (time_axis.size - 1)`. + + Returns + ------- + : + Time axis with higher resolution + + Examples + -------- + >>> import pint + >>> UR = pint.get_application_registry() + >>> Q = UR.Quantity + >>> + >>> time_axis = Q([2000, 2010, 2020, 2025], "yr") + >>> + >>> # Passing in res_increase equal to 1 simply returns the input values + >>> increase_time_axis_resolution(time_axis, res_increase=1) + + >>> + >>> # 'Double' the resolution + >>> increase_time_axis_resolution(time_axis, res_increase=2) + + >>> + >>> # 'Triple' the resolution + >>> increase_time_axis_resolution(time_axis, res_increase=3) + + """ + time_axis_internal = time_axis[:-1] + step_fractions = np.linspace(0.0, (res_increase - 1) / res_increase, res_increase) + time_deltas = time_axis[1:] - time_axis[:-1] + + time_axis_rep = ( + np.repeat(time_axis_internal.m, step_fractions.size) * time_axis_internal.u + ) + step_fractions_rep = np.tile(step_fractions, time_axis_internal.size) + time_axis_deltas_rep = np.repeat(time_deltas.m, step_fractions.size) * time_deltas.u + + res: PINT_NUMPY_ARRAY = np.hstack( # type: ignore # mypy confused by numpy and pint + [ + time_axis_rep + time_axis_deltas_rep * step_fractions_rep, + time_axis[-1], + ] + ) + + return res diff --git a/src/continuous_timeseries/timeseries.py b/src/continuous_timeseries/timeseries.py index cc629c7..0ef85a9 100644 --- a/src/continuous_timeseries/timeseries.py +++ b/src/continuous_timeseries/timeseries.py @@ -31,7 +31,7 @@ ExtrapolationNotAllowedError, MissingOptionalDependencyError, ) -from continuous_timeseries.time_axis import TimeAxis +from continuous_timeseries.time_axis import TimeAxis, increase_time_axis_resolution from continuous_timeseries.timeseries_continuous import TimeseriesContinuous from continuous_timeseries.timeseries_discrete import TimeseriesDiscrete from continuous_timeseries.typing import PINT_NUMPY_ARRAY, PINT_SCALAR @@ -294,6 +294,34 @@ def from_pandas_series( # noqa: PLR0913 name=name, ) + def to_pandas_series( + self, + time_units: str | pint.facets.plain.PlainUnit | None = None, + out_units: str | pint.facets.plain.PlainUnit | None = None, + ) -> pd.Series[np.floating]: + # Late import to avoid hard dependency on pandas + try: + import pandas as pd + except ImportError as exc: + raise MissingOptionalDependencyError( + "to_pandas_series", requirement="pandas" + ) from exc + + if time_units is None: + time_units = self.timeseries_continuous.time_units + + discrete = self.discrete + columns = discrete.time_axis.bounds.to(time_units).m + values = discrete.values_at_bounds.values + if out_units is not None: + values = values.to(out_units) + units = out_units + + else: + units = str(values.u) + + return pd.Series(values.m, index=columns, name=units) + def differentiate( self, name_res: str | None = None, @@ -325,6 +353,19 @@ def differentiate( timeseries_continuous=derivative, ) + def increase_resolution(self, res_increase: int) -> TimeseriesContinuous: + res = type(self)( + time_axis=TimeAxis( + increase_time_axis_resolution( + self.time_axis.bounds, res_increase=res_increase + ) + ), + # TODO: copy here? + timeseries_continuous=self.timeseries_continuous, + ) + + return res + def integrate( self, integration_constant: PINT_SCALAR, diff --git a/src/continuous_timeseries/timeseries_continuous.py b/src/continuous_timeseries/timeseries_continuous.py index 40bc8ac..c20550c 100644 --- a/src/continuous_timeseries/timeseries_continuous.py +++ b/src/continuous_timeseries/timeseries_continuous.py @@ -34,7 +34,7 @@ MissingOptionalDependencyError, ) from continuous_timeseries.plotting_helpers import get_plot_vals -from continuous_timeseries.time_axis import TimeAxis +from continuous_timeseries.time_axis import TimeAxis, increase_time_axis_resolution from continuous_timeseries.typing import NP_FLOAT_OR_INT, PINT_NUMPY_ARRAY, PINT_SCALAR from continuous_timeseries.values_at_bounds import ValuesAtBounds @@ -489,59 +489,36 @@ def to_discrete_timeseries( return res - def interpolate( - self, time_axis: TimeAxis | PINT_NUMPY_ARRAY, allow_extrapolation: bool = False - ) -> PINT_NUMPY_ARRAY: + def differentiate(self, name_res: str | None = None) -> TimeseriesContinuous: """ - Interpolate values on a given time axis + Differentiate Parameters ---------- - time_axis - Time axis onto which to interpolate values + name_res + Name to use for the output. - allow_extrapolation - Should extrapolation be allowed while interpolating? + If not supplied, we use f"{self.name}_derivative". Returns ------- : - Interpolated values + Integral of `self`. """ - if isinstance(time_axis, TimeAxis): - time_axis = time_axis.bounds - - if not allow_extrapolation: - try: - check_no_times_outside_domain( - time_axis, - domain=self.domain, - ) - except ValueError as exc: - msg = f"Extrapolation is not allowed ({allow_extrapolation=})." - raise ExtrapolationNotAllowedError(msg) from exc - - times_m = time_axis.to(self.time_units).m - values_m = self.function( - times_m, - # We have already checked the domain above. - # Hence, we want the function to extrapolate if needed. - allow_extrapolation=True, - ) + if name_res is None: + name_res = f"{self.name}_derivative" - if np.isnan(values_m).any(): # pragma: no cover - # This is an escape hatch. - # In general, we expect `self.function` to handle NaNs - # before we get to this point. - msg = ( - "The result of calling `self.function` contains NaNs. " - f"The result is {values_m!r}." - ) - raise AssertionError(msg) + derivative_values_units = self.values_units / self.time_units - res: PINT_NUMPY_ARRAY = values_m * self.values_units + derivative = self.function.differentiate() - return res + return type(self)( + name=name_res, + time_units=self.time_units, + values_units=derivative_values_units, + function=derivative, + domain=self.domain, + ) def integrate( self, integration_constant: PINT_SCALAR, name_res: str | None = None @@ -582,37 +559,60 @@ def integrate( domain=self.domain, ) - def differentiate(self, name_res: str | None = None) -> TimeseriesContinuous: + def interpolate( + self, time_axis: TimeAxis | PINT_NUMPY_ARRAY, allow_extrapolation: bool = False + ) -> PINT_NUMPY_ARRAY: """ - Differentiate + Interpolate values on a given time axis Parameters ---------- - name_res - Name to use for the output. + time_axis + Time axis onto which to interpolate values - If not supplied, we use f"{self.name}_derivative". + allow_extrapolation + Should extrapolation be allowed while interpolating? Returns ------- : - Integral of `self`. + Interpolated values """ - if name_res is None: - name_res = f"{self.name}_derivative" - - derivative_values_units = self.values_units / self.time_units + if isinstance(time_axis, TimeAxis): + time_axis = time_axis.bounds - derivative = self.function.differentiate() + if not allow_extrapolation: + try: + check_no_times_outside_domain( + time_axis, + domain=self.domain, + ) + except ValueError as exc: + msg = f"Extrapolation is not allowed ({allow_extrapolation=})." + raise ExtrapolationNotAllowedError(msg) from exc - return type(self)( - name=name_res, - time_units=self.time_units, - values_units=derivative_values_units, - function=derivative, - domain=self.domain, + times_m = time_axis.to(self.time_units).m + values_m = self.function( + times_m, + # We have already checked the domain above. + # Hence, we want the function to extrapolate if needed. + allow_extrapolation=True, ) + if np.isnan(values_m).any(): # pragma: no cover + # This is an escape hatch. + # In general, we expect `self.function` to handle NaNs + # before we get to this point. + msg = ( + "The result of calling `self.function` contains NaNs. " + f"The result is {values_m!r}." + ) + raise AssertionError(msg) + + res: PINT_NUMPY_ARRAY = values_m * self.values_units + + return res + def plot( self, time_axis: TimeAxis | PINT_NUMPY_ARRAY, @@ -687,7 +687,9 @@ def plot( # Then plot interpolated using linear joins # (as far as I can tell, this is the only general way to do this, # although it is slower than using e.g. step for piecewise constant stuff).) - plot_points = get_plot_points(time_axis, res_increase=res_increase) + plot_points = increase_time_axis_resolution( + time_axis, res_increase=res_increase + ) plot_values = self.interpolate(plot_points) x_vals = get_plot_vals( @@ -704,68 +706,3 @@ def plot( ax.plot(x_vals, y_vals, label=label, **kwargs) return ax - - -def get_plot_points(time_axis: PINT_NUMPY_ARRAY, res_increase: int) -> PINT_NUMPY_ARRAY: - """ - Get points to plot - - Parameters - ---------- - time_axis - Time axis to use for plotting - - res_increase - The increase in resolution we want to use when plotting. - - In each window defined by `time_axis[n]` to `time_axis[n + 1]`, - `res_increase - 1` evenly spaced points - between `time_axis[n]` and `time_axis[n + 1]` will be generated. - The points defined by `time_axis` are also included. - As a result, the total number of plotted points is equal to - `time_axis.size + (res_increase - 1) * (time_axis.size - 1)`. - - Returns - ------- - : - Points to plot - - Examples - -------- - >>> import pint - >>> UR = pint.get_application_registry() - >>> Q = UR.Quantity - >>> - >>> time_axis = Q([2000, 2010, 2020, 2025], "yr") - >>> - >>> # Passing in res_increase equal to 1 simply returns the input values - >>> get_plot_points(time_axis, res_increase=1) - - >>> - >>> # 'Double' the resolution - >>> get_plot_points(time_axis, res_increase=2) - - >>> - >>> # 'Triple' the resolution - >>> get_plot_points(time_axis, res_increase=3) - - """ - time_axis_internal = time_axis[:-1] - step_fractions = np.linspace(0.0, (res_increase - 1) / res_increase, res_increase) - time_deltas = time_axis[1:] - time_axis[:-1] - - time_axis_rep = ( - np.repeat(time_axis_internal.m, step_fractions.size) * time_axis_internal.u - ) - step_fractions_rep = np.tile(step_fractions, time_axis_internal.size) - time_axis_deltas_rep = np.repeat(time_deltas.m, step_fractions.size) * time_deltas.u - - res: PINT_NUMPY_ARRAY = np.hstack( # type: ignore # mypy confused by numpy and pint - [ - time_axis_rep + time_axis_deltas_rep * step_fractions_rep, - time_axis[-1], - ] - ) - - return res diff --git a/tests/unit/test_timeseries_continuous.py b/tests/unit/test_timeseries_continuous.py index a21ec05..8c539c4 100644 --- a/tests/unit/test_timeseries_continuous.py +++ b/tests/unit/test_timeseries_continuous.py @@ -19,7 +19,7 @@ from continuous_timeseries.timeseries_continuous import ( ContinuousFunctionScipyPPoly, TimeseriesContinuous, - get_plot_points, + get_higher_resolution_points, ) UR = pint.get_application_registry() @@ -128,7 +128,7 @@ def test_integrate_no_scipy(sys_modules_patch, expectation): ), ) def test_get_plot_points(time_axis, res_increase, exp): - res = get_plot_points(time_axis, res_increase) + res = get_higher_resolution_points(time_axis, res_increase) pint.testing.assert_allclose(res, exp) diff --git a/uv.lock b/uv.lock index 350ad8b..e6081ad 100644 --- a/uv.lock +++ b/uv.lock @@ -396,6 +396,7 @@ all-dev = [ { name = "mkdocstrings-python-xref" }, { name = "mypy" }, { name = "openscm-units" }, + { name = "pandas-indexing" }, { name = "pandas-stubs", version = "2.2.2.240807", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "pandas-stubs", version = "2.2.3.241126", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "pip" }, @@ -440,6 +441,7 @@ docs = [ { name = "mkdocstrings-python-accessors" }, { name = "mkdocstrings-python-xref" }, { name = "openscm-units" }, + { name = "pandas-indexing" }, { name = "pymdown-extensions" }, { name = "ruff" }, { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -491,6 +493,7 @@ all-dev = [ { name = "mkdocstrings-python-xref", specifier = "==1.6.2" }, { name = "mypy", specifier = "==1.14.0" }, { name = "openscm-units", specifier = ">=0.6.3" }, + { name = "pandas-indexing", specifier = ">=0.6.1" }, { name = "pandas-stubs", specifier = ">=2.2.2.240807" }, { name = "pip", specifier = "==24.3.1" }, { name = "pre-commit", specifier = "==4.0.1" }, @@ -532,6 +535,7 @@ docs = [ { name = "mkdocstrings-python-accessors", specifier = ">=0.1.1" }, { name = "mkdocstrings-python-xref", specifier = "==1.6.2" }, { name = "openscm-units", specifier = ">=0.6.3" }, + { name = "pandas-indexing", specifier = ">=0.6.1" }, { name = "pymdown-extensions", specifier = "==10.13" }, { name = "ruff", specifier = "==0.8.6" }, { name = "scipy", specifier = ">=1.13.1" }, @@ -752,6 +756,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, ] +[[package]] +name = "deprecated" +version = "1.2.15" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/a3/53e7d78a6850ffdd394d7048a31a6f14e44900adedf190f9a165f6b69439/deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d", size = 2977612 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/8f/c7f227eb42cfeaddce3eb0c96c60cbca37797fa7b34f8e1aeadf6c5c0983/Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320", size = 9941 }, +] + [[package]] name = "distlib" version = "0.3.9" @@ -2229,6 +2245,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 }, ] +[[package]] +name = "pandas-indexing" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "deprecated" }, + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/24/e4dde61f2f0dbc83f5b6b6b0fbc33ae7b88e3f056854ecde6891c2c9e2be/pandas_indexing-0.6.1.tar.gz", hash = "sha256:4f1fb7563f730d328fcc46e2c2f573ecc0ef3ede1a552b6c5bf04951bfda3176", size = 75847 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/6e/aebcae22b888ee028efa65edaa79e89b0c9359ae2d75bcf6184a24234365/pandas_indexing-0.6.1-py3-none-any.whl", hash = "sha256:8256b6237ccbb42690d9de7c2298d812cf9687e5609d0bcaff862c90e84a1abd", size = 32250 }, +] + [[package]] name = "pandas-stubs" version = "2.2.2.240807" @@ -3551,6 +3581,81 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/02/88b65cc394961a60c43c70517066b6b679738caf78506a5da7b88ffcb643/widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71", size = 2335872 }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307 }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486 }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777 }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314 }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947 }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778 }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716 }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548 }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334 }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427 }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774 }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721 }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899 }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222 }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707 }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685 }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567 }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672 }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865 }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 }, + { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/cf/cb/7a07b51762dcd59bdbe07aa97f87b3169766cadf240f48d1cbe70a1be9db/wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9", size = 83050 }, + { url = "https://files.pythonhosted.org/packages/a5/51/a42757dd41032afd6d8037617aa3bc6803ba971850733b24dfb7d5c627c4/wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f", size = 74718 }, + { url = "https://files.pythonhosted.org/packages/bf/bb/d552bfe47db02fcfc950fc563073a33500f8108efa5f7b41db2f83a59028/wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b", size = 82590 }, + { url = "https://files.pythonhosted.org/packages/77/99/77b06b3c3c410dbae411105bf22496facf03a5496bfaca8fbcf9da381889/wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f", size = 81462 }, + { url = "https://files.pythonhosted.org/packages/2d/21/cf0bd85ae66f92600829ea1de8e1da778e5e9f6e574ccbe74b66db0d95db/wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8", size = 74309 }, + { url = "https://files.pythonhosted.org/packages/6d/16/112d25e9092398a0dd6fec50ab7ac1b775a0c19b428f049785096067ada9/wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9", size = 81081 }, + { url = "https://files.pythonhosted.org/packages/2b/49/364a615a0cc0872685646c495c7172e4fc7bf1959e3b12a1807a03014e05/wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb", size = 36423 }, + { url = "https://files.pythonhosted.org/packages/00/ad/5d2c1b34ba3202cd833d9221833e74d6500ce66730974993a8dc9a94fb8c/wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb", size = 38772 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] + [[package]] name = "zipp" version = "3.21.0" From b8050ea1f1d3243e862961af0487efff8466d6eb Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sat, 18 Jan 2025 22:33:53 +0100 Subject: [PATCH 3/7] Auto-format --- docs/tutorials/pandas_accessor_tutorial.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py index 57db081..c1519c3 100644 --- a/docs/tutorials/pandas_accessor_tutorial.py +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -131,7 +131,10 @@ def create_df( n_ts = n_scenarios * n_variables * n_runs df = pd.DataFrame( - 50.0 * np.linspace(0.3, 1, n_ts)[:, np.newaxis] * np.linspace(0, 1, timepoints.size)[np.newaxis, :] + np.random.random((n_ts, timepoints.size)), + 50.0 + * np.linspace(0.3, 1, n_ts)[:, np.newaxis] + * np.linspace(0, 1, timepoints.size)[np.newaxis, :] + + np.random.random((n_ts, timepoints.size)), columns=timepoints, index=idx, ) @@ -185,12 +188,16 @@ def create_df( # this can be quite powerful for quick plots. # %% -ax = small_ts.loc[pix.isin(variable="variable_0")].ct.plot(continuous_plot_kwargs=dict(alpha=0.3)) +ax = small_ts.loc[pix.isin(variable="variable_0")].ct.plot( + continuous_plot_kwargs=dict(alpha=0.3) +) ax.legend(ncols=3, loc="upper center", bbox_to_anchor=(0.5, -0.15)) # %% # TODO: move this to plotting section -ax = small_ts.loc[pix.isin(variable="variable_0", run=0)].ct.plot(label="scenario", continuous_plot_kwargs=dict(alpha=0.9)) +ax = small_ts.loc[pix.isin(variable="variable_0", run=0)].ct.plot( + label="scenario", continuous_plot_kwargs=dict(alpha=0.9) +) ax.legend() # %% [markdown] @@ -272,8 +279,7 @@ def create_df( # %% ax = ( - bigger_df - .loc[pix.isin(variable="variable_1")] + bigger_df.loc[pix.isin(variable="variable_1")] .groupby(["scenario", "variable", "units"], observed=True) .median() .loc[pix.ismatch(scenario="scenario_1*")] @@ -297,7 +303,9 @@ def create_df( small_ts.ct.to_df(increase_resolution=3) # %% -sns_df = small_ts.loc[pix.isin(scenario=[f"scenario_{i}" for i in range(2)])].ct.to_sns_df(increase_resolution=100) +sns_df = small_ts.loc[ + pix.isin(scenario=[f"scenario_{i}" for i in range(2)]) +].ct.to_sns_df(increase_resolution=100) sns_df # %% From b18ef75334d9b2c3b8e9076807087d6cf9e1f2a3 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Sun, 19 Jan 2025 08:34:53 +0100 Subject: [PATCH 4/7] Scratch out plume plot demo --- docs/tutorials/pandas_accessor_tutorial.py | 54 +++++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py index c1519c3..a1347c1 100644 --- a/docs/tutorials/pandas_accessor_tutorial.py +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -32,6 +32,7 @@ import multiprocessing import traceback +import matplotlib.pyplot as plt import numpy as np import openscm_units import pandas as pd @@ -170,7 +171,7 @@ def create_df( # %% small_ts = small_df.ct.to_timeseries( time_units="yr", - interpolation=ct.InterpolationOption.PiecewiseConstantPreviousLeftClosed, + interpolation=ct.InterpolationOption.Quadratic, ) small_ts @@ -280,7 +281,7 @@ def create_df( # %% ax = ( bigger_df.loc[pix.isin(variable="variable_1")] - .groupby(["scenario", "variable", "units"], observed=True) + .groupby(bigger_df.index.names.difference(["run"]), observed=True) .median() .loc[pix.ismatch(scenario="scenario_1*")] .ct.to_timeseries( @@ -292,12 +293,12 @@ def create_df( ax.legend() # %% -# # Units don't round trip # pd.testing.assert_frame_equal( # small_df, -# small_ts.ct.to_df() +# # Units don't round trip by default +# small_ts.ct.to_df(out_units="Mt / yr") # ) -small_ts.ct.to_df() +small_ts.ct # %% small_ts.ct.to_df(increase_resolution=3) @@ -305,6 +306,7 @@ def create_df( # %% sns_df = small_ts.loc[ pix.isin(scenario=[f"scenario_{i}" for i in range(2)]) +# Rename to `to_tidy_df` ].ct.to_sns_df(increase_resolution=100) sns_df @@ -319,10 +321,50 @@ def create_df( units="run", ) +# %% +plumes_over = ["run"] +increase_resolution = 100 +quantiles_plumes = ( + (0.5, 0.8), + ((0.05, 0.95), 0.5), +) + +fig, ax = plt.subplots() +for scenario, s_ts in small_ts.loc[pix.isin(variable="variable_0")].groupby("scenario", observed=True): + for quantiles, alpha in quantiles_plumes: + s_quants = s_ts.ct.to_df(increase_resolution=increase_resolution).groupby(small_ts.index.names.difference(plumes_over), observed=True).quantile(quantiles) + if isinstance(quantiles, tuple): + ax.fill_between( + s_quants.columns.values.squeeze(), + # As long as there are only two rows, + # doesn't matter which way around you do this. + s_quants.iloc[0, :].values.squeeze(), + s_quants.iloc[1, :].values.squeeze(), + alpha=alpha, + # label=scenario, + ) + else: + ax.plot( + s_quants.columns.values.squeeze(), + s_quants.values.squeeze(), + alpha=alpha, + label=scenario, + ) + +ax.legend() + +# %% +( + small_ts + .ct.to_df(increase_resolution=5) + .groupby(small_ts.index.names.difference(["run"]), observed=True) + .quantile([0.05, 0.5, 0.95]) +) + # %% [markdown] # - other operations, also with progress, parallel, parallel with progress # - plot with basic control over labels -# - plot with grouping and plumes for ranges +# - plot with grouping and plumes for ranges (basically reproduce scmdata API) # - convert with more fine-grained control over interpolation # (e.g. interpolation being passed as pd.Series) # - unit conversion From 2141063bdcbf044a6587c7a0b3ca2aa500e9d1e0 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 22 Jan 2025 09:30:04 +0100 Subject: [PATCH 5/7] WIP: Split out apply pandas parallel function --- docs/tutorials/pandas_accessor_tutorial.py | 70 ++++++- src/continuous_timeseries/pandas_accessors.py | 186 +++++++++++++----- 2 files changed, 196 insertions(+), 60 deletions(-) diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py index a1347c1..3745712 100644 --- a/docs/tutorials/pandas_accessor_tutorial.py +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -175,6 +175,9 @@ def create_df( ) small_ts +# %% +small_ts.ct.differentiate(progress=True) + # %% [markdown] # Then we can use standard Continuous timeseries APIs, # e.g. plotting. @@ -201,6 +204,15 @@ def create_df( ) ax.legend() +# %% +# TODO: move this to ops section +ax = ( + small_ts.loc[pix.isin(variable="variable_0", run=0)] + .ct.differentiate() + .ct.plot(label="scenario", continuous_plot_kwargs=dict(alpha=0.9)) +) +ax.legend() + # %% [markdown] # If we have a bigger `pd.DataFrame`, the conversion process can be much slower. @@ -260,7 +272,7 @@ def create_df( # [here](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods)). # %% -bigger_df.ct.to_timeseries( +bigger_ts = bigger_df.ct.to_timeseries( time_units="yr", interpolation=ct.InterpolationOption.Linear, n_processes=n_processes, @@ -272,6 +284,46 @@ def create_df( progress_nested=True, mp_context=multiprocessing.get_context("fork"), ) +bigger_ts + +# %% [markdown] +# The same logic can be applied to other operations. + +# %% +diff_ts = create_df( + n_scenarios=50, + n_variables=1, + n_runs=600, + timepoints=np.arange(75) + 2025.0, +).ct.to_timeseries( + time_units="yr", + interpolation=ct.InterpolationOption.Linear, + n_processes=n_processes, + progress=True, + progress_nested=True, + mp_context=multiprocessing.get_context("fork"), +) +diff_ts + +# %% +diff_ts.ct.differentiate(progress=True) + +# %% +diff_ts.ct.differentiate(n_processes=n_processes) + +# %% +diff_ts.ct.differentiate(n_processes=n_processes, progress=True) + +# %% +diff_ts.ct.differentiate( + n_processes=n_processes, + progress=True, + progress_nested=True, + mp_context=multiprocessing.get_context("fork"), +) + +# %% [markdown] +# Demonstrate how to control parallel etc. with global config. # %% [markdown] # On big `pd.DataFrame`'s the combination with @@ -306,7 +358,7 @@ def create_df( # %% sns_df = small_ts.loc[ pix.isin(scenario=[f"scenario_{i}" for i in range(2)]) -# Rename to `to_tidy_df` + # Rename to `to_tidy_df` ].ct.to_sns_df(increase_resolution=100) sns_df @@ -330,9 +382,15 @@ def create_df( ) fig, ax = plt.subplots() -for scenario, s_ts in small_ts.loc[pix.isin(variable="variable_0")].groupby("scenario", observed=True): +for scenario, s_ts in small_ts.loc[pix.isin(variable="variable_0")].groupby( + "scenario", observed=True +): for quantiles, alpha in quantiles_plumes: - s_quants = s_ts.ct.to_df(increase_resolution=increase_resolution).groupby(small_ts.index.names.difference(plumes_over), observed=True).quantile(quantiles) + s_quants = ( + s_ts.ct.to_df(increase_resolution=increase_resolution) + .groupby(small_ts.index.names.difference(plumes_over), observed=True) + .quantile(quantiles) + ) if isinstance(quantiles, tuple): ax.fill_between( s_quants.columns.values.squeeze(), @@ -355,14 +413,12 @@ def create_df( # %% ( - small_ts - .ct.to_df(increase_resolution=5) + small_ts.ct.to_df(increase_resolution=5) .groupby(small_ts.index.names.difference(["run"]), observed=True) .quantile([0.05, 0.5, 0.95]) ) # %% [markdown] -# - other operations, also with progress, parallel, parallel with progress # - plot with basic control over labels # - plot with grouping and plumes for ranges (basically reproduce scmdata API) # - convert with more fine-grained control over interpolation diff --git a/src/continuous_timeseries/pandas_accessors.py b/src/continuous_timeseries/pandas_accessors.py index dbd6fdb..df96d46 100644 --- a/src/continuous_timeseries/pandas_accessors.py +++ b/src/continuous_timeseries/pandas_accessors.py @@ -6,6 +6,7 @@ import concurrent.futures from collections.abc import Iterator +from functools import partial from multiprocessing.context import BaseContext from typing import TYPE_CHECKING, Any, TypeVar @@ -23,6 +24,94 @@ P = TypeVar("P", bound=pd.DataFrame | pd.Series[Any]) +def apply_pandas_op_parallel( + obj, + op, + n_processes: int, + progress: bool = False, + progress_nested: bool = False, + mp_context: BaseContext | None = None, +): + iterator = get_chunks(obj, n_chunks=n_processes) + if progress: + try: + from tqdm.auto import tqdm + except ImportError as exc: + raise MissingOptionalDependencyError( # noqa: TRY003 + "apply_pandas_op_parallel(..., progress=True)", requirement="tdqm" + ) from exc + + iterator = tqdm(iterator, desc="submitting to pool") + + with concurrent.futures.ProcessPoolExecutor( + max_workers=n_processes, mp_context=mp_context + ) as pool: + futures = [ + pool.submit( + op, + chunk, + progress=progress_nested, + progress_bar_position=i, + ) + for i, chunk in enumerate(iterator) + ] + + iterator_results = concurrent.futures.as_completed(futures) + if progress: + iterator_results = tqdm( + iterator_results, + desc="Retrieving parallel results", + total=len(futures), + ) + + res_l = [future.result() for future in iterator_results] + + # Late import to avoid hard dependency on pandas + try: + import pandas as pd + except ImportError as exc: + raise MissingOptionalDependencyError( + "apply_pandas_op_parallel", requirement="pandas" + ) from exc + + # This assumes that the index isn't mangled. + # Using pix.concat might be safer, + # or we make the concatenation injectable. + res = pd.concat(res_l) + + return res + + +def differentiate_parallel_helper( + series: pd.Series[Timeseries], + progress: bool = False, + progress_bar_position: int = 0, +) -> pd.Series[Timeseries]: + if progress: + try: + from tqdm.auto import tqdm + except ImportError as exc: + raise MissingOptionalDependencyError( # noqa: TRY003 + "dist(..., progress=True)", requirement="tdqm" + ) from exc + + tqdm_kwargs = dict(position=progress_bar_position) + tqdm.pandas(**tqdm_kwargs) + meth_to_call = "progress_map" + # No-one knows why this is needed, but it is in jupyter notebooks + print(end=" ") + + else: + meth_to_call = "map" + + res = getattr(series, meth_to_call)( + lambda x: x.differentiate(), + # name="injectable?", + ) + + return res + + class SeriesCTAccessor: """ [`pd.Series`][pandas.Series] accessors @@ -89,7 +178,7 @@ def to_df(self, increase_resolution: int | None = None) -> pd.DataFrame: return df # TODO: add this to DataFrame accessor to allow for time filtering in the middle - def to_sns_df(self, increase_resolution: int = 100): + def to_sns_df(self, increase_resolution: int = 100) -> pd.DataFrame: # TODO: progress bar and parallelisation # TODO: time_units and out_units passing return ( @@ -102,6 +191,33 @@ def to_sns_df(self, increase_resolution: int = 100): .reset_index() ) + def differentiate( + self, + # res_name: str = "ts", + progress: bool = False, + progress_nested: bool = False, + n_processes: int = 1, + mp_context: BaseContext | None = None, + ) -> pd.Series[Timeseries]: # type: ignore + if n_processes == 1: + res = differentiate_parallel_helper( + self._series, + progress=progress, + ) + + return res + + res = apply_pandas_op_parallel( + self._series, + op=differentiate_parallel_helper, + n_processes=n_processes, + progress=progress, + progress_nested=progress_nested, + mp_context=mp_context, + ) + + return res + def plot( self, label: str | tuple[str, ...] | None = None, @@ -215,8 +331,7 @@ def get_timeseries_parallel_helper( tqdm_kwargs = dict(position=progress_bar_position) tqdm.pandas(**tqdm_kwargs) meth_to_call = "progress_apply" - # No-one knows why this is needed, but it is - # jupyter notebooks + # No-one knows why this is needed, but it is in jupyter notebooks print(end=" ") else: @@ -288,56 +403,21 @@ def to_timeseries( # noqa: PLR0913 return res - # I think it should be possible to split out a - # `apply_pandas_op_parallel` or similar function. - iterator = get_chunks(self._df, n_chunks=n_processes) - if progress: - try: - from tqdm.auto import tqdm - except ImportError as exc: - raise MissingOptionalDependencyError( # noqa: TRY003 - "to_timeseries(..., progress=True)", requirement="tdqm" - ) from exc - - iterator = tqdm(iterator, desc="submitting to pool") - - with concurrent.futures.ProcessPoolExecutor( - max_workers=n_processes, mp_context=mp_context - ) as pool: - futures = [ - pool.submit( - get_timeseries_parallel_helper, - chunk, - interpolation=interpolation, - time_units=time_units, - units_col=units_col, - idx_separator=idx_separator, - ur=ur, - progress=progress_nested, - progress_bar_position=i, - ) - for i, chunk in enumerate(iterator) - ] - - iterator_results = concurrent.futures.as_completed(futures) - if progress: - iterator_results = tqdm( - iterator_results, - desc="Retrieving parallel results", - total=len(futures), - ) - - res_l = [future.result() for future in iterator_results] - - # Late import to avoid hard dependency on pandas - try: - import pandas as pd - except ImportError as exc: - raise MissingOptionalDependencyError( - "interpolate", requirement="pandas" - ) from exc - - res = pd.concat(res_l) + res = apply_pandas_op_parallel( + self._df, + op=partial( + get_timeseries_parallel_helper, + interpolation=interpolation, + time_units=time_units, + units_col=units_col, + idx_separator=idx_separator, + ur=ur, + ), + n_processes=n_processes, + progress=progress, + progress_nested=progress_nested, + mp_context=mp_context, + ) return res From 56ef4578a57479361c2426bcf365d7395d3d4210 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 22 Jan 2025 11:16:59 +0100 Subject: [PATCH 6/7] Finish sketching out major APIs --- docs/tutorials/pandas_accessor_tutorial.py | 321 ++++++++++++++++-- src/continuous_timeseries/pandas_accessors.py | 30 ++ 2 files changed, 316 insertions(+), 35 deletions(-) diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py index 3745712..7c982b7 100644 --- a/docs/tutorials/pandas_accessor_tutorial.py +++ b/docs/tutorials/pandas_accessor_tutorial.py @@ -374,53 +374,304 @@ def create_df( ) # %% -plumes_over = ["run"] -increase_resolution = 100 +from itertools import cycle + +import matplotlib.lines as mlines +import matplotlib.patches as mpatches + +fig, ax = plt.subplots() +in_ts = small_ts.loc[pix.isin(variable="variable_0")] +quantile_over = "run" +pre_calculated = False +observed = True quantiles_plumes = ( - (0.5, 0.8), + ((0.5,), 0.8), + ((0.25, 0.75), 0.75), ((0.05, 0.95), 0.5), ) +hue_var = "scenario" +hue_var_label = None +style_var = "variable" +style_var_label = None +palette = None +dashes = None +observed = True +increase_resolution = 100 +linewidth = 2 + +# The joy of plotting, you create everything yourself. +# TODO: split creation from use? +if hue_var_label is None: + hue_var_label = hue_var.capitalize() +if style_var_label is None: + style_var_label = style_var.capitalize() + +quantiles = [] +for quantile_plot_def in quantiles_plumes: + q_def = quantile_plot_def[0] + try: + for q in q_def: + quantiles.append(q) + except TypeError: + quantiles.append(q_def) + +_palette = {} if palette is None else palette + +if dashes is None: + _dashes = {} + lines = ["-", "--", "-.", ":"] + linestyle_cycler = cycle(lines) +else: + _dashes = dashes + +# Need to keep track of this, just in case we end up plotting only plumes +_plotted_lines = False + +quantile_labels = {} +plotted_hues = [] +plotted_styles = [] +units_l = [] +for q, alpha in quantiles_plumes: + for hue_value, hue_ts in in_ts.groupby(hue_var, observed=observed): + for style_value, hue_style_ts in hue_ts.groupby(style_var, observed=observed): + # Remake in inner loop to avoid leaking between plots + pkwargs = {"alpha": alpha} + + if pre_calculated: + # Should add some checks here + raise NotImplementedError() + # Maybe something like the below + # missing_quantile = False + # for qt in q: + # if qt not in quantiles: + # warnings.warn( + # f"Quantile {qt} not available for {hue_value=} {style_value=}" + # ) + # missing_quantile = True + + # if missing_quantile: + # continue + else: + _pdf = ( + hue_ts.ct.to_df(increase_resolution=increase_resolution) + .ct.groupby_except(quantile_over) + .quantile(quantiles) + .ct.fix_index_name_after_groupby_quantile() + ) -fig, ax = plt.subplots() -for scenario, s_ts in small_ts.loc[pix.isin(variable="variable_0")].groupby( - "scenario", observed=True -): - for quantiles, alpha in quantiles_plumes: - s_quants = ( - s_ts.ct.to_df(increase_resolution=increase_resolution) - .groupby(small_ts.index.names.difference(plumes_over), observed=True) - .quantile(quantiles) - ) - if isinstance(quantiles, tuple): - ax.fill_between( - s_quants.columns.values.squeeze(), - # As long as there are only two rows, - # doesn't matter which way around you do this. - s_quants.iloc[0, :].values.squeeze(), - s_quants.iloc[1, :].values.squeeze(), - alpha=alpha, - # label=scenario, - ) - else: - ax.plot( - s_quants.columns.values.squeeze(), - s_quants.values.squeeze(), - alpha=alpha, - label=scenario, + if hue_value not in plotted_hues: + plotted_hues.append(hue_value) + + x_vals = _pdf.columns.values.squeeze() + # Require ur for this to work + # x_vals = get_plot_vals( + # self.time_axis.bounds, + # "self.time_axis.bounds", + # warn_if_magnitudes=warn_if_plotting_magnitudes, + # ) + + if palette is not None: + try: + pkwargs["color"] = _palette[hue_value] + except KeyError: + error_msg = f"{hue_value} not in palette. {palette=}" + raise KeyError(error_msg) + elif hue_value in _palette: + pkwargs["color"] = _palette[hue_value] + # else: + # # Let matplotlib default cycling do its thing + + n_q_for_plume = 2 + plot_plume = len(q) == n_q_for_plume + plot_line = len(q) == 1 + + if plot_plume: + label = f"{q[0] * 100:.0f}th - {q[1] * 100:.0f}th" + + y_lower_vals = _pdf.loc[pix.ismatch(quantile=q[0])].values.squeeze() + y_upper_vals = _pdf.loc[pix.ismatch(quantile=q[1])].values.squeeze() + # Require ur for this to work + # Also need the 1D check back in too + # y_lower_vals = get_plot_vals( + # self.time_axis.bounds, + # "self.time_axis.bounds", + # warn_if_magnitudes=warn_if_plotting_magnitudes, + # ) + p = ax.fill_between( + x_vals, + y_lower_vals, + y_upper_vals, + label=label, + **pkwargs, + ) + + if palette is None: + _palette[hue_value] = p.get_facecolor()[0] + + elif plot_line: + if style_value not in plotted_styles: + plotted_styles.append(style_value) + + _plotted_lines = True + + if dashes is not None: + try: + pkwargs["linestyle"] = _dashes[style_value] + except KeyError: + error_msg = f"{style_value} not in dashes. {dashes=}" + raise KeyError(error_msg) + else: + if style_value not in _dashes: + _dashes[style_value] = next(linestyle_cycler) + + pkwargs["linestyle"] = _dashes[style_value] + + if isinstance(q[0], str): + label = q[0] + else: + label = f"{q[0] * 100:.0f}th" + + y_vals = _pdf.loc[pix.ismatch(quantile=q[0])].values.squeeze() + # Require ur for this to work + # Also need the 1D check back in too + # y_vals = get_plot_vals( + # self.time_axis.bounds, + # "self.time_axis.bounds", + # warn_if_magnitudes=warn_if_plotting_magnitudes, + # ) + p = ax.plot( + x_vals, + y_vals, + label=label, + linewidth=linewidth, + **pkwargs, + )[0] + + if dashes is None: + _dashes[style_value] = p.get_linestyle() + + if palette is None: + _palette[hue_value] = p.get_color() + + else: + msg = f"quantiles to plot must be of length one or two, received: {q}" + raise ValueError(msg) + + if label not in quantile_labels: + quantile_labels[label] = p + + # Once we have unit handling with matplotlib, we can remove this + # (and if matplotlib isn't set up, we just don't do unit handling) + units_l.extend(_pdf.pix.unique("units").unique().tolist()) + + # Fake the line handles for the legend + hue_val_lines = [ + mlines.Line2D([0], [0], color=_palette[hue_value], label=hue_value) + for hue_value in plotted_hues + ] + + legend_items = [ + mpatches.Patch(alpha=0, label="Quantiles"), + *quantile_labels.values(), + mpatches.Patch(alpha=0, label=hue_var_label), + *hue_val_lines, + ] + + if _plotted_lines: + style_val_lines = [ + mlines.Line2D( + [0], + [0], + linestyle=_dashes[style_value], + label=style_value, + color="gray", + linewidth=linewidth, ) + for style_value in plotted_styles + ] + legend_items += [ + mpatches.Patch(alpha=0, label=style_var_label), + *style_val_lines, + ] + elif dashes is not None: + warnings.warn( + "`dashes` was passed but no lines were plotted, the style settings " + "will not be used" + ) -ax.legend() + ax.legend(handles=legend_items, loc="best") + + if len(set(units_l)) == 1: + ax.set_ylabel(units_l[0]) + + # return ax, legend_items + + +quantiles # %% -( +demo_q = ( small_ts.ct.to_df(increase_resolution=5) - .groupby(small_ts.index.names.difference(["run"]), observed=True) + .ct.groupby_except("run") .quantile([0.05, 0.5, 0.95]) + .ct.fix_index_name_after_groupby_quantile() +) +demo_q + +# %% +units_col = "units" +indf = demo_q +out_l = [] + +# The 'shortcut' +target_units = "Gt / yr" +locs_target_units = ((pix.ismatch(**{units_col: "**"}), target_units),) +locs_target_units = ( + (pix.ismatch(scenario="scenario_2"), "Gt / yr"), + (pix.ismatch(scenario="scenario_0"), "kt / yr"), + ( + demo_q.index.get_level_values("scenario").isin(["scenario_1"]) + & demo_q.index.get_level_values("variable").isin(["variable_1"]), + "t / yr", + ), ) +# locs_target_units = ( +# (pix.ismatch(scenario="*"), "t / yr"), +# ) + +converted = None +for locator, target_unit in locs_target_units: + if converted is None: + converted = locator + else: + converted = converted | locator + + def _convert_unit(idf: pd.DataFrame) -> pd.DataFrame: + start_units = idf.pix.unique(units_col).tolist() + if len(start_units) > 1: + msg = f"{start_units=}" + raise AssertionError(msg) + + start_units = start_units[0] + conversion_factor = UR.Quantity(1, start_units).to(target_unit).m + + return (idf * conversion_factor).pix.assign(**{units_col: target_unit}) + + out_l.append( + indf.loc[locator] + .groupby(units_col, observed=True, group_keys=False) + .apply(_convert_unit) + ) + +out = pix.concat([*out_l, indf.loc[~converted]]) +if isinstance(indf.index.dtypes[units_col], pd.CategoricalDtype): + # Make sure that units stay as a category, if it started as one. + out = out.reset_index(units_col) + out[units_col] = out[units_col].astype("category") + out = out.set_index(units_col, append=True).reorder_levels(indf.index.names) + +out # %% [markdown] -# - plot with basic control over labels -# - plot with grouping and plumes for ranges (basically reproduce scmdata API) # - convert with more fine-grained control over interpolation # (e.g. interpolation being passed as pd.Series) -# - unit conversion diff --git a/src/continuous_timeseries/pandas_accessors.py b/src/continuous_timeseries/pandas_accessors.py index df96d46..788a839 100644 --- a/src/continuous_timeseries/pandas_accessors.py +++ b/src/continuous_timeseries/pandas_accessors.py @@ -218,6 +218,16 @@ def differentiate( return res + def groupby_except( + self, non_groupers: str | list[str], observed: bool = True + ) -> pd.core.groupby.generic.SeriesGroupBy: + if isinstance(non_groupers, str): + non_groupers = [non_groupers] + + return self._series.groupby( + self._series.index.names.difference(non_groupers), observed=observed + ) + def plot( self, label: str | tuple[str, ...] | None = None, @@ -341,6 +351,9 @@ def get_timeseries_parallel_helper( res = getattr(df, meth_to_call)( # TODO: make this injectable too + # This will also allow us to introduce an extra layer + # to handle the case when interpolation is a Series, + # rather than the same across all rows. Timeseries.from_pandas_series, axis="columns", interpolation=interpolation, @@ -421,6 +434,23 @@ def to_timeseries( # noqa: PLR0913 return res + def groupby_except( + self, non_groupers: str | list[str], observed: bool = True + ) -> pd.core.groupby.generic.DataFrameGroupBy: + if isinstance(non_groupers, str): + non_groupers = [non_groupers] + + return self._df.groupby( + self._df.index.names.difference(non_groupers), observed=observed + ) + + def fix_index_name_after_groupby_quantile(self) -> pd.DataFrame: + # TODO: think about doing in place + res = self._df.copy() + res.index = res.index.rename({None: "quantile"}) + + return res + def register_pandas_accessor(namespace: str = "ct") -> None: """ From 6ffdffcda1ccb89c1a0b5a0fded84f0423f0a31b Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Wed, 12 Feb 2025 20:37:50 +0100 Subject: [PATCH 7/7] Add TODO --- tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index d461d38..f09a2a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,3 +23,5 @@ def setup_pandas_accessor() -> None: pd.Series._accessors.discard("ct") if hasattr(pd.Series, "ct"): del pd.Series.ct + + # Add DataFrame here too