From 3f1e97d60396347770e644923fbd9219ac2d8850 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:04:11 +0200 Subject: [PATCH 01/39] chore: Change to more explicit class name and add docs --- .../_sklearn/_comparison/metrics_accessor.py | 6 +++++- .../_cross_validation/metrics_accessor.py | 6 +++++- skore/src/skore/sklearn/_plot/metrics/__init__.py | 15 +++++++++++++++ sphinx/reference/report/displays.rst | 1 + sphinx/user_guide/reporters.rst | 7 +++++++ 5 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 skore/src/skore/sklearn/_plot/metrics/__init__.py diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 8adebfb93a..6bb8702fdd 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -7,7 +7,7 @@ from numpy.typing import ArrayLike from sklearn.metrics import make_scorer from sklearn.utils.metaestimators import available_if - +from skore.externals._pandas_accessors import DirNamesMixin from skore._sklearn._base import ( _BaseAccessor, _BaseMetricsAccessor, @@ -60,7 +60,11 @@ def summarize( pos_label: PositiveLabel | None = _DEFAULT, indicator_favorability: bool = False, flat_index: bool = False, +<<<<<<< HEAD:skore/src/skore/_sklearn/_comparison/metrics_accessor.py aggregate: Aggregate | None = ("mean", "std"), +======= + aggregate: Optional[Aggregate] = ("mean", "std"), +>>>>>>> 1a751cbb (chore: Change to more explicit class name and add docs):skore/src/skore/sklearn/_comparison/metrics_accessor.py ) -> MetricsSummaryDisplay: """Report a set of metrics for the estimators. diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index 76564ac3e2..4c4a8473d2 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -7,7 +7,7 @@ from numpy.typing import ArrayLike from sklearn.metrics import make_scorer from sklearn.utils.metaestimators import available_if - +from skore.externals._pandas_accessors import DirNamesMixin from skore._sklearn._base import ( _BaseAccessor, _BaseMetricsAccessor, @@ -61,7 +61,11 @@ def summarize( pos_label: PositiveLabel | None = _DEFAULT, indicator_favorability: bool = False, flat_index: bool = False, +<<<<<<< HEAD:skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py aggregate: Aggregate | None = ("mean", "std"), +======= + aggregate: Optional[Aggregate] = ("mean", "std"), +>>>>>>> 1a751cbb (chore: Change to more explicit class name and add docs):skore/src/skore/sklearn/_cross_validation/metrics_accessor.py ) -> MetricsSummaryDisplay: """Report a set of metrics for our estimator. diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py new file mode 100644 index 0000000000..e3ded1b085 --- /dev/null +++ b/skore/src/skore/sklearn/_plot/metrics/__init__.py @@ -0,0 +1,15 @@ +from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay +from skore.sklearn._plot.metrics.precision_recall_curve import ( + PrecisionRecallCurveDisplay, +) +from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay +from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay +from skore.sklearn._plot.metrics.summarize import MetricsSummaryDisplay + +__all__ = [ + "ConfusionMatrixDisplay", + "PrecisionRecallCurveDisplay", + "PredictionErrorDisplay", + "RocCurveDisplay", + "MetricsSummaryDisplay", +] diff --git a/sphinx/reference/report/displays.rst b/sphinx/reference/report/displays.rst index 7c4952de44..b32a7490d5 100644 --- a/sphinx/reference/report/displays.rst +++ b/sphinx/reference/report/displays.rst @@ -16,3 +16,4 @@ the API of each display. RocCurveDisplay PrecisionRecallCurveDisplay PredictionErrorDisplay + MetricsSummaryDisplay diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst index 1cd338023b..06c925c78a 100644 --- a/sphinx/user_guide/reporters.rst +++ b/sphinx/user_guide/reporters.rst @@ -67,6 +67,13 @@ regression). Nevertheless, you can specify the metrics you want to compute thank scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`. +We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a Display. By default, a set of metrics is computed based +on the type of target variable (e.g. classification or regression). Nevertheless, you +can specify the metrics you want to compute thanks to the `scoring` parameter. We accept +different types: (i) some strings that correspond to scikit-learn scorer names or a +built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed +with :func:`sklearn.metrics.make_scorer`. + Refer to the :ref:`displays` section for more details regarding the `skore` display API. Refer to the :ref:`estimator_metrics` section for more details on all the available metrics in `skore`. From 1e68910a55b37948f19fc600f5a859962f86e270 Mon Sep 17 00:00:00 2001 From: Marie Sacksick <79304610+MarieSacksick@users.noreply.github.com> Date: Wed, 11 Jun 2025 11:19:34 +0200 Subject: [PATCH 02/39] Update sphinx/user_guide/reporters.rst Co-authored-by: Guillaume Lemaitre --- sphinx/user_guide/reporters.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst index 06c925c78a..299e4bd992 100644 --- a/sphinx/user_guide/reporters.rst +++ b/sphinx/user_guide/reporters.rst @@ -67,7 +67,7 @@ regression). Nevertheless, you can specify the metrics you want to compute thank scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`. -We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a Display. By default, a set of metrics is computed based +We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a :class:`~skore.Display`. By default, a set of metrics is computed based on the type of target variable (e.g. classification or regression). Nevertheless, you can specify the metrics you want to compute thanks to the `scoring` parameter. We accept different types: (i) some strings that correspond to scikit-learn scorer names or a From 39a66cb707708bde24efbb36e705bf3bb73dab33 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:21:17 +0200 Subject: [PATCH 03/39] alphabetic sorting --- sphinx/reference/report/displays.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/sphinx/reference/report/displays.rst b/sphinx/reference/report/displays.rst index b32a7490d5..7c4952de44 100644 --- a/sphinx/reference/report/displays.rst +++ b/sphinx/reference/report/displays.rst @@ -16,4 +16,3 @@ the API of each display. RocCurveDisplay PrecisionRecallCurveDisplay PredictionErrorDisplay - MetricsSummaryDisplay From 3158ace1a62772ff93015da7fc71590dce71a0f1 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:24:45 +0200 Subject: [PATCH 04/39] fix init for sphinx --- skore/src/skore/sklearn/__init__.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 skore/src/skore/sklearn/__init__.py diff --git a/skore/src/skore/sklearn/__init__.py b/skore/src/skore/sklearn/__init__.py new file mode 100644 index 0000000000..242669655f --- /dev/null +++ b/skore/src/skore/sklearn/__init__.py @@ -0,0 +1,25 @@ +"""Enhance `sklearn` functions.""" + +from skore.sklearn._comparison import ComparisonReport +from skore.sklearn._cross_validation import CrossValidationReport +from skore.sklearn._estimator import EstimatorReport +from skore.sklearn._plot import ( + MetricsSummaryDisplay, + PrecisionRecallCurveDisplay, + PredictionErrorDisplay, + RocCurveDisplay, +) +from skore.sklearn.find_estimators import find_estimators +from skore.sklearn.train_test_split.train_test_split import train_test_split + +__all__ = [ + "ComparisonReport", + "CrossValidationReport", + "EstimatorReport", + "PrecisionRecallCurveDisplay", + "PredictionErrorDisplay", + "RocCurveDisplay", + "MetricsSummaryDisplay", + "train_test_split", + "find_estimators", +] From 4665e9e751906bec599baa6b725b505d8ad5ca0b Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:27:12 +0200 Subject: [PATCH 05/39] add function to be consistant with a display --- .../_plot/metrics/metrics_summary_display.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py new file mode 100644 index 0000000000..42a587ce45 --- /dev/null +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -0,0 +1,27 @@ +from skore.sklearn._plot.style import StyleDisplayMixin +from skore.sklearn._plot.utils import HelpDisplayMixin + + +class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): + """Display for summarize. + + An instance of this class will be created by `Report.metrics.summarize()`. + This class should not be instantiated directly. + """ + + def __init__(self, summarize_data): + self.summarize_data = summarize_data + + def frame(self): + """Return the summarize as a dataframe. + + Returns + ------- + frame : pandas.DataFrame + The report metrics as a dataframe. + """ + return self.summarize_data + + @StyleDisplayMixin.style_plot + def plot(self): + raise NotImplementedError From d778605a24330d7543310db1560eaf58d1e89678 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:28:03 +0200 Subject: [PATCH 06/39] fix init for sphinx --- skore/src/skore/sklearn/_plot/metrics/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py index e3ded1b085..3e55d01b1f 100644 --- a/skore/src/skore/sklearn/_plot/metrics/__init__.py +++ b/skore/src/skore/sklearn/_plot/metrics/__init__.py @@ -1,10 +1,10 @@ from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay +from skore.sklearn._plot.metrics.metrics_summary_display import MetricsSummaryDisplay from skore.sklearn._plot.metrics.precision_recall_curve import ( PrecisionRecallCurveDisplay, ) from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay -from skore.sklearn._plot.metrics.summarize import MetricsSummaryDisplay __all__ = [ "ConfusionMatrixDisplay", From 6a0b34de8a08b789e88bcbb74d09203fecec3ea8 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 11:36:34 +0200 Subject: [PATCH 07/39] docs: explain a bit more about display and their functions --- sphinx/user_guide/reporters.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst index 299e4bd992..1cd338023b 100644 --- a/sphinx/user_guide/reporters.rst +++ b/sphinx/user_guide/reporters.rst @@ -67,13 +67,6 @@ regression). Nevertheless, you can specify the metrics you want to compute thank scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`. -We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a :class:`~skore.Display`. By default, a set of metrics is computed based -on the type of target variable (e.g. classification or regression). Nevertheless, you -can specify the metrics you want to compute thanks to the `scoring` parameter. We accept -different types: (i) some strings that correspond to scikit-learn scorer names or a -built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed -with :func:`sklearn.metrics.make_scorer`. - Refer to the :ref:`displays` section for more details regarding the `skore` display API. Refer to the :ref:`estimator_metrics` section for more details on all the available metrics in `skore`. From 963bae09256c3bdb38d7690d506d48a48ce074b4 Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 9 Jun 2025 15:44:46 +0200 Subject: [PATCH 08/39] merge --- .../plot_skore_getting_started.py | 5 ++ .../_sklearn/_comparison/metrics_accessor.py | 2 +- .../_cross_validation/metrics_accessor.py | 2 +- .../_sklearn/_estimator/metrics_accessor.py | 2 +- .../_plot/metrics/metrics_summary_display.py | 69 ++++++++++++++++--- 5 files changed, 67 insertions(+), 13 deletions(-) diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py index e1ebdde86f..4c2ab09b33 100644 --- a/examples/getting_started/plot_skore_getting_started.py +++ b/examples/getting_started/plot_skore_getting_started.py @@ -207,6 +207,11 @@ # %% comparator.metrics.summarize(indicator_favorability=True).frame() +# %% +# To be more specific in our comparison, we can decide to compare the Brier score and the fitting time. + +# %% +comparator.metrics.report_metrics().plot(x="brier_score", y="fit_time") # %% # Thus, we easily have the result of our benchmark for several recommended metrics. diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 6bb8702fdd..7ef85d3049 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -178,7 +178,7 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(results) + return MetricsSummaryDisplay(results, report_type="comparison-cross-validation") @progress_decorator(description="Compute metric for each estimator") def _compute_metric_scores( diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index 4c4a8473d2..a73ec823f2 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -177,7 +177,7 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(summarize_data=results) + return MetricsSummaryDisplay(summarize_data=results, report_type="cross-validation") @progress_decorator(description="Compute metric for each split") def _compute_metric_scores( diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py index ff67d64b10..cdaf2e7342 100644 --- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py @@ -426,7 +426,7 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(summarize_data=results) + return MetricsSummaryDisplay(summarize_data=results, report_type="estimator") def _compute_metric_scores( self, diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 42a587ce45..1d745a779e 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -1,5 +1,8 @@ +import matplotlib.pyplot as plt + from skore.sklearn._plot.style import StyleDisplayMixin from skore.sklearn._plot.utils import HelpDisplayMixin +from skore.sklearn.types import ReportType class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): @@ -9,19 +12,65 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): This class should not be instantiated directly. """ - def __init__(self, summarize_data): + def __init__( + self, + *, + summarize_data, + report_type: ReportType, + ): self.summarize_data = summarize_data + self.report_type = report_type def frame(self): - """Return the summarize as a dataframe. - - Returns - ------- - frame : pandas.DataFrame - The report metrics as a dataframe. - """ + """Return the summarize as a dataframe.""" return self.summarize_data @StyleDisplayMixin.style_plot - def plot(self): - raise NotImplementedError + def plot(self, x, y) -> None: + """Plot visualization. + + Extra keyword arguments will be passed to matplotlib's `plot`. + + Parameters + ---------- + x : str, default=None + The metric to display on x-axis. By default, the first column. + + y : str, default=None + The metric to display on y-axis. By default, the second column. + + Notes + ----- + The average precision (cf. :func:`~sklearn.metrics.average_precision_score`) + in scikit-learn is computed without any interpolation. To be consistent + with this metric, the precision-recall curve is plotted without any + interpolation as well (step-wise style). + + You can change this style by passing the keyword argument + `drawstyle="default"`. However, the curve will not be strictly + consistent with the reported average precision. + + Examples + -------- + >>> from sklearn.datasets import load_breast_cancer + >>> from sklearn.linear_model import LogisticRegression + >>> from skore import train_test_split + >>> from skore import EstimatorReport + >>> X, y = load_breast_cancer(return_X_y=True) + >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True) + >>> classifier = LogisticRegression(max_iter=10_000) + >>> report = EstimatorReport(classifier, **split_data) + >>> display = report.metrics.precision_recall() + >>> display.plot(pr_curve_kwargs={"color": "tab:red"}) + """ + self.figure_, self.ax_ = plt.subplots() + + if self.report_type in ( + ["estimator", "cross-validation", "comparison-cross-validation"] + ): + raise NotImplementedError("To come soon!") + elif self.report_type == "comparison-estimator": + self.plot_comparison_estimator() + + def plot_comparison_estimator(self): + self.report_metrics_data.scatter(x=0, y=1) From e9e8d5d9b0f1dd43b2e3c34a4ad80e6b49220aca Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 9 Jun 2025 16:29:19 +0200 Subject: [PATCH 09/39] value error based on literal --- .../skore/_sklearn/_comparison/metrics_accessor.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 7ef85d3049..7c361bac6a 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -24,6 +24,7 @@ _DEFAULT, Aggregate, PositiveLabel, + ReportType, Scoring, ScoringName, YPlotData, @@ -178,7 +179,18 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(results, report_type="comparison-cross-validation") + + report_type: ReportType + if self._parent._reports_type == "EstimatorReport": + report_type = "comparison-estimator" + elif self._parent._reports_type == "CrossValidationReport": + report_type = "comparison-cross-validation" + else: + raise ValueError( + "Comparison should only apply to EstimatorReport or " + "CrossValidationReport" + ) + return MetricsSummaryDisplay(summarize_data=results, report_type=report_type) @progress_decorator(description="Compute metric for each estimator") def _compute_metric_scores( From 20f7685f191a45bbb6412d8c45b8eec87cbc769b Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 9 Jun 2025 17:46:36 +0200 Subject: [PATCH 10/39] plot for comparison report for estimator --- .../_plot/metrics/metrics_summary_display.py | 111 +++++++++++++----- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 1d745a779e..8c9bdd8d7b 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -13,13 +13,11 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): """ def __init__( - self, - *, - summarize_data, - report_type: ReportType, + self, *, summarize_data, report_type: ReportType, data_source: str = "test" ): self.summarize_data = summarize_data self.report_type = report_type + self.data_source = data_source def frame(self): """Return the summarize as a dataframe.""" @@ -29,8 +27,6 @@ def frame(self): def plot(self, x, y) -> None: """Plot visualization. - Extra keyword arguments will be passed to matplotlib's `plot`. - Parameters ---------- x : str, default=None @@ -39,29 +35,9 @@ def plot(self, x, y) -> None: y : str, default=None The metric to display on y-axis. By default, the second column. - Notes - ----- - The average precision (cf. :func:`~sklearn.metrics.average_precision_score`) - in scikit-learn is computed without any interpolation. To be consistent - with this metric, the precision-recall curve is plotted without any - interpolation as well (step-wise style). - - You can change this style by passing the keyword argument - `drawstyle="default"`. However, the curve will not be strictly - consistent with the reported average precision. - - Examples - -------- - >>> from sklearn.datasets import load_breast_cancer - >>> from sklearn.linear_model import LogisticRegression - >>> from skore import train_test_split - >>> from skore import EstimatorReport - >>> X, y = load_breast_cancer(return_X_y=True) - >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True) - >>> classifier = LogisticRegression(max_iter=10_000) - >>> report = EstimatorReport(classifier, **split_data) - >>> display = report.metrics.precision_recall() - >>> display.plot(pr_curve_kwargs={"color": "tab:red"}) + Returns + ------- + A matplotlib plot. """ self.figure_, self.ax_ = plt.subplots() @@ -70,7 +46,80 @@ def plot(self, x, y) -> None: ): raise NotImplementedError("To come soon!") elif self.report_type == "comparison-estimator": - self.plot_comparison_estimator() + self.plot_comparison_estimator(x, y) + + def plot_comparison_estimator(self, x, y): + fig, ax = plt.subplots() + + x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) + y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) + + # Check that the metrics are in the report + # If the metric is not in the report, help the user by suggesting + # supported metrics + reverse_score_info = { + value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items() + } + available_columns = self.summarize_data.columns.get_level_values(0).to_list() + available_columns.remove("Estimator") + supported_metrics = [ + reverse_score_info.get(col, col) for col in available_columns + ] + if x not in supported_metrics: + raise ValueError( + f"Performance metric {x} not found in the report. " + f"Supported metrics are: {supported_metrics}." + ) + if y not in supported_metrics: + raise ValueError( + f"Performance metric {y} not found in the report. " + f"Supported metrics are: {supported_metrics}." + ) + + x_data = self.summarize_data[x_label] + y_data = self.summarize_data[y_label] + if len(x_data.shape) > 1: + raise ValueError( + "The perf metric x requires to add a positive label parameter." + ) + if len(y_data.shape) > 1: + raise ValueError( + "The perf metric y requires to add a positive label parameter." + ) + + # Make it clear in the axis labels that we are using the train set + if x == "fit_time" and self.data_source != "train": + x_label_text = x_label + " on train set" + else: + x_label_text = x_label + if y == "fit_time" and self.data_source != "train": + y_label_text = y_label + " on train set" + else: + y_label_text = y_label + + title = f"{self.display_label_x} vs {self.display_label_x}" + if self.data_source is not None: + title += f" on {self.data_source} data" + + ax.scatter(x=x_data, y=self.summarize_data[y_data]) + ax.set_title(title) + ax.set_xlabel(x_label_text) + ax.set_ylabel(y_label_text) + + # Add labels to the points with a small offset + text = self.summarize_data["Estimator"] + for label, x_coord, y_coord in zip(text, x, y): + ax.annotate( + label, + (x_coord, y_coord), + textcoords="offset points", + xytext=(10, 0), + bbox=dict( + boxstyle="round,pad=0.3", + edgecolor="gray", + facecolor="white", + alpha=0.7, + ), + ) - def plot_comparison_estimator(self): self.report_metrics_data.scatter(x=0, y=1) From b29338959d5e40a3fcccb55a931c3cfe339ffbc8 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 14:57:08 +0200 Subject: [PATCH 11/39] linting --- .../src/skore/_sklearn/_cross_validation/metrics_accessor.py | 4 +++- .../skore/sklearn/_plot/metrics/metrics_summary_display.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index a73ec823f2..801a396c1d 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -177,7 +177,9 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(summarize_data=results, report_type="cross-validation") + return MetricsSummaryDisplay( + summarize_data=results, report_type="cross-validation" + ) @progress_decorator(description="Compute metric for each split") def _compute_metric_scores( diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 8c9bdd8d7b..550d6cdc87 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -49,7 +49,7 @@ def plot(self, x, y) -> None: self.plot_comparison_estimator(x, y) def plot_comparison_estimator(self, x, y): - fig, ax = plt.subplots() + _, ax = plt.subplots() x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) From 22c0e4981ecc2313ee55ea92112315c343e4a4f8 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 11 Jun 2025 15:40:01 +0200 Subject: [PATCH 12/39] introduce temporarily _SCORE_OR_LOSS_INFO in class --- .../_plot/metrics/metrics_summary_display.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 550d6cdc87..17b8a9e52e 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -12,6 +12,22 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): This class should not be instantiated directly. """ + # should be removed once transformed into a utils + _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = { + "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"}, + "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"}, + "accuracy": {"name": "Accuracy", "icon": "(↗︎)"}, + "precision": {"name": "Precision", "icon": "(↗︎)"}, + "recall": {"name": "Recall", "icon": "(↗︎)"}, + "brier_score": {"name": "Brier score", "icon": "(↘︎)"}, + "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"}, + "log_loss": {"name": "Log loss", "icon": "(↘︎)"}, + "r2": {"name": "R²", "icon": "(↗︎)"}, + "rmse": {"name": "RMSE", "icon": "(↘︎)"}, + "custom_metric": {"name": "Custom metric", "icon": ""}, + "report_metrics": {"name": "Report metrics", "icon": ""}, + } + def __init__( self, *, summarize_data, report_type: ReportType, data_source: str = "test" ): From 00ae0d734f365ed37d91b205a510f36b4f15c0ff Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 12 Jun 2025 16:45:44 +0200 Subject: [PATCH 13/39] first version of plot for comp report ready --- .../_plot/metrics/metrics_summary_display.py | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 17b8a9e52e..bfc5517546 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -1,4 +1,5 @@ import matplotlib.pyplot as plt +import pandas as pd from skore.sklearn._plot.style import StyleDisplayMixin from skore.sklearn._plot.utils import HelpDisplayMixin @@ -55,8 +56,6 @@ def plot(self, x, y) -> None: ------- A matplotlib plot. """ - self.figure_, self.ax_ = plt.subplots() - if self.report_type in ( ["estimator", "cross-validation", "comparison-cross-validation"] ): @@ -65,7 +64,7 @@ def plot(self, x, y) -> None: self.plot_comparison_estimator(x, y) def plot_comparison_estimator(self, x, y): - _, ax = plt.subplots() + fig, ax = plt.subplots() x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) @@ -76,8 +75,11 @@ def plot_comparison_estimator(self, x, y): reverse_score_info = { value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items() } - available_columns = self.summarize_data.columns.get_level_values(0).to_list() - available_columns.remove("Estimator") + index = self.summarize_data.index + if isinstance(index, pd.MultiIndex): + available_columns = index.get_level_values(0).to_list() + else: + available_columns = index.tolist() supported_metrics = [ reverse_score_info.get(col, col) for col in available_columns ] @@ -92,16 +94,23 @@ def plot_comparison_estimator(self, x, y): f"Supported metrics are: {supported_metrics}." ) - x_data = self.summarize_data[x_label] - y_data = self.summarize_data[y_label] - if len(x_data.shape) > 1: + x_data = self.summarize_data.loc[x_label] + y_data = self.summarize_data.loc[y_label] + if len(x_data.shape) > 1 and x_data.shape[0] > 1: + # case where we have multiIndex, and the metric is not a single value raise ValueError( "The perf metric x requires to add a positive label parameter." ) - if len(y_data.shape) > 1: + elif len(x_data.shape) > 1 and x_data.shape[0] == 1: + # case where we have multiIndex, but the metric is not affected by the + # pos_label + x_data = x_data.squeeze() + if len(y_data.shape) > 1 and y_data.shape[0] > 1: raise ValueError( "The perf metric y requires to add a positive label parameter." ) + elif len(y_data.shape) > 1 and y_data.shape[0] == 1: + y_data = y_data.squeeze() # Make it clear in the axis labels that we are using the train set if x == "fit_time" and self.data_source != "train": @@ -113,18 +122,18 @@ def plot_comparison_estimator(self, x, y): else: y_label_text = y_label - title = f"{self.display_label_x} vs {self.display_label_x}" + title = f"{x_label} vs {y_label}" if self.data_source is not None: title += f" on {self.data_source} data" - ax.scatter(x=x_data, y=self.summarize_data[y_data]) + ax.scatter(x=x_data, y=y_data) ax.set_title(title) ax.set_xlabel(x_label_text) ax.set_ylabel(y_label_text) # Add labels to the points with a small offset - text = self.summarize_data["Estimator"] - for label, x_coord, y_coord in zip(text, x, y): + text = self.summarize_data.columns.tolist() + for label, x_coord, y_coord in zip(text, x_data, y_data): ax.annotate( label, (x_coord, y_coord), @@ -138,4 +147,5 @@ def plot_comparison_estimator(self, x, y): ), ) - self.report_metrics_data.scatter(x=0, y=1) + plt.tight_layout() + return fig From c557d1ec5877a64714ef42b72af8791cc3843cb6 Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 12 Jun 2025 16:49:30 +0200 Subject: [PATCH 14/39] linting --- .../src/skore/sklearn/_plot/metrics/metrics_summary_display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index bfc5517546..d742e57581 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -133,7 +133,7 @@ def plot_comparison_estimator(self, x, y): # Add labels to the points with a small offset text = self.summarize_data.columns.tolist() - for label, x_coord, y_coord in zip(text, x_data, y_data): + for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False): ax.annotate( label, (x_coord, y_coord), From 866f367708815a0c9c0d7b0de2aefcc36404e486 Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 12 Jun 2025 16:56:43 +0200 Subject: [PATCH 15/39] adapt to name change from report_metrics to summarize --- examples/getting_started/plot_skore_getting_started.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py index 4c2ab09b33..568a37c31e 100644 --- a/examples/getting_started/plot_skore_getting_started.py +++ b/examples/getting_started/plot_skore_getting_started.py @@ -211,7 +211,7 @@ # To be more specific in our comparison, we can decide to compare the Brier score and the fitting time. # %% -comparator.metrics.report_metrics().plot(x="brier_score", y="fit_time") +comparator.metrics.summarize().plot(x="brier_score", y="fit_time") # %% # Thus, we easily have the result of our benchmark for several recommended metrics. From ddb734455af9e486fc79f5143dc14ff829ff4a2d Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 10:11:51 +0200 Subject: [PATCH 16/39] add some tests --- .../_plot/metrics/metrics_summary_display.py | 22 ++-- .../test_plot_comparison.py | 107 ++++++++++++++++++ 2 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index d742e57581..d4bef61b43 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -52,6 +52,14 @@ def plot(self, x, y) -> None: y : str, default=None The metric to display on y-axis. By default, the second column. + Attributes + ---------- + ax_ : matplotlib axes or ndarray of axes + The axes on which the precision-recall curve is plotted. + + figure_ : matplotlib figure + The figure on which the precision-recall curve is plotted. + Returns ------- A matplotlib plot. @@ -64,7 +72,7 @@ def plot(self, x, y) -> None: self.plot_comparison_estimator(x, y) def plot_comparison_estimator(self, x, y): - fig, ax = plt.subplots() + self.figure_, self.ax_ = plt.subplots() x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) @@ -126,15 +134,15 @@ def plot_comparison_estimator(self, x, y): if self.data_source is not None: title += f" on {self.data_source} data" - ax.scatter(x=x_data, y=y_data) - ax.set_title(title) - ax.set_xlabel(x_label_text) - ax.set_ylabel(y_label_text) + self.ax_.scatter(x=x_data, y=y_data) + self.ax_.set_title(title) + self.ax_.set_xlabel(x_label_text) + self.ax_.set_ylabel(y_label_text) # Add labels to the points with a small offset text = self.summarize_data.columns.tolist() for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False): - ax.annotate( + self.ax_.annotate( label, (x_coord, y_coord), textcoords="offset points", @@ -148,4 +156,4 @@ def plot_comparison_estimator(self, x, y): ) plt.tight_layout() - return fig + return self.figure_, self.ax_ diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py new file mode 100644 index 0000000000..b2e1af3410 --- /dev/null +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -0,0 +1,107 @@ +import pytest +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, +) +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import train_test_split +from skore import ComparisonReport, EstimatorReport + + +@pytest.fixture +def multi_classification_comparator(): + X, y = make_classification( + n_samples=100, + n_features=5, + n_informative=3, + n_redundant=0, + n_classes=3, + random_state=42, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0 + ) + + report_1 = EstimatorReport( + estimator=HistGradientBoostingClassifier(), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + report_2 = EstimatorReport( + estimator=LogisticRegression(max_iter=50), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + comp = ComparisonReport({"report_1": report_1, "report_2": report_2}) + return comp + + +@pytest.fixture +def binary_classification_comparator(): + X, y = make_classification(random_state=0) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0 + ) + + report_1 = EstimatorReport( + estimator=HistGradientBoostingClassifier(), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + report_2 = EstimatorReport( + estimator=LogisticRegression(max_iter=50), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + comp = ComparisonReport({"report_1": report_1, "report_2": report_2}) + return comp + + +@pytest.fixture +def regression_comparator(): + X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0 + ) + + report_1 = EstimatorReport( + estimator=HistGradientBoostingRegressor(), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + report_2 = EstimatorReport( + estimator=LinearRegression(), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + comp = ComparisonReport({"report_1": report_1, "report_2": report_2}) + return comp + + +def test_regression_comparator(regression_comparator): + display_summary = regression_comparator.metrics.summarize() + display_summary.plot_comparison_estimator("r2", "fit_time") + assert display_summary.ax_.get_xlabel() == "R²" + assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set" + assert len(display_summary.ax_.get_title()) > 4 + + +def test_error_invalid_metric(regression_comparator): + comp = regression_comparator + with pytest.raises(ValueError): + comp.metrics.summarize().plot_comparison_estimator( + "invalid_metric", "invalid_metric_bis" + ) From b2f2250b0e4f4582400ae0b00bed67e5a4c7f8e9 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 10:16:00 +0200 Subject: [PATCH 17/39] add test not implemented error --- .../metrics_summary_display/test_common.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py new file mode 100644 index 0000000000..bbf7d58b7a --- /dev/null +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py @@ -0,0 +1,34 @@ +import pytest +from sklearn.datasets import make_classification +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.model_selection import train_test_split +from skore import EstimatorReport + + +@pytest.fixture +def estimator_report_classification(): + X, y = make_classification(random_state=0) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0 + ) + + estimator_report = EstimatorReport( + estimator=HistGradientBoostingClassifier(), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + return estimator_report + + +def test_not_implemented(estimator_report_classification): + """ + Test that the plot_comparison_estimator method raises NotImplementedError + when called with a binary classification comparator. + """ + estimator_report_classification.metrics.summarize() + with pytest.raises(NotImplementedError): + estimator_report_classification.metrics.summarize().plot( + x="accuracy", y="f1_score" + ) From c6d063a263f6b001e51961c66ba1fdcd58ff19fa Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 10:34:20 +0200 Subject: [PATCH 18/39] add tests --- .../test_plot_comparison.py | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index b2e1af3410..ed6d92c56f 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -93,15 +93,37 @@ def regression_comparator(): def test_regression_comparator(regression_comparator): display_summary = regression_comparator.metrics.summarize() - display_summary.plot_comparison_estimator("r2", "fit_time") + display_summary.plot("r2", "fit_time") assert display_summary.ax_.get_xlabel() == "R²" assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set" assert len(display_summary.ax_.get_title()) > 4 +def test_data_source_affect_title_and_axis(regression_comparator): + comp = regression_comparator + display_summary = comp.metrics.summarize(data_source="train") + display_summary.plot("r2", "fit_time") + assert "on train set" in display_summary.ax_.get_title() + assert "on test set" not in display_summary.ax_.get_ylabel() + + def test_error_invalid_metric(regression_comparator): comp = regression_comparator with pytest.raises(ValueError): - comp.metrics.summarize().plot_comparison_estimator( - "invalid_metric", "invalid_metric_bis" - ) + comp.metrics.summarize().plot("invalid_metric", "fit_time") + with pytest.raises(ValueError): + comp.metrics.summarize().plot("fit_time", "invalid_metric") + + +def test_needs_positive_label(binary_classification_comparator): + comp = binary_classification_comparator + with pytest.raises( + ValueError, + match="The perf metric x requires to add a positive label parameter.", + ): + comp.metrics.summarize().plot("precision", "fit_time") + with pytest.raises( + ValueError, + match="The perf metric y requires to add a positive label parameter.", + ): + comp.metrics.summarize().plot("fit_time", "precision") From 42fb3a31c3a573b077557ee1ec7800852e399b01 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 10:34:41 +0200 Subject: [PATCH 19/39] add data_source at display creation --- skore/src/skore/_sklearn/_comparison/metrics_accessor.py | 4 +++- .../skore/_sklearn/_cross_validation/metrics_accessor.py | 4 +++- skore/src/skore/_sklearn/_estimator/metrics_accessor.py | 5 ++++- .../skore/sklearn/_plot/metrics/metrics_summary_display.py | 6 +++--- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 7c361bac6a..cd41a4fb70 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -190,7 +190,9 @@ class is set to the one provided when creating the report. If `None`, "Comparison should only apply to EstimatorReport or " "CrossValidationReport" ) - return MetricsSummaryDisplay(summarize_data=results, report_type=report_type) + return MetricsSummaryDisplay( + summarize_data=results, report_type=report_type, data_source=data_source + ) @progress_decorator(description="Compute metric for each estimator") def _compute_metric_scores( diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index 801a396c1d..9a10d1ed7e 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -178,7 +178,9 @@ class is set to the one provided when creating the report. If `None`, r"\((.*)\)$", r"\1", regex=True ) return MetricsSummaryDisplay( - summarize_data=results, report_type="cross-validation" + summarize_data=results, + report_type="cross-validation", + data_source=data_source, ) @progress_decorator(description="Compute metric for each split") diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py index cdaf2e7342..3d81319c6f 100644 --- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py @@ -426,7 +426,10 @@ class is set to the one provided when creating the report. If `None`, results.index = results.index.str.replace( r"\((.*)\)$", r"\1", regex=True ) - return MetricsSummaryDisplay(summarize_data=results, report_type="estimator") + + return MetricsSummaryDisplay( + summarize_data=results, report_type="estimator", data_source=data_source + ) def _compute_metric_scores( self, diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index d4bef61b43..f440c1602a 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -69,9 +69,9 @@ def plot(self, x, y) -> None: ): raise NotImplementedError("To come soon!") elif self.report_type == "comparison-estimator": - self.plot_comparison_estimator(x, y) + self._plot_comparison_estimator(x, y) - def plot_comparison_estimator(self, x, y): + def _plot_comparison_estimator(self, x, y): self.figure_, self.ax_ = plt.subplots() x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) @@ -132,7 +132,7 @@ def plot_comparison_estimator(self, x, y): title = f"{x_label} vs {y_label}" if self.data_source is not None: - title += f" on {self.data_source} data" + title += f" on {self.data_source} set" self.ax_.scatter(x=x_data, y=y_data) self.ax_.set_title(title) From 38a91bf4c5bd4b732b9599b0f368d38558253084 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 10:48:37 +0200 Subject: [PATCH 20/39] add new tests --- .../test_plot_comparison.py | 46 ++++++------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index ed6d92c56f..31dcc29d8e 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -9,38 +9,6 @@ from skore import ComparisonReport, EstimatorReport -@pytest.fixture -def multi_classification_comparator(): - X, y = make_classification( - n_samples=100, - n_features=5, - n_informative=3, - n_redundant=0, - n_classes=3, - random_state=42, - ) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=0 - ) - - report_1 = EstimatorReport( - estimator=HistGradientBoostingClassifier(), - X_train=X_train, - y_train=y_train, - X_test=X_test, - y_test=y_test, - ) - report_2 = EstimatorReport( - estimator=LogisticRegression(max_iter=50), - X_train=X_train, - y_train=y_train, - X_test=X_test, - y_test=y_test, - ) - comp = ComparisonReport({"report_1": report_1, "report_2": report_2}) - return comp - - @pytest.fixture def binary_classification_comparator(): X, y = make_classification(random_state=0) @@ -127,3 +95,17 @@ def test_needs_positive_label(binary_classification_comparator): match="The perf metric y requires to add a positive label parameter.", ): comp.metrics.summarize().plot("fit_time", "precision") + + +def test_no_positive_label_unrequired(binary_classification_comparator): + display_summary = binary_classification_comparator.metrics.summarize() + display_summary.plot("brier_score", "fit_time") + assert display_summary.ax_.get_xlabel() == "Brier score" + assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set" + assert len(display_summary.ax_.get_title()) > 4 + + display_summary = binary_classification_comparator.metrics.summarize() + display_summary.plot("fit_time", "brier_score") + assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set" + assert display_summary.ax_.get_ylabel() == "Brier score" + assert len(display_summary.ax_.get_title()) > 4 From 9274eda4ddfe59b9566784d1fbe39ae54c0ddf19 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 11:18:09 +0200 Subject: [PATCH 21/39] add description to tests --- .../metrics_summary_display/test_plot_comparison.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index 31dcc29d8e..b5d2c4ce7c 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -60,6 +60,7 @@ def regression_comparator(): def test_regression_comparator(regression_comparator): + """Test that the regression comparator can summarize metrics and plot them.""" display_summary = regression_comparator.metrics.summarize() display_summary.plot("r2", "fit_time") assert display_summary.ax_.get_xlabel() == "R²" @@ -68,6 +69,7 @@ def test_regression_comparator(regression_comparator): def test_data_source_affect_title_and_axis(regression_comparator): + """Test that the data source does change the title and axis labels.""" comp = regression_comparator display_summary = comp.metrics.summarize(data_source="train") display_summary.plot("r2", "fit_time") @@ -76,6 +78,7 @@ def test_data_source_affect_title_and_axis(regression_comparator): def test_error_invalid_metric(regression_comparator): + """Test the error raised when an invalid metric is used.""" comp = regression_comparator with pytest.raises(ValueError): comp.metrics.summarize().plot("invalid_metric", "fit_time") @@ -84,6 +87,10 @@ def test_error_invalid_metric(regression_comparator): def test_needs_positive_label(binary_classification_comparator): + """ + Test the error raised when a metric requiring a positive label is selected, + without giving the pos_label. + """ comp = binary_classification_comparator with pytest.raises( ValueError, @@ -98,6 +105,10 @@ def test_needs_positive_label(binary_classification_comparator): def test_no_positive_label_unrequired(binary_classification_comparator): + """ + Test that no error is raised when a metric not requiring a positive label is + selected. + """ display_summary = binary_classification_comparator.metrics.summarize() display_summary.plot("brier_score", "fit_time") assert display_summary.ax_.get_xlabel() == "Brier score" From d1888281c8668140e2a1d7b072c8833aa17a24c3 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 13 Jun 2025 11:18:17 +0200 Subject: [PATCH 22/39] add example --- .../_plot/metrics/metrics_summary_display.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index f440c1602a..2fe64572fa 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -63,6 +63,25 @@ def plot(self, x, y) -> None: Returns ------- A matplotlib plot. + + Example + ------- + >>> from sklearn.datasets import load_breast_cancer + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.ensemble import HistGradientBoostingClassifier + >>> from skore import train_test_split + >>> from skore import EstimatorReport, ComparisonReport + >>> X, y = load_breast_cancer(return_X_y=True) + >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True) + >>> classifier = LogisticRegression() + >>> report_a = EstimatorReport(classifier,pos_label=1, **split_data) + >>> classifier = HistGradientBoostingClassifier() + >>> report_b = EstimatorReport(classifier,pos_label=1, **split_data) + >>> comparison_report = ComparisonReport( + {"report_a": report_a, "report_b": report_b} + ) + >>> display = comparison_report.metrics.summarize() + >>> display.plot(x="accuracy", y="roc_auc") """ if self.report_type in ( ["estimator", "cross-validation", "comparison-cross-validation"] From a21ca0e5907304c85b58ac54f54134d094439045 Mon Sep 17 00:00:00 2001 From: Marie Sacksick <79304610+MarieSacksick@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:39:35 +0200 Subject: [PATCH 23/39] Update skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py Co-authored-by: Guillaume Lemaitre --- .../skore/sklearn/_plot/metrics/metrics_summary_display.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 2fe64572fa..ecd16ef608 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -154,9 +154,7 @@ def _plot_comparison_estimator(self, x, y): title += f" on {self.data_source} set" self.ax_.scatter(x=x_data, y=y_data) - self.ax_.set_title(title) - self.ax_.set_xlabel(x_label_text) - self.ax_.set_ylabel(y_label_text) + self.ax_.set(title=title, xlabel=x_label_text, y_label=y_label_text) # Add labels to the points with a small offset text = self.summarize_data.columns.tolist() From 7fa3615a352a754609c31b6363eccd674b63421c Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 16 Jun 2025 14:45:12 +0200 Subject: [PATCH 24/39] change not implemented error for estimator --- .../_plot/metrics/metrics_summary_display.py | 6 +++--- .../metrics_summary_display/test_common.py | 20 ++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index ecd16ef608..6a8fef25d1 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -83,9 +83,9 @@ def plot(self, x, y) -> None: >>> display = comparison_report.metrics.summarize() >>> display.plot(x="accuracy", y="roc_auc") """ - if self.report_type in ( - ["estimator", "cross-validation", "comparison-cross-validation"] - ): + if self.report_type == "estimator": + raise NotImplementedError() + elif self.report_type in ["cross-validation", "comparison-cross-validation"]: raise NotImplementedError("To come soon!") elif self.report_type == "comparison-estimator": self._plot_comparison_estimator(x, y) diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py index bbf7d58b7a..8ad2b01ffe 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py @@ -2,7 +2,7 @@ from sklearn.datasets import make_classification from sklearn.ensemble import HistGradientBoostingClassifier from sklearn.model_selection import train_test_split -from skore import EstimatorReport +from skore import CrossValidationReport, EstimatorReport @pytest.fixture @@ -22,13 +22,27 @@ def estimator_report_classification(): return estimator_report -def test_not_implemented(estimator_report_classification): +def test_not_implemented_estimator(estimator_report_classification): """ Test that the plot_comparison_estimator method raises NotImplementedError when called with a binary classification comparator. """ - estimator_report_classification.metrics.summarize() with pytest.raises(NotImplementedError): estimator_report_classification.metrics.summarize().plot( x="accuracy", y="f1_score" ) + + +def test_not_implemented_other_categories(): + """ + Test that the plot_comparison_estimator method raises NotImplementedError + when called with a binary classification comparator. + """ + X, y = make_classification(random_state=0) + cv_report = CrossValidationReport( + estimator=HistGradientBoostingClassifier(), + X=X, + y=y, + ) + with pytest.raises(NotImplementedError, match="To come soon!"): + cv_report.metrics.summarize().plot(x="accuracy", y="f1_score") From 77c84f19c42373fd2d315f7aafb8040b636fce1d Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 16 Jun 2025 14:47:04 +0200 Subject: [PATCH 25/39] bugfix matplotlib ax set --- .../src/skore/sklearn/_plot/metrics/metrics_summary_display.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 6a8fef25d1..7ff67697c6 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -154,7 +154,7 @@ def _plot_comparison_estimator(self, x, y): title += f" on {self.data_source} set" self.ax_.scatter(x=x_data, y=y_data) - self.ax_.set(title=title, xlabel=x_label_text, y_label=y_label_text) + self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text) # Add labels to the points with a small offset text = self.summarize_data.columns.tolist() From d348ca1b56eb8c1fc19e8cab3186b3f83b200ff7 Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 16 Jun 2025 14:48:46 +0200 Subject: [PATCH 26/39] remove useless line --- skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 7ff67697c6..3581b0de16 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -172,5 +172,4 @@ def _plot_comparison_estimator(self, x, y): ), ) - plt.tight_layout() return self.figure_, self.ax_ From bf7455455762a5cf69d01895c20a4348d1256551 Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 16 Jun 2025 15:56:13 +0200 Subject: [PATCH 27/39] change annotation to legend --- .../_plot/metrics/metrics_summary_display.py | 30 +++++++++---------- test.py | 18 +++++++++++ 2 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 test.py diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py index 3581b0de16..5e366bd166 100644 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py @@ -1,3 +1,5 @@ +import itertools + import matplotlib.pyplot as plt import pandas as pd @@ -153,23 +155,21 @@ def _plot_comparison_estimator(self, x, y): if self.data_source is not None: title += f" on {self.data_source} set" - self.ax_.scatter(x=x_data, y=y_data) - self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text) - - # Add labels to the points with a small offset + # Use a set of markers and colors for each data point text = self.summarize_data.columns.tolist() + markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8")) + colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"]) + + handles = [] for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False): - self.ax_.annotate( - label, - (x_coord, y_coord), - textcoords="offset points", - xytext=(10, 0), - bbox=dict( - boxstyle="round,pad=0.3", - edgecolor="gray", - facecolor="white", - alpha=0.7, - ), + marker = next(markers) + color = next(colors) + sc = self.ax_.scatter( + x_coord, y_coord, marker=marker, color=color, label=label ) + handles.append(sc) + + self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text) + self.ax_.legend(title="Models", loc="best") return self.figure_, self.ax_ diff --git a/test.py b/test.py new file mode 100644 index 0000000000..dfd55be02b --- /dev/null +++ b/test.py @@ -0,0 +1,18 @@ +# %% +from skore import EstimatorReport, ComparisonReport +from sklearn.datasets import load_breast_cancer +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import HistGradientBoostingClassifier +from skore import train_test_split +X, y = load_breast_cancer(return_X_y=True) +split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True) +classifier = LogisticRegression() +report_a = EstimatorReport(classifier, pos_label=1, **split_data) +classifier = HistGradientBoostingClassifier() +report_b = EstimatorReport(classifier, pos_label=1, **split_data) +comparison_report = ComparisonReport( + {"report_a": report_a, "report_b": report_b} +) +display = comparison_report.metrics.summarize() +display.plot(x="roc_auc", y="fit_time") +# %% From d3abdffa008949290dda273ba3d8e24bbe9d9e9e Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 28 Jul 2025 17:40:09 +0200 Subject: [PATCH 28/39] linting --- .../_sklearn/_comparison/metrics_accessor.py | 3 +- .../_cross_validation/metrics_accessor.py | 3 +- .../_plot/metrics/metrics_summary_display.py | 4 +- skore/src/skore/sklearn/__init__.py | 25 --- .../skore/sklearn/_plot/metrics/__init__.py | 15 -- .../_plot/metrics/metrics_summary_display.py | 175 ------------------ 6 files changed, 7 insertions(+), 218 deletions(-) delete mode 100644 skore/src/skore/sklearn/__init__.py delete mode 100644 skore/src/skore/sklearn/_plot/metrics/__init__.py delete mode 100644 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 459859e445..8ebcfd759f 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -7,6 +7,7 @@ from numpy.typing import ArrayLike from sklearn.metrics import make_scorer from sklearn.utils.metaestimators import available_if + from skore._externals._pandas_accessors import DirNamesMixin from skore._sklearn._base import ( _BaseAccessor, @@ -60,7 +61,7 @@ def summarize( pos_label: PositiveLabel | None = _DEFAULT, indicator_favorability: bool = False, flat_index: bool = False, - aggregate: Optional[Aggregate] = ("mean", "std"), + aggregate: Aggregate | None = ("mean", "std"), ) -> MetricsSummaryDisplay: """Report a set of metrics for the estimators. diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index fab7d9e748..22b96d1be2 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -7,6 +7,7 @@ from numpy.typing import ArrayLike from sklearn.metrics import make_scorer from sklearn.utils.metaestimators import available_if + from skore._externals._pandas_accessors import DirNamesMixin from skore._sklearn._base import ( _BaseAccessor, @@ -60,7 +61,7 @@ def summarize( pos_label: PositiveLabel | None = _DEFAULT, indicator_favorability: bool = False, flat_index: bool = False, - aggregate: Optional[Aggregate] = ("mean", "std"), + aggregate: Aggregate | None = ("mean", "std"), ) -> MetricsSummaryDisplay: """Report a set of metrics for our estimator. diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index a6846294ff..9cd889b51a 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -9,8 +9,10 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): This class should not be instantiated directly. """ - def __init__(self, summarize_data): + def __init__(self, summarize_data, report_type, data_source): self.summarize_data = summarize_data + self.report_type = report_type + self.data_source = data_source def frame(self): """Return the summarize as a dataframe. diff --git a/skore/src/skore/sklearn/__init__.py b/skore/src/skore/sklearn/__init__.py deleted file mode 100644 index 242669655f..0000000000 --- a/skore/src/skore/sklearn/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Enhance `sklearn` functions.""" - -from skore.sklearn._comparison import ComparisonReport -from skore.sklearn._cross_validation import CrossValidationReport -from skore.sklearn._estimator import EstimatorReport -from skore.sklearn._plot import ( - MetricsSummaryDisplay, - PrecisionRecallCurveDisplay, - PredictionErrorDisplay, - RocCurveDisplay, -) -from skore.sklearn.find_estimators import find_estimators -from skore.sklearn.train_test_split.train_test_split import train_test_split - -__all__ = [ - "ComparisonReport", - "CrossValidationReport", - "EstimatorReport", - "PrecisionRecallCurveDisplay", - "PredictionErrorDisplay", - "RocCurveDisplay", - "MetricsSummaryDisplay", - "train_test_split", - "find_estimators", -] diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py deleted file mode 100644 index 3e55d01b1f..0000000000 --- a/skore/src/skore/sklearn/_plot/metrics/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay -from skore.sklearn._plot.metrics.metrics_summary_display import MetricsSummaryDisplay -from skore.sklearn._plot.metrics.precision_recall_curve import ( - PrecisionRecallCurveDisplay, -) -from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay -from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay - -__all__ = [ - "ConfusionMatrixDisplay", - "PrecisionRecallCurveDisplay", - "PredictionErrorDisplay", - "RocCurveDisplay", - "MetricsSummaryDisplay", -] diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py deleted file mode 100644 index 5e366bd166..0000000000 --- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py +++ /dev/null @@ -1,175 +0,0 @@ -import itertools - -import matplotlib.pyplot as plt -import pandas as pd - -from skore.sklearn._plot.style import StyleDisplayMixin -from skore.sklearn._plot.utils import HelpDisplayMixin -from skore.sklearn.types import ReportType - - -class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): - """Display for summarize. - - An instance of this class will be created by `Report.metrics.summarize()`. - This class should not be instantiated directly. - """ - - # should be removed once transformed into a utils - _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = { - "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"}, - "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"}, - "accuracy": {"name": "Accuracy", "icon": "(↗︎)"}, - "precision": {"name": "Precision", "icon": "(↗︎)"}, - "recall": {"name": "Recall", "icon": "(↗︎)"}, - "brier_score": {"name": "Brier score", "icon": "(↘︎)"}, - "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"}, - "log_loss": {"name": "Log loss", "icon": "(↘︎)"}, - "r2": {"name": "R²", "icon": "(↗︎)"}, - "rmse": {"name": "RMSE", "icon": "(↘︎)"}, - "custom_metric": {"name": "Custom metric", "icon": ""}, - "report_metrics": {"name": "Report metrics", "icon": ""}, - } - - def __init__( - self, *, summarize_data, report_type: ReportType, data_source: str = "test" - ): - self.summarize_data = summarize_data - self.report_type = report_type - self.data_source = data_source - - def frame(self): - """Return the summarize as a dataframe.""" - return self.summarize_data - - @StyleDisplayMixin.style_plot - def plot(self, x, y) -> None: - """Plot visualization. - - Parameters - ---------- - x : str, default=None - The metric to display on x-axis. By default, the first column. - - y : str, default=None - The metric to display on y-axis. By default, the second column. - - Attributes - ---------- - ax_ : matplotlib axes or ndarray of axes - The axes on which the precision-recall curve is plotted. - - figure_ : matplotlib figure - The figure on which the precision-recall curve is plotted. - - Returns - ------- - A matplotlib plot. - - Example - ------- - >>> from sklearn.datasets import load_breast_cancer - >>> from sklearn.linear_model import LogisticRegression - >>> from sklearn.ensemble import HistGradientBoostingClassifier - >>> from skore import train_test_split - >>> from skore import EstimatorReport, ComparisonReport - >>> X, y = load_breast_cancer(return_X_y=True) - >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True) - >>> classifier = LogisticRegression() - >>> report_a = EstimatorReport(classifier,pos_label=1, **split_data) - >>> classifier = HistGradientBoostingClassifier() - >>> report_b = EstimatorReport(classifier,pos_label=1, **split_data) - >>> comparison_report = ComparisonReport( - {"report_a": report_a, "report_b": report_b} - ) - >>> display = comparison_report.metrics.summarize() - >>> display.plot(x="accuracy", y="roc_auc") - """ - if self.report_type == "estimator": - raise NotImplementedError() - elif self.report_type in ["cross-validation", "comparison-cross-validation"]: - raise NotImplementedError("To come soon!") - elif self.report_type == "comparison-estimator": - self._plot_comparison_estimator(x, y) - - def _plot_comparison_estimator(self, x, y): - self.figure_, self.ax_ = plt.subplots() - - x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) - y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) - - # Check that the metrics are in the report - # If the metric is not in the report, help the user by suggesting - # supported metrics - reverse_score_info = { - value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items() - } - index = self.summarize_data.index - if isinstance(index, pd.MultiIndex): - available_columns = index.get_level_values(0).to_list() - else: - available_columns = index.tolist() - supported_metrics = [ - reverse_score_info.get(col, col) for col in available_columns - ] - if x not in supported_metrics: - raise ValueError( - f"Performance metric {x} not found in the report. " - f"Supported metrics are: {supported_metrics}." - ) - if y not in supported_metrics: - raise ValueError( - f"Performance metric {y} not found in the report. " - f"Supported metrics are: {supported_metrics}." - ) - - x_data = self.summarize_data.loc[x_label] - y_data = self.summarize_data.loc[y_label] - if len(x_data.shape) > 1 and x_data.shape[0] > 1: - # case where we have multiIndex, and the metric is not a single value - raise ValueError( - "The perf metric x requires to add a positive label parameter." - ) - elif len(x_data.shape) > 1 and x_data.shape[0] == 1: - # case where we have multiIndex, but the metric is not affected by the - # pos_label - x_data = x_data.squeeze() - if len(y_data.shape) > 1 and y_data.shape[0] > 1: - raise ValueError( - "The perf metric y requires to add a positive label parameter." - ) - elif len(y_data.shape) > 1 and y_data.shape[0] == 1: - y_data = y_data.squeeze() - - # Make it clear in the axis labels that we are using the train set - if x == "fit_time" and self.data_source != "train": - x_label_text = x_label + " on train set" - else: - x_label_text = x_label - if y == "fit_time" and self.data_source != "train": - y_label_text = y_label + " on train set" - else: - y_label_text = y_label - - title = f"{x_label} vs {y_label}" - if self.data_source is not None: - title += f" on {self.data_source} set" - - # Use a set of markers and colors for each data point - text = self.summarize_data.columns.tolist() - markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8")) - colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"]) - - handles = [] - for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False): - marker = next(markers) - color = next(colors) - sc = self.ax_.scatter( - x_coord, y_coord, marker=marker, color=color, label=label - ) - handles.append(sc) - - self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text) - self.ax_.legend(title="Models", loc="best") - - return self.figure_, self.ax_ From c7fb645dadd9e87f591aa2ca35a61b9b3da4e524 Mon Sep 17 00:00:00 2001 From: mrastgoo Date: Tue, 29 Jul 2025 09:55:13 +0200 Subject: [PATCH 29/39] docs(skore): Change the name of features after preprocessing (#1901) closes #1421 Created a separated dataframe for features importances and rename the features by string replace operations. Changes were made for two figures: `engineered_ridge_report` --- examples/use_cases/plot_feature_importance.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/examples/use_cases/plot_feature_importance.py b/examples/use_cases/plot_feature_importance.py index 18a055ed30..2082eb5e08 100644 --- a/examples/use_cases/plot_feature_importance.py +++ b/examples/use_cases/plot_feature_importance.py @@ -452,9 +452,22 @@ def unscale_coefficients(df, feature_mean, feature_std): # Let us display the 15 largest absolute coefficients: # %% -engineered_ridge_report.feature_importance.coefficients().sort_values( - by="Coefficient", key=abs, ascending=True -).tail(15).plot.barh( +engineered_rigde_report_feature_importance = ( + engineered_ridge_report.feature_importance.coefficients() + .sort_values(by="Coefficient", key=abs, ascending=True) + .tail(15) +) + +engineered_rigde_report_feature_importance.index = ( + engineered_rigde_report_feature_importance.index.str.replace("remainder__", "") +) +engineered_rigde_report_feature_importance.index = ( + engineered_rigde_report_feature_importance.index.str.replace( + "kmeans__", "geospatial__" + ) +) + +engineered_rigde_report_feature_importance.plot.barh( title="Model weights", xlabel="Coefficient", ylabel="Feature", From bc221c108a35e905228eb1ab56d7b1abf02157fc Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 31 Jul 2025 17:23:23 +0200 Subject: [PATCH 30/39] finish merge --- .../_plot/metrics/metrics_summary_display.py | 135 +++++++++++++++++- .../test_plot_comparison.py | 16 +-- 2 files changed, 137 insertions(+), 14 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 9cd889b51a..19ceb52525 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -1,15 +1,37 @@ +import matplotlib.pyplot as plt +import pandas as pd + from skore._sklearn._plot.style import StyleDisplayMixin -from skore._sklearn._plot.utils import HelpDisplayMixin +from skore._sklearn._plot.utils import HelpDisplayMixin, PlotBackendMixin +from skore._sklearn.types import ReportType -class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin): +class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin): """Display for summarize. An instance of this class will be created by `Report.metrics.summarize()`. This class should not be instantiated directly. """ - def __init__(self, summarize_data, report_type, data_source): + # should be removed once transformed into a utils + _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = { + "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"}, + "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"}, + "accuracy": {"name": "Accuracy", "icon": "(↗︎)"}, + "precision": {"name": "Precision", "icon": "(↗︎)"}, + "recall": {"name": "Recall", "icon": "(↗︎)"}, + "brier_score": {"name": "Brier score", "icon": "(↘︎)"}, + "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"}, + "log_loss": {"name": "Log loss", "icon": "(↘︎)"}, + "r2": {"name": "R²", "icon": "(↗︎)"}, + "rmse": {"name": "RMSE", "icon": "(↘︎)"}, + "custom_metric": {"name": "Custom metric", "icon": ""}, + "report_metrics": {"name": "Report metrics", "icon": ""}, + } + + def __init__( + self, *, summarize_data, report_type: ReportType, data_source: str = "test" + ): self.summarize_data = summarize_data self.report_type = report_type self.data_source = data_source @@ -24,6 +46,107 @@ def frame(self): """ return self.summarize_data - @StyleDisplayMixin.style_plot - def plot(self): - raise NotImplementedError + def _plot_matplotlib(self, x: str, y: str) -> None: + """Plot visualization. + + Parameters + ---------- + x : str, default=None + The metric to display on x-axis. By default, the first column. + + y : str, default=None + The metric to display on y-axis. By default, the second column. + """ + self.figure_, self.ax_ = plt.subplots() + + if self.report_type in ( + ["estimator", "cross-validation", "comparison-cross-validation"] + ): + raise NotImplementedError("To come soon!") + elif self.report_type == "comparison-estimator": + self._plot_comparison_estimator(x, y) + + def _plot_comparison_estimator(self, x, y): + _, ax = plt.subplots() + + x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) + y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) + + # Check that the metrics are in the report + # If the metric is not in the report, help the user by suggesting + # supported metrics + reverse_score_info = { + value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items() + } + # available_columns = self.summarize_data.columns.get_level_values(0).to_list() + # available_columns.remove("Estimator") + available_columns = self.summarize_data.index + if isinstance(available_columns, pd.MultiIndex): + available_columns = available_columns.get_level_values(0).to_list() + supported_metrics = [ + reverse_score_info.get(col, col) for col in available_columns + ] + if x not in supported_metrics: + raise ValueError( + f"Performance metric {x} not found in the report. " + f"Supported metrics are: {supported_metrics}." + ) + if y not in supported_metrics: + raise ValueError( + f"Performance metric {y} not found in the report. " + f"Supported metrics are: {supported_metrics}." + ) + + x_data = self.summarize_data.loc[x_label] + y_data = self.summarize_data.loc[y_label] + if len(x_data.shape) > 1: + if x_data.shape[0] == 1: + x_data = x_data.reset_index(drop=True).values + else: + raise ValueError( + "The perf metric x requires to add a positive label parameter." + ) + if len(y_data.shape) > 1: + if y_data.shape[0] == 1: + y_data = y_data.reset_index(drop=True).values + else: + raise ValueError( + "The perf metric y requires to add a positive label parameter." + ) + + # Make it clear in the axis labels that we are using the train set + if x == "fit_time" and self.data_source != "train": + x_label_text = x_label + " on train set" + else: + x_label_text = x_label + if y == "fit_time" and self.data_source != "train": + y_label_text = y_label + " on train set" + else: + y_label_text = y_label + + title = f"{x_label} vs {y_label}" + if self.data_source is not None: + title += f" on {self.data_source} set" + + ax.scatter(x=x_data, y=y_data) + ax.set_title(title) + ax.set_xlabel(x_label_text) + ax.set_ylabel(y_label_text) + + # Add labels to the points with a small offset + text = self.summarize_data.columns + for label, x_coord, y_coord in zip(text, x, y, strict=False): + ax.annotate( + label, + (x_coord, y_coord), + textcoords="offset points", + xytext=(10, 0), + bbox=dict( + boxstyle="round,pad=0.3", + edgecolor="gray", + facecolor="white", + alpha=0.7, + ), + ) + + self.ax_ = ax diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index b5d2c4ce7c..f7c9e132df 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -62,7 +62,7 @@ def regression_comparator(): def test_regression_comparator(regression_comparator): """Test that the regression comparator can summarize metrics and plot them.""" display_summary = regression_comparator.metrics.summarize() - display_summary.plot("r2", "fit_time") + display_summary.plot(x="r2", y="fit_time") assert display_summary.ax_.get_xlabel() == "R²" assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set" assert len(display_summary.ax_.get_title()) > 4 @@ -72,7 +72,7 @@ def test_data_source_affect_title_and_axis(regression_comparator): """Test that the data source does change the title and axis labels.""" comp = regression_comparator display_summary = comp.metrics.summarize(data_source="train") - display_summary.plot("r2", "fit_time") + display_summary.plot(x="r2", y="fit_time") assert "on train set" in display_summary.ax_.get_title() assert "on test set" not in display_summary.ax_.get_ylabel() @@ -81,9 +81,9 @@ def test_error_invalid_metric(regression_comparator): """Test the error raised when an invalid metric is used.""" comp = regression_comparator with pytest.raises(ValueError): - comp.metrics.summarize().plot("invalid_metric", "fit_time") + comp.metrics.summarize().plot(x="invalid_metric", y="fit_time") with pytest.raises(ValueError): - comp.metrics.summarize().plot("fit_time", "invalid_metric") + comp.metrics.summarize().plot(x="fit_time", y="invalid_metric") def test_needs_positive_label(binary_classification_comparator): @@ -96,12 +96,12 @@ def test_needs_positive_label(binary_classification_comparator): ValueError, match="The perf metric x requires to add a positive label parameter.", ): - comp.metrics.summarize().plot("precision", "fit_time") + comp.metrics.summarize().plot(x="precision", y="fit_time") with pytest.raises( ValueError, match="The perf metric y requires to add a positive label parameter.", ): - comp.metrics.summarize().plot("fit_time", "precision") + comp.metrics.summarize().plot(x="fit_time", y="precision") def test_no_positive_label_unrequired(binary_classification_comparator): @@ -110,13 +110,13 @@ def test_no_positive_label_unrequired(binary_classification_comparator): selected. """ display_summary = binary_classification_comparator.metrics.summarize() - display_summary.plot("brier_score", "fit_time") + display_summary.plot(x="brier_score", y="fit_time") assert display_summary.ax_.get_xlabel() == "Brier score" assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set" assert len(display_summary.ax_.get_title()) > 4 display_summary = binary_classification_comparator.metrics.summarize() - display_summary.plot("fit_time", "brier_score") + display_summary.plot(x="fit_time", y="brier_score") assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set" assert display_summary.ax_.get_ylabel() == "Brier score" assert len(display_summary.ax_.get_title()) > 4 From 8cd4dbfbc0cc0a6673d29970162ba357c2a30ffc Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 31 Jul 2025 17:39:15 +0200 Subject: [PATCH 31/39] annot to legend --- .../_plot/metrics/metrics_summary_display.py | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 19ceb52525..1b25ddd1ff 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -1,3 +1,5 @@ +import itertools + import matplotlib.pyplot as plt import pandas as pd @@ -128,25 +130,17 @@ def _plot_comparison_estimator(self, x, y): if self.data_source is not None: title += f" on {self.data_source} set" - ax.scatter(x=x_data, y=y_data) - ax.set_title(title) - ax.set_xlabel(x_label_text) - ax.set_ylabel(y_label_text) - - # Add labels to the points with a small offset + # Add legend text = self.summarize_data.columns - for label, x_coord, y_coord in zip(text, x, y, strict=False): - ax.annotate( - label, - (x_coord, y_coord), - textcoords="offset points", - xytext=(10, 0), - bbox=dict( - boxstyle="round,pad=0.3", - edgecolor="gray", - facecolor="white", - alpha=0.7, - ), - ) + markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8")) + colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"]) + + for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False): + marker = next(markers) + color = next(colors) + ax.scatter(x_coord, y_coord, marker=marker, color=color, label=label) + + ax.set(title=title, xlabel=x_label_text, ylabel=y_label_text) + ax.legend(title="Models", loc="best") self.ax_ = ax From 509d4affa72bd3b1cf261be6a935ae6b6d745f6d Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 31 Jul 2025 17:51:15 +0200 Subject: [PATCH 32/39] change scale according to data range --- .../_plot/metrics/metrics_summary_display.py | 24 +++++++++++++++++-- skore/src/skore/_sklearn/_plot/utils.py | 11 +++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 1b25ddd1ff..721bfd5f5b 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -4,7 +4,11 @@ import pandas as pd from skore._sklearn._plot.style import StyleDisplayMixin -from skore._sklearn._plot.utils import HelpDisplayMixin, PlotBackendMixin +from skore._sklearn._plot.utils import ( + HelpDisplayMixin, + PlotBackendMixin, + _interval_max_min_ratio, +) from skore._sklearn.types import ReportType @@ -140,7 +144,23 @@ def _plot_comparison_estimator(self, x, y): color = next(colors) ax.scatter(x_coord, y_coord, marker=marker, color=color, label=label) - ax.set(title=title, xlabel=x_label_text, ylabel=y_label_text) + if _interval_max_min_ratio(x_data) > 5: + xscale = "symlog" if x_data.min() <= 0 else "log" + else: + xscale = "linear" + + if _interval_max_min_ratio(y_data) > 5: + yscale = "symlog" if y_data.min() <= 0 else "log" + else: + yscale = "linear" + + ax.set( + title=title, + xlabel=x_label_text, + ylabel=y_label_text, + xscale=xscale, + yscale=yscale, + ) ax.legend(title="Models", loc="best") self.ax_ = ax diff --git a/skore/src/skore/_sklearn/_plot/utils.py b/skore/src/skore/_sklearn/_plot/utils.py index fa2876e65b..c53475f0d9 100644 --- a/skore/src/skore/_sklearn/_plot/utils.py +++ b/skore/src/skore/_sklearn/_plot/utils.py @@ -466,3 +466,14 @@ def sample_mpl_colormap( """ indices = np.linspace(0, 1, n) return [cmap(i) for i in indices] + + +def _interval_max_min_ratio(data): + """Compute the ratio between the largest and smallest inter-point distances. + + A value larger than 5 typically indicates that the parameter range would + better be displayed with a log scale while a linear scale would be more + suitable otherwise. + """ + diff = np.diff(np.sort(data)) + return diff.max() / diff.min() From 7abf9dc27ae5b00aeecbc2b00626de9027d520e0 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 1 Aug 2025 12:43:26 +0200 Subject: [PATCH 33/39] extend to support custom metrics --- .../_sklearn/_comparison/metrics_accessor.py | 6 +- .../_cross_validation/metrics_accessor.py | 1 + .../_sklearn/_estimator/metrics_accessor.py | 5 +- .../_plot/metrics/metrics_summary_display.py | 80 +++++++++---------- .../test_plot_comparison.py | 16 ++++ 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py index 8ebcfd759f..1b09c31450 100644 --- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py @@ -187,7 +187,11 @@ class is set to the one provided when creating the report. If `None`, "CrossValidationReport" ) return MetricsSummaryDisplay( - summarize_data=results, report_type=report_type, data_source=data_source + summarize_data=results, + report_type=report_type, + data_source=data_source, + scoring_names=scoring_names, + default_verbose_metric_names=self._score_or_loss_info, ) @progress_decorator(description="Compute metric for each estimator") diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index 22b96d1be2..ad231ec4dc 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -177,6 +177,7 @@ class is set to the one provided when creating the report. If `None`, summarize_data=results, report_type="cross-validation", data_source=data_source, + default_verbose_metric_names=self._score_or_loss_info, ) @progress_decorator(description="Compute metric for each split") diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py index d2e5672112..953b5f6a25 100644 --- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py @@ -429,7 +429,10 @@ class is set to the one provided when creating the report. If `None`, ) return MetricsSummaryDisplay( - summarize_data=results, report_type="estimator", data_source=data_source + summarize_data=results, + report_type="estimator", + data_source=data_source, + default_verbose_metric_names=self._score_or_loss_info, ) def _compute_metric_scores( diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 721bfd5f5b..87dbbe2804 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -9,7 +9,7 @@ PlotBackendMixin, _interval_max_min_ratio, ) -from skore._sklearn.types import ReportType +from skore._sklearn.types import ReportType, ScoringName class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin): @@ -19,28 +19,20 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixi This class should not be instantiated directly. """ - # should be removed once transformed into a utils - _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = { - "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"}, - "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"}, - "accuracy": {"name": "Accuracy", "icon": "(↗︎)"}, - "precision": {"name": "Precision", "icon": "(↗︎)"}, - "recall": {"name": "Recall", "icon": "(↗︎)"}, - "brier_score": {"name": "Brier score", "icon": "(↘︎)"}, - "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"}, - "log_loss": {"name": "Log loss", "icon": "(↘︎)"}, - "r2": {"name": "R²", "icon": "(↗︎)"}, - "rmse": {"name": "RMSE", "icon": "(↘︎)"}, - "custom_metric": {"name": "Custom metric", "icon": ""}, - "report_metrics": {"name": "Report metrics", "icon": ""}, - } - def __init__( - self, *, summarize_data, report_type: ReportType, data_source: str = "test" + self, + *, + summarize_data, + report_type: ReportType, + data_source: str = "test", + default_verbose_metric_names: dict[str, dict[str, str]], + scoring_names: ScoringName | list[ScoringName] | None = None, ): self.summarize_data = summarize_data self.report_type = report_type self.data_source = data_source + self.scoring_names = scoring_names + self.default_verbose_metric_names = default_verbose_metric_names def frame(self): """Return the summarize as a dataframe. @@ -70,41 +62,49 @@ def _plot_matplotlib(self, x: str, y: str) -> None: ): raise NotImplementedError("To come soon!") elif self.report_type == "comparison-estimator": - self._plot_comparison_estimator(x, y) + self._plot_matplotlib_comparison_estimator(x, y) - def _plot_comparison_estimator(self, x, y): + def _plot_matplotlib_comparison_estimator(self, x, y): _, ax = plt.subplots() - x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x) - y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y) + # Get verbose name from x and y + # if they are not verbose already + x_verbose = self.default_verbose_metric_names.get(x, {}).get("name", x) + y_verbose = self.default_verbose_metric_names.get(y, {}).get("name", y) # Check that the metrics are in the report # If the metric is not in the report, help the user by suggesting # supported metrics reverse_score_info = { - value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items() + value["name"]: key + for key, value in self.default_verbose_metric_names.items() } - # available_columns = self.summarize_data.columns.get_level_values(0).to_list() - # available_columns.remove("Estimator") - available_columns = self.summarize_data.index - if isinstance(available_columns, pd.MultiIndex): - available_columns = available_columns.get_level_values(0).to_list() - supported_metrics = [ - reverse_score_info.get(col, col) for col in available_columns - ] + available_metrics = self.summarize_data.index + if isinstance(available_metrics, pd.MultiIndex): + available_metrics = available_metrics.get_level_values(0).to_list() + + # if scoring_names is provided, they are the supported metrics + # otherwise, the default verbose names apply. + if self.scoring_names is not None: + supported_metrics = self.scoring_names + else: + supported_metrics = [ + reverse_score_info.get(col, col) for col in available_metrics + ] + if x not in supported_metrics: raise ValueError( - f"Performance metric {x} not found in the report. " + f"Performance metric '{x}' not found in the report. " f"Supported metrics are: {supported_metrics}." ) if y not in supported_metrics: raise ValueError( - f"Performance metric {y} not found in the report. " + f"Performance metric '{y}' not found in the report. " f"Supported metrics are: {supported_metrics}." ) - x_data = self.summarize_data.loc[x_label] - y_data = self.summarize_data.loc[y_label] + x_data = self.summarize_data.loc[x_verbose] + y_data = self.summarize_data.loc[y_verbose] if len(x_data.shape) > 1: if x_data.shape[0] == 1: x_data = x_data.reset_index(drop=True).values @@ -122,15 +122,15 @@ def _plot_comparison_estimator(self, x, y): # Make it clear in the axis labels that we are using the train set if x == "fit_time" and self.data_source != "train": - x_label_text = x_label + " on train set" + x_label_text = x_verbose + " on train set" else: - x_label_text = x_label + x_label_text = x_verbose if y == "fit_time" and self.data_source != "train": - y_label_text = y_label + " on train set" + y_label_text = y_verbose + " on train set" else: - y_label_text = y_label + y_label_text = y_verbose - title = f"{x_label} vs {y_label}" + title = f"{x_verbose} vs {y_verbose}" if self.data_source is not None: title += f" on {self.data_source} set" diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index f7c9e132df..d3011eb738 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -120,3 +120,19 @@ def test_no_positive_label_unrequired(binary_classification_comparator): assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set" assert display_summary.ax_.get_ylabel() == "Brier score" assert len(display_summary.ax_.get_title()) > 4 + + +def test_custom_metrics(binary_classification_comparator): + """ + Test that custom metric names are used in the plot. + """ + comp = binary_classification_comparator + display_summary = comp.metrics.summarize( + scoring=["precision", "recall"], + scoring_names=["My Precision", "My Recall"], + pos_label=1, + ) + display_summary.plot(x="My Precision", y="My Recall") + assert display_summary.ax_.get_xlabel() == "My Precision" + assert display_summary.ax_.get_ylabel() == "My Recall" + assert len(display_summary.ax_.get_title()) > 4 From 1d4b0136c42aaa4d3104c51878ae1777dbf50719 Mon Sep 17 00:00:00 2001 From: Marie Date: Fri, 1 Aug 2025 14:49:09 +0200 Subject: [PATCH 34/39] fix: remove useless subplot --- .../src/skore/_sklearn/_plot/metrics/metrics_summary_display.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 87dbbe2804..392f505033 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -55,8 +55,6 @@ def _plot_matplotlib(self, x: str, y: str) -> None: y : str, default=None The metric to display on y-axis. By default, the second column. """ - self.figure_, self.ax_ = plt.subplots() - if self.report_type in ( ["estimator", "cross-validation", "comparison-cross-validation"] ): From 589ddb84ce27ea9f6cdb13ea1b96df1adefc7e18 Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 4 Aug 2025 16:36:38 +0200 Subject: [PATCH 35/39] add test for axis --- skore/src/skore/_sklearn/_plot/utils.py | 2 +- .../test_plot_comparison.py | 49 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/skore/src/skore/_sklearn/_plot/utils.py b/skore/src/skore/_sklearn/_plot/utils.py index c53475f0d9..d8e7c2ccc5 100644 --- a/skore/src/skore/_sklearn/_plot/utils.py +++ b/skore/src/skore/_sklearn/_plot/utils.py @@ -475,5 +475,5 @@ def _interval_max_min_ratio(data): better be displayed with a log scale while a linear scale would be more suitable otherwise. """ - diff = np.diff(np.sort(data)) + diff = np.diff(np.sort(data), axis=0) return diff.max() / diff.min() diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py index d3011eb738..759430cf76 100644 --- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py +++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py @@ -1,5 +1,7 @@ +import numpy as np import pytest from sklearn.datasets import make_classification, make_regression +from sklearn.dummy import DummyRegressor from sklearn.ensemble import ( HistGradientBoostingClassifier, HistGradientBoostingRegressor, @@ -59,6 +61,40 @@ def regression_comparator(): return comp +@pytest.fixture +def high_error_regression(): + X_train = np.random.rand(100, 5) + y_train = np.random.normal(0.1, 0.1, 100) + X_test = np.random.rand(100, 5) + y_test = np.random.normal(100, 1, 100) + + report_1 = EstimatorReport( + estimator=DummyRegressor(strategy="mean"), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + report_2 = EstimatorReport( + estimator=DummyRegressor(strategy="constant", constant=0.99), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + report_3 = EstimatorReport( + estimator=DummyRegressor(strategy="constant", constant=100), + X_train=X_train, + y_train=y_train, + X_test=X_test, + y_test=y_test, + ) + comp = ComparisonReport( + {"report_1": report_1, "report_2": report_2, "report_3": report_3} + ) + return comp + + def test_regression_comparator(regression_comparator): """Test that the regression comparator can summarize metrics and plot them.""" display_summary = regression_comparator.metrics.summarize() @@ -136,3 +172,16 @@ def test_custom_metrics(binary_classification_comparator): assert display_summary.ax_.get_xlabel() == "My Precision" assert display_summary.ax_.get_ylabel() == "My Recall" assert len(display_summary.ax_.get_title()) > 4 + + +def test_various_scales(high_error_regression): + """ + Test that the plot can handle metrics with different scales. + """ + comp = high_error_regression + display_summary = comp.metrics.summarize(scoring=["fit_time", "rmse"]) + display_summary.plot(x="rmse", y="fit_time") + assert display_summary.ax_.get_xscale() == "log" + + display_summary.plot(x="fit_time", y="rmse") + assert display_summary.ax_.get_yscale() == "log" From e5ff840ecabb86a8d9b3692a3076db55b7080775 Mon Sep 17 00:00:00 2001 From: Marie Date: Mon, 4 Aug 2025 17:11:32 +0200 Subject: [PATCH 36/39] fix test --- .../skore/_sklearn/_plot/metrics/metrics_summary_display.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 392f505033..4ce5225db0 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -105,14 +105,14 @@ def _plot_matplotlib_comparison_estimator(self, x, y): y_data = self.summarize_data.loc[y_verbose] if len(x_data.shape) > 1: if x_data.shape[0] == 1: - x_data = x_data.reset_index(drop=True).values + x_data = x_data.reset_index(drop=True).values[0] else: raise ValueError( "The perf metric x requires to add a positive label parameter." ) if len(y_data.shape) > 1: if y_data.shape[0] == 1: - y_data = y_data.reset_index(drop=True).values + y_data = y_data.reset_index(drop=True).values[0] else: raise ValueError( "The perf metric y requires to add a positive label parameter." From 65f0c9accc0a192b6ade4121028873ec3f812266 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 8 Oct 2025 11:15:39 +0200 Subject: [PATCH 37/39] linting and update import to be consistant with refactor --- .../_sklearn/_plot/metrics/metrics_summary_display.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index a61198779f..0153ee0975 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -3,16 +3,12 @@ import matplotlib.pyplot as plt import pandas as pd -from skore._sklearn._plot.style import StyleDisplayMixin -from skore._sklearn._plot.utils import ( - HelpDisplayMixin, - PlotBackendMixin, - _interval_max_min_ratio, -) +from skore._sklearn._plot.base import DisplayMixin +from skore._sklearn._plot.utils import _interval_max_min_ratio from skore._sklearn.types import ReportType, ScoringName -class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin): +class MetricsSummaryDisplay(DisplayMixin): """Display for summarize. An instance of this class will be created by `Report.metrics.summarize()`. @@ -162,6 +158,7 @@ def _plot_matplotlib_comparison_estimator(self, x, y): ax.legend(title="Models", loc="best") self.ax_ = ax + @DisplayMixin.style_plot def plot(self): """Not yet implemented.""" From 2bfc789d45dbb08cb7e0d8c24b5bbd3102a44642 Mon Sep 17 00:00:00 2001 From: Marie Date: Wed, 8 Oct 2025 16:18:49 +0200 Subject: [PATCH 38/39] add default plot function in metrics summary display --- .../_plot/metrics/metrics_summary_display.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py index 0153ee0975..1549fa5ce4 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py +++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py @@ -51,10 +51,11 @@ def _plot_matplotlib(self, x: str, y: str) -> None: y : str, default=None The metric to display on y-axis. By default, the second column. """ - if self.report_type in ( - ["estimator", "cross-validation", "comparison-cross-validation"] - ): + if self.report_type in (["cross-validation", "comparison-cross-validation"]): raise NotImplementedError("To come soon!") + elif self.report_type == "estimator": + raise NotImplementedError() + # it does not make sense to plot the metrics for a single estimator elif self.report_type == "comparison-estimator": self._plot_matplotlib_comparison_estimator(x, y) @@ -160,6 +161,5 @@ def _plot_matplotlib_comparison_estimator(self, x, y): self.ax_ = ax @DisplayMixin.style_plot - def plot(self): - """Not yet implemented.""" - raise NotImplementedError + def plot(self, x: str, y: str): + self._plot(**{"x": x, "y": y}) From 95eeca975b08d27690ca2e958730b30b98534856 Mon Sep 17 00:00:00 2001 From: Marie Date: Thu, 9 Oct 2025 15:56:02 +0200 Subject: [PATCH 39/39] fix docs with available metrics --- examples/getting_started/plot_skore_getting_started.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py index 24a016cfb0..0acdcd53c6 100644 --- a/examples/getting_started/plot_skore_getting_started.py +++ b/examples/getting_started/plot_skore_getting_started.py @@ -214,10 +214,10 @@ comparator.metrics.summarize(indicator_favorability=True).frame() # %% -# To be more specific in our comparison, we can decide to compare the Brier score and the fitting time. +# To be more specific in our comparison, we can decide to compare the ROC AUC and the fitting time. # %% -comparator.metrics.summarize().plot(x="brier_score", y="fit_time") +comparator.metrics.summarize().plot(x="roc_auc", y="fit_time") # %% # Thus, we easily have the result of our benchmark for several recommended metrics.