From 3f1e97d60396347770e644923fbd9219ac2d8850 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:04:11 +0200
Subject: [PATCH 01/39] chore: Change to more explicit class name and add docs

---
 .../_sklearn/_comparison/metrics_accessor.py      |  6 +++++-
 .../_cross_validation/metrics_accessor.py         |  6 +++++-
 skore/src/skore/sklearn/_plot/metrics/__init__.py | 15 +++++++++++++++
 sphinx/reference/report/displays.rst              |  1 +
 sphinx/user_guide/reporters.rst                   |  7 +++++++
 5 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 skore/src/skore/sklearn/_plot/metrics/__init__.py

diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 8adebfb93a..6bb8702fdd 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -7,7 +7,7 @@
 from numpy.typing import ArrayLike
 from sklearn.metrics import make_scorer
 from sklearn.utils.metaestimators import available_if
-
+from skore.externals._pandas_accessors import DirNamesMixin
 from skore._sklearn._base import (
     _BaseAccessor,
     _BaseMetricsAccessor,
@@ -60,7 +60,11 @@ def summarize(
         pos_label: PositiveLabel | None = _DEFAULT,
         indicator_favorability: bool = False,
         flat_index: bool = False,
+<<<<<<< HEAD:skore/src/skore/_sklearn/_comparison/metrics_accessor.py
         aggregate: Aggregate | None = ("mean", "std"),
+=======
+        aggregate: Optional[Aggregate] = ("mean", "std"),
+>>>>>>> 1a751cbb (chore: Change to more explicit class name and add docs):skore/src/skore/sklearn/_comparison/metrics_accessor.py
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for the estimators.
 
diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index 76564ac3e2..4c4a8473d2 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -7,7 +7,7 @@
 from numpy.typing import ArrayLike
 from sklearn.metrics import make_scorer
 from sklearn.utils.metaestimators import available_if
-
+from skore.externals._pandas_accessors import DirNamesMixin
 from skore._sklearn._base import (
     _BaseAccessor,
     _BaseMetricsAccessor,
@@ -61,7 +61,11 @@ def summarize(
         pos_label: PositiveLabel | None = _DEFAULT,
         indicator_favorability: bool = False,
         flat_index: bool = False,
+<<<<<<< HEAD:skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
         aggregate: Aggregate | None = ("mean", "std"),
+=======
+        aggregate: Optional[Aggregate] = ("mean", "std"),
+>>>>>>> 1a751cbb (chore: Change to more explicit class name and add docs):skore/src/skore/sklearn/_cross_validation/metrics_accessor.py
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for our estimator.
 
diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py
new file mode 100644
index 0000000000..e3ded1b085
--- /dev/null
+++ b/skore/src/skore/sklearn/_plot/metrics/__init__.py
@@ -0,0 +1,15 @@
+from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay
+from skore.sklearn._plot.metrics.precision_recall_curve import (
+    PrecisionRecallCurveDisplay,
+)
+from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay
+from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay
+from skore.sklearn._plot.metrics.summarize import MetricsSummaryDisplay
+
+__all__ = [
+    "ConfusionMatrixDisplay",
+    "PrecisionRecallCurveDisplay",
+    "PredictionErrorDisplay",
+    "RocCurveDisplay",
+    "MetricsSummaryDisplay",
+]
diff --git a/sphinx/reference/report/displays.rst b/sphinx/reference/report/displays.rst
index 7c4952de44..b32a7490d5 100644
--- a/sphinx/reference/report/displays.rst
+++ b/sphinx/reference/report/displays.rst
@@ -16,3 +16,4 @@ the API of each display.
    RocCurveDisplay
    PrecisionRecallCurveDisplay
    PredictionErrorDisplay
+   MetricsSummaryDisplay
diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst
index 1cd338023b..06c925c78a 100644
--- a/sphinx/user_guide/reporters.rst
+++ b/sphinx/user_guide/reporters.rst
@@ -67,6 +67,13 @@ regression). Nevertheless, you can specify the metrics you want to compute thank
 scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii)
 scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`.
 
+We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a Display. By default, a set of metrics is computed based
+on the type of target variable (e.g. classification or regression). Nevertheless, you
+can specify the metrics you want to compute thanks to the `scoring` parameter. We accept
+different types: (i) some strings that correspond to scikit-learn scorer names or a
+built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed
+with :func:`sklearn.metrics.make_scorer`.
+
 Refer to the :ref:`displays` section for more details regarding the `skore` display
 API. Refer to the :ref:`estimator_metrics` section for more details on all the
 available metrics in `skore`.

From 1e68910a55b37948f19fc600f5a859962f86e270 Mon Sep 17 00:00:00 2001
From: Marie Sacksick <79304610+MarieSacksick@users.noreply.github.com>
Date: Wed, 11 Jun 2025 11:19:34 +0200
Subject: [PATCH 02/39] Update sphinx/user_guide/reporters.rst

Co-authored-by: Guillaume Lemaitre <guillaume@probabl.ai>
---
 sphinx/user_guide/reporters.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst
index 06c925c78a..299e4bd992 100644
--- a/sphinx/user_guide/reporters.rst
+++ b/sphinx/user_guide/reporters.rst
@@ -67,7 +67,7 @@ regression). Nevertheless, you can specify the metrics you want to compute thank
 scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii)
 scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`.
 
-We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a Display. By default, a set of metrics is computed based
+We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a :class:`~skore.Display`. By default, a set of metrics is computed based
 on the type of target variable (e.g. classification or regression). Nevertheless, you
 can specify the metrics you want to compute thanks to the `scoring` parameter. We accept
 different types: (i) some strings that correspond to scikit-learn scorer names or a

From 39a66cb707708bde24efbb36e705bf3bb73dab33 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:21:17 +0200
Subject: [PATCH 03/39] alphabetic sorting

---
 sphinx/reference/report/displays.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sphinx/reference/report/displays.rst b/sphinx/reference/report/displays.rst
index b32a7490d5..7c4952de44 100644
--- a/sphinx/reference/report/displays.rst
+++ b/sphinx/reference/report/displays.rst
@@ -16,4 +16,3 @@ the API of each display.
    RocCurveDisplay
    PrecisionRecallCurveDisplay
    PredictionErrorDisplay
-   MetricsSummaryDisplay

From 3158ace1a62772ff93015da7fc71590dce71a0f1 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:24:45 +0200
Subject: [PATCH 04/39] fix init for sphinx

---
 skore/src/skore/sklearn/__init__.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 skore/src/skore/sklearn/__init__.py

diff --git a/skore/src/skore/sklearn/__init__.py b/skore/src/skore/sklearn/__init__.py
new file mode 100644
index 0000000000..242669655f
--- /dev/null
+++ b/skore/src/skore/sklearn/__init__.py
@@ -0,0 +1,25 @@
+"""Enhance `sklearn` functions."""
+
+from skore.sklearn._comparison import ComparisonReport
+from skore.sklearn._cross_validation import CrossValidationReport
+from skore.sklearn._estimator import EstimatorReport
+from skore.sklearn._plot import (
+    MetricsSummaryDisplay,
+    PrecisionRecallCurveDisplay,
+    PredictionErrorDisplay,
+    RocCurveDisplay,
+)
+from skore.sklearn.find_estimators import find_estimators
+from skore.sklearn.train_test_split.train_test_split import train_test_split
+
+__all__ = [
+    "ComparisonReport",
+    "CrossValidationReport",
+    "EstimatorReport",
+    "PrecisionRecallCurveDisplay",
+    "PredictionErrorDisplay",
+    "RocCurveDisplay",
+    "MetricsSummaryDisplay",
+    "train_test_split",
+    "find_estimators",
+]

From 4665e9e751906bec599baa6b725b505d8ad5ca0b Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:27:12 +0200
Subject: [PATCH 05/39] add function to be consistant with a display

---
 .../_plot/metrics/metrics_summary_display.py  | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
new file mode 100644
index 0000000000..42a587ce45
--- /dev/null
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -0,0 +1,27 @@
+from skore.sklearn._plot.style import StyleDisplayMixin
+from skore.sklearn._plot.utils import HelpDisplayMixin
+
+
+class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
+    """Display for summarize.
+
+    An instance of this class will be created by `Report.metrics.summarize()`.
+    This class should not be instantiated directly.
+    """
+
+    def __init__(self, summarize_data):
+        self.summarize_data = summarize_data
+
+    def frame(self):
+        """Return the summarize as a dataframe.
+
+        Returns
+        -------
+        frame : pandas.DataFrame
+            The report metrics as a dataframe.
+        """
+        return self.summarize_data
+
+    @StyleDisplayMixin.style_plot
+    def plot(self):
+        raise NotImplementedError

From d778605a24330d7543310db1560eaf58d1e89678 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:28:03 +0200
Subject: [PATCH 06/39] fix init for sphinx

---
 skore/src/skore/sklearn/_plot/metrics/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py
index e3ded1b085..3e55d01b1f 100644
--- a/skore/src/skore/sklearn/_plot/metrics/__init__.py
+++ b/skore/src/skore/sklearn/_plot/metrics/__init__.py
@@ -1,10 +1,10 @@
 from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay
+from skore.sklearn._plot.metrics.metrics_summary_display import MetricsSummaryDisplay
 from skore.sklearn._plot.metrics.precision_recall_curve import (
     PrecisionRecallCurveDisplay,
 )
 from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay
 from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay
-from skore.sklearn._plot.metrics.summarize import MetricsSummaryDisplay
 
 __all__ = [
     "ConfusionMatrixDisplay",

From 6a0b34de8a08b789e88bcbb74d09203fecec3ea8 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 11:36:34 +0200
Subject: [PATCH 07/39] docs: explain a bit more about display and their
 functions

---
 sphinx/user_guide/reporters.rst | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/sphinx/user_guide/reporters.rst b/sphinx/user_guide/reporters.rst
index 299e4bd992..1cd338023b 100644
--- a/sphinx/user_guide/reporters.rst
+++ b/sphinx/user_guide/reporters.rst
@@ -67,13 +67,6 @@ regression). Nevertheless, you can specify the metrics you want to compute thank
 scikit-learn scorer names or a built-in `skore` metric name, (ii) a callable or a (iii)
 scikit-learn scorer constructed with :func:`sklearn.metrics.make_scorer`.
 
-We provide the :class:`EstimatorReport.metrics.summarize` method that aggregates metrics in a single dataframe, available through a :class:`~skore.Display`. By default, a set of metrics is computed based
-on the type of target variable (e.g. classification or regression). Nevertheless, you
-can specify the metrics you want to compute thanks to the `scoring` parameter. We accept
-different types: (i) some strings that correspond to scikit-learn scorer names or a
-built-in `skore` metric name, (ii) a callable or a (iii) scikit-learn scorer constructed
-with :func:`sklearn.metrics.make_scorer`.
-
 Refer to the :ref:`displays` section for more details regarding the `skore` display
 API. Refer to the :ref:`estimator_metrics` section for more details on all the
 available metrics in `skore`.

From 963bae09256c3bdb38d7690d506d48a48ce074b4 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 9 Jun 2025 15:44:46 +0200
Subject: [PATCH 08/39] merge

---
 .../plot_skore_getting_started.py             |  5 ++
 .../_sklearn/_comparison/metrics_accessor.py  |  2 +-
 .../_cross_validation/metrics_accessor.py     |  2 +-
 .../_sklearn/_estimator/metrics_accessor.py   |  2 +-
 .../_plot/metrics/metrics_summary_display.py  | 69 ++++++++++++++++---
 5 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py
index e1ebdde86f..4c2ab09b33 100644
--- a/examples/getting_started/plot_skore_getting_started.py
+++ b/examples/getting_started/plot_skore_getting_started.py
@@ -207,6 +207,11 @@
 # %%
 comparator.metrics.summarize(indicator_favorability=True).frame()
 
+# %%
+# To be more specific in our comparison, we can decide to compare the Brier score and the fitting time.
+
+# %%
+comparator.metrics.report_metrics().plot(x="brier_score", y="fit_time")
 # %%
 # Thus, we easily have the result of our benchmark for several recommended metrics.
 
diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 6bb8702fdd..7ef85d3049 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -178,7 +178,7 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(results)
+        return MetricsSummaryDisplay(results, report_type="comparison-cross-validation")
 
     @progress_decorator(description="Compute metric for each estimator")
     def _compute_metric_scores(
diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index 4c4a8473d2..a73ec823f2 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -177,7 +177,7 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(summarize_data=results)
+        return MetricsSummaryDisplay(summarize_data=results, report_type="cross-validation")
 
     @progress_decorator(description="Compute metric for each split")
     def _compute_metric_scores(
diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
index ff67d64b10..cdaf2e7342 100644
--- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
@@ -426,7 +426,7 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(summarize_data=results)
+        return MetricsSummaryDisplay(summarize_data=results, report_type="estimator")
 
     def _compute_metric_scores(
         self,
diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 42a587ce45..1d745a779e 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -1,5 +1,8 @@
+import matplotlib.pyplot as plt
+
 from skore.sklearn._plot.style import StyleDisplayMixin
 from skore.sklearn._plot.utils import HelpDisplayMixin
+from skore.sklearn.types import ReportType
 
 
 class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
@@ -9,19 +12,65 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
     This class should not be instantiated directly.
     """
 
-    def __init__(self, summarize_data):
+    def __init__(
+        self,
+        *,
+        summarize_data,
+        report_type: ReportType,
+    ):
         self.summarize_data = summarize_data
+        self.report_type = report_type
 
     def frame(self):
-        """Return the summarize as a dataframe.
-
-        Returns
-        -------
-        frame : pandas.DataFrame
-            The report metrics as a dataframe.
-        """
+        """Return the summarize as a dataframe."""
         return self.summarize_data
 
     @StyleDisplayMixin.style_plot
-    def plot(self):
-        raise NotImplementedError
+    def plot(self, x, y) -> None:
+        """Plot visualization.
+
+        Extra keyword arguments will be passed to matplotlib's `plot`.
+
+        Parameters
+        ----------
+        x : str, default=None
+            The metric to display on x-axis. By default, the first column.
+
+        y : str, default=None
+            The metric to display on y-axis. By default, the second column.
+
+        Notes
+        -----
+        The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
+        in scikit-learn is computed without any interpolation. To be consistent
+        with this metric, the precision-recall curve is plotted without any
+        interpolation as well (step-wise style).
+
+        You can change this style by passing the keyword argument
+        `drawstyle="default"`. However, the curve will not be strictly
+        consistent with the reported average precision.
+
+        Examples
+        --------
+        >>> from sklearn.datasets import load_breast_cancer
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> from skore import train_test_split
+        >>> from skore import EstimatorReport
+        >>> X, y = load_breast_cancer(return_X_y=True)
+        >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
+        >>> classifier = LogisticRegression(max_iter=10_000)
+        >>> report = EstimatorReport(classifier, **split_data)
+        >>> display = report.metrics.precision_recall()
+        >>> display.plot(pr_curve_kwargs={"color": "tab:red"})
+        """
+        self.figure_, self.ax_ = plt.subplots()
+
+        if self.report_type in (
+            ["estimator", "cross-validation", "comparison-cross-validation"]
+        ):
+            raise NotImplementedError("To come soon!")
+        elif self.report_type == "comparison-estimator":
+            self.plot_comparison_estimator()
+
+    def plot_comparison_estimator(self):
+        self.report_metrics_data.scatter(x=0, y=1)

From e9e8d5d9b0f1dd43b2e3c34a4ad80e6b49220aca Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 9 Jun 2025 16:29:19 +0200
Subject: [PATCH 09/39] value error based on literal

---
 .../skore/_sklearn/_comparison/metrics_accessor.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 7ef85d3049..7c361bac6a 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -24,6 +24,7 @@
     _DEFAULT,
     Aggregate,
     PositiveLabel,
+    ReportType,
     Scoring,
     ScoringName,
     YPlotData,
@@ -178,7 +179,18 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(results, report_type="comparison-cross-validation")
+
+        report_type: ReportType
+        if self._parent._reports_type == "EstimatorReport":
+            report_type = "comparison-estimator"
+        elif self._parent._reports_type == "CrossValidationReport":
+            report_type = "comparison-cross-validation"
+        else:
+            raise ValueError(
+                "Comparison should only apply to EstimatorReport or "
+                "CrossValidationReport"
+            )
+        return MetricsSummaryDisplay(summarize_data=results, report_type=report_type)
 
     @progress_decorator(description="Compute metric for each estimator")
     def _compute_metric_scores(

From 20f7685f191a45bbb6412d8c45b8eec87cbc769b Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 9 Jun 2025 17:46:36 +0200
Subject: [PATCH 10/39] plot for comparison report for estimator

---
 .../_plot/metrics/metrics_summary_display.py  | 111 +++++++++++++-----
 1 file changed, 80 insertions(+), 31 deletions(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 1d745a779e..8c9bdd8d7b 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -13,13 +13,11 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
     """
 
     def __init__(
-        self,
-        *,
-        summarize_data,
-        report_type: ReportType,
+        self, *, summarize_data, report_type: ReportType, data_source: str = "test"
     ):
         self.summarize_data = summarize_data
         self.report_type = report_type
+        self.data_source = data_source
 
     def frame(self):
         """Return the summarize as a dataframe."""
@@ -29,8 +27,6 @@ def frame(self):
     def plot(self, x, y) -> None:
         """Plot visualization.
 
-        Extra keyword arguments will be passed to matplotlib's `plot`.
-
         Parameters
         ----------
         x : str, default=None
@@ -39,29 +35,9 @@ def plot(self, x, y) -> None:
         y : str, default=None
             The metric to display on y-axis. By default, the second column.
 
-        Notes
-        -----
-        The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
-        in scikit-learn is computed without any interpolation. To be consistent
-        with this metric, the precision-recall curve is plotted without any
-        interpolation as well (step-wise style).
-
-        You can change this style by passing the keyword argument
-        `drawstyle="default"`. However, the curve will not be strictly
-        consistent with the reported average precision.
-
-        Examples
-        --------
-        >>> from sklearn.datasets import load_breast_cancer
-        >>> from sklearn.linear_model import LogisticRegression
-        >>> from skore import train_test_split
-        >>> from skore import EstimatorReport
-        >>> X, y = load_breast_cancer(return_X_y=True)
-        >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
-        >>> classifier = LogisticRegression(max_iter=10_000)
-        >>> report = EstimatorReport(classifier, **split_data)
-        >>> display = report.metrics.precision_recall()
-        >>> display.plot(pr_curve_kwargs={"color": "tab:red"})
+        Returns
+        -------
+        A matplotlib plot.
         """
         self.figure_, self.ax_ = plt.subplots()
 
@@ -70,7 +46,80 @@ def plot(self, x, y) -> None:
         ):
             raise NotImplementedError("To come soon!")
         elif self.report_type == "comparison-estimator":
-            self.plot_comparison_estimator()
+            self.plot_comparison_estimator(x, y)
+
+    def plot_comparison_estimator(self, x, y):
+        fig, ax = plt.subplots()
+
+        x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
+        y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
+
+        # Check that the metrics are in the report
+        # If the metric is not in the report, help the user by suggesting
+        # supported metrics
+        reverse_score_info = {
+            value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items()
+        }
+        available_columns = self.summarize_data.columns.get_level_values(0).to_list()
+        available_columns.remove("Estimator")
+        supported_metrics = [
+            reverse_score_info.get(col, col) for col in available_columns
+        ]
+        if x not in supported_metrics:
+            raise ValueError(
+                f"Performance metric {x} not found in the report. "
+                f"Supported metrics are: {supported_metrics}."
+            )
+        if y not in supported_metrics:
+            raise ValueError(
+                f"Performance metric {y} not found in the report. "
+                f"Supported metrics are: {supported_metrics}."
+            )
+
+        x_data = self.summarize_data[x_label]
+        y_data = self.summarize_data[y_label]
+        if len(x_data.shape) > 1:
+            raise ValueError(
+                "The perf metric x requires to add a positive label parameter."
+            )
+        if len(y_data.shape) > 1:
+            raise ValueError(
+                "The perf metric y requires to add a positive label parameter."
+            )
+
+        # Make it clear in the axis labels that we are using the train set
+        if x == "fit_time" and self.data_source != "train":
+            x_label_text = x_label + " on train set"
+        else:
+            x_label_text = x_label
+        if y == "fit_time" and self.data_source != "train":
+            y_label_text = y_label + " on train set"
+        else:
+            y_label_text = y_label
+
+        title = f"{self.display_label_x} vs {self.display_label_x}"
+        if self.data_source is not None:
+            title += f" on {self.data_source} data"
+
+        ax.scatter(x=x_data, y=self.summarize_data[y_data])
+        ax.set_title(title)
+        ax.set_xlabel(x_label_text)
+        ax.set_ylabel(y_label_text)
+
+        # Add labels to the points with a small offset
+        text = self.summarize_data["Estimator"]
+        for label, x_coord, y_coord in zip(text, x, y):
+            ax.annotate(
+                label,
+                (x_coord, y_coord),
+                textcoords="offset points",
+                xytext=(10, 0),
+                bbox=dict(
+                    boxstyle="round,pad=0.3",
+                    edgecolor="gray",
+                    facecolor="white",
+                    alpha=0.7,
+                ),
+            )
 
-    def plot_comparison_estimator(self):
         self.report_metrics_data.scatter(x=0, y=1)

From b29338959d5e40a3fcccb55a931c3cfe339ffbc8 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 14:57:08 +0200
Subject: [PATCH 11/39] linting

---
 .../src/skore/_sklearn/_cross_validation/metrics_accessor.py  | 4 +++-
 .../skore/sklearn/_plot/metrics/metrics_summary_display.py    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index a73ec823f2..801a396c1d 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -177,7 +177,9 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(summarize_data=results, report_type="cross-validation")
+        return MetricsSummaryDisplay(
+            summarize_data=results, report_type="cross-validation"
+        )
 
     @progress_decorator(description="Compute metric for each split")
     def _compute_metric_scores(
diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 8c9bdd8d7b..550d6cdc87 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -49,7 +49,7 @@ def plot(self, x, y) -> None:
             self.plot_comparison_estimator(x, y)
 
     def plot_comparison_estimator(self, x, y):
-        fig, ax = plt.subplots()
+        _, ax = plt.subplots()
 
         x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
         y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)

From 22c0e4981ecc2313ee55ea92112315c343e4a4f8 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 11 Jun 2025 15:40:01 +0200
Subject: [PATCH 12/39] introduce temporarily _SCORE_OR_LOSS_INFO in class

---
 .../_plot/metrics/metrics_summary_display.py     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 550d6cdc87..17b8a9e52e 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -12,6 +12,22 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
     This class should not be instantiated directly.
     """
 
+    # should be removed once transformed into a utils
+    _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = {
+        "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"},
+        "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"},
+        "accuracy": {"name": "Accuracy", "icon": "(↗︎)"},
+        "precision": {"name": "Precision", "icon": "(↗︎)"},
+        "recall": {"name": "Recall", "icon": "(↗︎)"},
+        "brier_score": {"name": "Brier score", "icon": "(↘︎)"},
+        "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"},
+        "log_loss": {"name": "Log loss", "icon": "(↘︎)"},
+        "r2": {"name": "R²", "icon": "(↗︎)"},
+        "rmse": {"name": "RMSE", "icon": "(↘︎)"},
+        "custom_metric": {"name": "Custom metric", "icon": ""},
+        "report_metrics": {"name": "Report metrics", "icon": ""},
+    }
+
     def __init__(
         self, *, summarize_data, report_type: ReportType, data_source: str = "test"
     ):

From 00ae0d734f365ed37d91b205a510f36b4f15c0ff Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 12 Jun 2025 16:45:44 +0200
Subject: [PATCH 13/39] first version of plot for comp report ready

---
 .../_plot/metrics/metrics_summary_display.py  | 38 ++++++++++++-------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 17b8a9e52e..bfc5517546 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -1,4 +1,5 @@
 import matplotlib.pyplot as plt
+import pandas as pd
 
 from skore.sklearn._plot.style import StyleDisplayMixin
 from skore.sklearn._plot.utils import HelpDisplayMixin
@@ -55,8 +56,6 @@ def plot(self, x, y) -> None:
         -------
         A matplotlib plot.
         """
-        self.figure_, self.ax_ = plt.subplots()
-
         if self.report_type in (
             ["estimator", "cross-validation", "comparison-cross-validation"]
         ):
@@ -65,7 +64,7 @@ def plot(self, x, y) -> None:
             self.plot_comparison_estimator(x, y)
 
     def plot_comparison_estimator(self, x, y):
-        _, ax = plt.subplots()
+        fig, ax = plt.subplots()
 
         x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
         y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
@@ -76,8 +75,11 @@ def plot_comparison_estimator(self, x, y):
         reverse_score_info = {
             value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items()
         }
-        available_columns = self.summarize_data.columns.get_level_values(0).to_list()
-        available_columns.remove("Estimator")
+        index = self.summarize_data.index
+        if isinstance(index, pd.MultiIndex):
+            available_columns = index.get_level_values(0).to_list()
+        else:
+            available_columns = index.tolist()
         supported_metrics = [
             reverse_score_info.get(col, col) for col in available_columns
         ]
@@ -92,16 +94,23 @@ def plot_comparison_estimator(self, x, y):
                 f"Supported metrics are: {supported_metrics}."
             )
 
-        x_data = self.summarize_data[x_label]
-        y_data = self.summarize_data[y_label]
-        if len(x_data.shape) > 1:
+        x_data = self.summarize_data.loc[x_label]
+        y_data = self.summarize_data.loc[y_label]
+        if len(x_data.shape) > 1 and x_data.shape[0] > 1:
+            # case where we have multiIndex, and the metric is not a single value
             raise ValueError(
                 "The perf metric x requires to add a positive label parameter."
             )
-        if len(y_data.shape) > 1:
+        elif len(x_data.shape) > 1 and x_data.shape[0] == 1:
+            # case where we have multiIndex, but the metric is not affected by the
+            # pos_label
+            x_data = x_data.squeeze()
+        if len(y_data.shape) > 1 and y_data.shape[0] > 1:
             raise ValueError(
                 "The perf metric y requires to add a positive label parameter."
             )
+        elif len(y_data.shape) > 1 and y_data.shape[0] == 1:
+            y_data = y_data.squeeze()
 
         # Make it clear in the axis labels that we are using the train set
         if x == "fit_time" and self.data_source != "train":
@@ -113,18 +122,18 @@ def plot_comparison_estimator(self, x, y):
         else:
             y_label_text = y_label
 
-        title = f"{self.display_label_x} vs {self.display_label_x}"
+        title = f"{x_label} vs {y_label}"
         if self.data_source is not None:
             title += f" on {self.data_source} data"
 
-        ax.scatter(x=x_data, y=self.summarize_data[y_data])
+        ax.scatter(x=x_data, y=y_data)
         ax.set_title(title)
         ax.set_xlabel(x_label_text)
         ax.set_ylabel(y_label_text)
 
         # Add labels to the points with a small offset
-        text = self.summarize_data["Estimator"]
-        for label, x_coord, y_coord in zip(text, x, y):
+        text = self.summarize_data.columns.tolist()
+        for label, x_coord, y_coord in zip(text, x_data, y_data):
             ax.annotate(
                 label,
                 (x_coord, y_coord),
@@ -138,4 +147,5 @@ def plot_comparison_estimator(self, x, y):
                 ),
             )
 
-        self.report_metrics_data.scatter(x=0, y=1)
+        plt.tight_layout()
+        return fig

From c557d1ec5877a64714ef42b72af8791cc3843cb6 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 12 Jun 2025 16:49:30 +0200
Subject: [PATCH 14/39] linting

---
 .../src/skore/sklearn/_plot/metrics/metrics_summary_display.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index bfc5517546..d742e57581 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -133,7 +133,7 @@ def plot_comparison_estimator(self, x, y):
 
         # Add labels to the points with a small offset
         text = self.summarize_data.columns.tolist()
-        for label, x_coord, y_coord in zip(text, x_data, y_data):
+        for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False):
             ax.annotate(
                 label,
                 (x_coord, y_coord),

From 866f367708815a0c9c0d7b0de2aefcc36404e486 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 12 Jun 2025 16:56:43 +0200
Subject: [PATCH 15/39] adapt to name change from report_metrics to summarize

---
 examples/getting_started/plot_skore_getting_started.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py
index 4c2ab09b33..568a37c31e 100644
--- a/examples/getting_started/plot_skore_getting_started.py
+++ b/examples/getting_started/plot_skore_getting_started.py
@@ -211,7 +211,7 @@
 # To be more specific in our comparison, we can decide to compare the Brier score and the fitting time.
 
 # %%
-comparator.metrics.report_metrics().plot(x="brier_score", y="fit_time")
+comparator.metrics.summarize().plot(x="brier_score", y="fit_time")
 # %%
 # Thus, we easily have the result of our benchmark for several recommended metrics.
 

From ddb734455af9e486fc79f5143dc14ff829ff4a2d Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 10:11:51 +0200
Subject: [PATCH 16/39] add some tests

---
 .../_plot/metrics/metrics_summary_display.py  |  22 ++--
 .../test_plot_comparison.py                   | 107 ++++++++++++++++++
 2 files changed, 122 insertions(+), 7 deletions(-)
 create mode 100644 skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index d742e57581..d4bef61b43 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -52,6 +52,14 @@ def plot(self, x, y) -> None:
         y : str, default=None
             The metric to display on y-axis. By default, the second column.
 
+        Attributes
+        ----------
+        ax_ : matplotlib axes or ndarray of axes
+            The axes on which the precision-recall curve is plotted.
+
+        figure_ : matplotlib figure
+            The figure on which the precision-recall curve is plotted.
+
         Returns
         -------
         A matplotlib plot.
@@ -64,7 +72,7 @@ def plot(self, x, y) -> None:
             self.plot_comparison_estimator(x, y)
 
     def plot_comparison_estimator(self, x, y):
-        fig, ax = plt.subplots()
+        self.figure_, self.ax_ = plt.subplots()
 
         x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
         y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
@@ -126,15 +134,15 @@ def plot_comparison_estimator(self, x, y):
         if self.data_source is not None:
             title += f" on {self.data_source} data"
 
-        ax.scatter(x=x_data, y=y_data)
-        ax.set_title(title)
-        ax.set_xlabel(x_label_text)
-        ax.set_ylabel(y_label_text)
+        self.ax_.scatter(x=x_data, y=y_data)
+        self.ax_.set_title(title)
+        self.ax_.set_xlabel(x_label_text)
+        self.ax_.set_ylabel(y_label_text)
 
         # Add labels to the points with a small offset
         text = self.summarize_data.columns.tolist()
         for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False):
-            ax.annotate(
+            self.ax_.annotate(
                 label,
                 (x_coord, y_coord),
                 textcoords="offset points",
@@ -148,4 +156,4 @@ def plot_comparison_estimator(self, x, y):
             )
 
         plt.tight_layout()
-        return fig
+        return self.figure_, self.ax_
diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
new file mode 100644
index 0000000000..b2e1af3410
--- /dev/null
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -0,0 +1,107 @@
+import pytest
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.model_selection import train_test_split
+from skore import ComparisonReport, EstimatorReport
+
+
+@pytest.fixture
+def multi_classification_comparator():
+    X, y = make_classification(
+        n_samples=100,
+        n_features=5,
+        n_informative=3,
+        n_redundant=0,
+        n_classes=3,
+        random_state=42,
+    )
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=0
+    )
+
+    report_1 = EstimatorReport(
+        estimator=HistGradientBoostingClassifier(),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    report_2 = EstimatorReport(
+        estimator=LogisticRegression(max_iter=50),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    comp = ComparisonReport({"report_1": report_1, "report_2": report_2})
+    return comp
+
+
+@pytest.fixture
+def binary_classification_comparator():
+    X, y = make_classification(random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=0
+    )
+
+    report_1 = EstimatorReport(
+        estimator=HistGradientBoostingClassifier(),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    report_2 = EstimatorReport(
+        estimator=LogisticRegression(max_iter=50),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    comp = ComparisonReport({"report_1": report_1, "report_2": report_2})
+    return comp
+
+
+@pytest.fixture
+def regression_comparator():
+    X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=0
+    )
+
+    report_1 = EstimatorReport(
+        estimator=HistGradientBoostingRegressor(),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    report_2 = EstimatorReport(
+        estimator=LinearRegression(),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    comp = ComparisonReport({"report_1": report_1, "report_2": report_2})
+    return comp
+
+
+def test_regression_comparator(regression_comparator):
+    display_summary = regression_comparator.metrics.summarize()
+    display_summary.plot_comparison_estimator("r2", "fit_time")
+    assert display_summary.ax_.get_xlabel() == "R²"
+    assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set"
+    assert len(display_summary.ax_.get_title()) > 4
+
+
+def test_error_invalid_metric(regression_comparator):
+    comp = regression_comparator
+    with pytest.raises(ValueError):
+        comp.metrics.summarize().plot_comparison_estimator(
+            "invalid_metric", "invalid_metric_bis"
+        )

From b2f2250b0e4f4582400ae0b00bed67e5a4c7f8e9 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 10:16:00 +0200
Subject: [PATCH 17/39] add test not implemented error

---
 .../metrics_summary_display/test_common.py    | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py

diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py
new file mode 100644
index 0000000000..bbf7d58b7a
--- /dev/null
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py
@@ -0,0 +1,34 @@
+import pytest
+from sklearn.datasets import make_classification
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from skore import EstimatorReport
+
+
+@pytest.fixture
+def estimator_report_classification():
+    X, y = make_classification(random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=0
+    )
+
+    estimator_report = EstimatorReport(
+        estimator=HistGradientBoostingClassifier(),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    return estimator_report
+
+
+def test_not_implemented(estimator_report_classification):
+    """
+    Test that the plot_comparison_estimator method raises NotImplementedError
+    when called with a binary classification comparator.
+    """
+    estimator_report_classification.metrics.summarize()
+    with pytest.raises(NotImplementedError):
+        estimator_report_classification.metrics.summarize().plot(
+            x="accuracy", y="f1_score"
+        )

From c6d063a263f6b001e51961c66ba1fdcd58ff19fa Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 10:34:20 +0200
Subject: [PATCH 18/39] add tests

---
 .../test_plot_comparison.py                   | 30 ++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index b2e1af3410..ed6d92c56f 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -93,15 +93,37 @@ def regression_comparator():
 
 def test_regression_comparator(regression_comparator):
     display_summary = regression_comparator.metrics.summarize()
-    display_summary.plot_comparison_estimator("r2", "fit_time")
+    display_summary.plot("r2", "fit_time")
     assert display_summary.ax_.get_xlabel() == "R²"
     assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set"
     assert len(display_summary.ax_.get_title()) > 4
 
 
+def test_data_source_affect_title_and_axis(regression_comparator):
+    comp = regression_comparator
+    display_summary = comp.metrics.summarize(data_source="train")
+    display_summary.plot("r2", "fit_time")
+    assert "on train set" in display_summary.ax_.get_title()
+    assert "on test set" not in display_summary.ax_.get_ylabel()
+
+
 def test_error_invalid_metric(regression_comparator):
     comp = regression_comparator
     with pytest.raises(ValueError):
-        comp.metrics.summarize().plot_comparison_estimator(
-            "invalid_metric", "invalid_metric_bis"
-        )
+        comp.metrics.summarize().plot("invalid_metric", "fit_time")
+    with pytest.raises(ValueError):
+        comp.metrics.summarize().plot("fit_time", "invalid_metric")
+
+
+def test_needs_positive_label(binary_classification_comparator):
+    comp = binary_classification_comparator
+    with pytest.raises(
+        ValueError,
+        match="The perf metric x requires to add a positive label parameter.",
+    ):
+        comp.metrics.summarize().plot("precision", "fit_time")
+    with pytest.raises(
+        ValueError,
+        match="The perf metric y requires to add a positive label parameter.",
+    ):
+        comp.metrics.summarize().plot("fit_time", "precision")

From 42fb3a31c3a573b077557ee1ec7800852e399b01 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 10:34:41 +0200
Subject: [PATCH 19/39] add data_source at display creation

---
 skore/src/skore/_sklearn/_comparison/metrics_accessor.py    | 4 +++-
 .../skore/_sklearn/_cross_validation/metrics_accessor.py    | 4 +++-
 skore/src/skore/_sklearn/_estimator/metrics_accessor.py     | 5 ++++-
 .../skore/sklearn/_plot/metrics/metrics_summary_display.py  | 6 +++---
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 7c361bac6a..cd41a4fb70 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -190,7 +190,9 @@ class is set to the one provided when creating the report. If `None`,
                 "Comparison should only apply to EstimatorReport or "
                 "CrossValidationReport"
             )
-        return MetricsSummaryDisplay(summarize_data=results, report_type=report_type)
+        return MetricsSummaryDisplay(
+            summarize_data=results, report_type=report_type, data_source=data_source
+        )
 
     @progress_decorator(description="Compute metric for each estimator")
     def _compute_metric_scores(
diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index 801a396c1d..9a10d1ed7e 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -178,7 +178,9 @@ class is set to the one provided when creating the report. If `None`,
                     r"\((.*)\)$", r"\1", regex=True
                 )
         return MetricsSummaryDisplay(
-            summarize_data=results, report_type="cross-validation"
+            summarize_data=results,
+            report_type="cross-validation",
+            data_source=data_source,
         )
 
     @progress_decorator(description="Compute metric for each split")
diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
index cdaf2e7342..3d81319c6f 100644
--- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
@@ -426,7 +426,10 @@ class is set to the one provided when creating the report. If `None`,
                 results.index = results.index.str.replace(
                     r"\((.*)\)$", r"\1", regex=True
                 )
-        return MetricsSummaryDisplay(summarize_data=results, report_type="estimator")
+
+        return MetricsSummaryDisplay(
+            summarize_data=results, report_type="estimator", data_source=data_source
+        )
 
     def _compute_metric_scores(
         self,
diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index d4bef61b43..f440c1602a 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -69,9 +69,9 @@ def plot(self, x, y) -> None:
         ):
             raise NotImplementedError("To come soon!")
         elif self.report_type == "comparison-estimator":
-            self.plot_comparison_estimator(x, y)
+            self._plot_comparison_estimator(x, y)
 
-    def plot_comparison_estimator(self, x, y):
+    def _plot_comparison_estimator(self, x, y):
         self.figure_, self.ax_ = plt.subplots()
 
         x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
@@ -132,7 +132,7 @@ def plot_comparison_estimator(self, x, y):
 
         title = f"{x_label} vs {y_label}"
         if self.data_source is not None:
-            title += f" on {self.data_source} data"
+            title += f" on {self.data_source} set"
 
         self.ax_.scatter(x=x_data, y=y_data)
         self.ax_.set_title(title)

From 38a91bf4c5bd4b732b9599b0f368d38558253084 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 10:48:37 +0200
Subject: [PATCH 20/39] add new tests

---
 .../test_plot_comparison.py                   | 46 ++++++-------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index ed6d92c56f..31dcc29d8e 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -9,38 +9,6 @@
 from skore import ComparisonReport, EstimatorReport
 
 
-@pytest.fixture
-def multi_classification_comparator():
-    X, y = make_classification(
-        n_samples=100,
-        n_features=5,
-        n_informative=3,
-        n_redundant=0,
-        n_classes=3,
-        random_state=42,
-    )
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=0
-    )
-
-    report_1 = EstimatorReport(
-        estimator=HistGradientBoostingClassifier(),
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test,
-        y_test=y_test,
-    )
-    report_2 = EstimatorReport(
-        estimator=LogisticRegression(max_iter=50),
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test,
-        y_test=y_test,
-    )
-    comp = ComparisonReport({"report_1": report_1, "report_2": report_2})
-    return comp
-
-
 @pytest.fixture
 def binary_classification_comparator():
     X, y = make_classification(random_state=0)
@@ -127,3 +95,17 @@ def test_needs_positive_label(binary_classification_comparator):
         match="The perf metric y requires to add a positive label parameter.",
     ):
         comp.metrics.summarize().plot("fit_time", "precision")
+
+
+def test_no_positive_label_unrequired(binary_classification_comparator):
+    display_summary = binary_classification_comparator.metrics.summarize()
+    display_summary.plot("brier_score", "fit_time")
+    assert display_summary.ax_.get_xlabel() == "Brier score"
+    assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set"
+    assert len(display_summary.ax_.get_title()) > 4
+
+    display_summary = binary_classification_comparator.metrics.summarize()
+    display_summary.plot("fit_time", "brier_score")
+    assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set"
+    assert display_summary.ax_.get_ylabel() == "Brier score"
+    assert len(display_summary.ax_.get_title()) > 4

From 9274eda4ddfe59b9566784d1fbe39ae54c0ddf19 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 11:18:09 +0200
Subject: [PATCH 21/39] add description to tests

---
 .../metrics_summary_display/test_plot_comparison.py   | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index 31dcc29d8e..b5d2c4ce7c 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -60,6 +60,7 @@ def regression_comparator():
 
 
 def test_regression_comparator(regression_comparator):
+    """Test that the regression comparator can summarize metrics and plot them."""
     display_summary = regression_comparator.metrics.summarize()
     display_summary.plot("r2", "fit_time")
     assert display_summary.ax_.get_xlabel() == "R²"
@@ -68,6 +69,7 @@ def test_regression_comparator(regression_comparator):
 
 
 def test_data_source_affect_title_and_axis(regression_comparator):
+    """Test that the data source does change the title and axis labels."""
     comp = regression_comparator
     display_summary = comp.metrics.summarize(data_source="train")
     display_summary.plot("r2", "fit_time")
@@ -76,6 +78,7 @@ def test_data_source_affect_title_and_axis(regression_comparator):
 
 
 def test_error_invalid_metric(regression_comparator):
+    """Test the error raised when an invalid metric is used."""
     comp = regression_comparator
     with pytest.raises(ValueError):
         comp.metrics.summarize().plot("invalid_metric", "fit_time")
@@ -84,6 +87,10 @@ def test_error_invalid_metric(regression_comparator):
 
 
 def test_needs_positive_label(binary_classification_comparator):
+    """
+    Test the error raised when a metric requiring a positive label is selected,
+    without giving the pos_label.
+    """
     comp = binary_classification_comparator
     with pytest.raises(
         ValueError,
@@ -98,6 +105,10 @@ def test_needs_positive_label(binary_classification_comparator):
 
 
 def test_no_positive_label_unrequired(binary_classification_comparator):
+    """
+    Test that no error is raised when a metric not requiring a positive label is
+    selected.
+    """
     display_summary = binary_classification_comparator.metrics.summarize()
     display_summary.plot("brier_score", "fit_time")
     assert display_summary.ax_.get_xlabel() == "Brier score"

From d1888281c8668140e2a1d7b072c8833aa17a24c3 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 13 Jun 2025 11:18:17 +0200
Subject: [PATCH 22/39] add example

---
 .../_plot/metrics/metrics_summary_display.py  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index f440c1602a..2fe64572fa 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -63,6 +63,25 @@ def plot(self, x, y) -> None:
         Returns
         -------
         A matplotlib plot.
+
+        Example
+        -------
+        >>> from sklearn.datasets import load_breast_cancer
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> from sklearn.ensemble import HistGradientBoostingClassifier
+        >>> from skore import train_test_split
+        >>> from skore import EstimatorReport, ComparisonReport
+        >>> X, y = load_breast_cancer(return_X_y=True)
+        >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
+        >>> classifier = LogisticRegression()
+        >>> report_a = EstimatorReport(classifier,pos_label=1, **split_data)
+        >>> classifier = HistGradientBoostingClassifier()
+        >>> report_b = EstimatorReport(classifier,pos_label=1, **split_data)
+        >>> comparison_report = ComparisonReport(
+            {"report_a": report_a, "report_b": report_b}
+        )
+        >>> display = comparison_report.metrics.summarize()
+        >>> display.plot(x="accuracy", y="roc_auc")
         """
         if self.report_type in (
             ["estimator", "cross-validation", "comparison-cross-validation"]

From a21ca0e5907304c85b58ac54f54134d094439045 Mon Sep 17 00:00:00 2001
From: Marie Sacksick <79304610+MarieSacksick@users.noreply.github.com>
Date: Mon, 16 Jun 2025 14:39:35 +0200
Subject: [PATCH 23/39] Update
 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py

Co-authored-by: Guillaume Lemaitre <guillaume@probabl.ai>
---
 .../skore/sklearn/_plot/metrics/metrics_summary_display.py    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 2fe64572fa..ecd16ef608 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -154,9 +154,7 @@ def _plot_comparison_estimator(self, x, y):
             title += f" on {self.data_source} set"
 
         self.ax_.scatter(x=x_data, y=y_data)
-        self.ax_.set_title(title)
-        self.ax_.set_xlabel(x_label_text)
-        self.ax_.set_ylabel(y_label_text)
+        self.ax_.set(title=title, xlabel=x_label_text, y_label=y_label_text)
 
         # Add labels to the points with a small offset
         text = self.summarize_data.columns.tolist()

From 7fa3615a352a754609c31b6363eccd674b63421c Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 16 Jun 2025 14:45:12 +0200
Subject: [PATCH 24/39] change not implemented error for estimator

---
 .../_plot/metrics/metrics_summary_display.py  |  6 +++---
 .../metrics_summary_display/test_common.py    | 20 ++++++++++++++++---
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index ecd16ef608..6a8fef25d1 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -83,9 +83,9 @@ def plot(self, x, y) -> None:
         >>> display = comparison_report.metrics.summarize()
         >>> display.plot(x="accuracy", y="roc_auc")
         """
-        if self.report_type in (
-            ["estimator", "cross-validation", "comparison-cross-validation"]
-        ):
+        if self.report_type == "estimator":
+            raise NotImplementedError()
+        elif self.report_type in ["cross-validation", "comparison-cross-validation"]:
             raise NotImplementedError("To come soon!")
         elif self.report_type == "comparison-estimator":
             self._plot_comparison_estimator(x, y)
diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py
index bbf7d58b7a..8ad2b01ffe 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_common.py
@@ -2,7 +2,7 @@
 from sklearn.datasets import make_classification
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.model_selection import train_test_split
-from skore import EstimatorReport
+from skore import CrossValidationReport, EstimatorReport
 
 
 @pytest.fixture
@@ -22,13 +22,27 @@ def estimator_report_classification():
     return estimator_report
 
 
-def test_not_implemented(estimator_report_classification):
+def test_not_implemented_estimator(estimator_report_classification):
     """
     Test that the plot_comparison_estimator method raises NotImplementedError
     when called with a binary classification comparator.
     """
-    estimator_report_classification.metrics.summarize()
     with pytest.raises(NotImplementedError):
         estimator_report_classification.metrics.summarize().plot(
             x="accuracy", y="f1_score"
         )
+
+
+def test_not_implemented_other_categories():
+    """
+    Test that the plot_comparison_estimator method raises NotImplementedError
+    when called with a binary classification comparator.
+    """
+    X, y = make_classification(random_state=0)
+    cv_report = CrossValidationReport(
+        estimator=HistGradientBoostingClassifier(),
+        X=X,
+        y=y,
+    )
+    with pytest.raises(NotImplementedError, match="To come soon!"):
+        cv_report.metrics.summarize().plot(x="accuracy", y="f1_score")

From 77c84f19c42373fd2d315f7aafb8040b636fce1d Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 16 Jun 2025 14:47:04 +0200
Subject: [PATCH 25/39] bugfix matplotlib ax set

---
 .../src/skore/sklearn/_plot/metrics/metrics_summary_display.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 6a8fef25d1..7ff67697c6 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -154,7 +154,7 @@ def _plot_comparison_estimator(self, x, y):
             title += f" on {self.data_source} set"
 
         self.ax_.scatter(x=x_data, y=y_data)
-        self.ax_.set(title=title, xlabel=x_label_text, y_label=y_label_text)
+        self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
 
         # Add labels to the points with a small offset
         text = self.summarize_data.columns.tolist()

From d348ca1b56eb8c1fc19e8cab3186b3f83b200ff7 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 16 Jun 2025 14:48:46 +0200
Subject: [PATCH 26/39] remove useless line

---
 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 7ff67697c6..3581b0de16 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -172,5 +172,4 @@ def _plot_comparison_estimator(self, x, y):
                 ),
             )
 
-        plt.tight_layout()
         return self.figure_, self.ax_

From bf7455455762a5cf69d01895c20a4348d1256551 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 16 Jun 2025 15:56:13 +0200
Subject: [PATCH 27/39] change annotation to legend

---
 .../_plot/metrics/metrics_summary_display.py  | 30 +++++++++----------
 test.py                                       | 18 +++++++++++
 2 files changed, 33 insertions(+), 15 deletions(-)
 create mode 100644 test.py

diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
index 3581b0de16..5e366bd166 100644
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
@@ -1,3 +1,5 @@
+import itertools
+
 import matplotlib.pyplot as plt
 import pandas as pd
 
@@ -153,23 +155,21 @@ def _plot_comparison_estimator(self, x, y):
         if self.data_source is not None:
             title += f" on {self.data_source} set"
 
-        self.ax_.scatter(x=x_data, y=y_data)
-        self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
-
-        # Add labels to the points with a small offset
+        # Use a set of markers and colors for each data point
         text = self.summarize_data.columns.tolist()
+        markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8"))
+        colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])
+
+        handles = []
         for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False):
-            self.ax_.annotate(
-                label,
-                (x_coord, y_coord),
-                textcoords="offset points",
-                xytext=(10, 0),
-                bbox=dict(
-                    boxstyle="round,pad=0.3",
-                    edgecolor="gray",
-                    facecolor="white",
-                    alpha=0.7,
-                ),
+            marker = next(markers)
+            color = next(colors)
+            sc = self.ax_.scatter(
+                x_coord, y_coord, marker=marker, color=color, label=label
             )
+            handles.append(sc)
+
+        self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
+        self.ax_.legend(title="Models", loc="best")
 
         return self.figure_, self.ax_
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000..dfd55be02b
--- /dev/null
+++ b/test.py
@@ -0,0 +1,18 @@
+# %%
+from skore import EstimatorReport, ComparisonReport
+from sklearn.datasets import load_breast_cancer
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import HistGradientBoostingClassifier
+from skore import train_test_split
+X, y = load_breast_cancer(return_X_y=True)
+split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
+classifier = LogisticRegression()
+report_a = EstimatorReport(classifier, pos_label=1, **split_data)
+classifier = HistGradientBoostingClassifier()
+report_b = EstimatorReport(classifier, pos_label=1, **split_data)
+comparison_report = ComparisonReport(
+    {"report_a": report_a, "report_b": report_b}
+)
+display = comparison_report.metrics.summarize()
+display.plot(x="roc_auc", y="fit_time")
+# %%

From d3abdffa008949290dda273ba3d8e24bbe9d9e9e Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 28 Jul 2025 17:40:09 +0200
Subject: [PATCH 28/39] linting

---
 .../_sklearn/_comparison/metrics_accessor.py  |   3 +-
 .../_cross_validation/metrics_accessor.py     |   3 +-
 .../_plot/metrics/metrics_summary_display.py  |   4 +-
 skore/src/skore/sklearn/__init__.py           |  25 ---
 .../skore/sklearn/_plot/metrics/__init__.py   |  15 --
 .../_plot/metrics/metrics_summary_display.py  | 175 ------------------
 6 files changed, 7 insertions(+), 218 deletions(-)
 delete mode 100644 skore/src/skore/sklearn/__init__.py
 delete mode 100644 skore/src/skore/sklearn/_plot/metrics/__init__.py
 delete mode 100644 skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py

diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 459859e445..8ebcfd759f 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -7,6 +7,7 @@
 from numpy.typing import ArrayLike
 from sklearn.metrics import make_scorer
 from sklearn.utils.metaestimators import available_if
+
 from skore._externals._pandas_accessors import DirNamesMixin
 from skore._sklearn._base import (
     _BaseAccessor,
@@ -60,7 +61,7 @@ def summarize(
         pos_label: PositiveLabel | None = _DEFAULT,
         indicator_favorability: bool = False,
         flat_index: bool = False,
-        aggregate: Optional[Aggregate] = ("mean", "std"),
+        aggregate: Aggregate | None = ("mean", "std"),
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for the estimators.
 
diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index fab7d9e748..22b96d1be2 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -7,6 +7,7 @@
 from numpy.typing import ArrayLike
 from sklearn.metrics import make_scorer
 from sklearn.utils.metaestimators import available_if
+
 from skore._externals._pandas_accessors import DirNamesMixin
 from skore._sklearn._base import (
     _BaseAccessor,
@@ -60,7 +61,7 @@ def summarize(
         pos_label: PositiveLabel | None = _DEFAULT,
         indicator_favorability: bool = False,
         flat_index: bool = False,
-        aggregate: Optional[Aggregate] = ("mean", "std"),
+        aggregate: Aggregate | None = ("mean", "std"),
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for our estimator.
 
diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index a6846294ff..9cd889b51a 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -9,8 +9,10 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
     This class should not be instantiated directly.
     """
 
-    def __init__(self, summarize_data):
+    def __init__(self, summarize_data, report_type, data_source):
         self.summarize_data = summarize_data
+        self.report_type = report_type
+        self.data_source = data_source
 
     def frame(self):
         """Return the summarize as a dataframe.
diff --git a/skore/src/skore/sklearn/__init__.py b/skore/src/skore/sklearn/__init__.py
deleted file mode 100644
index 242669655f..0000000000
--- a/skore/src/skore/sklearn/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""Enhance `sklearn` functions."""
-
-from skore.sklearn._comparison import ComparisonReport
-from skore.sklearn._cross_validation import CrossValidationReport
-from skore.sklearn._estimator import EstimatorReport
-from skore.sklearn._plot import (
-    MetricsSummaryDisplay,
-    PrecisionRecallCurveDisplay,
-    PredictionErrorDisplay,
-    RocCurveDisplay,
-)
-from skore.sklearn.find_estimators import find_estimators
-from skore.sklearn.train_test_split.train_test_split import train_test_split
-
-__all__ = [
-    "ComparisonReport",
-    "CrossValidationReport",
-    "EstimatorReport",
-    "PrecisionRecallCurveDisplay",
-    "PredictionErrorDisplay",
-    "RocCurveDisplay",
-    "MetricsSummaryDisplay",
-    "train_test_split",
-    "find_estimators",
-]
diff --git a/skore/src/skore/sklearn/_plot/metrics/__init__.py b/skore/src/skore/sklearn/_plot/metrics/__init__.py
deleted file mode 100644
index 3e55d01b1f..0000000000
--- a/skore/src/skore/sklearn/_plot/metrics/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from skore.sklearn._plot.metrics.confusion_matrix import ConfusionMatrixDisplay
-from skore.sklearn._plot.metrics.metrics_summary_display import MetricsSummaryDisplay
-from skore.sklearn._plot.metrics.precision_recall_curve import (
-    PrecisionRecallCurveDisplay,
-)
-from skore.sklearn._plot.metrics.prediction_error import PredictionErrorDisplay
-from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay
-
-__all__ = [
-    "ConfusionMatrixDisplay",
-    "PrecisionRecallCurveDisplay",
-    "PredictionErrorDisplay",
-    "RocCurveDisplay",
-    "MetricsSummaryDisplay",
-]
diff --git a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
deleted file mode 100644
index 5e366bd166..0000000000
--- a/skore/src/skore/sklearn/_plot/metrics/metrics_summary_display.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import itertools
-
-import matplotlib.pyplot as plt
-import pandas as pd
-
-from skore.sklearn._plot.style import StyleDisplayMixin
-from skore.sklearn._plot.utils import HelpDisplayMixin
-from skore.sklearn.types import ReportType
-
-
-class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
-    """Display for summarize.
-
-    An instance of this class will be created by `Report.metrics.summarize()`.
-    This class should not be instantiated directly.
-    """
-
-    # should be removed once transformed into a utils
-    _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = {
-        "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"},
-        "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"},
-        "accuracy": {"name": "Accuracy", "icon": "(↗︎)"},
-        "precision": {"name": "Precision", "icon": "(↗︎)"},
-        "recall": {"name": "Recall", "icon": "(↗︎)"},
-        "brier_score": {"name": "Brier score", "icon": "(↘︎)"},
-        "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"},
-        "log_loss": {"name": "Log loss", "icon": "(↘︎)"},
-        "r2": {"name": "R²", "icon": "(↗︎)"},
-        "rmse": {"name": "RMSE", "icon": "(↘︎)"},
-        "custom_metric": {"name": "Custom metric", "icon": ""},
-        "report_metrics": {"name": "Report metrics", "icon": ""},
-    }
-
-    def __init__(
-        self, *, summarize_data, report_type: ReportType, data_source: str = "test"
-    ):
-        self.summarize_data = summarize_data
-        self.report_type = report_type
-        self.data_source = data_source
-
-    def frame(self):
-        """Return the summarize as a dataframe."""
-        return self.summarize_data
-
-    @StyleDisplayMixin.style_plot
-    def plot(self, x, y) -> None:
-        """Plot visualization.
-
-        Parameters
-        ----------
-        x : str, default=None
-            The metric to display on x-axis. By default, the first column.
-
-        y : str, default=None
-            The metric to display on y-axis. By default, the second column.
-
-        Attributes
-        ----------
-        ax_ : matplotlib axes or ndarray of axes
-            The axes on which the precision-recall curve is plotted.
-
-        figure_ : matplotlib figure
-            The figure on which the precision-recall curve is plotted.
-
-        Returns
-        -------
-        A matplotlib plot.
-
-        Example
-        -------
-        >>> from sklearn.datasets import load_breast_cancer
-        >>> from sklearn.linear_model import LogisticRegression
-        >>> from sklearn.ensemble import HistGradientBoostingClassifier
-        >>> from skore import train_test_split
-        >>> from skore import EstimatorReport, ComparisonReport
-        >>> X, y = load_breast_cancer(return_X_y=True)
-        >>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
-        >>> classifier = LogisticRegression()
-        >>> report_a = EstimatorReport(classifier,pos_label=1, **split_data)
-        >>> classifier = HistGradientBoostingClassifier()
-        >>> report_b = EstimatorReport(classifier,pos_label=1, **split_data)
-        >>> comparison_report = ComparisonReport(
-            {"report_a": report_a, "report_b": report_b}
-        )
-        >>> display = comparison_report.metrics.summarize()
-        >>> display.plot(x="accuracy", y="roc_auc")
-        """
-        if self.report_type == "estimator":
-            raise NotImplementedError()
-        elif self.report_type in ["cross-validation", "comparison-cross-validation"]:
-            raise NotImplementedError("To come soon!")
-        elif self.report_type == "comparison-estimator":
-            self._plot_comparison_estimator(x, y)
-
-    def _plot_comparison_estimator(self, x, y):
-        self.figure_, self.ax_ = plt.subplots()
-
-        x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
-        y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
-
-        # Check that the metrics are in the report
-        # If the metric is not in the report, help the user by suggesting
-        # supported metrics
-        reverse_score_info = {
-            value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items()
-        }
-        index = self.summarize_data.index
-        if isinstance(index, pd.MultiIndex):
-            available_columns = index.get_level_values(0).to_list()
-        else:
-            available_columns = index.tolist()
-        supported_metrics = [
-            reverse_score_info.get(col, col) for col in available_columns
-        ]
-        if x not in supported_metrics:
-            raise ValueError(
-                f"Performance metric {x} not found in the report. "
-                f"Supported metrics are: {supported_metrics}."
-            )
-        if y not in supported_metrics:
-            raise ValueError(
-                f"Performance metric {y} not found in the report. "
-                f"Supported metrics are: {supported_metrics}."
-            )
-
-        x_data = self.summarize_data.loc[x_label]
-        y_data = self.summarize_data.loc[y_label]
-        if len(x_data.shape) > 1 and x_data.shape[0] > 1:
-            # case where we have multiIndex, and the metric is not a single value
-            raise ValueError(
-                "The perf metric x requires to add a positive label parameter."
-            )
-        elif len(x_data.shape) > 1 and x_data.shape[0] == 1:
-            # case where we have multiIndex, but the metric is not affected by the
-            # pos_label
-            x_data = x_data.squeeze()
-        if len(y_data.shape) > 1 and y_data.shape[0] > 1:
-            raise ValueError(
-                "The perf metric y requires to add a positive label parameter."
-            )
-        elif len(y_data.shape) > 1 and y_data.shape[0] == 1:
-            y_data = y_data.squeeze()
-
-        # Make it clear in the axis labels that we are using the train set
-        if x == "fit_time" and self.data_source != "train":
-            x_label_text = x_label + " on train set"
-        else:
-            x_label_text = x_label
-        if y == "fit_time" and self.data_source != "train":
-            y_label_text = y_label + " on train set"
-        else:
-            y_label_text = y_label
-
-        title = f"{x_label} vs {y_label}"
-        if self.data_source is not None:
-            title += f" on {self.data_source} set"
-
-        # Use a set of markers and colors for each data point
-        text = self.summarize_data.columns.tolist()
-        markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8"))
-        colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])
-
-        handles = []
-        for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False):
-            marker = next(markers)
-            color = next(colors)
-            sc = self.ax_.scatter(
-                x_coord, y_coord, marker=marker, color=color, label=label
-            )
-            handles.append(sc)
-
-        self.ax_.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
-        self.ax_.legend(title="Models", loc="best")
-
-        return self.figure_, self.ax_

From c7fb645dadd9e87f591aa2ca35a61b9b3da4e524 Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Tue, 29 Jul 2025 09:55:13 +0200
Subject: [PATCH 29/39] docs(skore): Change the name of features after
 preprocessing (#1901)

closes #1421

Created a separated dataframe for features importances and rename the
features by string replace operations.
Changes were made for two figures:  `engineered_ridge_report`
---
 examples/use_cases/plot_feature_importance.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/examples/use_cases/plot_feature_importance.py b/examples/use_cases/plot_feature_importance.py
index 18a055ed30..2082eb5e08 100644
--- a/examples/use_cases/plot_feature_importance.py
+++ b/examples/use_cases/plot_feature_importance.py
@@ -452,9 +452,22 @@ def unscale_coefficients(df, feature_mean, feature_std):
 # Let us display the 15 largest absolute coefficients:
 
 # %%
-engineered_ridge_report.feature_importance.coefficients().sort_values(
-    by="Coefficient", key=abs, ascending=True
-).tail(15).plot.barh(
+engineered_rigde_report_feature_importance = (
+    engineered_ridge_report.feature_importance.coefficients()
+    .sort_values(by="Coefficient", key=abs, ascending=True)
+    .tail(15)
+)
+
+engineered_rigde_report_feature_importance.index = (
+    engineered_rigde_report_feature_importance.index.str.replace("remainder__", "")
+)
+engineered_rigde_report_feature_importance.index = (
+    engineered_rigde_report_feature_importance.index.str.replace(
+        "kmeans__", "geospatial__"
+    )
+)
+
+engineered_rigde_report_feature_importance.plot.barh(
     title="Model weights",
     xlabel="Coefficient",
     ylabel="Feature",

From bc221c108a35e905228eb1ab56d7b1abf02157fc Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 31 Jul 2025 17:23:23 +0200
Subject: [PATCH 30/39] finish merge

---
 .../_plot/metrics/metrics_summary_display.py  | 135 +++++++++++++++++-
 .../test_plot_comparison.py                   |  16 +--
 2 files changed, 137 insertions(+), 14 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 9cd889b51a..19ceb52525 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -1,15 +1,37 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
 from skore._sklearn._plot.style import StyleDisplayMixin
-from skore._sklearn._plot.utils import HelpDisplayMixin
+from skore._sklearn._plot.utils import HelpDisplayMixin, PlotBackendMixin
+from skore._sklearn.types import ReportType
 
 
-class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin):
+class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin):
     """Display for summarize.
 
     An instance of this class will be created by `Report.metrics.summarize()`.
     This class should not be instantiated directly.
     """
 
-    def __init__(self, summarize_data, report_type, data_source):
+    # should be removed once transformed into a utils
+    _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = {
+        "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"},
+        "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"},
+        "accuracy": {"name": "Accuracy", "icon": "(↗︎)"},
+        "precision": {"name": "Precision", "icon": "(↗︎)"},
+        "recall": {"name": "Recall", "icon": "(↗︎)"},
+        "brier_score": {"name": "Brier score", "icon": "(↘︎)"},
+        "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"},
+        "log_loss": {"name": "Log loss", "icon": "(↘︎)"},
+        "r2": {"name": "R²", "icon": "(↗︎)"},
+        "rmse": {"name": "RMSE", "icon": "(↘︎)"},
+        "custom_metric": {"name": "Custom metric", "icon": ""},
+        "report_metrics": {"name": "Report metrics", "icon": ""},
+    }
+
+    def __init__(
+        self, *, summarize_data, report_type: ReportType, data_source: str = "test"
+    ):
         self.summarize_data = summarize_data
         self.report_type = report_type
         self.data_source = data_source
@@ -24,6 +46,107 @@ def frame(self):
         """
         return self.summarize_data
 
-    @StyleDisplayMixin.style_plot
-    def plot(self):
-        raise NotImplementedError
+    def _plot_matplotlib(self, x: str, y: str) -> None:
+        """Plot visualization.
+
+        Parameters
+        ----------
+        x : str, default=None
+            The metric to display on x-axis. By default, the first column.
+
+        y : str, default=None
+            The metric to display on y-axis. By default, the second column.
+        """
+        self.figure_, self.ax_ = plt.subplots()
+
+        if self.report_type in (
+            ["estimator", "cross-validation", "comparison-cross-validation"]
+        ):
+            raise NotImplementedError("To come soon!")
+        elif self.report_type == "comparison-estimator":
+            self._plot_comparison_estimator(x, y)
+
+    def _plot_comparison_estimator(self, x, y):
+        _, ax = plt.subplots()
+
+        x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
+        y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
+
+        # Check that the metrics are in the report
+        # If the metric is not in the report, help the user by suggesting
+        # supported metrics
+        reverse_score_info = {
+            value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items()
+        }
+        # available_columns = self.summarize_data.columns.get_level_values(0).to_list()
+        # available_columns.remove("Estimator")
+        available_columns = self.summarize_data.index
+        if isinstance(available_columns, pd.MultiIndex):
+            available_columns = available_columns.get_level_values(0).to_list()
+        supported_metrics = [
+            reverse_score_info.get(col, col) for col in available_columns
+        ]
+        if x not in supported_metrics:
+            raise ValueError(
+                f"Performance metric {x} not found in the report. "
+                f"Supported metrics are: {supported_metrics}."
+            )
+        if y not in supported_metrics:
+            raise ValueError(
+                f"Performance metric {y} not found in the report. "
+                f"Supported metrics are: {supported_metrics}."
+            )
+
+        x_data = self.summarize_data.loc[x_label]
+        y_data = self.summarize_data.loc[y_label]
+        if len(x_data.shape) > 1:
+            if x_data.shape[0] == 1:
+                x_data = x_data.reset_index(drop=True).values
+            else:
+                raise ValueError(
+                    "The perf metric x requires to add a positive label parameter."
+                )
+        if len(y_data.shape) > 1:
+            if y_data.shape[0] == 1:
+                y_data = y_data.reset_index(drop=True).values
+            else:
+                raise ValueError(
+                    "The perf metric y requires to add a positive label parameter."
+                )
+
+        # Make it clear in the axis labels that we are using the train set
+        if x == "fit_time" and self.data_source != "train":
+            x_label_text = x_label + " on train set"
+        else:
+            x_label_text = x_label
+        if y == "fit_time" and self.data_source != "train":
+            y_label_text = y_label + " on train set"
+        else:
+            y_label_text = y_label
+
+        title = f"{x_label} vs {y_label}"
+        if self.data_source is not None:
+            title += f" on {self.data_source} set"
+
+        ax.scatter(x=x_data, y=y_data)
+        ax.set_title(title)
+        ax.set_xlabel(x_label_text)
+        ax.set_ylabel(y_label_text)
+
+        # Add labels to the points with a small offset
+        text = self.summarize_data.columns
+        for label, x_coord, y_coord in zip(text, x, y, strict=False):
+            ax.annotate(
+                label,
+                (x_coord, y_coord),
+                textcoords="offset points",
+                xytext=(10, 0),
+                bbox=dict(
+                    boxstyle="round,pad=0.3",
+                    edgecolor="gray",
+                    facecolor="white",
+                    alpha=0.7,
+                ),
+            )
+
+        self.ax_ = ax
diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index b5d2c4ce7c..f7c9e132df 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -62,7 +62,7 @@ def regression_comparator():
 def test_regression_comparator(regression_comparator):
     """Test that the regression comparator can summarize metrics and plot them."""
     display_summary = regression_comparator.metrics.summarize()
-    display_summary.plot("r2", "fit_time")
+    display_summary.plot(x="r2", y="fit_time")
     assert display_summary.ax_.get_xlabel() == "R²"
     assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set"
     assert len(display_summary.ax_.get_title()) > 4
@@ -72,7 +72,7 @@ def test_data_source_affect_title_and_axis(regression_comparator):
     """Test that the data source does change the title and axis labels."""
     comp = regression_comparator
     display_summary = comp.metrics.summarize(data_source="train")
-    display_summary.plot("r2", "fit_time")
+    display_summary.plot(x="r2", y="fit_time")
     assert "on train set" in display_summary.ax_.get_title()
     assert "on test set" not in display_summary.ax_.get_ylabel()
 
@@ -81,9 +81,9 @@ def test_error_invalid_metric(regression_comparator):
     """Test the error raised when an invalid metric is used."""
     comp = regression_comparator
     with pytest.raises(ValueError):
-        comp.metrics.summarize().plot("invalid_metric", "fit_time")
+        comp.metrics.summarize().plot(x="invalid_metric", y="fit_time")
     with pytest.raises(ValueError):
-        comp.metrics.summarize().plot("fit_time", "invalid_metric")
+        comp.metrics.summarize().plot(x="fit_time", y="invalid_metric")
 
 
 def test_needs_positive_label(binary_classification_comparator):
@@ -96,12 +96,12 @@ def test_needs_positive_label(binary_classification_comparator):
         ValueError,
         match="The perf metric x requires to add a positive label parameter.",
     ):
-        comp.metrics.summarize().plot("precision", "fit_time")
+        comp.metrics.summarize().plot(x="precision", y="fit_time")
     with pytest.raises(
         ValueError,
         match="The perf metric y requires to add a positive label parameter.",
     ):
-        comp.metrics.summarize().plot("fit_time", "precision")
+        comp.metrics.summarize().plot(x="fit_time", y="precision")
 
 
 def test_no_positive_label_unrequired(binary_classification_comparator):
@@ -110,13 +110,13 @@ def test_no_positive_label_unrequired(binary_classification_comparator):
     selected.
     """
     display_summary = binary_classification_comparator.metrics.summarize()
-    display_summary.plot("brier_score", "fit_time")
+    display_summary.plot(x="brier_score", y="fit_time")
     assert display_summary.ax_.get_xlabel() == "Brier score"
     assert display_summary.ax_.get_ylabel() == "Fit time (s) on train set"
     assert len(display_summary.ax_.get_title()) > 4
 
     display_summary = binary_classification_comparator.metrics.summarize()
-    display_summary.plot("fit_time", "brier_score")
+    display_summary.plot(x="fit_time", y="brier_score")
     assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set"
     assert display_summary.ax_.get_ylabel() == "Brier score"
     assert len(display_summary.ax_.get_title()) > 4

From 8cd4dbfbc0cc0a6673d29970162ba357c2a30ffc Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 31 Jul 2025 17:39:15 +0200
Subject: [PATCH 31/39] annot to legend

---
 .../_plot/metrics/metrics_summary_display.py  | 32 ++++++++-----------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 19ceb52525..1b25ddd1ff 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -1,3 +1,5 @@
+import itertools
+
 import matplotlib.pyplot as plt
 import pandas as pd
 
@@ -128,25 +130,17 @@ def _plot_comparison_estimator(self, x, y):
         if self.data_source is not None:
             title += f" on {self.data_source} set"
 
-        ax.scatter(x=x_data, y=y_data)
-        ax.set_title(title)
-        ax.set_xlabel(x_label_text)
-        ax.set_ylabel(y_label_text)
-
-        # Add labels to the points with a small offset
+        # Add legend
         text = self.summarize_data.columns
-        for label, x_coord, y_coord in zip(text, x, y, strict=False):
-            ax.annotate(
-                label,
-                (x_coord, y_coord),
-                textcoords="offset points",
-                xytext=(10, 0),
-                bbox=dict(
-                    boxstyle="round,pad=0.3",
-                    edgecolor="gray",
-                    facecolor="white",
-                    alpha=0.7,
-                ),
-            )
+        markers = itertools.cycle(("o", "s", "^", "D", "v", "P", "*", "X", "h", "8"))
+        colors = itertools.cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"])
+
+        for label, x_coord, y_coord in zip(text, x_data, y_data, strict=False):
+            marker = next(markers)
+            color = next(colors)
+            ax.scatter(x_coord, y_coord, marker=marker, color=color, label=label)
+
+        ax.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
+        ax.legend(title="Models", loc="best")
 
         self.ax_ = ax

From 509d4affa72bd3b1cf261be6a935ae6b6d745f6d Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 31 Jul 2025 17:51:15 +0200
Subject: [PATCH 32/39] change scale according to data range

---
 .../_plot/metrics/metrics_summary_display.py  | 24 +++++++++++++++++--
 skore/src/skore/_sklearn/_plot/utils.py       | 11 +++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 1b25ddd1ff..721bfd5f5b 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -4,7 +4,11 @@
 import pandas as pd
 
 from skore._sklearn._plot.style import StyleDisplayMixin
-from skore._sklearn._plot.utils import HelpDisplayMixin, PlotBackendMixin
+from skore._sklearn._plot.utils import (
+    HelpDisplayMixin,
+    PlotBackendMixin,
+    _interval_max_min_ratio,
+)
 from skore._sklearn.types import ReportType
 
 
@@ -140,7 +144,23 @@ def _plot_comparison_estimator(self, x, y):
             color = next(colors)
             ax.scatter(x_coord, y_coord, marker=marker, color=color, label=label)
 
-        ax.set(title=title, xlabel=x_label_text, ylabel=y_label_text)
+        if _interval_max_min_ratio(x_data) > 5:
+            xscale = "symlog" if x_data.min() <= 0 else "log"
+        else:
+            xscale = "linear"
+
+        if _interval_max_min_ratio(y_data) > 5:
+            yscale = "symlog" if y_data.min() <= 0 else "log"
+        else:
+            yscale = "linear"
+
+        ax.set(
+            title=title,
+            xlabel=x_label_text,
+            ylabel=y_label_text,
+            xscale=xscale,
+            yscale=yscale,
+        )
         ax.legend(title="Models", loc="best")
 
         self.ax_ = ax
diff --git a/skore/src/skore/_sklearn/_plot/utils.py b/skore/src/skore/_sklearn/_plot/utils.py
index fa2876e65b..c53475f0d9 100644
--- a/skore/src/skore/_sklearn/_plot/utils.py
+++ b/skore/src/skore/_sklearn/_plot/utils.py
@@ -466,3 +466,14 @@ def sample_mpl_colormap(
     """
     indices = np.linspace(0, 1, n)
     return [cmap(i) for i in indices]
+
+
+def _interval_max_min_ratio(data):
+    """Compute the ratio between the largest and smallest inter-point distances.
+
+    A value larger than 5 typically indicates that the parameter range would
+    better be displayed with a log scale while a linear scale would be more
+    suitable otherwise.
+    """
+    diff = np.diff(np.sort(data))
+    return diff.max() / diff.min()

From 7abf9dc27ae5b00aeecbc2b00626de9027d520e0 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 1 Aug 2025 12:43:26 +0200
Subject: [PATCH 33/39] extend to support custom metrics

---
 .../_sklearn/_comparison/metrics_accessor.py  |  6 +-
 .../_cross_validation/metrics_accessor.py     |  1 +
 .../_sklearn/_estimator/metrics_accessor.py   |  5 +-
 .../_plot/metrics/metrics_summary_display.py  | 80 +++++++++----------
 .../test_plot_comparison.py                   | 16 ++++
 5 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
index 8ebcfd759f..1b09c31450 100644
--- a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -187,7 +187,11 @@ class is set to the one provided when creating the report. If `None`,
                 "CrossValidationReport"
             )
         return MetricsSummaryDisplay(
-            summarize_data=results, report_type=report_type, data_source=data_source
+            summarize_data=results,
+            report_type=report_type,
+            data_source=data_source,
+            scoring_names=scoring_names,
+            default_verbose_metric_names=self._score_or_loss_info,
         )
 
     @progress_decorator(description="Compute metric for each estimator")
diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
index 22b96d1be2..ad231ec4dc 100644
--- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -177,6 +177,7 @@ class is set to the one provided when creating the report. If `None`,
             summarize_data=results,
             report_type="cross-validation",
             data_source=data_source,
+            default_verbose_metric_names=self._score_or_loss_info,
         )
 
     @progress_decorator(description="Compute metric for each split")
diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
index d2e5672112..953b5f6a25 100644
--- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
+++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
@@ -429,7 +429,10 @@ class is set to the one provided when creating the report. If `None`,
                 )
 
         return MetricsSummaryDisplay(
-            summarize_data=results, report_type="estimator", data_source=data_source
+            summarize_data=results,
+            report_type="estimator",
+            data_source=data_source,
+            default_verbose_metric_names=self._score_or_loss_info,
         )
 
     def _compute_metric_scores(
diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 721bfd5f5b..87dbbe2804 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -9,7 +9,7 @@
     PlotBackendMixin,
     _interval_max_min_ratio,
 )
-from skore._sklearn.types import ReportType
+from skore._sklearn.types import ReportType, ScoringName
 
 
 class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin):
@@ -19,28 +19,20 @@ class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixi
     This class should not be instantiated directly.
     """
 
-    # should be removed once transformed into a utils
-    _SCORE_OR_LOSS_INFO: dict[str, dict[str, str]] = {
-        "fit_time": {"name": "Fit time (s)", "icon": "(↘︎)"},
-        "predict_time": {"name": "Predict time (s)", "icon": "(↘︎)"},
-        "accuracy": {"name": "Accuracy", "icon": "(↗︎)"},
-        "precision": {"name": "Precision", "icon": "(↗︎)"},
-        "recall": {"name": "Recall", "icon": "(↗︎)"},
-        "brier_score": {"name": "Brier score", "icon": "(↘︎)"},
-        "roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"},
-        "log_loss": {"name": "Log loss", "icon": "(↘︎)"},
-        "r2": {"name": "R²", "icon": "(↗︎)"},
-        "rmse": {"name": "RMSE", "icon": "(↘︎)"},
-        "custom_metric": {"name": "Custom metric", "icon": ""},
-        "report_metrics": {"name": "Report metrics", "icon": ""},
-    }
-
     def __init__(
-        self, *, summarize_data, report_type: ReportType, data_source: str = "test"
+        self,
+        *,
+        summarize_data,
+        report_type: ReportType,
+        data_source: str = "test",
+        default_verbose_metric_names: dict[str, dict[str, str]],
+        scoring_names: ScoringName | list[ScoringName] | None = None,
     ):
         self.summarize_data = summarize_data
         self.report_type = report_type
         self.data_source = data_source
+        self.scoring_names = scoring_names
+        self.default_verbose_metric_names = default_verbose_metric_names
 
     def frame(self):
         """Return the summarize as a dataframe.
@@ -70,41 +62,49 @@ def _plot_matplotlib(self, x: str, y: str) -> None:
         ):
             raise NotImplementedError("To come soon!")
         elif self.report_type == "comparison-estimator":
-            self._plot_comparison_estimator(x, y)
+            self._plot_matplotlib_comparison_estimator(x, y)
 
-    def _plot_comparison_estimator(self, x, y):
+    def _plot_matplotlib_comparison_estimator(self, x, y):
         _, ax = plt.subplots()
 
-        x_label = self._SCORE_OR_LOSS_INFO.get(x, {}).get("name", x)
-        y_label = self._SCORE_OR_LOSS_INFO.get(y, {}).get("name", y)
+        # Get verbose name from x and y
+        # if they are not verbose already
+        x_verbose = self.default_verbose_metric_names.get(x, {}).get("name", x)
+        y_verbose = self.default_verbose_metric_names.get(y, {}).get("name", y)
 
         # Check that the metrics are in the report
         # If the metric is not in the report, help the user by suggesting
         # supported metrics
         reverse_score_info = {
-            value["name"]: key for key, value in self._SCORE_OR_LOSS_INFO.items()
+            value["name"]: key
+            for key, value in self.default_verbose_metric_names.items()
         }
-        # available_columns = self.summarize_data.columns.get_level_values(0).to_list()
-        # available_columns.remove("Estimator")
-        available_columns = self.summarize_data.index
-        if isinstance(available_columns, pd.MultiIndex):
-            available_columns = available_columns.get_level_values(0).to_list()
-        supported_metrics = [
-            reverse_score_info.get(col, col) for col in available_columns
-        ]
+        available_metrics = self.summarize_data.index
+        if isinstance(available_metrics, pd.MultiIndex):
+            available_metrics = available_metrics.get_level_values(0).to_list()
+
+        # if scoring_names is provided, they are the supported metrics
+        # otherwise, the default verbose names apply.
+        if self.scoring_names is not None:
+            supported_metrics = self.scoring_names
+        else:
+            supported_metrics = [
+                reverse_score_info.get(col, col) for col in available_metrics
+            ]
+
         if x not in supported_metrics:
             raise ValueError(
-                f"Performance metric {x} not found in the report. "
+                f"Performance metric '{x}' not found in the report. "
                 f"Supported metrics are: {supported_metrics}."
             )
         if y not in supported_metrics:
             raise ValueError(
-                f"Performance metric {y} not found in the report. "
+                f"Performance metric '{y}' not found in the report. "
                 f"Supported metrics are: {supported_metrics}."
             )
 
-        x_data = self.summarize_data.loc[x_label]
-        y_data = self.summarize_data.loc[y_label]
+        x_data = self.summarize_data.loc[x_verbose]
+        y_data = self.summarize_data.loc[y_verbose]
         if len(x_data.shape) > 1:
             if x_data.shape[0] == 1:
                 x_data = x_data.reset_index(drop=True).values
@@ -122,15 +122,15 @@ def _plot_comparison_estimator(self, x, y):
 
         # Make it clear in the axis labels that we are using the train set
         if x == "fit_time" and self.data_source != "train":
-            x_label_text = x_label + " on train set"
+            x_label_text = x_verbose + " on train set"
         else:
-            x_label_text = x_label
+            x_label_text = x_verbose
         if y == "fit_time" and self.data_source != "train":
-            y_label_text = y_label + " on train set"
+            y_label_text = y_verbose + " on train set"
         else:
-            y_label_text = y_label
+            y_label_text = y_verbose
 
-        title = f"{x_label} vs {y_label}"
+        title = f"{x_verbose} vs {y_verbose}"
         if self.data_source is not None:
             title += f" on {self.data_source} set"
 
diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index f7c9e132df..d3011eb738 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -120,3 +120,19 @@ def test_no_positive_label_unrequired(binary_classification_comparator):
     assert display_summary.ax_.get_xlabel() == "Fit time (s) on train set"
     assert display_summary.ax_.get_ylabel() == "Brier score"
     assert len(display_summary.ax_.get_title()) > 4
+
+
+def test_custom_metrics(binary_classification_comparator):
+    """
+    Test that custom metric names are used in the plot.
+    """
+    comp = binary_classification_comparator
+    display_summary = comp.metrics.summarize(
+        scoring=["precision", "recall"],
+        scoring_names=["My Precision", "My Recall"],
+        pos_label=1,
+    )
+    display_summary.plot(x="My Precision", y="My Recall")
+    assert display_summary.ax_.get_xlabel() == "My Precision"
+    assert display_summary.ax_.get_ylabel() == "My Recall"
+    assert len(display_summary.ax_.get_title()) > 4

From 1d4b0136c42aaa4d3104c51878ae1777dbf50719 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Fri, 1 Aug 2025 14:49:09 +0200
Subject: [PATCH 34/39] fix: remove useless subplot

---
 .../src/skore/_sklearn/_plot/metrics/metrics_summary_display.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 87dbbe2804..392f505033 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -55,8 +55,6 @@ def _plot_matplotlib(self, x: str, y: str) -> None:
         y : str, default=None
             The metric to display on y-axis. By default, the second column.
         """
-        self.figure_, self.ax_ = plt.subplots()
-
         if self.report_type in (
             ["estimator", "cross-validation", "comparison-cross-validation"]
         ):

From 589ddb84ce27ea9f6cdb13ea1b96df1adefc7e18 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 4 Aug 2025 16:36:38 +0200
Subject: [PATCH 35/39] add test for axis

---
 skore/src/skore/_sklearn/_plot/utils.py       |  2 +-
 .../test_plot_comparison.py                   | 49 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/skore/src/skore/_sklearn/_plot/utils.py b/skore/src/skore/_sklearn/_plot/utils.py
index c53475f0d9..d8e7c2ccc5 100644
--- a/skore/src/skore/_sklearn/_plot/utils.py
+++ b/skore/src/skore/_sklearn/_plot/utils.py
@@ -475,5 +475,5 @@ def _interval_max_min_ratio(data):
     better be displayed with a log scale while a linear scale would be more
     suitable otherwise.
     """
-    diff = np.diff(np.sort(data))
+    diff = np.diff(np.sort(data), axis=0)
     return diff.max() / diff.min()
diff --git a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
index d3011eb738..759430cf76 100644
--- a/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
+++ b/skore/tests/unit/sklearn/plot/metrics_summary_display/test_plot_comparison.py
@@ -1,5 +1,7 @@
+import numpy as np
 import pytest
 from sklearn.datasets import make_classification, make_regression
+from sklearn.dummy import DummyRegressor
 from sklearn.ensemble import (
     HistGradientBoostingClassifier,
     HistGradientBoostingRegressor,
@@ -59,6 +61,40 @@ def regression_comparator():
     return comp
 
 
+@pytest.fixture
+def high_error_regression():
+    X_train = np.random.rand(100, 5)
+    y_train = np.random.normal(0.1, 0.1, 100)
+    X_test = np.random.rand(100, 5)
+    y_test = np.random.normal(100, 1, 100)
+
+    report_1 = EstimatorReport(
+        estimator=DummyRegressor(strategy="mean"),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    report_2 = EstimatorReport(
+        estimator=DummyRegressor(strategy="constant", constant=0.99),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    report_3 = EstimatorReport(
+        estimator=DummyRegressor(strategy="constant", constant=100),
+        X_train=X_train,
+        y_train=y_train,
+        X_test=X_test,
+        y_test=y_test,
+    )
+    comp = ComparisonReport(
+        {"report_1": report_1, "report_2": report_2, "report_3": report_3}
+    )
+    return comp
+
+
 def test_regression_comparator(regression_comparator):
     """Test that the regression comparator can summarize metrics and plot them."""
     display_summary = regression_comparator.metrics.summarize()
@@ -136,3 +172,16 @@ def test_custom_metrics(binary_classification_comparator):
     assert display_summary.ax_.get_xlabel() == "My Precision"
     assert display_summary.ax_.get_ylabel() == "My Recall"
     assert len(display_summary.ax_.get_title()) > 4
+
+
+def test_various_scales(high_error_regression):
+    """
+    Test that the plot can handle metrics with different scales.
+    """
+    comp = high_error_regression
+    display_summary = comp.metrics.summarize(scoring=["fit_time", "rmse"])
+    display_summary.plot(x="rmse", y="fit_time")
+    assert display_summary.ax_.get_xscale() == "log"
+
+    display_summary.plot(x="fit_time", y="rmse")
+    assert display_summary.ax_.get_yscale() == "log"

From e5ff840ecabb86a8d9b3692a3076db55b7080775 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Mon, 4 Aug 2025 17:11:32 +0200
Subject: [PATCH 36/39] fix test

---
 .../skore/_sklearn/_plot/metrics/metrics_summary_display.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 392f505033..4ce5225db0 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -105,14 +105,14 @@ def _plot_matplotlib_comparison_estimator(self, x, y):
         y_data = self.summarize_data.loc[y_verbose]
         if len(x_data.shape) > 1:
             if x_data.shape[0] == 1:
-                x_data = x_data.reset_index(drop=True).values
+                x_data = x_data.reset_index(drop=True).values[0]
             else:
                 raise ValueError(
                     "The perf metric x requires to add a positive label parameter."
                 )
         if len(y_data.shape) > 1:
             if y_data.shape[0] == 1:
-                y_data = y_data.reset_index(drop=True).values
+                y_data = y_data.reset_index(drop=True).values[0]
             else:
                 raise ValueError(
                     "The perf metric y requires to add a positive label parameter."

From 65f0c9accc0a192b6ade4121028873ec3f812266 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 8 Oct 2025 11:15:39 +0200
Subject: [PATCH 37/39] linting and update import to be consistant with
 refactor

---
 .../_sklearn/_plot/metrics/metrics_summary_display.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index a61198779f..0153ee0975 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -3,16 +3,12 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 
-from skore._sklearn._plot.style import StyleDisplayMixin
-from skore._sklearn._plot.utils import (
-    HelpDisplayMixin,
-    PlotBackendMixin,
-    _interval_max_min_ratio,
-)
+from skore._sklearn._plot.base import DisplayMixin
+from skore._sklearn._plot.utils import _interval_max_min_ratio
 from skore._sklearn.types import ReportType, ScoringName
 
 
-class MetricsSummaryDisplay(HelpDisplayMixin, StyleDisplayMixin, PlotBackendMixin):
+class MetricsSummaryDisplay(DisplayMixin):
     """Display for summarize.
 
     An instance of this class will be created by `Report.metrics.summarize()`.
@@ -162,6 +158,7 @@ def _plot_matplotlib_comparison_estimator(self, x, y):
         ax.legend(title="Models", loc="best")
 
         self.ax_ = ax
+
     @DisplayMixin.style_plot
     def plot(self):
         """Not yet implemented."""

From 2bfc789d45dbb08cb7e0d8c24b5bbd3102a44642 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Wed, 8 Oct 2025 16:18:49 +0200
Subject: [PATCH 38/39] add default plot function in metrics summary display

---
 .../_plot/metrics/metrics_summary_display.py         | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
index 0153ee0975..1549fa5ce4 100644
--- a/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
+++ b/skore/src/skore/_sklearn/_plot/metrics/metrics_summary_display.py
@@ -51,10 +51,11 @@ def _plot_matplotlib(self, x: str, y: str) -> None:
         y : str, default=None
             The metric to display on y-axis. By default, the second column.
         """
-        if self.report_type in (
-            ["estimator", "cross-validation", "comparison-cross-validation"]
-        ):
+        if self.report_type in (["cross-validation", "comparison-cross-validation"]):
             raise NotImplementedError("To come soon!")
+        elif self.report_type == "estimator":
+            raise NotImplementedError()
+        # it does not make sense to plot the metrics for a single estimator
         elif self.report_type == "comparison-estimator":
             self._plot_matplotlib_comparison_estimator(x, y)
 
@@ -160,6 +161,5 @@ def _plot_matplotlib_comparison_estimator(self, x, y):
         self.ax_ = ax
 
     @DisplayMixin.style_plot
-    def plot(self):
-        """Not yet implemented."""
-        raise NotImplementedError
+    def plot(self, x: str, y: str):
+        self._plot(**{"x": x, "y": y})

From 95eeca975b08d27690ca2e958730b30b98534856 Mon Sep 17 00:00:00 2001
From: Marie <marie.sacksick@posteo.net>
Date: Thu, 9 Oct 2025 15:56:02 +0200
Subject: [PATCH 39/39] fix docs with available metrics

---
 examples/getting_started/plot_skore_getting_started.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py
index 24a016cfb0..0acdcd53c6 100644
--- a/examples/getting_started/plot_skore_getting_started.py
+++ b/examples/getting_started/plot_skore_getting_started.py
@@ -214,10 +214,10 @@
 comparator.metrics.summarize(indicator_favorability=True).frame()
 
 # %%
-# To be more specific in our comparison, we can decide to compare the Brier score and the fitting time.
+# To be more specific in our comparison, we can decide to compare the ROC AUC and the fitting time.
 
 # %%
-comparator.metrics.summarize().plot(x="brier_score", y="fit_time")
+comparator.metrics.summarize().plot(x="roc_auc", y="fit_time")
 # %%
 # Thus, we easily have the result of our benchmark for several recommended metrics.