fix: Show consistent favorability (↘︎) for Brier score with non-probabilistic classifiers (#1758)

waridrox · glemaitre · web-flow · commit d35228d080a1 · 2025-05-27T10:59:37.000+02:00
Co-authored-by: Guillaume Lemaitre &lt;guillaume@probabl.ai&gt;
diff --git a/skore/src/skore/sklearn/_comparison/utils.py b/skore/src/skore/sklearn/_comparison/utils.py
@@ -76,7 +76,10 @@ def _combine_estimator_results(
     # - not use it in the aggregate operation
     # - later to only report a single column and not by split columns
     if indicator_favorability:
-        favorability = results.pop("Favorability").iloc[:, 0]
+        # Some metrics can be undefined for some estimators and NaN are
+        # introduced after the concatenation. We fill the NaN using the
+        # valid favorability
+        favorability = results.pop("Favorability").bfill(axis=1).iloc[:, 0]
     else:
         favorability = None
 
@@ -301,9 +304,14 @@ def sort_by_split(df: pd.DataFrame) -> pd.DataFrame:
     # - not use it in the aggregate operation
     # - later to only report a single column and not by split columns
     if indicator_favorability:
-        favorability = results[0]["Favorability"]
-        for result in results:
-            result.pop("Favorability")
+        # Some metrics can be undefined for some estimators and NaN are
+        # introduced after the concatenation. We fill the NaN using the
+        # valid favorability
+        favorability = (
+            pd.concat([result.pop("Favorability") for result in results], axis=1)
+            .bfill(axis=1)
+            .iloc[:, 0]
+        )
     else:
         favorability = None
 
diff --git a/skore/tests/unit/sklearn/comparison/test_report_common.py b/skore/tests/unit/sklearn/comparison/test_report_common.py
@@ -8,6 +8,10 @@
 
 import joblib
 import pytest
+from sklearn.datasets import make_classification
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import LinearSVC
+from skore import ComparisonReport, CrossValidationReport, EstimatorReport
 
 
 @pytest.fixture(params=["report_estimator_reports", "report_cv_reports"])
@@ -67,3 +71,39 @@ def test_metrics_help(capsys, report):
     report.metrics.help()
     captured = capsys.readouterr()
     assert "Available metrics methods" in captured.out
+
+
+@pytest.mark.parametrize("report", [EstimatorReport, CrossValidationReport])
+def test_comparison_report_favorability_undefined_metrics(report):
+    """Check that we don't introduce NaN when favorability is computed when
+    for some estimators, the metric is undefined.
+
+    Non-regression test for:
+    https://github.com/probabl-ai/skore/issues/1755
+    """
+
+    X, y = make_classification(random_state=0)
+    estimators = {"LinearSVC": LinearSVC(), "LogisticRegression": LogisticRegression()}
+
+    if report is EstimatorReport:
+        reports = {
+            name: EstimatorReport(est, X_train=X, X_test=X, y_train=y, y_test=y)
+            for name, est in estimators.items()
+        }
+    else:
+        reports = {
+            name: CrossValidationReport(est, X=X, y=y)
+            for name, est in estimators.items()
+        }
+
+    comparison_report = ComparisonReport(reports)
+    metrics = comparison_report.metrics.report_metrics(
+        pos_label=1, indicator_favorability=True
+    )
+
+    assert "Brier score" in metrics.index
+    assert "Favorability" in metrics.columns
+    assert not metrics["Favorability"].isna().any()
+    expected_values = {"(↗︎)", "(↘︎)"}
+    actual_values = set(metrics["Favorability"].to_numpy())
+    assert actual_values.issubset(expected_values)