DoubleML · akihiroshimoda · Apr 10, 2026 · Apr 18, 2026
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
@@ -674,3 +674,98 @@ def policy_tree(self, features, depth=2, **tree_params):
         model = DoubleMLPolicyTree(orth_signal, depth=depth, features=features, **tree_params).fit()
 
         return model
+
+    def plot_overlap_common_support(
+        self,
+        idx_treatment: int = 0,
+        i_rep: int = 0,
+        bins=10,
+        density: bool = False,
+        palette: str = "colorblind",
+    ):
+        """Plot propensity score distributions and binned calibration curves.
+
+        Visualizes the distribution of estimated propensity scores :math:`\\hat{m}_0(X) = \\hat{E}[D|X]`
+        split by treatment status using histograms, together with calibration curves comparing
+        predicted propensity scores against observed treatment fractions.
+
+        Parameters
+        ----------
+        idx_treatment : int
+            Index of the treatment variable (for multi-treatment settings).
+            Default is ``0``.
+
+        i_rep : int
+            Index of the repetition to use for the propensity score predictions.
+            Default is ``0``.
+
+        bins : int or array-like
+            Number of bins or explicit bin edges for the histograms and calibration curves.
+            Default is ``10``.
+
+        density : bool
+            If ``True``, histogram heights are normalized to density.
+            Default is ``False``.
+
+        palette : str or sequence
+            Seaborn palette name or explicit colors.
+            Default is ``"colorblind"``.
+
+        Returns
+        -------
+        fig : :class:`matplotlib.figure.Figure`
+            Matplotlib figure.
+        axes : :class:`numpy.ndarray`
+            2x2 axes array.
+
+        Raises
+        ------
+        ValueError
+            If ``fit()`` has not been called or predictions are not stored.
+        """
+        from doubleml.utils.plots import plot_propensity_score_calibration
+
+        # Input validation
+        if self._framework is None:
+            raise ValueError("Apply fit() before plot_overlap_common_support().")
+
+        if self.predictions is None:
+            raise ValueError(
+                "Predictions are not stored. Call fit() with store_predictions=True "
+                "before plot_overlap_common_support()."
+            )
+
+        if "ml_m" not in self.predictions:
+            raise ValueError(
+                "Propensity score predictions ('ml_m') are not available. "
+                "Ensure fit() was called with store_predictions=True."
+            )
+
+        if not isinstance(idx_treatment, int):
+            raise TypeError(f"idx_treatment must be an integer. Got {type(idx_treatment)}.")
+        if idx_treatment < 0 or idx_treatment >= self._dml_data.n_treat:
+            raise ValueError(
+                f"idx_treatment must be in [0, {self._dml_data.n_treat - 1}]. Got {idx_treatment}."
+            )
+
+        if not isinstance(i_rep, int):
+            raise TypeError(f"i_rep must be an integer. Got {type(i_rep)}.")
+        if i_rep < 0 or i_rep >= self.n_rep:
+            raise ValueError(f"i_rep must be in [0, {self.n_rep - 1}]. Got {i_rep}.")
+
+        # Extract propensity scores and treatment indicator
+        ps_scores = self.predictions["ml_m"][:, i_rep, idx_treatment]
+        treatment = self._dml_data.d
+
+        # Generate plot
+        fig, axes = plot_propensity_score_calibration(
+            propensity_score=ps_scores,
+            treatment=treatment,
+            bins=bins,
+            density=density,
+            palette=palette,
+        )
+
+        return fig, axes
+
+
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
@@ -7,6 +7,7 @@
 from .dummy_learners import DMLDummyClassifier, DMLDummyRegressor
 from .gain_statistics import gain_statistics
 from .global_learner import GlobalClassifier, GlobalRegressor
+from .plots import plot_propensity_score_calibration
 from .policytree import DoubleMLPolicyTree
 from .propensity_score_processing import PSProcessor, PSProcessorConfig
 from .resampling import DoubleMLClusterResampling, DoubleMLResampling
@@ -22,6 +23,7 @@
     "gain_statistics",
     "GlobalClassifier",
     "GlobalRegressor",
+    "plot_propensity_score_calibration",
     "PSProcessor",
     "PSProcessorConfig",
 ]
diff --git a/doubleml/utils/_plots.py b/doubleml/utils/_plots.py
@@ -92,3 +92,6 @@ def _sensitivity_contour_plot(
     fig.update_yaxes(range=[0, np.max(y)])
 
     return fig
+
+
+
diff --git a/doubleml/utils/plots.py b/doubleml/utils/plots.py
@@ -0,0 +1,132 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+
+
+def plot_propensity_score_calibration(
+    propensity_score,
+    treatment,
+    bins=10,
+    density=False,
+    palette="colorblind",
+):
+    """
+    Plot propensity score distributions and binned calibration curves.
+
+    Parameters
+    ----------
+    propensity_score : array-like
+        Predicted propensity scores of shape (n_samples,).
+    treatment : array-like
+        Binary treatment indicator of shape (n_samples,).
+    bins : int or array-like
+        Number of bins or explicit bin edges.
+    density : bool
+        If True, histogram heights are normalized.
+    palette : str or sequence
+        Seaborn palette name or explicit colors.
+
+    Returns
+    -------
+    fig : :class:`matplotlib.figure.Figure`
+        Matplotlib figure.
+    axes : :class:`numpy.ndarray`
+        2x2 axes array.
+    """
+    ps = np.asarray(propensity_score, dtype=float).reshape(-1)
+    tr = np.asarray(treatment).reshape(-1)
+
+    if ps.shape != tr.shape:
+        raise ValueError("propensity_score and treatment must have the same shape.")
+    if ps.ndim != 1:
+        raise ValueError("propensity_score and treatment must be one-dimensional.")
+    if not np.isin(tr, [0, 1]).all():
+        raise ValueError("treatment must be binary with values 0 and 1.")
+    if np.any((ps < 0) | (ps > 1)):
+        raise ValueError("propensity_score must lie in [0, 1].")
+
+    tr = tr.astype(int)
+
+    if isinstance(bins, int):
+        if bins < 2:
+            raise ValueError("bins must be at least 2.")
+        bins = np.linspace(0.0, 1.0, bins + 1)
+    else:
+        bins = np.asarray(bins, dtype=float)
+        if bins.ndim != 1 or len(bins) < 2:
+            raise ValueError("bins must contain at least two edges.")
+        if np.any(np.diff(bins) <= 0):
+            raise ValueError("bins must be strictly increasing.")
+
+    x_min, x_max = float(bins[0]), float(bins[-1])
+    centers = 0.5 * (bins[:-1] + bins[1:])
+    widths = np.diff(bins)
+
+    treated_frac = []
+    control_frac = []
+    for i in range(len(bins) - 1):
+        if i < len(bins) - 2:
+            mask = (ps >= bins[i]) & (ps < bins[i + 1])
+        else:
+            mask = (ps >= bins[i]) & (ps <= bins[i + 1])
+        if np.sum(mask) == 0:
+            treated_frac.append(np.nan)
+            control_frac.append(np.nan)
+        else:
+            p_treated = np.mean(tr[mask] == 1)
+            treated_frac.append(p_treated)
+            control_frac.append(1.0 - p_treated)
+
+    colors = sns.color_palette(palette, n_colors=2)
+    fig, axes = plt.subplots(2, 2, figsize=(12, 10), gridspec_kw={"height_ratios": [2, 1]})
+
+    sns.histplot(
+        ps[tr == 1],
+        bins=bins,
+        stat="density" if density else "count",
+        kde=False,
+        color=colors[0],
+        ax=axes[0, 0],
+        label="Treated",
+    )
+    axes[0, 0].set_title("Treated: Propensity Score Distribution")
+    axes[0, 0].set_xlim(x_min, x_max)
+    axes[0, 0].set_ylabel("Density" if density else "Count")
+    axes[0, 0].legend()
+
+    sns.histplot(
+        ps[tr == 0],
+        bins=bins,
+        stat="density" if density else "count",
+        kde=False,
+        color=colors[1],
+        ax=axes[0, 1],
+        label="Control",
+    )
+    axes[0, 1].set_title("Control: Propensity Score Distribution")
+    axes[0, 1].set_xlim(x_min, x_max)
+    axes[0, 1].set_ylabel("Density" if density else "Count")
+    axes[0, 1].legend()
+
+    axes[1, 0].bar(centers, treated_frac, width=widths, color=colors[0], alpha=0.7)
+    axes[1, 0].plot([x_min, x_max], [x_min, x_max], "k--", label="Ideal calibration")
+    axes[1, 0].set_title("Treated: Calibration")
+    axes[1, 0].set_xlabel("Predicted propensity score")
+    axes[1, 0].set_ylabel("Observed treatment fraction")
+    axes[1, 0].set_xlim(x_min, x_max)
+    axes[1, 0].set_ylim(0, 1)
+    axes[1, 0].legend()
+
+    axes[1, 1].bar(centers, control_frac, width=widths, color=colors[1], alpha=0.7)
+    axes[1, 1].plot([x_min, x_max], [1 - x_min, 1 - x_max], "k--", label="Ideal calibration")
+    axes[1, 1].set_title("Control: Calibration")
+    axes[1, 1].set_xlabel("Predicted propensity score")
+    axes[1, 1].set_ylabel("Observed control fraction")
+    axes[1, 1].set_xlim(x_min, x_max)
+    axes[1, 1].set_ylim(0, 1)
+    axes[1, 1].legend()
+
+    fig.suptitle("Propensity Score Calibration")
+    plt.tight_layout()
+
+    return fig, axes
Original file line number	Diff line number	Diff line change
Expand Up		@@ -92,3 +92,6 @@ def _sensitivity_contour_plot(
		fig.update_yaxes(range=[0, np.max(y)])

		return fig