py-econometrics · mdizadi · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 16, 2026
diff --git a/docs/_freeze/tutorials/instrumental-variables/execute-results/html.json b/docs/_freeze/tutorials/instrumental-variables/execute-results/html.json
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyfixest/estimation/models/feiv_.py b/pyfixest/estimation/models/feiv_.py
@@ -268,6 +268,75 @@ def get_fit(self) -> None:
         D = np.linalg.inv(self._tXZ @ self._tZZinv @ self._tZX)
         self._bread = H.T @ D @ H
 
+    def _bootstrap_one_rep(self, nz: np.ndarray, w_combined: np.ndarray) -> np.ndarray:
+        """
+        Run one 2SLS bootstrap iteration.
+
+        Demeans Y, X (exog + endog), and Z (instruments) with the bootstrap
+        weights, applies WLS scaling, then solves the 2SLS formula.
+        """
+        w_f = w_combined[nz]
+        Y_f = self._Y_untransformed.iloc[nz].reset_index(drop=True)
+        X_f = (
+            self._X_untransformed_df[list(self._coefnames)]
+            .iloc[nz]
+            .reset_index(drop=True)
+        )
+        Z_f = (
+            self._Z_untransformed_df[list(self._coefnames_z)]
+            .iloc[nz]
+            .reset_index(drop=True)
+        )
+        fe_f = (
+            self._fe.iloc[nz].reset_index(drop=True) if self._fe is not None else None
+        )
+
+        if self._has_fixef:
+            Yd, Xd = demean_model(
+                Y=Y_f,  # type: ignore[arg-type]
+                X=X_f,
+                fe=fe_f,
+                weights=w_f,
+                lookup_demeaned_data={},
+                na_index=self._na_index,
+                fixef_tol=self._fixef_tol,
+                fixef_maxiter=self._fixef_maxiter,
+                demean_func=self._demean_func,
+            )
+            # demean Z — pass Y_f as a placeholder Y (only Z columns matter)
+            _, Zd = demean_model(
+                Y=Y_f,  # type: ignore[arg-type]
+                X=Z_f,
+                fe=fe_f,
+                weights=w_f,
+                lookup_demeaned_data={},
+                na_index=self._na_index,
+                fixef_tol=self._fixef_tol,
+                fixef_maxiter=self._fixef_maxiter,
+                demean_func=self._demean_func,
+            )
+            _Y_b = Yd.to_numpy().flatten()
+            _X_b = Xd.to_numpy()
+            _Z_b = Zd.to_numpy()
+        else:
+            _Y_b = Y_f.to_numpy().flatten()
+            _X_b = X_f.to_numpy()
+            _Z_b = Z_f.to_numpy()
+
+        # WLS transform: scale by sqrt(w)
+        sqrt_w = np.sqrt(w_f)
+        Xw = _X_b * sqrt_w[:, None]
+        Yw = _Y_b * sqrt_w
+        Zw = _Z_b * sqrt_w[:, None]
+
+        # 2SLS formula: β = (X'Z (Z'Z)⁻¹ Z'X)⁻¹ (X'Z (Z'Z)⁻¹ Z'Y)
+        tZX = Zw.T @ Xw
+        tXZ = Xw.T @ Zw
+        tZy = Zw.T @ Yw
+        tZZinv = np.linalg.inv(Zw.T @ Zw)
+        H = tXZ @ tZZinv
+        return solve_ols(H @ tZX, H @ tZy, self._solver)
+
     def first_stage(self) -> None:
         """Implement First stage regression."""
         # Store names of instruments from Z matrix

diff --git a/pyfixest/estimation/models/feols_.py b/pyfixest/estimation/models/feols_.py
@@ -412,9 +412,17 @@ def prepare_model_matrix(self):
         self._Y = model_matrix.dependent
         self._Y_untransformed = model_matrix.dependent.copy()
         self._X = model_matrix.independent
+        self._X_untransformed_df = (
+            model_matrix.independent.copy()
+        )  # pre-demeaning DataFrame, for bootstrap
         self._fe = model_matrix.fixed_effects
         self._endogvar = model_matrix.endogenous
         self._Z = model_matrix.instruments
+        self._Z_untransformed_df = (
+            model_matrix.instruments.copy()
+            if model_matrix.instruments is not None
+            else None
+        )  # pre-demeaning instrument DataFrame; used by Feiv bootstrap
         self._weights_df = model_matrix.weights
         self._na_index = model_matrix.na_index
         # TODO: set dynamically based on naming set in pyfixest.estimation.formula.factor_interaction._encode_i
@@ -1104,6 +1112,8 @@ def _clear_attributes(self):
                 "_Y_hat_link",
                 "_Y_hat_response",
                 "_Y_untransformed",
+                "_X_untransformed_df",
+                "_Z_untransformed_df",
             ]
 
         for attr in attributes:
@@ -1459,6 +1469,227 @@ def wildboottest(
         else:
             return res_df
 
+    def weightingboottest(
+        self,
+        reps: int,
+        method: Literal["bayesian", "multinomial"] = "bayesian",
+        concentration: float = 1.0,
+        cluster: str | None = None,
+        ci_level: float = 0.95,
+        seed: int | None = None,
+        return_draws: bool = False,
+    ) -> pd.DataFrame | tuple[pd.DataFrame, np.ndarray]:
+        """
+        Run a weighting bootstrap (Bayesian or Multinomial) for inference on
+        all regression coefficients.
+
+        Unlike the wild bootstrap, which perturbs residuals, the weighting
+        bootstrap reweights observations. Each iteration draws a new weight
+        vector, re-demeans (if fixed effects are present) with those weights,
+        fits a weighted OLS, and collects the coefficient estimates. Standard
+        errors and confidence intervals are derived directly from the bootstrap
+        distribution.
+
+        When a cluster variable is provided (or set via vcov at estimation
+        time), weights are drawn once per unit and broadcast to all observations
+        of that unit, correctly handling unbalanced panels.
+
+        Parameters
+        ----------
+        reps : int
+            Number of bootstrap iterations.
+        method : str, optional
+            Either ``"bayesian"`` (Dirichlet/Gamma weights) or
+            ``"multinomial"`` (integer counts). Defaults to ``"bayesian"``.
+        concentration : float, optional
+            Concentration parameter for the Dirichlet distribution.
+            Only used when ``method="bayesian"``. ``concentration=1`` is the
+            standard Bayesian bootstrap of Rubin (1981); larger values shrink
+            weights toward uniform (less variable draws). Defaults to 1.0.
+        cluster : str or None, optional
+            Column name in the data used to define sampling units for
+            panel-aware weighting. If None, falls back to the model's
+            ``clustervar`` if one was set at estimation time, otherwise
+            draws weights independently per observation.
+        ci_level : float, optional
+            Confidence level for percentile intervals. Defaults to 0.95.
+        seed : int or None, optional
+            Random seed for reproducibility. Defaults to None.
+        return_draws : bool, optional
+            If True, also returns the full ``(reps x k)`` matrix of bootstrap
+            coefficient draws. Defaults to False.
+
+        Returns
+        -------
+        pd.DataFrame
+            Indexed by coefficient name with columns:
+            ``Estimate``, ``Bootstrap SE``, ``CI lower``, ``CI upper``,
+            ``p-value``, ``method``.
+        tuple[pd.DataFrame, np.ndarray]
+            If ``return_draws=True``, returns a tuple of the DataFrame and
+            the ``(reps x k)`` bootstrap draws matrix.
+
+        Examples
+        --------
+        ```{python}
+        import pyfixest as pf
+
+        data = pf.get_data()
+        fit = pf.feols("Y ~ X1 + X2 | f1", data)
+
+        fit.weightingboottest(reps=500, method="bayesian", concentration=1.0, seed=42)
+        fit.weightingboottest(reps=500, method="multinomial", seed=42)
+        ```
+        """
+        if method not in ("bayesian", "multinomial"):
+            raise ValueError(
+                f"method must be 'bayesian' or 'multinomial', got '{method}'."
+            )
+        if method == "bayesian" and concentration <= 0:
+            raise ValueError(f"concentration must be positive, got {concentration}.")
+
+        rng = np.random.default_rng(seed)
+
+        # --- resolve cluster variable (same logic as wildboottest) ---
+        cluster_var = None
+        if cluster is not None:
+            cluster_var = cluster
+        elif self._clustervar:
+            cluster_var = (
+                self._clustervar[0]
+                if isinstance(self._clustervar, list)
+                else self._clustervar
+            )
+
+        if cluster_var is not None and cluster_var not in self._data.columns:
+            raise ValueError(f"Cluster variable '{cluster_var}' not found in the data.")
+
+        # --- unit-to-observation mapping for panel-aware weighting ---
+        # np.unique returns sorted unique values and inverse maps each
+        # observation back to its unit index — handles unbalanced panels
+        # because mapping is by value, not by position.
+        if cluster_var is not None:
+            cluster_arr = self._data[cluster_var].to_numpy()
+            _, inverse = np.unique(cluster_arr, return_inverse=True)
+            n_units = int(inverse.max()) + 1
+        else:
+            N_obs = len(self._Y)
+            n_units = N_obs
+            inverse = np.arange(N_obs)
+
+        # --- original estimate on already-fitted arrays ---
+        # self._X and self._Y are post-demean, post-wls-transform numpy arrays.
+        # self._coefnames tracks the coefficient names after collinearity drop.
+        beta_hat = self._beta_hat
+        coefnames = self._coefnames
+
+        # --- original analytic weights (ones if no WLS) ---
+        w_original = self._weights.flatten()  # shape (N_obs,)
+
+        # --- bootstrap loop ---
+        k = len(beta_hat)
+        beta_boots = np.empty((reps, k))
+
+        for b in range(reps):
+            # draw unit-level weights
+            if method == "multinomial":
+                w_unit = rng.multinomial(n_units, np.ones(n_units) / n_units).astype(
+                    float
+                )
+            else:  # bayesian
+                w_unit = rng.dirichlet(np.full(n_units, concentration)) * n_units
+
+            # broadcast to observations — handles unbalanced panels correctly
+            w_boot = w_unit[inverse]
+
+            # combine with original analytic weights if WLS
+            w_combined = w_original * w_boot
+
+            # With multinomial draws, whole FE groups can get zero total weight,
+            # causing division-by-zero in the demeaning kernel.  Filter them out;
+            # zero-weight rows contribute nothing to the WLS objective anyway.
+            nz = w_combined > 0
+            beta_boots[b] = self._bootstrap_one_rep(nz, w_combined)
+
+        # --- inference from bootstrap distribution ---
+        boot_se = beta_boots.std(axis=0, ddof=1)
+        q_lo = (1 - ci_level) / 2
+        q_hi = 1 - q_lo
+        ci_lower = np.quantile(beta_boots, q_lo, axis=0)
+        ci_upper = np.quantile(beta_boots, q_hi, axis=0)
+        # symmetric p-value: fraction of draws at least as extreme as observed
+        p_values = np.mean(np.abs(beta_boots - beta_hat) >= np.abs(beta_hat), axis=0)
+
+        res_df = pd.DataFrame(
+            {
+                "Estimate": beta_hat,
+                "Bootstrap SE": boot_se,
+                f"CI {int(q_lo * 100)}%": ci_lower,
+                f"CI {int(q_hi * 100)}%": ci_upper,
+                "p-value": p_values,
+                "method": method,
+            },
+            index=coefnames,
+        )
+
+        if return_draws:
+            return res_df, beta_boots
+        return res_df
+
+    def _bootstrap_one_rep(self, nz: np.ndarray, w_combined: np.ndarray) -> np.ndarray:
+        """
+        Run one OLS/WLS bootstrap iteration.
+
+        Subclasses override this method to implement model-specific fitting
+        (e.g. 2SLS for Feiv, IRLS for Fepois/Feglm). The shared weight-drawing
+        and inference logic stays in ``weightingboottest()``.
+
+        Parameters
+        ----------
+        nz : np.ndarray
+            Boolean mask of observations with positive combined weight.
+        w_combined : np.ndarray
+            Combined weights (bootstrap * original analytic weights), length N.
+
+        Returns
+        -------
+        np.ndarray
+            Bootstrap coefficient vector of shape ``(k,)``.
+        """
+        w_f = w_combined[nz]
+        Y_f = self._Y_untransformed.iloc[nz].reset_index(drop=True)
+        X_f = (
+            self._X_untransformed_df[list(self._coefnames)]
+            .iloc[nz]
+            .reset_index(drop=True)
+        )
+        fe_f = (
+            self._fe.iloc[nz].reset_index(drop=True) if self._fe is not None else None
+        )
+
+        if self._has_fixef:
+            Yd, Xd = demean_model(
+                Y=Y_f,  # type: ignore[arg-type]
+                X=X_f,
+                fe=fe_f,
+                weights=w_f,
+                lookup_demeaned_data={},
+                na_index=self._na_index,
+                fixef_tol=self._fixef_tol,
+                fixef_maxiter=self._fixef_maxiter,
+                demean_func=self._demean_func,
+            )
+            _Y_b = Yd.to_numpy().flatten()
+            _X_b = Xd.to_numpy()
+        else:
+            _Y_b = Y_f.to_numpy().flatten()
+            _X_b = X_f.to_numpy()
+
+        sqrt_w = np.sqrt(w_f)
+        Xw = _X_b * sqrt_w[:, None]
+        Yw = _Y_b * sqrt_w
+        return solve_ols(Xw.T @ Xw, Xw.T @ Yw, self._solver)
+
     def ccv(
         self,
         treatment,

diff --git a/pyfixest/utils/dgps.py b/pyfixest/utils/dgps.py
@@ -19,7 +19,7 @@ def get_ivf_data(N=2000, seed=1234):
         TRUE_EFFECT = -0.15
 
     OVB formula for naive OLS (earnings ~ num_children):
-        bias ≈ γ_ambition * Cov(num_children, ambition) / Var(num_children)
+        bias ≈ y_ambition * Cov(num_children, ambition) / Var(num_children)
              ≈ 0.6 * (-0.4) / 0.57 ≈ -0.42
         β_OLS ≈ -0.15 + (-0.42) ≈ -0.57   (overstates the penalty)
         β_IV  ≈ -0.15                        (recovers the true effect)

diff --git a/pyproject.toml b/pyproject.toml
@@ -201,6 +201,7 @@ polars = ">=1.0.0"
 pyarrow = ">=14.0"
 doubleml = ">=0.10.1"
 pytest-sugar = ">=1.1.1"
+jupyterlab = ">=4.5.6,<5"
 
 [tool.pixi.pypi-dependencies]
 pyfixest = { path = ".", editable = true, extras = ["jax", "plots"] }