diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
index 0d5aabe81..22d4b6065 100644
--- a/econml/_ortho_learner.py
+++ b/econml/_ortho_learner.py
@@ -30,6 +30,7 @@ class in this module implements the general logic in a very versatile way
 from abc import abstractmethod
 import inspect
 from collections import defaultdict
+from joblib import Parallel, delayed
 import re
 
 import numpy as np
@@ -42,7 +43,7 @@ class in this module implements the general logic in a very versatile way
 from ._cate_estimator import (BaseCateEstimator, LinearCateEstimator,
                               TreatmentExpansionMixin)
 from .inference import BootstrapInference
-from .utilities import (_deprecate_positional, check_input_arrays,
+from .utilities import (convertArg, _deprecate_positional, check_input_arrays,
                         cross_product, filter_none_kwargs,
                         inverse_onehot, jacify_featurizer, ndim, reshape, shape, transpose)
 
@@ -195,7 +196,7 @@ def predict(self, X, y, W=None):
 
 CachedValues = namedtuple('CachedValues', ['nuisances',
                                            'Y', 'T', 'X', 'W', 'Z', 'sample_weight', 'freq_weight',
-                                           'sample_var', 'groups'])
+                                           'sample_var', 'groups', 'output_T'])
 
 
 class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
@@ -443,6 +444,7 @@ def __init__(self, *,
         self.categories = categories
         self.mc_iters = mc_iters
         self.mc_agg = mc_agg
+        self._clone_model_finals = False
         super().__init__()
 
     @abstractmethod
@@ -547,9 +549,118 @@ def _prefit(self, Y, T, *args, only_final=False, **kwargs):
         if not only_final:
             # generate an instance of the nuisance model
             self._ortho_learner_model_nuisance = self._gen_ortho_learner_model_nuisance()
-
         super()._prefit(Y, T, *args, **kwargs)
 
+    def _gen_cloned_ortho_learner_model_finals(self, num_clone_final_models):
+        self._clone_model_finals = True
+        self._cloned_final_models = [clone(self._ortho_learner_model_final, safe=False) for _ in range(num_clone_final_models)]
+        self._current_cloned_index = 0
+        self._current_cloned_final_model = self._cloned_final_models[self._current_cloned_index]
+    
+    def _set_current_cloned_ortho_learner_model_final(self, clone_index):
+        self._current_cloned_index = clone_index
+        self._current_cloned_final_model = self._cloned_final_models[self._current_cloned_index]
+        self._ortho_learner_model_final = self._current_cloned_final_model
+
+    def _fit_compute_final_T(self, cached_values):
+        final_T = cached_values.T
+        if self.transformer:
+            if (self.discrete_treatment):
+                final_T = self.transformer.transform(final_T.reshape(-1, 1))
+            else:  # treatment featurizer case
+                final_T = cached_values.output_T
+        return cached_values._replace(T=final_T)
+
+    def _fit_cached_values(self, Y, T, *, X=None, W=None, Z=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None, 
+                           cache_values=False, inference=None, only_final=False, check_input=True):
+        if check_input:
+            Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups = check_input_arrays(
+                Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups)
+            self._check_input_dims(Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups)
+        output_T = None
+        if self.discrete_treatment:
+            categories = self.categories
+            if categories != 'auto':
+                categories = [categories]  # OneHotEncoder expects a 2D array with features per column
+            self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
+            self.transformer.fit(reshape(T, (-1, 1)))
+            self._d_t = (len(self.transformer.categories_[0]) - 1,)
+        elif self.treatment_featurizer:
+            self._original_treatment_featurizer = clone(self.treatment_featurizer, safe=False)
+            self.transformer = jacify_featurizer(self.treatment_featurizer)
+            output_T = self.transformer.fit_transform(T)
+            self._d_t = np.shape(output_T)[1:]
+        else:
+            self.transformer = None
+
+        if self.discrete_instrument:
+            self.z_transformer = OneHotEncoder(categories='auto', sparse=False, drop='first')
+            self.z_transformer.fit(reshape(Z, (-1, 1)))
+        else:
+            self.z_transformer = None
+        all_nuisances = []
+        fitted_inds = None
+        if sample_weight is None:
+            if freq_weight is not None:
+                sample_weight_nuisances = freq_weight
+            else:
+                sample_weight_nuisances = None
+        else:
+            if freq_weight is not None:
+                sample_weight_nuisances = freq_weight * sample_weight
+            else:
+                sample_weight_nuisances = sample_weight
+
+        self._models_nuisance = []
+        for idx in range(self.mc_iters or 1):
+            nuisances, fitted_models, new_inds, scores = self._fit_nuisances(
+                Y, T, X, W, Z, sample_weight=sample_weight_nuisances, groups=groups)
+            all_nuisances.append(nuisances)
+            self._models_nuisance.append(fitted_models)
+            if scores is None:
+                self.nuisance_scores_ = None
+            else:
+                if idx == 0:
+                    self.nuisance_scores_ = tuple([] for _ in scores)
+                for ind, score in enumerate(scores):
+                    self.nuisance_scores_[ind].append(score)
+            if fitted_inds is None:
+                fitted_inds = new_inds
+            elif not np.array_equal(fitted_inds, new_inds):
+                raise AttributeError("Different indices were fit by different folds, so they cannot be aggregated")
+
+        if self.mc_iters is not None:
+            if self.mc_agg == 'mean':
+                nuisances = tuple(np.mean(nuisance_mc_variants, axis=0)
+                                    for nuisance_mc_variants in zip(*all_nuisances))
+            elif self.mc_agg == 'median':
+                nuisances = tuple(np.median(nuisance_mc_variants, axis=0)
+                                    for nuisance_mc_variants in zip(*all_nuisances))
+            else:
+                raise ValueError(
+                    "Parameter `mc_agg` must be one of {'mean', 'median'}. Got {}".format(self.mc_agg))
+
+        Y, T, X, W, Z, sample_weight, freq_weight, sample_var = (self._subinds_check_none(arr, fitted_inds)
+                                                                    for arr in (Y, T, X, W, Z, sample_weight,
+                                                                                freq_weight, sample_var))
+        nuisances = tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances])
+        cached_values = CachedValues(nuisances=nuisances,
+                                     Y=Y, T=T, X=X, W=W, Z=Z,
+                                     sample_weight=sample_weight,
+                                     freq_weight=freq_weight,
+                                     sample_var=sample_var,
+                                     groups=groups,
+                                     output_T=output_T)
+        return cached_values
+
+    def _fit_init(self, Y, T, *, X=None, W=None, Z=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None,
+                  cache_values=False, inference=None, check_input=True):
+        self._random_state = check_random_state(self.random_state)
+        assert (freq_weight is None) == (
+            sample_var is None), "Sample variances and frequency weights must be provided together!"
+        assert not (self.discrete_treatment and self.treatment_featurizer), "Treatment featurization " \
+            "is not supported when treatment is discrete"
+
     @BaseCateEstimator._wrap_fit
     def fit(self, Y, T, *, X=None, W=None, Z=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None,
             cache_values=False, inference=None, only_final=False, check_input=True):
@@ -599,93 +710,19 @@ def fit(self, Y, T, *, X=None, W=None, Z=None, sample_weight=None, freq_weight=N
         -------
         self : object
         """
-        self._random_state = check_random_state(self.random_state)
-        assert (freq_weight is None) == (
-            sample_var is None), "Sample variances and frequency weights must be provided together!"
-        assert not (self.discrete_treatment and self.treatment_featurizer), "Treatment featurization " \
-            "is not supported when treatment is discrete"
-        if check_input:
-            Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups = check_input_arrays(
-                Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups)
-            self._check_input_dims(Y, T, X, W, Z, sample_weight, freq_weight, sample_var, groups)
-
+        self._fit_init(Y=Y, T=T, X=X, W=W, Z=Z, sample_weight=sample_weight, 
+                       freq_weight=freq_weight, sample_var=sample_var, groups=groups,
+                       cache_values=cache_values, inference=inference, check_input=check_input)
+        cached_values = None
         if not only_final:
-
-            if self.discrete_treatment:
-                categories = self.categories
-                if categories != 'auto':
-                    categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-                self.transformer = OneHotEncoder(categories=categories, sparse=False, drop='first')
-                self.transformer.fit(reshape(T, (-1, 1)))
-                self._d_t = (len(self.transformer.categories_[0]) - 1,)
-            elif self.treatment_featurizer:
-                self._original_treatment_featurizer = clone(self.treatment_featurizer, safe=False)
-                self.transformer = jacify_featurizer(self.treatment_featurizer)
-                output_T = self.transformer.fit_transform(T)
-                self._d_t = np.shape(output_T)[1:]
-            else:
-                self.transformer = None
-
-            if self.discrete_instrument:
-                self.z_transformer = OneHotEncoder(categories='auto', sparse=False, drop='first')
-                self.z_transformer.fit(reshape(Z, (-1, 1)))
-            else:
-                self.z_transformer = None
-
-            all_nuisances = []
-            fitted_inds = None
-            if sample_weight is None:
-                if freq_weight is not None:
-                    sample_weight_nuisances = freq_weight
-                else:
-                    sample_weight_nuisances = None
-            else:
-                if freq_weight is not None:
-                    sample_weight_nuisances = freq_weight * sample_weight
-                else:
-                    sample_weight_nuisances = sample_weight
-
-            self._models_nuisance = []
-            for idx in range(self.mc_iters or 1):
-                nuisances, fitted_models, new_inds, scores = self._fit_nuisances(
-                    Y, T, X, W, Z, sample_weight=sample_weight_nuisances, groups=groups)
-                all_nuisances.append(nuisances)
-                self._models_nuisance.append(fitted_models)
-                if scores is None:
-                    self.nuisance_scores_ = None
-                else:
-                    if idx == 0:
-                        self.nuisance_scores_ = tuple([] for _ in scores)
-                    for ind, score in enumerate(scores):
-                        self.nuisance_scores_[ind].append(score)
-                if fitted_inds is None:
-                    fitted_inds = new_inds
-                elif not np.array_equal(fitted_inds, new_inds):
-                    raise AttributeError("Different indices were fit by different folds, so they cannot be aggregated")
-
-            if self.mc_iters is not None:
-                if self.mc_agg == 'mean':
-                    nuisances = tuple(np.mean(nuisance_mc_variants, axis=0)
-                                      for nuisance_mc_variants in zip(*all_nuisances))
-                elif self.mc_agg == 'median':
-                    nuisances = tuple(np.median(nuisance_mc_variants, axis=0)
-                                      for nuisance_mc_variants in zip(*all_nuisances))
-                else:
-                    raise ValueError(
-                        "Parameter `mc_agg` must be one of {'mean', 'median'}. Got {}".format(self.mc_agg))
-
-            Y, T, X, W, Z, sample_weight, freq_weight, sample_var = (self._subinds_check_none(arr, fitted_inds)
-                                                                     for arr in (Y, T, X, W, Z, sample_weight,
-                                                                                 freq_weight, sample_var))
-            nuisances = tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances])
-            self._cached_values = CachedValues(nuisances=nuisances,
-                                               Y=Y, T=T, X=X, W=W, Z=Z,
-                                               sample_weight=sample_weight,
-                                               freq_weight=freq_weight,
-                                               sample_var=sample_var,
-                                               groups=groups) if cache_values else None
+            cached_values = self._fit_cached_values(Y=Y, T=T, X=X, W=W, Z=Z,
+                                                    sample_weight=sample_weight,
+                                                    freq_weight=freq_weight,
+                                                    sample_var=sample_var,
+                                                    groups=groups)
+            self._cached_values = cached_values if cache_values else None
         else:
-            nuisances = self._cached_values.nuisances
+            cached_values = self._cached_values
             # _d_t is altered by fit nuisances to what prefit does. So we need to perform the same
             # alteration even when we only want to fit_final.
             if self.transformer is not None:
@@ -694,23 +731,8 @@ def fit(self, Y, T, *, X=None, W=None, Z=None, sample_weight=None, freq_weight=N
                 else:
                     output_T = self.transformer.fit_transform(T)
                     self._d_t = np.shape(output_T)[1:]
-
-        final_T = T
-        if self.transformer:
-            if (self.discrete_treatment):
-                final_T = self.transformer.transform(final_T.reshape(-1, 1))
-            else:  # treatment featurizer case
-                final_T = output_T
-
-        self._fit_final(Y=Y,
-                        T=final_T,
-                        X=X, W=W, Z=Z,
-                        nuisances=nuisances,
-                        sample_weight=sample_weight,
-                        freq_weight=freq_weight,
-                        sample_var=sample_var,
-                        groups=groups)
-
+        cached_values = self._fit_compute_final_T(cached_values)
+        self._fit_final(cached_values)
         return self
 
     @property
@@ -794,21 +816,43 @@ def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None, group
                                                                   Y, T, X=X, W=W, Z=Z,
                                                                   sample_weight=sample_weight, groups=groups)
         return nuisances, fitted_models, fitted_inds, scores
-
-    def _fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None,
-                   freq_weight=None, sample_var=None, groups=None):
-        self._ortho_learner_model_final.fit(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z,
-                                                                       nuisances=nuisances,
-                                                                       sample_weight=sample_weight,
-                                                                       freq_weight=freq_weight,
-                                                                       sample_var=sample_var,
-                                                                       groups=groups))
+    
+    def _set_bootstrap_params(self, indices, n_jobs, verbose):
+        self._bootstrap_indices = indices
+        self._n_jobs = n_jobs
+        self._verbose = verbose
+
+    def _fit_final(self, cached_values, final_model=None):
+        if final_model is None:
+            final_model = self._ortho_learner_model_final
+        if self._clone_model_finals:
+            def fit(x, **kwargs):
+                x.fit(**filter_none_kwargs(**kwargs))
+                return x
+            cached_values_dict = cached_values._asdict()
+            del cached_values_dict["output_T"]
+
+            Parallel(n_jobs=self._n_jobs, prefer='threads', verbose=self._verbose)(
+                delayed(fit)(cloned_final_model,
+                            **{arg: convertArg(cached_values_dict[arg], inds) for arg in cached_values_dict})
+                for inds, cloned_final_model in zip(self._bootstrap_indices, self._cloned_final_models)
+            )
+        final_model.fit(cached_values.Y, cached_values.T, **filter_none_kwargs(X=cached_values.X, 
+                                                                               W=cached_values.W, 
+                                                                               Z=cached_values.Z,
+                                                                               nuisances=cached_values.nuisances,
+                                                                               sample_weight=cached_values.sample_weight,
+                                                                               freq_weight=cached_values.freq_weight,
+                                                                               sample_var=cached_values.sample_var,
+                                                                               groups=cached_values.groups))
         self.score_ = None
-        if hasattr(self._ortho_learner_model_final, 'score'):
-            self.score_ = self._ortho_learner_model_final.score(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z,
-                                                                                           nuisances=nuisances,
-                                                                                           sample_weight=sample_weight,
-                                                                                           groups=groups))
+        if hasattr(final_model, 'score'):
+            self.score_ = final_model.score(cached_values.Y, cached_values.T, **filter_none_kwargs(X=cached_values.X, 
+                                                                                                   W=cached_values.W, 
+                                                                                                   Z=cached_values.Z,
+                                                                                                   nuisances=cached_values.nuisances,
+                                                                                                   sample_weight=cached_values.sample_weight,
+                                                                                                   groups=cached_values.groups))
 
     def const_marginal_effect(self, X=None):
         X, = check_input_arrays(X)
diff --git a/econml/inference/_bootstrap.py b/econml/inference/_bootstrap.py
index 640eeed0e..c130e58dc 100644
--- a/econml/inference/_bootstrap.py
+++ b/econml/inference/_bootstrap.py
@@ -9,6 +9,7 @@
 from collections import OrderedDict
 import pandas as pd
 
+from ..utilities import convertArg
 
 class BootstrapEstimator:
     """Estimator that uses bootstrap sampling to wrap an existing estimator.
@@ -39,6 +40,10 @@ class BootstrapEstimator:
     n_jobs: int, default: None
         The maximum number of concurrently running jobs, as in joblib.Parallel.
 
+    only_final : bool, default True
+        Whether to bootstrap only the final model, for estimators that do cross-fitting.
+        Ignored for estimators where this does not apply.
+
     verbose: int, default: 0
         Verbosity level
 
@@ -56,12 +61,16 @@ class BootstrapEstimator:
     def __init__(self, wrapped,
                  n_bootstrap_samples=100,
                  n_jobs=None,
+                 only_final=True,
                  verbose=0,
                  compute_means=True,
                  bootstrap_type='pivot'):
+        if not hasattr(wrapped, "_gen_ortho_learner_model_final"):
+            only_final = False
         self._instances = [clone(wrapped, safe=False) for _ in range(n_bootstrap_samples)]
         self._n_bootstrap_samples = n_bootstrap_samples
         self._n_jobs = n_jobs
+        self._only_final = only_final
         self._verbose = verbose
         self._compute_means = compute_means
         self._bootstrap_type = bootstrap_type
@@ -79,51 +88,69 @@ def __stratified_indices(arr):
             indices.append(ind)
         return indices
 
-    def fit(self, *args, **named_args):
-        """
-        Fit the model.
-
-        The full signature of this method is the same as that of the wrapped object's `fit` method.
-        """
+    def _compute_subsets(self, *args, **named_args):
         from .._cate_estimator import BaseCateEstimator  # need to nest this here to avoid circular import
+        from ..panel.dml import DynamicDML
 
         index_chunks = None
-        if isinstance(self._instances[0], BaseCateEstimator):
+        indices = [] 
+        if isinstance(self._wrapped, BaseCateEstimator):
             index_chunks = self._instances[0]._strata(*args, **named_args)
-            if index_chunks is not None:
+            if (index_chunks is not None):
                 index_chunks = self.__stratified_indices(index_chunks)
         if index_chunks is None:
             n_samples = np.shape(args[0] if args else named_args[(*named_args,)[0]])[0]
             index_chunks = [np.arange(n_samples)]  # one chunk with all indices
+        # For DynamicDML only
+        # Take n_bootstrap sets of samples of length n_panels among arange(n_panels) and then each sample corresponds with the chunk
+        if isinstance(self._wrapped, DynamicDML):
+            n_index_chunks = len(index_chunks)
+            bootstrapped_chunk_indices = np.random.choice(n_index_chunks, 
+                                                          size=(self._n_bootstrap_samples, n_index_chunks),
+                                                          replace=True)
+            for i in range(self._n_bootstrap_samples):
+                samples = bootstrapped_chunk_indices[i]
+                sample_chunk_indices = [index_chunks[j] for j in samples]
+                indices_sample = np.hstack(sample_chunk_indices)
+                indices.append(indices_sample)
+            indices = np.array(indices)
+        else:
+            for chunk in index_chunks:
+                n_samples = len(chunk)
+                sample = chunk[np.random.choice(n_samples,
+                                                      size=(self._n_bootstrap_samples, n_samples),
+                                                      replace=True)]
+                indices.append(sample)
+            indices = np.hstack(indices)
+        return indices
+    
+    def fit(self, *args, **named_args):
+        """
+        Fit the model.
 
-        indices = []
-        for chunk in index_chunks:
-            n_samples = len(chunk)
-            indices.append(chunk[np.random.choice(n_samples,
-                                                  size=(self._n_bootstrap_samples, n_samples),
-                                                  replace=True)])
-
-        indices = np.hstack(indices)
+        The full signature of this method is the same as that of the wrapped object's `fit` method.
+        """
 
+        if self._only_final:
+            self._wrapped._gen_cloned_ortho_learner_model_finals(self._n_bootstrap_samples)
+            
         def fit(x, *args, **kwargs):
             x.fit(*args, **kwargs)
             return x  # Explicitly return x in case fit fails to return its target
 
-        def convertArg(arg, inds):
-            if arg is None:
-                return None
-            arr = np.asarray(arg)
-            if arr.ndim > 0:
-                return arr[inds]
-            else:  # arg was a scalar, so we shouldn't have converted it
-                return arg
-
-        self._instances = Parallel(n_jobs=self._n_jobs, prefer='threads', verbose=self._verbose)(
-            delayed(fit)(obj,
-                         *[convertArg(arg, inds) for arg in args],
-                         **{arg: convertArg(named_args[arg], inds) for arg in named_args})
-            for obj, inds in zip(self._instances, indices)
-        )
+        indices = self._compute_subsets(*args, **named_args)
+        
+        if not self._only_final:
+            self._instances = Parallel(n_jobs=self._n_jobs, prefer='threads', verbose=self._verbose)(
+                delayed(fit)(obj,
+                            *[convertArg(arg, inds) for arg in args],
+                            **{arg: convertArg(named_args[arg], inds) for arg in named_args})
+                for obj, inds in zip(self._instances, indices)
+            )
+        else:
+            self._wrapped._set_bootstrap_params(indices, self._n_bootstrap_samples, self._verbose)
+            self._wrapped.fit(*args, **named_args)
+            self._instances = [clone(self._wrapped, safe=False)]
         return self
 
     def __getattr__(self, name):
@@ -139,8 +166,16 @@ def __getattr__(self, name):
 
         def proxy(make_call, name, summary):
             def summarize_with(f):
-                results = np.array(Parallel(n_jobs=self._n_jobs, prefer='threads', verbose=self._verbose)(
-                    (f, (obj, name), {}) for obj in self._instances)), f(self._wrapped, name)
+                instance_results = []
+                obj = clone(self._wrapped, safe=False)
+                for i in range(self._n_bootstrap_samples):
+                    if self._only_final:
+                        obj._set_current_cloned_ortho_learner_model_final(i)
+                    else:
+                        obj = self._instances[i]
+                    instance_results.append(f(obj, name))
+                instance_results = np.array(instance_results)
+                results = instance_results, f(self._wrapped, name)
                 return summary(*results)
             if make_call:
                 def call(*args, **kwargs):
@@ -151,11 +186,11 @@ def call(*args, **kwargs):
 
         def get_mean():
             # for attributes that exist on the wrapped object, just compute the mean of the wrapped calls
-            return proxy(callable(getattr(self._instances[0], name)), name, lambda arr, _: np.mean(arr, axis=0))
+            return proxy(callable(getattr(self._wrapped, name)), name, lambda arr, _: np.mean(arr, axis=0))
 
         def get_std():
             prefix = name[: - len('_std')]
-            return proxy(callable(getattr(self._instances[0], prefix)), prefix,
+            return proxy(callable(getattr(self._wrapped, prefix)), prefix,
                          lambda arr, _: np.std(arr, axis=0))
 
         def get_interval():
@@ -182,7 +217,7 @@ def normal_bootstrap(arr, est):
                       'pivot': pivot_bootstrap}[self._bootstrap_type]
                 return proxy(can_call, prefix, fn)
 
-            can_call = callable(getattr(self._instances[0], prefix))
+            can_call = callable(getattr(self._wrapped, prefix))
             if can_call:
                 # collect extra arguments and pass them through, if the wrapped attribute was callable
                 def call(*args, lower=5, upper=95, **kwargs):
@@ -208,10 +243,10 @@ def fname_transformer(x):
                 inf_type = 'effect'
             elif prefix == 'coef_':
                 inf_type = 'coefficient'
-                if (hasattr(self._instances[0], 'cate_feature_names') and
-                        callable(self._instances[0].cate_feature_names)):
+                if (hasattr(self._wrapped, 'cate_feature_names') and
+                        callable(self._wrapped.cate_feature_names)):
                     def fname_transformer(x):
-                        return self._instances[0].cate_feature_names(x)
+                        return self._wrapped.cate_feature_names(x)
             elif prefix == 'intercept_':
                 inf_type = 'intercept'
             else:
@@ -223,7 +258,7 @@ def fname_transformer(x):
                 d_t = None
             d_y = self._wrapped._d_y[0] if self._wrapped._d_y else 1
 
-            can_call = callable(getattr(self._instances[0], prefix))
+            can_call = callable(getattr(self._wrapped, prefix))
 
             kind = self._bootstrap_type
             if kind == 'percentile' or kind == 'pivot':
diff --git a/econml/inference/_inference.py b/econml/inference/_inference.py
index 671345550..e72b4fedd 100644
--- a/econml/inference/_inference.py
+++ b/econml/inference/_inference.py
@@ -71,6 +71,10 @@ class BootstrapInference(Inference):
     verbose: int, default: 0
         Verbosity level
 
+    only_final : bool, default True
+        Whether to bootstrap only the final model, for estimators that do cross-fitting.
+        Ignored for estimators where this does not apply.
+
     bootstrap_type: 'percentile', 'pivot', or 'normal', default 'pivot'
         Bootstrap method used to compute results.
         'percentile' will result in using the empiracal CDF of the replicated computations of the statistics.
@@ -79,14 +83,15 @@ class BootstrapInference(Inference):
         'normal' will instead compute a pivot interval assuming the replicates are normally distributed.
     """
 
-    def __init__(self, n_bootstrap_samples=100, n_jobs=-1, bootstrap_type='pivot', verbose=0):
+    def __init__(self, n_bootstrap_samples=100, n_jobs=-1, only_final=True, bootstrap_type='pivot', verbose=0):
         self._n_bootstrap_samples = n_bootstrap_samples
         self._n_jobs = n_jobs
+        self._only_final = only_final
         self._bootstrap_type = bootstrap_type
         self._verbose = verbose
 
     def fit(self, estimator, *args, **kwargs):
-        est = BootstrapEstimator(estimator, self._n_bootstrap_samples, self._n_jobs, compute_means=False,
+        est = BootstrapEstimator(estimator, self._n_bootstrap_samples, self._n_jobs, self._only_final, compute_means=False,
                                  bootstrap_type=self._bootstrap_type, verbose=self._verbose)
         filtered_kwargs = filter_none_kwargs(**kwargs)
         est.fit(*args, **filtered_kwargs)
diff --git a/econml/panel/dml/_dml.py b/econml/panel/dml/_dml.py
index 2cfd1f1bb..6ac2790cc 100644
--- a/econml/panel/dml/_dml.py
+++ b/econml/panel/dml/_dml.py
@@ -23,12 +23,21 @@
 
 
 def _get_groups_period_filter(groups, n_periods):
+    """
+    Computes a dictionary from time periods to corresponding rows.
+    
+    This assumes that there are a multiple of `n_periods` rows in each group.  The corresponding rows are 
+    then assumed to be in order within that group and are assigned to time periods accordingly
+    """
     group_counts = {}
     group_period_filter = {i: [] for i in range(n_periods)}
     for i, g in enumerate(groups):
         if g not in group_counts:
             group_counts[g] = 0
-        group_period_filter[group_counts[g]].append(i)
+        # Typically, we expect each group to occur exactly n_periods times;
+        # however, when bootstrapping, all of each group's entries may be copied and occur more than once
+        # so we use % to ensure each one ends up in the correct bucket
+        group_period_filter[group_counts[g] % n_periods].append(i)        
         group_counts[g] += 1
     return group_period_filter
 
@@ -157,6 +166,10 @@ def __init__(self, model_final, n_periods):
 
     def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None, groups=None):
         # NOTE: sample weight, sample var are not passed in
+        _, group_counts = np.unique(groups, return_counts=True)
+        unique_group_counts = np.unique(group_counts)
+        assert np.all(unique_group_counts % self.n_periods == 0), \
+            "Each group should appear in whole multiples in bootstrapping"
         period_filters = _get_groups_period_filter(groups, self.n_periods)
         Y_res, T_res = nuisances
         self._d_y = Y.shape[1:]
diff --git a/econml/tests/test_bootstrap.py b/econml/tests/test_bootstrap.py
index 3cb6dac78..c604a0a01 100644
--- a/econml/tests/test_bootstrap.py
+++ b/econml/tests/test_bootstrap.py
@@ -11,68 +11,70 @@
 import numpy as np
 import unittest
 import joblib
-
+import scipy
+import time
 
 class TestBootstrap(unittest.TestCase):
 
     def test_with_sklearn(self):
         """Test that we can bootstrap sklearn estimators."""
         for n_jobs in [None, -1]:  # test parallelism
-            for kind in ['percentile', 'pivot', 'normal']:  # test both percentile and pivot intervals
-                x = np.random.normal(size=(1000, 1))
-                y = x * 0.5 + np.random.normal(size=(1000, 1))
-                y = y.flatten()
-
-                est = LinearRegression()
-                est.fit(x, y)
-
-                bs = BootstrapEstimator(est, 50, n_jobs=n_jobs, bootstrap_type=kind)
-                # test that we can fit with the same arguments as the base estimator
-                bs.fit(x, y)
-
-                # test that we can get the same attribute for the bootstrap as the original, with the same shape
-                self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_))
-
-                # test that we can get an interval for the same attribute for the bootstrap as the original,
-                # with the same shape for the lower and upper bounds
-                lower, upper = bs.coef__interval()
-                for bound in [lower, upper]:
-                    self.assertEqual(np.shape(est.coef_), np.shape(bound))
-
-                # test that the lower and upper bounds differ
-                assert (lower <= upper).all()
-                assert (lower < upper).any()
-
-                # test that we can do the same thing once we provide percentile bounds
-                lower, upper = bs.coef__interval(lower=10, upper=90)
-                for bound in [lower, upper]:
-                    self.assertEqual(np.shape(est.coef_), np.shape(bound))
-
-                # test that the lower and upper bounds differ
-                assert (lower <= upper).all()
-                assert (lower < upper).any()
-
-                # test that we can do the same thing with the results of a method, rather than an attribute
-                self.assertEqual(np.shape(est.predict(x)), np.shape(bs.predict(x)))
-
-                # test that we can get an interval for the same attribute for the bootstrap as the original,
-                # with the same shape for the lower and upper bounds
-                lower, upper = bs.predict_interval(x)
-                for bound in [lower, upper]:
-                    self.assertEqual(np.shape(est.predict(x)), np.shape(bound))
-
-                # test that the lower and upper bounds differ
-                assert (lower <= upper).all()
-                assert (lower < upper).any()
-
-                # test that we can do the same thing once we provide percentile bounds
-                lower, upper = bs.predict_interval(x, lower=10, upper=90)
-                for bound in [lower, upper]:
-                    self.assertEqual(np.shape(est.predict(x)), np.shape(bound))
-
-                # test that the lower and upper bounds differ
-                assert (lower <= upper).all()
-                assert (lower < upper).any()
+            for only_final in [False, True]:
+                for kind in ['percentile', 'pivot', 'normal']:  # test both percentile and pivot intervals
+                    x = np.random.normal(size=(1000, 1))
+                    y = x * 0.5 + np.random.normal(size=(1000, 1))
+                    y = y.flatten()
+
+                    est = LinearRegression()
+                    est.fit(x, y)
+
+                    bs = BootstrapEstimator(est, 50, n_jobs=n_jobs, only_final=only_final, bootstrap_type=kind)
+                    # test that we can fit with the same arguments as the base estimator
+                    bs.fit(x, y)
+
+                    # test that we can get the same attribute for the bootstrap as the original, with the same shape
+                    self.assertEqual(np.shape(est.coef_), np.shape(bs.coef_))
+
+                    # test that we can get an interval for the same attribute for the bootstrap as the original,
+                    # with the same shape for the lower and upper bounds
+                    lower, upper = bs.coef__interval()
+                    for bound in [lower, upper]:
+                        self.assertEqual(np.shape(est.coef_), np.shape(bound))
+
+                    # test that the lower and upper bounds differ
+                    assert (lower <= upper).all()
+                    assert (lower < upper).any()
+
+                    # test that we can do the same thing once we provide percentile bounds
+                    lower, upper = bs.coef__interval(lower=10, upper=90)
+                    for bound in [lower, upper]:
+                        self.assertEqual(np.shape(est.coef_), np.shape(bound))
+
+                    # test that the lower and upper bounds differ
+                    assert (lower <= upper).all()
+                    assert (lower < upper).any()
+
+                    # test that we can do the same thing with the results of a method, rather than an attribute
+                    self.assertEqual(np.shape(est.predict(x)), np.shape(bs.predict(x)))
+
+                    # test that we can get an interval for the same attribute for the bootstrap as the original,
+                    # with the same shape for the lower and upper bounds
+                    lower, upper = bs.predict_interval(x)
+                    for bound in [lower, upper]:
+                        self.assertEqual(np.shape(est.predict(x)), np.shape(bound))
+
+                    # test that the lower and upper bounds differ
+                    assert (lower <= upper).all()
+                    assert (lower < upper).any()
+
+                    # test that we can do the same thing once we provide percentile bounds
+                    lower, upper = bs.predict_interval(x, lower=10, upper=90)
+                    for bound in [lower, upper]:
+                        self.assertEqual(np.shape(est.predict(x)), np.shape(bound))
+
+                    # test that the lower and upper bounds differ
+                    assert (lower <= upper).all()
+                    assert (lower < upper).any()
 
     def test_with_econml(self):
         """Test that we can bootstrap econml estimators."""
@@ -329,3 +331,25 @@ def test_all_kinds(self):
                 assert i[0].shape == i[1].shape == inf.point_estimate.shape
                 assert np.allclose(i[0], inf.conf_int()[0])
                 assert np.allclose(i[1], inf.conf_int()[1])
+
+    def test_bootstrap_only_final_time(self):
+        X = np.random.normal(size=(1000, 5))
+        T = np.random.binomial(1, scipy.special.expit(X[:, 0]))
+        y = (1 + .5*X[:, 0]) * T + X[:, 0] + np.random.normal(size=(1000,))
+        n_bootstrap_samples = 10
+
+        est = LinearDML(discrete_treatment=True)
+        start_time = time.time()
+        est.fit(y, T, X=X, W=None, inference=BootstrapInference(n_bootstrap_samples=n_bootstrap_samples, only_final=False))
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        all_bootstrapping_time = elapsed_time
+
+        est = LinearDML(discrete_treatment=True)
+        start_time = time.time()
+        est.fit(y, T, X=X, W=None, inference=BootstrapInference(n_bootstrap_samples=n_bootstrap_samples, only_final=True))
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        final_bootstrapping_time = elapsed_time
+        
+        assert (all_bootstrapping_time > final_bootstrapping_time)      
\ No newline at end of file
diff --git a/econml/utilities.py b/econml/utilities.py
index a7becc0ad..eabaad410 100644
--- a/econml/utilities.py
+++ b/econml/utilities.py
@@ -45,6 +45,21 @@ def transform(self, X):
         """Perform the identity transform, which returns the input unmodified."""
         return X
 
+def convertArg(arg, inds):
+    def convertArg_(arg, inds):
+        arr = np.asarray(arg)
+        if arr.ndim > 0:
+            return arr[inds]
+        else:  # arg was a scalar, so we shouldn't have converted it
+            return arg
+    if arg is None:
+        return None
+    if isinstance(arg, tuple):
+        converted_arg = []
+        for arg_param in arg:
+            converted_arg.append(convertArg_(arg_param, inds))
+        return tuple(converted_arg)
+    return convertArg_(arg, inds)
 
 def parse_final_model_params(coef, intercept, d_y, d_t, d_t_in, bias_part_of_coef, fit_cate_intercept):
     dt = d_t