From bc35391b0ad9e5981ae8eddcce7ee356d3c22ee3 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:07:47 -0500
Subject: [PATCH 01/34] Improved plotting acq ops

---
 src/mitim_tools/opt_tools/OPTtools.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mitim_tools/opt_tools/OPTtools.py b/src/mitim_tools/opt_tools/OPTtools.py
index 63c0da47..40bd926f 100644
--- a/src/mitim_tools/opt_tools/OPTtools.py
+++ b/src/mitim_tools/opt_tools/OPTtools.py
@@ -834,7 +834,7 @@ def plotInfo(
         )
         # ---------- Plot Residue
         GRAPHICStools.plotMultiVariate(
-            np.transpose(np.atleast_2d(infoOPT["y_res_start"])),
+            -np.transpose(np.atleast_2d(infoOPT["y_res_start"])),
             axs=axR,
             marker="s",
             markersize=ms,
@@ -869,7 +869,7 @@ def plotInfo(
         )
         # ---------- Plot Residue
         GRAPHICStools.plotMultiVariate(
-            np.transpose(np.atleast_2d(infoOPT["y_res"])),
+            -np.transpose(np.atleast_2d(infoOPT["y_res"])),
             axs=axR,
             marker="s",
             markersize=ms,
@@ -894,17 +894,17 @@ def plotInfo(
 
     if not plotStart:
         y = (
-            -infoOPT["acq_evaluated"]
+            infoOPT["acq_evaluated"]
             if "acq_evaluated" in infoOPT
-            else -infoOPT["y_res"]
+            else infoOPT["y_res"]
         )
     else:
-        y = -infoOPT["y_res_start"]
+        y = infoOPT["y_res_start"]
 
     if not plotStart:
-        yo = -infoOPT["y_res"][0]
+        yo = infoOPT["y_res"][0]
     else:
-        yo = -infoOPT["y_res_start"][0]
+        yo = infoOPT["y_res_start"][0]
 
     x_origin = 0 + it_start
     x_last = len(y) - 1 + it_start

From 439ffa0d3754ef44d6575feebaa40c05d8d950b6 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:07:58 -0500
Subject: [PATCH 02/34] added non log ei

---
 src/mitim_tools/opt_tools/STEPtools.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index bffb5af4..0e8708b5 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -378,6 +378,16 @@ def residual(Y, X = None):
                 )
             )
 
+        elif self.acquisition_type == "ei_mc":
+            self.evaluators["acq_function"] = (
+                botorch.acquisition.monte_carlo.qExpectedImprovement(
+                    self.evaluators["GP"].gpmodel,
+                    objective=self.evaluators["objective"],
+                    best_f=self.evaluators["objective"](self.evaluators["GP"].train_Y.unsqueeze(1)).max(),
+                    sampler=sampler
+                )
+            )
+
         elif self.acquisition_type == "logei_mc":
             self.evaluators["acq_function"] = (
                 botorch.acquisition.logei.qLogExpectedImprovement(

From ff68bd6ec15afc7be42c66d48a3454125561b83b Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:08:13 -0500
Subject: [PATCH 03/34] misc plotting

---
 src/mitim_tools/opt_tools/STRATEGYtools.py | 35 +++++++---------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/mitim_tools/opt_tools/STRATEGYtools.py b/src/mitim_tools/opt_tools/STRATEGYtools.py
index 5fb31e74..f12139ad 100644
--- a/src/mitim_tools/opt_tools/STRATEGYtools.py
+++ b/src/mitim_tools/opt_tools/STRATEGYtools.py
@@ -1662,7 +1662,7 @@ def plotAcquisitionOptimization(self, fn=None, step_from=0, step_to=-1):
 
         fig = fn.add_figure(label='Acquisition Convergence')
 
-        axs = GRAPHICStools.producePlotsGrid(len(step_num), fig=fig, hspace=0.6, wspace=0.3, sharex=False, sharey=False)
+        axs = GRAPHICStools.producePlotsGrid(len(step_num), fig=fig, hspace=0.6, wspace=0.3)
 
         for step in step_num:
 
@@ -1681,20 +1681,18 @@ def plotAcquisitionOptimization(self, fn=None, step_from=0, step_to=-1):
             for ix in range(self.steps[step].train_X.shape[0]):
                 acq_trained[ix] = acq(torch.Tensor(self.steps[step].train_X[ix,:]).unsqueeze(0)).item()
 
-
             # Plot
-            ax.plot(y_acq, c='b')
-            ax.axhline(y=acq_trained.max(), c='r', ls='--', lw=0.5, label='max acq of trained points')
+            ax.plot(y_acq,'-o', c='g', markersize=2, lw = 0.5, label='max of batch')
+            ax.axhline(y=acq_trained.max(), c='r', ls='--', lw=1.0, label='max trained')
+            ax.axhline(y=y_acq[0], c='b', ls='--', lw=1.0, label='max of guesses')
 
             ax.set_title(f'BO Step #{step}')
-            ax.set_ylabel('acquisition')
-            ax.set_xlabel('iteration')
+            ax.set_ylabel('$f_{acq}$ (to max)')
+            ax.set_xlabel('Evaluations')
             if step == step_num[0]:
-                ax.legend()
+                ax.legend(loc='best')
 
             GRAPHICStools.addDenseAxis(ax)
-            ax.set_ylim(top=0.0)
-
 
     def plotModelStatus(
         self, fn=None, boStep=-1, plotsPerFigure=20, stds=2, tab_color=None
@@ -1878,21 +1876,12 @@ def plotSurrogateOptimization(self, fig1=None, fig2=None, boStep=-1):
 
         xypair = np.array(xypair)
 
-        loga = True if xypair[:, 1].min() > 0 else False
-
         axsDVs[0].legend(prop={"size": 5})
-        if loga:
-            for p in range(len(axsOFs)):
-                axsOFs[p].set_xscale("log")
-                axsOFs[p].set_yscale("log")
-
         ax1_r.set_ylabel("DV values")
         GRAPHICStools.addDenseAxis(ax1_r)
         GRAPHICStools.autoscale_y(ax1_r)
 
-        ax2_r.set_ylabel("Residual values")
-        if loga:
-            ax2_r.set_yscale("log")
+        ax2_r.set_ylabel("Acquisition values")
         GRAPHICStools.addDenseAxis(ax2_r)
         GRAPHICStools.autoscale_y(ax2_r)
 
@@ -1901,20 +1890,18 @@ def plotSurrogateOptimization(self, fig1=None, fig2=None, boStep=-1):
         iinfo = info[-1]["info"]
         for i, y in enumerate(iinfo["y_res"]):
             ax0_r.axhline(
-                y=-y,
+                y=y,
                 c=colors[ipost + 1],
                 ls="--",
                 lw=2,
                 label=info[-1]["method"] if i == 0 else "",
             )
         iinfo = info[0]["info"]
-        ax0_r.axhline(y=-iinfo["y_res_start"][0], c="k", ls="--", lw=2)
+        ax0_r.axhline(y=iinfo["y_res_start"][0], c="k", ls="--", lw=2)
 
         ax0_r.set_xlabel("Optimization iterations")
-        ax0_r.set_ylabel("$-f_{acq}$")
+        ax0_r.set_ylabel("$f_{acq}$")
         GRAPHICStools.addDenseAxis(ax0_r)
-        if loga:
-            ax0_r.set_yscale("log")
         ax0_r.legend(loc="best", prop={"size": 8})
         ax0_r.set_title("Evolution of acquisition in optimization stages")
 

From e088f115ffe0e6a0563519eede043afee368e575 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:08:40 -0500
Subject: [PATCH 04/34] Improved callable

---
 .../opt_tools/optimizers/BOTORCHoptim.py         | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index 1cae35b1..f3c1b278 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -49,11 +49,17 @@ def findOptima(fun, optimization_params = {}, writeTrajectory=False):
 
     acq_evaluated = []
     if writeTrajectory:
-        def new_call(self, x, *args, v=acq_evaluated, **kwargs):
-            f = fun_opt(x, *args, **kwargs)
-            v.append(f.max().item())
-            return f
-        fun_opt.__call__ = types.MethodType(new_call, fun_opt)
+        class CustomFunctionWrapper:
+            def __init__(self, func, eval_list):
+                self.func = func
+                self.eval_list = eval_list
+
+            def __call__(self, x, *args, **kwargs):
+                f = self.func(x, *args, **kwargs)
+                self.eval_list.append(f.max().item())
+                return f
+
+        fun_opt = CustomFunctionWrapper(fun_opt, acq_evaluated)
 
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")

From ff20a3185d1dd76ec4e9cd7c9c75e2ad6de0d009 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:08:48 -0500
Subject: [PATCH 05/34] Increased default num_restarts

---
 templates/main.namelist.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/main.namelist.json b/templates/main.namelist.json
index 6f32673e..03351169 100644
--- a/templates/main.namelist.json
+++ b/templates/main.namelist.json
@@ -24,7 +24,7 @@
         "acquisition_params" : {
             "mc_samples": 1024,
             "acquisition_optimization" : {
-                "num_restarts": 64,
+                "num_restarts": 128,
                 "raw_samples": 1024
             }
         },

From 0da002e3fb7857b0750f0b9ed0546016c36a128b Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 22:41:55 -0500
Subject: [PATCH 06/34] Generalization of torch device

---
 src/mitim_modules/freegsu/FREEGSUmain.py      |  3 ++-
 src/mitim_modules/portals/PORTALSmain.py      | 11 +++++-----
 .../portals/utils/PORTALSinit.py              |  9 +++++++-
 .../portals/utils/PORTALSoptimization.py      |  4 ++++
 src/mitim_modules/powertorch/STATEtools.py    | 16 +++++---------
 .../powertorch/utils/TRANSFORMtools.py        |  4 ++--
 src/mitim_modules/vitals/VITALSmain.py        |  3 ++-
 src/mitim_tools/misc_tools/FARMINGtools.py    |  5 -----
 src/mitim_tools/opt_tools/STRATEGYtools.py    | 21 +++++++------------
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 14 +------------
 10 files changed, 38 insertions(+), 52 deletions(-)

diff --git a/src/mitim_modules/freegsu/FREEGSUmain.py b/src/mitim_modules/freegsu/FREEGSUmain.py
index 0830a019..adb281a9 100644
--- a/src/mitim_modules/freegsu/FREEGSUmain.py
+++ b/src/mitim_modules/freegsu/FREEGSUmain.py
@@ -33,7 +33,7 @@ def default_namelist(optimization_options):
 
 
 class freegsu(STRATEGYtools.opt_evaluator):
-    def __init__(self, folder, namelist=None, function_parameters={}):
+    def __init__(self, folder, namelist=None, function_parameters={}, **kwargs):
         print(
             "\n-----------------------------------------------------------------------------------------"
         )
@@ -49,6 +49,7 @@ def __init__(self, folder, namelist=None, function_parameters={}):
             folder,
             namelist=namelist,
             default_namelist_function=default_namelist if (namelist is None) else None,
+            **kwargs
         )
 
     def prep(
diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py
index 320befdd..aec99d35 100644
--- a/src/mitim_modules/portals/PORTALSmain.py
+++ b/src/mitim_modules/portals/PORTALSmain.py
@@ -86,7 +86,10 @@ def __init__(
         self, 
         folder,                             # Folder where the PORTALS workflow will be run
         namelist=None,                      # If None, default namelist will be used. If not None, it will be read and used
-        TensorsType=torch.double,           # Type of tensors to be used (torch.float, torch.double)
+        tensor_opts = {
+            "dtype": torch.double,
+            "device": torch.device("cpu"),
+        },
         CGYROrun=False,                     # If True, use CGYRO defaults for best optimization practices
         portals_transformation_variables = None,          # If None, use defaults for both main and trace
         portals_transformation_variables_trace = None,
@@ -109,7 +112,7 @@ def __init__(
         super().__init__(
             folder,
             namelist=namelist,
-            TensorsType=TensorsType,
+            tensor_opts=tensor_opts,
             default_namelist_function=(
                 partial(default_namelist, CGYROrun=CGYROrun)
                 if (namelist is None)
@@ -186,8 +189,6 @@ def __init__(
 		---------------------------------------------
 		"""
 
-        
-
         (
             portals_transformation_variables,
             portals_transformation_variables_trace,
@@ -315,7 +316,7 @@ def prep(
             limitsAreRelative=limitsAreRelative,
             cold_start=cold_start,
             hardGradientLimits=hardGradientLimits,
-            dfT=self.dfT,
+            tensor_opts = self.tensor_opts,
             seedInitial=seedInitial,
             checkForSpecies=askQuestions,
             ModelOptions=ModelOptions,
diff --git a/src/mitim_modules/portals/utils/PORTALSinit.py b/src/mitim_modules/portals/utils/PORTALSinit.py
index 059022c2..09131416 100644
--- a/src/mitim_modules/portals/utils/PORTALSinit.py
+++ b/src/mitim_modules/portals/utils/PORTALSinit.py
@@ -26,10 +26,13 @@ def initializeProblem(
     dvs_fixed=None,
     start_from_folder=None,
     define_ranges_from_profiles=None,
-    dfT=torch.randn((2, 2), dtype=torch.double),
     ModelOptions=None,
     seedInitial=None,
     checkForSpecies=True,
+    tensor_opts = {
+        "dtype": torch.double,
+        "device": torch.device("cpu"),
+    }
     ):
     """
     Notes:
@@ -39,6 +42,8 @@ def initializeProblem(
         - define_ranges_from_profiles must be PROFILES class
     """
 
+    dfT = torch.randn((2, 2), **tensor_opts)
+
     if seedInitial is not None:
         torch.manual_seed(seed=seedInitial)
 
@@ -177,6 +182,7 @@ def initializeProblem(
                 "TypeTarget": portals_fun.MODELparameters["Physics_options"]["TypeTarget"],
                 "TargetCalc": portals_fun.PORTALSparameters["TargetCalc"]},
         },
+        tensor_opts = tensor_opts
     )
 
     # ***************************************************************************************************
@@ -227,6 +233,7 @@ def initializeProblem(
                     "TypeTarget": portals_fun.MODELparameters["Physics_options"]["TypeTarget"],
                     "TargetCalc": portals_fun.PORTALSparameters["TargetCalc"]},
             },
+            tensor_opts = tensor_opts
         )
 
         dictCPs_base_extra = {}
diff --git a/src/mitim_modules/portals/utils/PORTALSoptimization.py b/src/mitim_modules/portals/utils/PORTALSoptimization.py
index 61d66baa..5ac0c285 100644
--- a/src/mitim_modules/portals/utils/PORTALSoptimization.py
+++ b/src/mitim_modules/portals/utils/PORTALSoptimization.py
@@ -104,6 +104,7 @@ def flux_match_surrogate(step,profiles_new, plot_results=True, file_write_csv=No
 
 
     # Create powerstate with the same options as the original portals but with the new profiles
+    embed()
     powerstate = STATEtools.powerstate(
         profiles_new,
         EvolutionOptions={
@@ -115,6 +116,9 @@ def flux_match_surrogate(step,profiles_new, plot_results=True, file_write_csv=No
         },
         TransportOptions=TransportOptions,
         TargetOptions=step.surrogate_parameters["powerstate"].TargetOptions,
+        tensor_opts = {
+            "dtype": step.surrogate_parameters["powerstate"].dfT.dtype,
+            "device": step.surrogate_parameters["powerstate"].dfT.device},
     )
 
     # Pass powerstate as part of the surrogate_parameters such that transformations now occur with the new profiles
diff --git a/src/mitim_modules/powertorch/STATEtools.py b/src/mitim_modules/powertorch/STATEtools.py
index e4df8976..c024faec 100644
--- a/src/mitim_modules/powertorch/STATEtools.py
+++ b/src/mitim_modules/powertorch/STATEtools.py
@@ -10,8 +10,6 @@
 from mitim_tools.misc_tools.LOGtools import printMsg as print
 from IPython import embed
 
-UseCUDAifAvailable = True
-
 # ------------------------------------------------------------------
 # POWERSTATE Class
 # ------------------------------------------------------------------
@@ -32,6 +30,10 @@ def __init__(
                 "TargetCalc": "powerstate"
                 },
         },
+        tensor_opts = {
+            "dtype": torch.double,
+            "device": torch.device("cpu"),
+        }
     ):
         '''
         Inputs:
@@ -69,15 +71,7 @@ def _ensure_ne_before_nz(lst):
         self.ProfilesPredicted = _ensure_ne_before_nz(self.ProfilesPredicted)
 
         # Default type and device tensor
-        self.dfT = torch.randn(
-            (2, 2),
-            dtype=torch.double,
-            device=torch.device(
-                "cpu"
-                if ((not UseCUDAifAvailable) or (not torch.cuda.is_available()))
-                else "cuda"
-            ),
-        )
+        self.dfT = torch.randn((2, 2), **tensor_opts)
 
         '''
         Potential profiles to evolve (aLX) and their corresponding flux matching
diff --git a/src/mitim_modules/powertorch/utils/TRANSFORMtools.py b/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
index 2b018594..c4c830c4 100644
--- a/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
+++ b/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
@@ -486,12 +486,12 @@ def parameterize_curve(
     aLy_coarse[-1, 1] = aLy_coarse[-2, 1]
 
     # Boundary condition at point moved by gridPointsAllowed
-    y_bc = torch.from_numpy(interpolation_function([x_coarse[-1]], x_coord, y_coord.cpu().numpy())).to(
+    y_bc = torch.from_numpy(interpolation_function([x_coarse[-1]], x_coord, y_coord.numpy())).to(
         ygrad_coord
     )
 
     # Boundary condition at point (ACTUAL THAT I WANT to keep fixed, i.e. rho=0.8)
-    y_bc_real = torch.from_numpy(interpolation_function([x_coarse[-2]], x_coord, y_coord.cpu().numpy())).to(
+    y_bc_real = torch.from_numpy(interpolation_function([x_coarse[-2]], x_coord, y_coord.numpy())).to(
         ygrad_coord
     )
 
diff --git a/src/mitim_modules/vitals/VITALSmain.py b/src/mitim_modules/vitals/VITALSmain.py
index 1827e945..0f345fe2 100644
--- a/src/mitim_modules/vitals/VITALSmain.py
+++ b/src/mitim_modules/vitals/VITALSmain.py
@@ -31,7 +31,7 @@ def default_namelist(optimization_options):
 
 
 class vitals(STRATEGYtools.opt_evaluator):
-    def __init__(self, folder, namelist=None):
+    def __init__(self, folder, namelist=None, **kwargs):
         print(
             "\n-----------------------------------------------------------------------------------------"
         )
@@ -45,6 +45,7 @@ def __init__(self, folder, namelist=None):
             folder,
             namelist=namelist,
             default_namelist_function=default_namelist if (namelist is None) else None,
+            **kwargs
         )
 
         # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/mitim_tools/misc_tools/FARMINGtools.py b/src/mitim_tools/misc_tools/FARMINGtools.py
index 2706bf8e..3f8b0f16 100644
--- a/src/mitim_tools/misc_tools/FARMINGtools.py
+++ b/src/mitim_tools/misc_tools/FARMINGtools.py
@@ -23,8 +23,6 @@
 from mitim_tools.misc_tools.CONFIGread import read_verbose_level
 from IPython import embed
 
-UseCUDAifAvailable = True
-
 """
 New handling of jobs in remote or local clusters. Example use:
 
@@ -908,9 +906,6 @@ def ParallelProcedure(
     else:
         import multiprocessing
 
-    if UseCUDAifAvailable and torch.cuda.is_available():
-        multiprocessing.set_start_method("spawn")
-
     """
 	This way of pooling passes a lock when initializing every child class. It handles
 	a global lock, and then every child can call lock.acquire() and lock.release()
diff --git a/src/mitim_tools/opt_tools/STRATEGYtools.py b/src/mitim_tools/opt_tools/STRATEGYtools.py
index f12139ad..bb527699 100644
--- a/src/mitim_tools/opt_tools/STRATEGYtools.py
+++ b/src/mitim_tools/opt_tools/STRATEGYtools.py
@@ -21,8 +21,6 @@
 from mitim_tools.misc_tools.LOGtools import printMsg as print
 from mitim_tools import __mitimroot__
 
-UseCUDAifAvailable = True
-
 """
 Example usage (see tutorials for actual examples and parameter definitions):
 
@@ -70,13 +68,18 @@ def __init__(
         self,
         folder,
         namelist=None,
-        TensorsType=torch.double,
         default_namelist_function=None,
+        tensor_opts = {
+            "dtype": torch.double,
+            "device": torch.device("cpu"),
+        }
     ):
         """
         Namelist file can be provided and will be copied to the folder
         """
 
+        self.tensor_opts = tensor_opts
+
         print("- Parent opt_evaluator function initialized")
 
         self.folder = folder
@@ -124,17 +127,9 @@ def __init__(
 
         # Determine type of tensors to work with
         torch.set_default_dtype(
-            TensorsType
+            self.tensor_opts["dtype"]
         )  # In case I forgot to specify a type explicitly, use as default (https://github.com/pytorch/botorch/discussions/1444)
-        self.dfT = torch.randn(
-            (2, 2),
-            dtype=TensorsType,
-            device=torch.device(
-                "cpu"
-                if ((not UseCUDAifAvailable) or (not torch.cuda.is_available()))
-                else "cuda"
-            ),
-        )
+        self.dfT = torch.randn( (2, 2), **tensor_opts)
 
         # Name of calibrated objectives (e.g. QiRes1 to represent the objective from Qi1-QiT1)
         self.name_objectives = None
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 52f40286..6bac3aab 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -13,10 +13,6 @@
 from mitim_tools.misc_tools.LOGtools import printMsg as print
 from IPython import embed
 
-
-
-UseCUDAifAvailable = True
-
 # ---------------------------------------------------------------------------------
 # 	Model Class
 # ---------------------------------------------------------------------------------
@@ -69,15 +65,7 @@ def __init__(
         self.losses = None
 
         if self.dfT is None:
-            self.dfT = torch.randn(
-                (2, 2),
-                dtype=torch.double,
-                device=torch.device(
-                    "cpu"
-                    if ((not UseCUDAifAvailable) or (not torch.cuda.is_available()))
-                    else "cuda"
-                ),
-            )
+            self.dfT = torch.randn((2, 2),dtype=torch.double,device=torch.device("cpu"))
 
         self.train_X = torch.from_numpy(Xor).to(self.dfT)
         self.train_Y = torch.from_numpy(Yor).to(self.dfT)

From 5a21ce84f321f3f81aa8a629b8e87b51631034dd Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 15 Nov 2024 23:12:20 -0500
Subject: [PATCH 07/34] fix

---
 src/mitim_modules/powertorch/utils/TRANSFORMtools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mitim_modules/powertorch/utils/TRANSFORMtools.py b/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
index c4c830c4..2b018594 100644
--- a/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
+++ b/src/mitim_modules/powertorch/utils/TRANSFORMtools.py
@@ -486,12 +486,12 @@ def parameterize_curve(
     aLy_coarse[-1, 1] = aLy_coarse[-2, 1]
 
     # Boundary condition at point moved by gridPointsAllowed
-    y_bc = torch.from_numpy(interpolation_function([x_coarse[-1]], x_coord, y_coord.numpy())).to(
+    y_bc = torch.from_numpy(interpolation_function([x_coarse[-1]], x_coord, y_coord.cpu().numpy())).to(
         ygrad_coord
     )
 
     # Boundary condition at point (ACTUAL THAT I WANT to keep fixed, i.e. rho=0.8)
-    y_bc_real = torch.from_numpy(interpolation_function([x_coarse[-2]], x_coord, y_coord.numpy())).to(
+    y_bc_real = torch.from_numpy(interpolation_function([x_coarse[-2]], x_coord, y_coord.cpu().numpy())).to(
         ygrad_coord
     )
 

From dbd296ba86c4fe17804bf7c4d648be84a1198091 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 16 Nov 2024 12:52:32 -0500
Subject: [PATCH 08/34] explicity seed

---
 src/mitim_tools/opt_tools/SURROGATEtools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 6bac3aab..29e7350d 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -42,6 +42,7 @@ def __init__(
         surrogateOptions={},
         FixedValue=False,
         fileTraining=None,
+        seed = 0
     ):
         """
         Noise is variance here (square of standard deviation).
@@ -50,7 +51,7 @@ def __init__(
         if avoidPoints is None:
             avoidPoints = []
 
-        torch.manual_seed(0)
+        torch.manual_seed(seed)
 
         self.avoidPoints = avoidPoints
         self.output = output

From 05121d83390af803c2600d6a882d31a8ce0a7816 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 16 Nov 2024 14:57:42 -0500
Subject: [PATCH 09/34] First cut (not working) to broadcast input transform

---
 src/mitim_modules/portals/PORTALStools.py   |   9 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py   |  42 ++++----
 src/mitim_tools/opt_tools/STEPtools.py      |  23 +++++
 src/mitim_tools/opt_tools/SURROGATEtools.py | 107 ++++++++++++--------
 tests/PORTALS_workflow.py                   |   2 +-
 5 files changed, 119 insertions(+), 64 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index 0308649a..3e556c88 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -112,7 +112,7 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
 	2. Calculate kinetic profiles to use during transformations and update powerstate with them
 	-------------------------------------------------------------------------------------------
 	"""
-
+    print(shape_orig, X.shape)
     powerstate = constructEvaluationProfiles(X, surrogate_parameters, recalculateTargets = True) # This is the only place where I recalculate targets, so that I have the target transformation
 
     """
@@ -131,6 +131,10 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
     for ikey in surrogate_transformation_variables[output]:
         xx = powerstate.plasma[ikey][: X.shape[0], index]
         xFit = torch.cat((xFit, xx.unsqueeze(1)), dim=1).to(X)
+    
+    #TO FIX
+    import torch.nn.functional as F
+    xFit = F.pad(xFit, (0, 3-xFit.shape[-1]))
 
     parameters_combined = {"powerstate": powerstate}
 
@@ -151,12 +155,13 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
 # ----------------------------------------------------------------------
 
 
-def transformPORTALS(X, surrogate_parameters, output):
+def transformPORTALS(X0, surrogate_parameters, output):
     """
     1. Make sure all batches are squeezed into a single dimension
     ------------------------------------------------------------------
             E.g.: (batch1,batch2,batch3,dim) -> (batch1*batch2*batch3,dim)
     """
+    X = X0[0,...] #TO FIX
     shape_orig = np.array(X.shape)
     X = X.view(np.prod(shape_orig[:-1]), shape_orig[-1])
 
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index e72b75e6..0078f717 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -43,16 +43,16 @@ def __init__(
             f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
         )
 
-        self.store_training(
-            train_X,
-            train_X_added,
-            train_Y,
-            train_Y_added,
-            train_Yvar,
-            train_Yvar_added,
-            input_transform,
-            outcome_transform,
-        )
+        # self.store_training(
+        #     train_X,
+        #     train_X_added,
+        #     train_Y,
+        #     train_Y_added,
+        #     train_Yvar,
+        #     train_Yvar_added,
+        #     input_transform,
+        #     outcome_transform,
+        # )
 
         """
 		----------------------------------------------------------------------------------------
@@ -78,24 +78,24 @@ def __init__(
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
 
-        # Added points are raw transformed, so I need to normalize them
-        if train_X_added.shape[0] > 0:
-            train_X_added = input_transform["tf2"](train_X_added)
-            train_Y_added, train_Yvar_added = outcome_transform["tf2"](
-                train_Y_added, train_Yvar_added
-            )
+        # # Added points are raw transformed, so I need to normalize them
+        # if train_X_added.shape[0] > 0:
+        #     train_X_added = input_transform["tf2"](train_X_added)
+        #     train_Y_added, train_Yvar_added = outcome_transform["tf2"](
+        #         train_Y_added, train_Yvar_added
+        #     )
         # -----
 
-        train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=0)
-        train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=0)
-        train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=0)
+        train_X_usedToTrain = transformed_X #torch.cat((transformed_X, train_X_added), axis=0)
+        train_Y_usedToTrain = train_Y #torch.cat((train_Y, train_Y_added), axis=0)
+        train_Yvar_usedToTrain = train_Yvar #torch.cat((train_Yvar, train_Yvar_added), axis=0)
 
         self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions(
             train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain
         )
 
-        train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1)
-        train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1)
+        # train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1)
+        # train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1)
 
         """
 		-----------------------------------------------------------------------
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 0e8708b5..4934aa44 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -158,6 +158,29 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
         print("--> Fitting multiple single-output models and creating composite model")
         time1 = datetime.datetime.now()
 
+        # full Multi-output model
+        gp_mo = SURROGATEtools.surrogate_model(
+            self.x,
+            self.y,
+            self.yvar,
+            self.surrogate_parameters,
+            outputs=self.outputs,
+            outputs_transformed=self.stepSettings["name_transformed_ofs"],
+            bounds=self.bounds,
+            dfT=self.dfT,
+            surrogateOptions=self.surrogateOptions,
+        )
+
+        # Fitting
+        gp_mo.fit()
+
+
+        aaaa
+
+
+
+
+
         for i in range(self.y.shape[-1]):
             outi = self.outputs[i] if (self.outputs is not None) else None
 
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 29e7350d..7bc66735 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -34,8 +34,8 @@ def __init__(
         Yor,
         Yvaror,
         surrogate_parameters,
-        output=None,
-        output_transformed=None,
+        outputs=None,
+        outputs_transformed=None,
         bounds=None,
         avoidPoints=None,
         dfT=None,
@@ -54,8 +54,8 @@ def __init__(
         torch.manual_seed(seed)
 
         self.avoidPoints = avoidPoints
-        self.output = output
-        self.output_transformed = output_transformed
+        self.outputs = outputs
+        self.outputs_transformed = outputs_transformed
         self.surrogateOptions = surrogateOptions
         self.dfT = dfT
         self.surrogate_parameters = surrogate_parameters
@@ -68,9 +68,9 @@ def __init__(
         if self.dfT is None:
             self.dfT = torch.randn((2, 2),dtype=torch.double,device=torch.device("cpu"))
 
-        self.train_X = torch.from_numpy(Xor).to(self.dfT)
         self.train_Y = torch.from_numpy(Yor).to(self.dfT)
-
+        self.train_X = torch.from_numpy(Xor).to(self.dfT).unsqueeze(0).repeat(self.train_Y.shape[-1], 1, 1)
+        
         # Extend noise if needed
         if isinstance(Yvaror, float) or len(Yvaror.shape) == 1:
             print(
@@ -249,23 +249,24 @@ def __init__(
             tf1=outcome_transform_physics, tf2=output_transformed_standardization
         ).to(self.dfT)
 
-        self.variables = (
-            self.surrogate_transformation_variables[self.output]
-            if (
-                (self.output is not None)
-                and ("surrogate_transformation_variables" in self.__dict__)
-                and (self.surrogate_transformation_variables is not None)
-            )
-            else None
-        )
+        self.variables = None
+        # self.variables = (
+        #     self.surrogate_transformation_variables[self.output]
+        #     if (
+        #         (self.output is not None)
+        #         and ("surrogate_transformation_variables" in self.__dict__)
+        #         and (self.surrogate_transformation_variables is not None)
+        #     )
+        #     else None
+        # )
 
         # *************************************************************************************
         # Model
         # *************************************************************************************
 
-        print(
-            f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}',
-        )
+        # print(
+        #     f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}',
+        # )
 
         """
         self.train_X contains the untransformed of this specific run:   (batch1, dimX)
@@ -286,23 +287,10 @@ def __init__(
 
     def _define_physics_transformation(self):
 
-        self._select_transition_physics_based_params()
-
-        # Input and Outcome transform (PHYSICS)
-        dimY = self.train_Y.shape[-1]
-        input_transform_physics = BOTORCHtools.Transformation_Inputs(
-            self.output, self.surrogate_parameters, self.surrogate_transformation_variables
-        ).to(self.dfT)
-        outcome_transform_physics = BOTORCHtools.Transformation_Outcomes(
-            dimY, self.output, self.surrogate_parameters
-        ).to(self.dfT)
-
-        dimTransformedDV_x = input_transform_physics(self.train_X).shape[-1]
-        dimTransformedDV_y = dimY
-
-        return input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y
+        # ------------------------------------------------------------------------------------
+        # Define individual transformations and then put together
+        # ------------------------------------------------------------------------------------
 
-    def _select_transition_physics_based_params(self, ):
         self.surrogate_transformation_variables = None
         if ("surrogate_transformation_variables_alltimes" in self.surrogate_parameters) and (self.surrogate_parameters["surrogate_transformation_variables_alltimes"] is not None):
 
@@ -321,6 +309,41 @@ def _select_transition_physics_based_params(self, ):
 
             self.surrogate_transformation_variables = self.surrogate_parameters["surrogate_transformation_variables_alltimes"][transition_position]
 
+        # ------------------------------------------------------------------------------------
+        # Input and Outcome transform (PHYSICS) of each output
+        # ------------------------------------------------------------------------------------
+
+        input_transformations_physics = []
+        outcome_transformations_physics = []
+
+        for ind_out in range(self.train_Y.shape[-1]):
+
+            dimY = 1
+
+            input_transform_physics = BOTORCHtools.Transformation_Inputs(
+                self.outputs[ind_out], self.surrogate_parameters, self.surrogate_transformation_variables
+            ).to(self.dfT)
+            outcome_transform_physics = BOTORCHtools.Transformation_Outcomes(
+                dimY, self.outputs[ind_out], self.surrogate_parameters
+            ).to(self.dfT)
+
+            input_transformations_physics.append(input_transform_physics)
+            outcome_transformations_physics.append(outcome_transform_physics)
+
+        # ------------------------------------------------------------------------------------
+        # Broadcast the input transformation to all outputs
+        # ------------------------------------------------------------------------------------
+
+        input_transformation_physics = botorch.models.transforms.input.BatchBroadcastedInputTransform(input_transformations_physics)
+        output_transformation_physics = outcome_transformations_physics[0] #TO FIX
+
+        dimX = input_transformation_physics(self.train_X).shape[-1]
+
+        dimTransformedDV_x = dimX
+        dimTransformedDV_y = self.train_Y.shape[-1]
+
+        return input_transformation_physics, output_transformation_physics, dimTransformedDV_x, dimTransformedDV_y
+
     def normalization_pass(
         self,
         input_transform_physics,
@@ -332,12 +355,15 @@ def normalization_pass(
         outcome_transform_normalization.training = True
         outcome_transform_normalization._is_trained = torch.tensor(False)
 
-        train_X_transformed = torch.cat(
-            (input_transform_physics(self.train_X), self.train_X_added), axis=0
-        )
-        y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
-        train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0)
-        train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0)
+        train_X_transformed = input_transform_physics(self.train_X)
+        train_Y_transformed, train_Yvar_transformed = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
+
+        # train_X_transformed = torch.cat(
+        #     (input_transform_physics(self.train_X), self.train_X_added), axis=0
+        # )
+        # y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
+        # train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0)
+        # train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0)
 
         train_X_transformed_norm = input_transform_normalization(train_X_transformed)
         (
@@ -420,6 +446,7 @@ def perform_model_fit(self, mll):
         # --------------------------------------------------
 
         # Store first MLL value
+        embed()
         track_fval = [
             -mll.forward(mll.model(*mll.model.train_inputs), mll.model.train_targets)
             .detach()
diff --git a/tests/PORTALS_workflow.py b/tests/PORTALS_workflow.py
index f1c8c0ac..9c504a27 100644
--- a/tests/PORTALS_workflow.py
+++ b/tests/PORTALS_workflow.py
@@ -4,7 +4,7 @@
 from mitim_modules.portals import PORTALSmain
 from mitim_tools import __mitimroot__
 
-cold_start = True
+cold_start = False
 
 (__mitimroot__ / "tests" / "scratch").mkdir(parents=True, exist_ok=True)
 

From 593c66b3c8000932596cc2047cfbdb825f4ca61a Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 16 Nov 2024 21:51:51 -0500
Subject: [PATCH 10/34] misc

---
 src/mitim_modules/portals/PORTALSmain.py    |  21 +-
 src/mitim_modules/portals/PORTALStools.py   |   6 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py   | 153 +++++++++-
 src/mitim_tools/opt_tools/STEPtools.py      | 318 ++++++++++----------
 src/mitim_tools/opt_tools/SURROGATEtools.py | 102 ++++---
 5 files changed, 385 insertions(+), 215 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py
index aec99d35..7d9b5f3e 100644
--- a/src/mitim_modules/portals/PORTALSmain.py
+++ b/src/mitim_modules/portals/PORTALSmain.py
@@ -412,7 +412,7 @@ def run(self, paramsfile, resultsfile):
             with open(self.optimization_extra, "wb") as handle:
                 pickle_dill.dump(dictStore, handle, protocol=4)
 
-    def scalarized_objective(self, Y):
+    def scalarized_objective(self, Y0):
         """
         Notes
         -----
@@ -422,6 +422,14 @@ def scalarized_objective(self, Y):
 
         ofs_ordered_names = np.array(self.optimization_options["ofs"])
 
+        # TO FIX: convert to dimensions such that the dimY is in -1
+        if Y0.shape[0] == len(ofs_ordered_names):
+            Y = Y0.transpose(0, -1)
+        elif Y0.shape[1] == len(ofs_ordered_names):
+            Y = Y0.transpose(1, -1)
+        else:
+            Y = Y0
+
         """
 		-------------------------------------------------------------------------
 		Prepare transport dictionary
@@ -448,6 +456,17 @@ def scalarized_objective(self, Y):
 
         of, cal, _, res = PORTALSinteraction.calculatePseudos(self.powerstate, self.PORTALSparameters,specific_vars=var_dict)
 
+        # TO FIX: convert dims back
+        print(Y.shape, of.shape, cal.shape, res.shape)
+        if Y0.shape[0] == len(ofs_ordered_names):
+            of = of.transpose(0, -1)
+            cal = cal.transpose(0, -1)
+            #res = res.transpose(0, -1)
+        elif Y0.shape[1] == len(ofs_ordered_names):
+            of = of.transpose(1, -1)
+            cal = cal.transpose(1, -1)
+            #res = res.transpose(1, -1)
+
         return of, cal, res
 
     def analyze_results(self, plotYN=True, fn=None, cold_start=False, analysis_level=2):
diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index 3e556c88..dcb77f76 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -112,7 +112,7 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
 	2. Calculate kinetic profiles to use during transformations and update powerstate with them
 	-------------------------------------------------------------------------------------------
 	"""
-    print(shape_orig, X.shape)
+
     powerstate = constructEvaluationProfiles(X, surrogate_parameters, recalculateTargets = True) # This is the only place where I recalculate targets, so that I have the target transformation
 
     """
@@ -155,13 +155,13 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
 # ----------------------------------------------------------------------
 
 
-def transformPORTALS(X0, surrogate_parameters, output):
+def transformPORTALS(X, surrogate_parameters, output):
     """
     1. Make sure all batches are squeezed into a single dimension
     ------------------------------------------------------------------
             E.g.: (batch1,batch2,batch3,dim) -> (batch1*batch2*batch3,dim)
     """
-    X = X0[0,...] #TO FIX
+
     shape_orig = np.array(X.shape)
     X = X.view(np.prod(shape_orig[:-1]), shape_orig[-1])
 
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 0078f717..cec5f6d6 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -14,8 +14,156 @@
 # SingleTaskGP needs to be modified because I want to input options and outcome transform taking X, otherwise it should be a copy
 # ----------------------------------------------------------------------------------------------------------------------------
 
+import torch
+from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
+from botorch.models.model import FantasizeMixin
+from botorch.models.transforms.input import InputTransform
+from botorch.models.transforms.outcome import OutcomeTransform, Standardize
+from botorch.models.utils import validate_input_scaling
+from botorch.models.utils.gpytorch_modules import (
+    get_covar_module_with_dim_scaled_prior,
+    get_gaussian_likelihood_with_lognormal_prior,
+)
+from botorch.utils.containers import BotorchContainer
+from botorch.utils.datasets import SupervisedDataset
+from botorch.utils.types import _DefaultType, DEFAULT
+from gpytorch.distributions.multivariate_normal import MultivariateNormal
+from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
+from gpytorch.likelihoods.likelihood import Likelihood
+from gpytorch.means.constant_mean import ConstantMean
+from gpytorch.means.mean import Mean
+from gpytorch.models.exact_gp import ExactGP
+from gpytorch.module import Module
+from torch import Tensor
+
+class SingleTaskGP_MITIM(botorch.models.gp_regression.SingleTaskGP):
+    def __init__(
+        self,
+        train_X,
+        train_Y,
+        train_Yvar = None,
+        likelihood = None,
+        covar_module = None,
+        mean_module = None,
+        outcome_transform = None,
+        input_transform = None,
+    ) -> None:
+
+        self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
+        if outcome_transform == DEFAULT:
+            outcome_transform = Standardize(
+                m=train_Y.shape[-1], batch_shape=train_X.shape[:-2]
+            )
+        with torch.no_grad():
+            transformed_X = self.transform_inputs(
+                X=train_X, input_transform=input_transform
+            )
+            embed()
+        if outcome_transform is not None:
+            train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
+        # Validate again after applying the transforms
+        self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar)
+        ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
+        validate_input_scaling(
+            train_X=transformed_X,
+            train_Y=train_Y,
+            train_Yvar=train_Yvar,
+            ignore_X_dims=ignore_X_dims,
+        )
+        self._set_dimensions(train_X=train_X, train_Y=train_Y)
+        train_X, train_Y, train_Yvar = self._transform_tensor_args(
+            X=train_X, Y=train_Y, Yvar=train_Yvar
+        )
+        if likelihood is None:
+            if train_Yvar is None:
+                likelihood = get_gaussian_likelihood_with_lognormal_prior(
+                    batch_shape=self._aug_batch_shape
+                )
+            else:
+                likelihood = FixedNoiseGaussianLikelihood(
+                    noise=train_Yvar, batch_shape=self._aug_batch_shape
+                )
+        else:
+            self._is_custom_likelihood = True
+        ExactGP.__init__(
+            self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood
+        )
+        if mean_module is None:
+            mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
+        self.mean_module = mean_module
+        if covar_module is None:
+            covar_module = get_covar_module_with_dim_scaled_prior(
+                ard_num_dims=transformed_X.shape[-1],
+                batch_shape=self._aug_batch_shape,
+            )
+            # Used for subsetting along the output dimension. See Model.subset_output.
+            self._subset_batch_dict = {
+                "mean_module.raw_constant": -1,
+                "covar_module.raw_lengthscale": -3,
+            }
+            if train_Yvar is None:
+                self._subset_batch_dict["likelihood.noise_covar.raw_noise"] = -2
+        self.covar_module: Module = covar_module
+        # TODO: Allow subsetting of other covar modules
+        if outcome_transform is not None:
+            self.outcome_transform = outcome_transform
+        if input_transform is not None:
+            self.input_transform = input_transform
+        self.to(train_X)
+
+    def posterior(
+        self,
+        X,
+        output_indices=None,
+        observation_noise=False,
+        posterior_transform=None,
+        **kwargs,
+    ):
+
+        self.eval()  # make sure model is in eval mode
+        # input transforms are applied at `posterior` in `eval` mode, and at
+        # `model.forward()` at the training time
+        Xtr = self.transform_inputs(X)
+        with botorch.models.utils.gpt_posterior_settings():
+            # insert a dimension for the output dimension
+            if self._num_outputs > 1:
+                Xtr, output_dim_idx = botorch.models.utils.add_output_dim(
+                    X=Xtr, original_batch_shape=self._input_batch_shape
+                )
+            # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus
+            # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior,
+            # rather than e.g. SingleTaskGP's forward, which computes the prior.
+            mvn = self(Xtr)
+            mvn = self._apply_noise(X=Xtr, mvn=mvn, observation_noise=observation_noise)
+            if self._num_outputs > 1:
+                mean_x = mvn.mean
+                covar_x = mvn.lazy_covariance_matrix
+                output_indices = output_indices or range(self._num_outputs)
+                mvns = [
+                    gpytorch.distributions.MultivariateNormal(
+                        mean_x.select(dim=output_dim_idx, index=t),
+                        covar_x[(slice(None),) * output_dim_idx + (t,)],
+                    )
+                    for t in output_indices
+                ]
+                mvn = gpytorch.distributions.MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
+
+        posterior = botorch.posteriors.gpytorch.GPyTorchPosterior(distribution=mvn)
+        if hasattr(self, "outcome_transform"):
+            posterior = self.outcome_transform.untransform_posterior(X, posterior)
+        if posterior_transform is not None:
+            return posterior_transform(posterior)
+        return posterior
+
+class BatchBroadcastedInputTransform_MITIM(botorch.models.transforms.input.BatchBroadcastedInputTransform):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+  
+    def _Xs_and_transforms(self, X):
+        Xs = (X,) * len(self.transforms)
+        return zip(Xs, self.transforms)
 
-class ExactGPcustom(botorch.models.gp_regression.SingleTaskGP):
+class SingleTaskGP_MITIM2(botorch.models.gp_regression.SingleTaskGP):
     def __init__(
         self,
         train_X,
@@ -107,7 +255,6 @@ def __init__(
 
         if FixedNoise:
             # Noise not inferred, given by data
-            
             likelihood = (
                 gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
                     noise=train_Yvar_usedToTrain.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
@@ -272,6 +419,7 @@ def posterior(
         posterior_transform=None,
         **kwargs,
     ):
+
         self.eval()  # make sure model is in eval mode
         # input transforms are applied at `posterior` in `eval` mode, and at
         # `model.forward()` at the training time
@@ -369,7 +517,6 @@ def posterior(
 # I need my own transformation based on physics
 # ----------------------------------------------------------------------------------------------------------------------------
 
-
 class Transformation_Inputs(
     botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module
 ):
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 4934aa44..0e2af213 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -159,7 +159,7 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
         time1 = datetime.datetime.now()
 
         # full Multi-output model
-        gp_mo = SURROGATEtools.surrogate_model(
+        self.GP["combined_model"] = SURROGATEtools.surrogate_model(
             self.x,
             self.y,
             self.yvar,
@@ -172,166 +172,162 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
         )
 
         # Fitting
-        gp_mo.fit()
-
-
-        aaaa
-
-
-
-
-
-        for i in range(self.y.shape[-1]):
-            outi = self.outputs[i] if (self.outputs is not None) else None
-
-            # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
-            specialTreatment = (
-                (outi is not None)
-                and (fitWithTrainingDataIfContains is not None)
-                and (fitWithTrainingDataIfContains not in outi)
-            )
-            # -----------------------------------------------------------------------------------------------------------------------------------
-
-            outi_transformed = (
-                self.stepSettings["name_transformed_ofs"][i]
-                if (self.stepSettings["name_transformed_ofs"] is not None)
-                else outi
-            )
-
-            # ---------------------------------------------------------------------------------------------------
-            # Define model-specific functions for this output
-            # ---------------------------------------------------------------------------------------------------
-
-            surrogateOptions = copy.deepcopy(self.surrogateOptions)
-
-            # Then, depending on application (e.g. targets in mitim are fitted differently)
-            if (
-                "selectSurrogate" in surrogateOptions
-                and surrogateOptions["selectSurrogate"] is not None
-            ):
-                surrogateOptions = surrogateOptions["selectSurrogate"](
-                    outi, surrogateOptions
-                )
-
-            # ---------------------------------------------------------------------------------------------------
-            # To avoid problems with fixed values (e.g. calibration terms that are fixed)
-            # ---------------------------------------------------------------------------------------------------
-
-            threshold_to_consider_fixed = 1e-6
-            MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(
-                self.y.mean()
-            )
-
-            if (
-                np.isnan(MaxRelativeDifference)
-                or (
-                    (self.y.shape[0] > 1)
-                    and ((MaxRelativeDifference < threshold_to_consider_fixed).all())
-                )
-            ) and (not specialTreatment):
-                print(
-                    f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}",
-                    typeMsg="w",
-                )
-                FixedValue = True
-                surrogateOptions["TypeMean"] = 0
-                surrogateOptions["TypeKernel"] = 6  # Constant kernel
-
-            else:
-                FixedValue = False
-
-            # ---------------------------------------------------------------------------------------------------
-            # Fit individual output
-            # ---------------------------------------------------------------------------------------------------
-
-            # Data to train the surrogate
-            x = self.x
-            y = np.expand_dims(self.y[:, i], axis=1)
-            yvar = np.expand_dims(self.yvar[:, i], axis=1)
-
-            if specialTreatment:
-                x, y, yvar = (
-                    np.empty((0, x.shape[-1])),
-                    np.empty((0, y.shape[-1])),
-                    np.empty((0, y.shape[-1])),
-                )
-
-            # Surrogate
-
-            print(f"~ Model for output: {outi}")
-
-            GP = SURROGATEtools.surrogate_model(
-                x,
-                y,
-                yvar,
-                self.surrogate_parameters,
-                bounds=self.bounds,
-                output=outi,
-                output_transformed=outi_transformed,
-                avoidPoints=self.avoidPoints,
-                dfT=self.dfT,
-                surrogateOptions=surrogateOptions,
-                FixedValue=FixedValue,
-                fileTraining=fileTraining,
-            )
-
-            # Fitting
-            GP.fit()
-
-            self.GP["individual_models"][i] = GP
-
-        fileBackup.unlink(missing_ok=True)
-
-        # ------------------------------------------------------------------------------------------------------
-        # Combine them in a ModelListGP (create one single with MV but do not fit)
-        # ------------------------------------------------------------------------------------------------------
-
-        print("~ MV model to initialize combination")
-
-        self.GP["combined_model"] = SURROGATEtools.surrogate_model(
-            self.x,
-            self.y,
-            self.yvar,
-            self.surrogate_parameters,
-            avoidPoints=self.avoidPoints,
-            bounds=self.bounds,
-            dfT=self.dfT,
-            surrogateOptions=self.surrogateOptions,
-        )
-
-        models = ()
-        for GP in self.GP["individual_models"]:
-            models += (GP.gpmodel,)
-        self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models)
-
-        # ------------------------------------------------------------------------------------------------------
-        # Make sure each model has the right surrogate_transformation_variables inside the combined model
-        # ------------------------------------------------------------------------------------------------------
-        if self.GP["combined_model"].surrogate_transformation_variables is not None:
-            for i in range(self.y.shape[-1]):
-
-                outi = self.outputs[i] if (self.outputs is not None) else None
-
-                if outi is not None:
-                    self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi]
-
-        print(f"--> Fitting of all models took {IOtools.getTimeDifference(time1)}")
-
-        """
-		*********************************************************************************************************************
-			Postprocessing
-		*********************************************************************************************************************
-		"""
-
-        # Test (if test could not be launched is likely because a singular matrix for Choleski decomposition)
-        print("--> Launching tests to assure batch evaluation accuracy")
-        TESTtools.testBatchCapabilities(self.GP["combined_model"])
-        print("--> Launching tests to assure model combination accuracy")
-        TESTtools.testCombinationCapabilities(
-            self.GP["individual_models"], self.GP["combined_model"]
-        )
-        print("--> Launching tests evaluate accuracy on training set (absolute units)")
-        self.GP["combined_model"].testTraining()
+        self.GP["combined_model"].fit()
+
+
+        # for i in range(self.y.shape[-1]):
+        #     outi = self.outputs[i] if (self.outputs is not None) else None
+
+        #     # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
+        #     specialTreatment = (
+        #         (outi is not None)
+        #         and (fitWithTrainingDataIfContains is not None)
+        #         and (fitWithTrainingDataIfContains not in outi)
+        #     )
+        #     # -----------------------------------------------------------------------------------------------------------------------------------
+
+        #     outi_transformed = (
+        #         self.stepSettings["name_transformed_ofs"][i]
+        #         if (self.stepSettings["name_transformed_ofs"] is not None)
+        #         else outi
+        #     )
+
+        #     # ---------------------------------------------------------------------------------------------------
+        #     # Define model-specific functions for this output
+        #     # ---------------------------------------------------------------------------------------------------
+
+        #     surrogateOptions = copy.deepcopy(self.surrogateOptions)
+
+        #     # Then, depending on application (e.g. targets in mitim are fitted differently)
+        #     if (
+        #         "selectSurrogate" in surrogateOptions
+        #         and surrogateOptions["selectSurrogate"] is not None
+        #     ):
+        #         surrogateOptions = surrogateOptions["selectSurrogate"](
+        #             outi, surrogateOptions
+        #         )
+
+        #     # ---------------------------------------------------------------------------------------------------
+        #     # To avoid problems with fixed values (e.g. calibration terms that are fixed)
+        #     # ---------------------------------------------------------------------------------------------------
+
+        #     threshold_to_consider_fixed = 1e-6
+        #     MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(
+        #         self.y.mean()
+        #     )
+
+        #     if (
+        #         np.isnan(MaxRelativeDifference)
+        #         or (
+        #             (self.y.shape[0] > 1)
+        #             and ((MaxRelativeDifference < threshold_to_consider_fixed).all())
+        #         )
+        #     ) and (not specialTreatment):
+        #         print(
+        #             f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}",
+        #             typeMsg="w",
+        #         )
+        #         FixedValue = True
+        #         surrogateOptions["TypeMean"] = 0
+        #         surrogateOptions["TypeKernel"] = 6  # Constant kernel
+
+        #     else:
+        #         FixedValue = False
+
+        #     # ---------------------------------------------------------------------------------------------------
+        #     # Fit individual output
+        #     # ---------------------------------------------------------------------------------------------------
+
+        #     # Data to train the surrogate
+        #     x = self.x
+        #     y = np.expand_dims(self.y[:, i], axis=1)
+        #     yvar = np.expand_dims(self.yvar[:, i], axis=1)
+
+        #     if specialTreatment:
+        #         x, y, yvar = (
+        #             np.empty((0, x.shape[-1])),
+        #             np.empty((0, y.shape[-1])),
+        #             np.empty((0, y.shape[-1])),
+        #         )
+
+        #     # Surrogate
+
+        #     print(f"~ Model for output: {outi}")
+
+        #     GP = SURROGATEtools.surrogate_model(
+        #         x,
+        #         y,
+        #         yvar,
+        #         self.surrogate_parameters,
+        #         bounds=self.bounds,
+        #         output=outi,
+        #         output_transformed=outi_transformed,
+        #         avoidPoints=self.avoidPoints,
+        #         dfT=self.dfT,
+        #         surrogateOptions=surrogateOptions,
+        #         FixedValue=FixedValue,
+        #         fileTraining=fileTraining,
+        #     )
+
+        #     # Fitting
+        #     GP.fit()
+
+        #     self.GP["individual_models"][i] = GP
+
+        # fileBackup.unlink(missing_ok=True)
+
+        # # ------------------------------------------------------------------------------------------------------
+        # # Combine them in a ModelListGP (create one single with MV but do not fit)
+        # # ------------------------------------------------------------------------------------------------------
+
+        # print("~ MV model to initialize combination")
+
+        # self.GP["combined_model"] = SURROGATEtools.surrogate_model(
+        #     self.x,
+        #     self.y,
+        #     self.yvar,
+        #     self.surrogate_parameters,
+        #     avoidPoints=self.avoidPoints,
+        #     bounds=self.bounds,
+        #     dfT=self.dfT,
+        #     surrogateOptions=self.surrogateOptions,
+        # )
+
+        # models = ()
+        # for GP in self.GP["individual_models"]:
+        #     models += (GP.gpmodel,)
+        # self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models)
+
+
+
+        # # ------------------------------------------------------------------------------------------------------
+        # # Make sure each model has the right surrogate_transformation_variables inside the combined model
+        # # ------------------------------------------------------------------------------------------------------
+        # if self.GP["combined_model"].surrogate_transformation_variables is not None:
+        #     for i in range(self.y.shape[-1]):
+
+        #         outi = self.outputs[i] if (self.outputs is not None) else None
+
+        #         if outi is not None:
+        #             self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi]
+
+        # print(f"--> Fitting of all models took {IOtools.getTimeDifference(time1)}")
+
+        # """
+		# *********************************************************************************************************************
+		# 	Postprocessing
+		# *********************************************************************************************************************
+		# """
+
+        # # Test (if test could not be launched is likely because a singular matrix for Choleski decomposition)
+        # print("--> Launching tests to assure batch evaluation accuracy")
+        # TESTtools.testBatchCapabilities(self.GP["combined_model"])
+        # print("--> Launching tests to assure model combination accuracy")
+        # TESTtools.testCombinationCapabilities(
+        #     self.GP["individual_models"], self.GP["combined_model"]
+        # )
+        # print("--> Launching tests evaluate accuracy on training set (absolute units)")
+        # self.GP["combined_model"].testTraining()
 
         txt_time = IOtools.getTimeDifference(time1)
 
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 7bc66735..29d60fd1 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -68,9 +68,10 @@ def __init__(
         if self.dfT is None:
             self.dfT = torch.randn((2, 2),dtype=torch.double,device=torch.device("cpu"))
 
+        # I need: GPs, training, dim
         self.train_Y = torch.from_numpy(Yor).to(self.dfT)
-        self.train_X = torch.from_numpy(Xor).to(self.dfT).unsqueeze(0).repeat(self.train_Y.shape[-1], 1, 1)
-        
+        self.train_X = torch.from_numpy(Xor).to(self.dfT)
+
         # Extend noise if needed
         if isinstance(Yvaror, float) or len(Yvaror.shape) == 1:
             print(
@@ -85,31 +86,32 @@ def __init__(
         for i in self.surrogateOptions:
             print(f"\t\t{i:20} = {self.surrogateOptions[i]}")
 
-        # --------------------------------------------------------------------
-        # Eliminate points if needed (not from the "added" set)
-        # --------------------------------------------------------------------
+        # # --------------------------------------------------------------------
+        # # Eliminate points if needed (not from the "added" set)
+        # # --------------------------------------------------------------------
 
-        if len(self.avoidPoints) > 0:
-            print(
-                f"\t- Fitting without considering points: {self.avoidPoints}",
-                typeMsg="w",
-            )
+        # if len(self.avoidPoints) > 0:
+        #     print(
+        #         f"\t- Fitting without considering points: {self.avoidPoints}",
+        #         typeMsg="w",
+        #     )
 
-            self.train_X = torch.Tensor(
-                np.delete(self.train_X, self.avoidPoints, axis=0)
-            ).to(self.dfT)
-            self.train_Y = torch.Tensor(
-                np.delete(self.train_Y, self.avoidPoints, axis=0)
-            ).to(self.dfT)
-            self.train_Yvar = torch.Tensor(
-                np.delete(self.train_Yvar, self.avoidPoints, axis=0)
-            ).to(self.dfT)
+        #     self.train_X = torch.Tensor(
+        #         np.delete(self.train_X, self.avoidPoints, axis=0)
+        #     ).to(self.dfT)
+        #     self.train_Y = torch.Tensor(
+        #         np.delete(self.train_Y, self.avoidPoints, axis=0)
+        #     ).to(self.dfT)
+        #     self.train_Yvar = torch.Tensor(
+        #         np.delete(self.train_Yvar, self.avoidPoints, axis=0)
+        #     ).to(self.dfT)
 
         # -------------------------------------------------------------------------------------
         # Add points from file
         # -------------------------------------------------------------------------------------
 
         # Points to be added from file
+        continueAdding = False
         if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
 
             print(
@@ -124,8 +126,6 @@ def __init__(
                 continueAdding = False
             else:
                 continueAdding = True
-        else:
-            continueAdding = False
 
         if continueAdding:
 
@@ -173,7 +173,7 @@ def __init__(
             # --------------------------------------------------------------------------------------
             # Define transformation (here because I want to account for the added points)
             # --------------------------------------------------------------------------------------
-            self.num_training_points = self.train_X.shape[0]
+            self.num_training_points = self.train_X.shape[1]
             input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y = self._define_physics_transformation()
             # ------------------------------------------------------------------------------------------------------------
 
@@ -223,10 +223,10 @@ def __init__(
         # -------------------------------------------------------------------------------------
 
         input_transform_normalization = botorch.models.transforms.input.Normalize(
-            dimTransformedDV_x, bounds=None
+            d = dimTransformedDV_x, bounds=None
         ).to(self.dfT)
         output_transformed_standardization = (
-            botorch.models.transforms.outcome.Standardize((dimTransformedDV_y))
+            botorch.models.transforms.outcome.Standardize(m = dimTransformedDV_y)
         ).to(self.dfT)
 
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
@@ -272,18 +272,26 @@ def __init__(
         self.train_X contains the untransformed of this specific run:   (batch1, dimX)
         self.train_X_added contains the transformed of the table:       (batch2, dimXtr)
         """
-        self.gpmodel = BOTORCHtools.ExactGPcustom(
-            self.train_X,
-            self.train_Y,
-            self.train_Yvar,
-            input_transform=input_transform,
-            outcome_transform=outcome_transform,
-            surrogateOptions=self.surrogateOptions,
-            variables=self.variables,
-            train_X_added=self.train_X_added,
-            train_Y_added=self.train_Y_added,
-            train_Yvar_added=self.train_Yvar_added,
+
+        embed()
+        self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
+            self.train_X, self.train_Y, train_Yvar = self.train_Yvar, input_transform = input_transform) #, outcome_transform=outcome_transform,
         )
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.gpmodel.likelihood, self.gpmodel)
+        botorch.fit.fit_gpytorch_mll(mll)
+
+        # self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
+        #     self.train_X,
+        #     self.train_Y,
+        #     self.train_Yvar,
+        #     input_transform=input_transform,
+        #     outcome_transform=outcome_transform,
+        #     surrogateOptions=self.surrogateOptions,
+        #     variables=self.variables,
+        #     train_X_added=self.train_X_added,
+        #     train_Y_added=self.train_Y_added,
+        #     train_Yvar_added=self.train_Yvar_added,
+        # )
 
     def _define_physics_transformation(self):
 
@@ -316,9 +324,9 @@ def _define_physics_transformation(self):
         input_transformations_physics = []
         outcome_transformations_physics = []
 
-        for ind_out in range(self.train_Y.shape[-1]):
+        for ind_out in range(self.train_Y.shape[0]):
 
-            dimY = 1
+            dimY = self.train_Y.shape[-1]
 
             input_transform_physics = BOTORCHtools.Transformation_Inputs(
                 self.outputs[ind_out], self.surrogate_parameters, self.surrogate_transformation_variables
@@ -334,7 +342,7 @@ def _define_physics_transformation(self):
         # Broadcast the input transformation to all outputs
         # ------------------------------------------------------------------------------------
 
-        input_transformation_physics = botorch.models.transforms.input.BatchBroadcastedInputTransform(input_transformations_physics)
+        input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform_MITIM(input_transformations_physics)
         output_transformation_physics = outcome_transformations_physics[0] #TO FIX
 
         dimX = input_transformation_physics(self.train_X).shape[-1]
@@ -408,8 +416,8 @@ def fit(self):
 		"""
 
         # Train always in physics-transformed space, to enable mitim re-use training from file
-        with fundamental_model_context(self):
-            track_fval = self.perform_model_fit(mll)
+        #with fundamental_model_context(self):
+        track_fval = self.perform_model_fit(mll)
 
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
@@ -446,11 +454,9 @@ def perform_model_fit(self, mll):
         # --------------------------------------------------
 
         # Store first MLL value
-        embed()
         track_fval = [
             -mll.forward(mll.model(*mll.model.train_inputs), mll.model.train_targets)
             .detach()
-            .item()
         ]
 
         def callback(x, y, mll=mll):
@@ -472,9 +478,9 @@ def callback(x, y, mll=mll):
         self.gpmodel.likelihood.eval()
         mll.eval()
 
-        print(
-            f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}"
-        )
+        # print(
+        #     f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}"
+        # )
 
         return track_fval
 
@@ -898,13 +904,15 @@ def __init__(self, surrogate_model):
 
     def __enter__(self):
         # Works for individual models, not ModelList
-        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = False
+        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False
 
         return self.surrogate_model
 
     def __exit__(self, *args):
-        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = True
+        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True
 
 def create_df_portals(x, y, yvar, x_names, output, max_x = 20):

From d3ebc68b8e430798c47126829601a9e4d94d33b6 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Tue, 19 Nov 2024 23:29:23 -0500
Subject: [PATCH 11/34] First working version of transition from ModelList

---
 src/mitim_modules/portals/PORTALSmain.py      |  21 +-
 src/mitim_modules/portals/PORTALStools.py     |  33 +-
 .../portals/utils/PORTALSinit.py              |   4 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py     | 358 ++++++++++++++++--
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 309 +++++++--------
 5 files changed, 511 insertions(+), 214 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py
index 7d9b5f3e..aec99d35 100644
--- a/src/mitim_modules/portals/PORTALSmain.py
+++ b/src/mitim_modules/portals/PORTALSmain.py
@@ -412,7 +412,7 @@ def run(self, paramsfile, resultsfile):
             with open(self.optimization_extra, "wb") as handle:
                 pickle_dill.dump(dictStore, handle, protocol=4)
 
-    def scalarized_objective(self, Y0):
+    def scalarized_objective(self, Y):
         """
         Notes
         -----
@@ -422,14 +422,6 @@ def scalarized_objective(self, Y0):
 
         ofs_ordered_names = np.array(self.optimization_options["ofs"])
 
-        # TO FIX: convert to dimensions such that the dimY is in -1
-        if Y0.shape[0] == len(ofs_ordered_names):
-            Y = Y0.transpose(0, -1)
-        elif Y0.shape[1] == len(ofs_ordered_names):
-            Y = Y0.transpose(1, -1)
-        else:
-            Y = Y0
-
         """
 		-------------------------------------------------------------------------
 		Prepare transport dictionary
@@ -456,17 +448,6 @@ def scalarized_objective(self, Y0):
 
         of, cal, _, res = PORTALSinteraction.calculatePseudos(self.powerstate, self.PORTALSparameters,specific_vars=var_dict)
 
-        # TO FIX: convert dims back
-        print(Y.shape, of.shape, cal.shape, res.shape)
-        if Y0.shape[0] == len(ofs_ordered_names):
-            of = of.transpose(0, -1)
-            cal = cal.transpose(0, -1)
-            #res = res.transpose(0, -1)
-        elif Y0.shape[1] == len(ofs_ordered_names):
-            of = of.transpose(1, -1)
-            cal = cal.transpose(1, -1)
-            #res = res.transpose(1, -1)
-
         return of, cal, res
 
     def analyze_results(self, plotYN=True, fn=None, cold_start=False, analysis_level=2):
diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index dcb77f76..d8299f34 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -91,7 +91,7 @@ def default_portals_transformation_variables(additional_params = []):
 
     return portals_transformation_variables, portals_transformation_variables_trace
 
-def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformation_variables):
+def input_transformation_portals(Xorig, output, surrogate_parameters, surrogate_transformation_variables):
 
     """
     - Xorig will be a tensor (batch1...N,dim) unnormalized (with or without gradients).
@@ -155,11 +155,12 @@ def produceNewInputs(Xorig, output, surrogate_parameters, surrogate_transformati
 # ----------------------------------------------------------------------
 
 
-def transformPORTALS(X, surrogate_parameters, output):
+def output_transformation_portals(X, surrogate_parameters, outputs):
     """
     1. Make sure all batches are squeezed into a single dimension
     ------------------------------------------------------------------
             E.g.: (batch1,batch2,batch3,dim) -> (batch1*batch2*batch3,dim)
+    Output is a factor that account for all the outputs
     """
 
     shape_orig = np.array(X.shape)
@@ -174,14 +175,16 @@ def transformPORTALS(X, surrogate_parameters, output):
     # Produce relevant quantities here (in particular, GB will be used)
     powerstate = constructEvaluationProfiles(X, surrogate_parameters)
 
-    # --- Original model output is in real units, transform to GB here b/c that's how GK codes work
-    factorGB = GBfromXnorm(X, output, powerstate)
-    # --- Ratio of fluxes (quasilinear)
-    factorRat = ratioFactor(X, surrogate_parameters, output, powerstate)
-    # --- Specific to output
-    factorImp = ImpurityGammaTrick(X, surrogate_parameters, output, powerstate)
+    compounded = torch.Tensor().to(X)
+    for output in outputs:
+        # --- Original model output is in real units, transform to GB here b/c that's how GK codes work
+        factorGB = GBfromXnorm(X, output, powerstate)
+        # --- Ratio of fluxes (quasilinear)
+        factorRat = ratioFactor(X, surrogate_parameters, output, powerstate)
+        # --- Specific to output
+        factorImp = ImpurityGammaTrick(X, surrogate_parameters, output, powerstate)
 
-    compounded = factorGB * factorRat * factorImp
+        compounded = torch.cat((compounded, factorGB * factorRat * factorImp), dim=-1)
 
     """
 	3. Go back to the original batching system
@@ -190,7 +193,7 @@ def transformPORTALS(X, surrogate_parameters, output):
 	"""
     shape_orig[-1] = compounded.shape[-1]
     compounded = compounded.view(tuple(shape_orig))
-
+    
     return compounded
 
 
@@ -231,7 +234,7 @@ def computeTurbExchangeIndividual(PexchTurb, powerstate):
     return PexchTurb_integrated
 
 
-# def transformPORTALS(X,Y,Yvar,surrogate_parameters,output):
+# def output_transformation_portals(X,Y,Yvar,surrogate_parameters,output):
 # 	'''
 # 	Transform direct evaluation output to something that the model understands better.
 
@@ -250,14 +253,14 @@ def computeTurbExchangeIndividual(PexchTurb, powerstate):
 # 	return Ytr,Ytr_var
 
 
-# def untransformPORTALS(X, mean, upper, lower, surrogate_parameters, output):
+# def unoutput_transformation_portals(X, mean, upper, lower, surrogate_parameters, output):
 # 	'''
-# 	Transform direct model output to the actual evaluation output (must be the opposite to transformPORTALS)
+# 	Transform direct model output to the actual evaluation output (must be the opposite to output_transformation_portals)
 
 # 		- Receives unnormalized X (batch1,...,dim) to construct QGB (batch1,...,1) corresponding to what output I'm looking at
 # 		- Transforms and produces Y and confidence bounds (batch1,...,)
 
-# 	This untransforms whatever has happened in the transformPORTALS function
+# 	This untransforms whatever has happened in the output_transformation_portals function
 # 	'''
 
 # 	factor = factorProducer(X,surrogate_parameters,output).squeeze(-1)
@@ -380,9 +383,11 @@ def constructEvaluationProfiles(X, surrogate_parameters, recalculateTargets=Fals
     if ("parameters_combined" in surrogate_parameters) and (
         "powerstate" in surrogate_parameters["parameters_combined"]
     ):
+
         powerstate = surrogate_parameters["parameters_combined"]["powerstate"]
 
     else:
+
         powerstate = surrogate_parameters["powerstate"]
 
         if X.shape[0] > 0:
diff --git a/src/mitim_modules/portals/utils/PORTALSinit.py b/src/mitim_modules/portals/utils/PORTALSinit.py
index 09131416..46ae9e65 100644
--- a/src/mitim_modules/portals/utils/PORTALSinit.py
+++ b/src/mitim_modules/portals/utils/PORTALSinit.py
@@ -340,8 +340,8 @@ def initializeProblem(
         Variables[ikey] = prepportals_transformation_variables(portals_fun, ikey)
 
     portals_fun.surrogate_parameters = {
-        "transformationInputs": PORTALStools.produceNewInputs,
-        "transformationOutputs": PORTALStools.transformPORTALS,
+        "transformationInputs": PORTALStools.input_transformation_portals,
+        "transformationOutputs": PORTALStools.output_transformation_portals,
         "powerstate": portals_fun.powerstate,
         "applyImpurityGammaTrick": portals_fun.PORTALSparameters[
             "applyImpurityGammaTrick"
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index cec5f6d6..d651ae3f 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -14,9 +14,6 @@
 # SingleTaskGP needs to be modified because I want to input options and outcome transform taking X, otherwise it should be a copy
 # ----------------------------------------------------------------------------------------------------------------------------
 
-import torch
-from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
-from botorch.models.model import FantasizeMixin
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform, Standardize
 from botorch.models.utils import validate_input_scaling
@@ -24,18 +21,15 @@
     get_covar_module_with_dim_scaled_prior,
     get_gaussian_likelihood_with_lognormal_prior,
 )
-from botorch.utils.containers import BotorchContainer
-from botorch.utils.datasets import SupervisedDataset
-from botorch.utils.types import _DefaultType, DEFAULT
-from gpytorch.distributions.multivariate_normal import MultivariateNormal
+from botorch.utils.types import DEFAULT
 from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
-from gpytorch.likelihoods.likelihood import Likelihood
 from gpytorch.means.constant_mean import ConstantMean
-from gpytorch.means.mean import Mean
 from gpytorch.models.exact_gp import ExactGP
 from gpytorch.module import Module
 from torch import Tensor
 
+from linear_operator.operators import CholLinearOperator, DiagLinearOperator
+
 class SingleTaskGP_MITIM(botorch.models.gp_regression.SingleTaskGP):
     def __init__(
         self,
@@ -58,7 +52,7 @@ def __init__(
             transformed_X = self.transform_inputs(
                 X=train_X, input_transform=input_transform
             )
-            embed()
+
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
         # Validate again after applying the transforms
@@ -111,7 +105,7 @@ def __init__(
             self.input_transform = input_transform
         self.to(train_X)
 
-    def posterior(
+    def posterior_full(
         self,
         X,
         output_indices=None,
@@ -459,7 +453,6 @@ def posterior(
 # ModelListGP needs to be modified to allow me to have "common" parameters to models, to not run at every transformation again
 # ----------------------------------------------------------------------------------------------------------------------------
 
-
 class ModifiedModelListGP(botorch.models.model_list_gp_regression.ModelListGP):
     def __init__(self, *gp_models):
         super().__init__(*gp_models)
@@ -518,8 +511,7 @@ def posterior(
 # ----------------------------------------------------------------------------------------------------------------------------
 
 class Transformation_Inputs(
-    botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module
-):
+    botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module):
     def __init__(
         self,
         output,
@@ -580,54 +572,104 @@ def _untransform(self, X):
 
 # Copy standardize but modify in untransform the "std" which is my factor!
 class Transformation_Outcomes(botorch.models.transforms.outcome.Standardize):
-    def __init__(self, m, output, surrogate_parameters):
+    def __init__(self, m, outputs_names, surrogate_parameters):
         super().__init__(m)
 
-        self.output = output
+        self.outputs_names = outputs_names
         self.surrogate_parameters = surrogate_parameters
         self.flag_to_evaluate = True
 
     def forward(self, X, Y, Yvar):
-        if (self.output is not None) and (self.flag_to_evaluate):
+        if (self.outputs_names is not None) and (self.flag_to_evaluate):
             factor = self.surrogate_parameters["transformationOutputs"](
-                X, self.surrogate_parameters, self.output
+                X, self.surrogate_parameters, self.outputs_names
             ).to(X.device)
         else:
             factor = Y.mean(dim=-2, keepdim=True).to(Y.device) * 0.0 + 1.0
 
+        # This occurs in Standardize, now I'm tricking it
         self.stdvs = factor
         self.means = self.stdvs * 0.0
         self._stdvs_sq = self.stdvs.pow(2)
+        self._is_trained = torch.tensor(True)
 
         # When calling the forward method of Standardize, do not recalculate mean and stdvs (never be on training)
-        self._is_trained = torch.tensor(True)
         self.training = False
         # ----------------------------------------
 
         return super().forward(Y, Yvar)
 
     def untransform_posterior(self, X, posterior):
-        if (self.output is not None) and (self.flag_to_evaluate):
+        if (self.outputs_names is not None) and (self.flag_to_evaluate):
             factor = self.surrogate_parameters["transformationOutputs"](
-                X, self.surrogate_parameters, self.output
+                X, self.surrogate_parameters, self.outputs_names
             ).to(X.device)
 
             self.stdvs = factor
             self.means = self.stdvs * 0.0
             self._stdvs_sq = self.stdvs.pow(2)
-            return super().untransform_posterior(posterior)
-
+            return self.untransform_posterior_mod(posterior)
         else:
             return posterior
 
     def untransform(self, Y, Yvar):
         raise NotImplementedError("[MITIM] This situation has not been implemented yet")
 
+    def untransform_posterior_mod(self, posterior):
+        '''
+        PRF: I modified this because I cannot make the squeeze operation in the posterior, otherwise
+        I miss the element of the batch dimension 
+        '''
+        is_mtgp_posterior = False
+        if type(posterior) is GPyTorchPosterior:
+            is_mtgp_posterior = posterior._is_mt
+        if not self._m == posterior._extended_shape()[-1] and not is_mtgp_posterior:
+            raise RuntimeError(
+                "Incompatible output dimensions encountered. Transform has output "
+                f"dimension {self._m} and posterior has "
+                f"{posterior._extended_shape()[-1]}."
+            )
+
+        mvn = posterior.distribution
+        offset = self.means
+        scale_fac = self.stdvs
+        if not posterior._is_mt:
+            mean_tf = offset.squeeze(-1) + scale_fac.squeeze(-1) * mvn.mean
+            scale_fac = scale_fac.squeeze(-1).expand_as(mean_tf)
+        else:
+            mean_tf = offset + scale_fac * mvn.mean
+            # reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
+            # scale_fac = scale_fac.squeeze(-2)
+
+            reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
+            scale_fac = scale_fac.view(-1)
+            
+            # if mvn._interleaved:
+            #     scale_fac = scale_fac.repeat(*[1 for _ in scale_fac.shape[:-1]], reps)
+            # else:
+            #     scale_fac = torch.repeat_interleave(scale_fac, reps, dim=-1)
+
+        if (
+            not mvn.islazy
+            # TODO: Figure out attribute namming weirdness here
+            or mvn._MultivariateNormal__unbroadcasted_scale_tril is not None
+        ):
+            # if already computed, we can save a lot of time using scale_tril
+            covar_tf = CholLinearOperator(mvn.scale_tril * scale_fac.unsqueeze(-1))
+        else:
+            lcv = mvn.lazy_covariance_matrix
+            #scale_fac = scale_fac.expand(lcv.shape[:-1])
+            scale_mat = DiagLinearOperator(scale_fac)
+            covar_tf = scale_mat @ lcv @ scale_mat
+
+        kwargs = {"interleaved": mvn._interleaved} if posterior._is_mt else {}
+        mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs)
+        return GPyTorchPosterior(mvn_tf)
+
 
 # Because I need it to take X too (for physics only, which is always the first tf)
 class ChainedOutcomeTransform(
-    botorch.models.transforms.outcome.ChainedOutcomeTransform
-):
+    botorch.models.transforms.outcome.ChainedOutcomeTransform):
     def __init__(self, **transforms):
         super().__init__(**transforms)
 
@@ -643,7 +685,7 @@ def untransform_posterior(self, X, posterior):
         for i, tf in enumerate(reversed(self.values())):
             posterior = (
                 tf.untransform_posterior(X, posterior)
-                if i == 1
+                if i == len(self.values())-1
                 else tf.untransform_posterior(posterior)
             )  # Only physics transformation (tf1) takes X
 
@@ -898,3 +940,267 @@ def forward(self, x):
             + self.bias
         )
         return res
+
+
+#!/usr/bin/env -S grimaldi --kernel bento_kernel_automl
+# fmt: off
+
+""":py"""
+# %local-changes
+
+""":py"""
+import botorch
+import torch
+from botorch.fit import fit_gpytorch_mll
+from botorch.models import SingleTaskGP
+from botorch.models.transforms.input import Normalize
+from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
+from torch import Tensor
+
+""":py"""
+from typing import Iterable
+
+from botorch.models.transforms.input import InputTransform
+from torch.nn import ModuleDict
+
+
+class BatchBroadcastedInputTransform(InputTransform, ModuleDict):
+    r"""An input transform representing a list of transforms to be broadcasted."""
+
+    def __init__(
+        self,
+        transforms: list[InputTransform],
+        broadcast_index: int = -3,
+    ) -> None:
+        r"""A transform list that is broadcasted across a batch dimension specified by
+        `broadcast_index`. This is allows using a batched Gaussian process model when
+        the input transforms are different for different batch dimensions.
+
+        Args:
+            transforms: The transforms to broadcast across the first batch dimension.
+                The transform at position i in the list will be applied to `X[i]` for
+                a given input tensor `X` in the forward pass.
+            broadcast_index: The tensor index at which the transforms are broadcasted.
+
+        Example:
+            >>> tf1 = Normalize(d=2)
+            >>> tf2 = InputStandardize(d=2)
+            >>> tf = BatchBroadcastedTransformList(transforms=[tf1, tf2])
+        """
+        super().__init__()
+        self.transform_on_train = False
+        self.transform_on_eval = False
+        self.transform_on_fantasize = False
+        self.transforms = transforms
+        if broadcast_index >= 0:
+            raise ValueError("A non-negative broadcast index is not supported yet.")
+        if broadcast_index in (-2, -1):
+            raise ValueError(
+                "The broadcast index cannot be -2 and -1, as these indices are reserved"
+                " for non-batch, data and input dimensions."
+            )
+        self.broadcast_index = broadcast_index
+        self.is_one_to_many = self.transforms[0].is_one_to_many
+        if not all(tf.is_one_to_many == self.is_one_to_many for tf in self.transforms):
+            raise ValueError(  # output shapes of transforms must be the same
+                "All transforms must have the same is_one_to_many property."
+            )
+        for tf in self.transforms:
+            self.transform_on_train |= tf.transform_on_train
+            self.transform_on_eval |= tf.transform_on_eval
+            self.transform_on_fantasize |= tf.transform_on_fantasize
+
+    def transform(self, X: Tensor) -> Tensor:
+        r"""Transform the inputs to a model.
+
+        Individual transforms are applied in sequence and results are returned as
+        a batched tensor.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of transformed inputs.
+        """
+        return torch.stack(
+            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+
+    def untransform(self, X: Tensor) -> Tensor:
+        r"""Un-transform the inputs to a model.
+
+        Un-transforms of the individual transforms are applied in reverse sequence.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of transformed inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of un-transformed inputs.
+        """
+        # return torch.stack(
+        #     [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+        #     dim=self.broadcast_index,
+        # )
+        #
+        # return self.transforms[0].untransform(X)
+        Xt = torch.stack(
+            [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+        Xt = Xt.unique(dim=self.broadcast_index)
+        # since we are assuming that this batch dimension was added solely
+        # because of different transforms, rather than different original inputs X.
+        assert Xt.shape[self.broadcast_index] == 1
+        return Xt.squeeze(self.broadcast_index)
+
+    def equals(self, other: InputTransform) -> bool:
+        r"""Check if another input transform is equivalent.
+
+        Args:
+            other: Another input transform.
+
+        Returns:
+            A boolean indicating if the other transform is equivalent.
+        """
+        return (
+            super().equals(other=other)
+            and all(t1.equals(t2) for t1, t2 in zip(self.transforms, other.transforms))
+            and (self.broadcast_index == other.broadcast_index)
+        )
+
+    def preprocess_transform(self, X: Tensor) -> Tensor:
+        r"""Apply transforms for preprocessing inputs.
+
+        The main use cases for this method are 1) to preprocess training data
+        before calling `set_train_data` and 2) preprocess `X_baseline` for noisy
+        acquisition functions so that `X_baseline` is "preprocessed" with the
+        same transformations as the cached training inputs.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of (transformed) inputs.
+        """
+        return torch.stack(
+            [t.preprocess_transform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+
+    def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform]]:
+        r"""Returns an iterable of sub-tensors of X and their associated transforms.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            An iterable containing tuples of sub-tensors of X and their transforms.
+        """
+        # transform_shape = (
+        #     len(input_transform.transforms),
+        #     *(1 for _ in range(abs(self.broadcast_index) - 1)),
+        # )
+        # print(f"{transform_shape = }")
+        # print(f"{X.shape = }")
+        # TODO: Add dimension rather than broadcasting over the inputs.
+
+        # broadcast_shape = torch.broadcast_shapes(transform_shape, X.shape)
+        # X_expanded = X.expand(broadcast_shape)
+        # Xs = X_expanded.unbind(dim=self.broadcast_index)
+        # return zip(Xs, self.transforms)
+        return zip([X for _ in self.transforms], self.transforms)
+
+
+
+
+""":py"""
+from botorch.models.transforms.outcome import OutcomeTransform
+from botorch.posteriors.gpytorch import GPyTorchPosterior
+from botorch.posteriors.posterior import Posterior
+from gpytorch.distributions import MultitaskMultivariateNormal
+from linear_operator.operators import BlockDiagLinearOperator
+
+
+class OutcomeToBatchDimension(OutcomeTransform):
+    """Transform permuting dimensions in the outcome tensor."""
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self, Y: Tensor, Yvar: Tensor | None = None
+    ) -> tuple[Tensor, Tensor | None]:
+        r"""Transform the outcomes in a model's training targets
+
+        Args:
+            Y: A `batch_shape x n x m`-dim tensor of training targets.
+            Yvar: A `batch_shape x n x m`-dim tensor of observation noises
+                associated with the training targets (if applicable).
+
+        Returns:
+            A two-tuple with the transformed outcomes (batch_shape x m x n x 1).
+
+            - The transformed outcome observations.
+            - The transformed observation noise (if applicable).
+        """
+        return Y.unsqueeze(-3).transpose(-3, -1), (
+            Yvar.unsqueeze(-3).transpose(-3, -1) #if Yvar else None
+        )
+
+    def untransform(
+        self, Y: Tensor, Yvar: Tensor | None = None
+    ) -> tuple[Tensor, Tensor | None]:
+        r"""Un-transform previously transformed outcomes
+
+        Args:
+            Y: A `batch_shape x n x m`-dim tensor of transfomred training targets.
+            Yvar: A `batch_shape x n x m`-dim tensor of transformed observation
+                noises associated with the training targets (if applicable).
+
+        Returns:
+            A two-tuple with the un-transformed outcomes:
+
+            - The un-transformed outcome observations.
+            - The un-transformed observation noise (if applicable).
+        """
+        assert Y.shape[-1] == 1
+        Y_perm = Y.transpose(-3, -1).squeeze(-3)
+        Yvar_perm = Yvar.transpose(-3, -1).squeeze(-3) if Yvar else None
+        return Y_perm, Yvar_perm
+
+    @property
+    def _is_linear(self) -> bool:
+        """
+        True for transformations such as `Standardize`; these should be able to apply
+        `untransform_posterior` to a GPyTorchPosterior and return a GPyTorchPosterior,
+        because a multivariate normal distribution should remain multivariate normal
+        after applying the transform.
+        """
+        return True
+
+    def untransform_posterior(self, posterior: Posterior) -> Posterior:
+        r"""Un-transform a posterior.
+
+        Posteriors with `_is_linear=True` should return a `GPyTorchPosterior` when
+        `posterior` is a `GPyTorchPosterior`. Posteriors with `_is_linear=False`
+        likely return a `TransformedPosterior` instead.
+
+        Args:
+            posterior: A posterior in the transformed space.
+
+        Returns:
+            The un-transformed posterior.
+        """
+        mvn = posterior.mvn
+        # print(f"{posterior.mean.shape = }")
+        # print(f"{mvn.mean.shape = }")
+        mean = self.untransform(posterior.mean)[0]
+        # print(f"{mean.shape = }")
+        covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
+        # could potentially use from_independent_mvns
+        # print(f"{mvn._covar.shape = }")
+        # print(f"{covar.shape=}")
+        dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
+        return GPyTorchPosterior(distribution=dis)
+
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 29d60fd1..4ae30a3d 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -48,12 +48,9 @@ def __init__(
         Noise is variance here (square of standard deviation).
         """
 
-        if avoidPoints is None:
-            avoidPoints = []
-
         torch.manual_seed(seed)
 
-        self.avoidPoints = avoidPoints
+        self.avoidPoints = avoidPoints if avoidPoints is not None else []
         self.outputs = outputs
         self.outputs_transformed = outputs_transformed
         self.surrogateOptions = surrogateOptions
@@ -62,23 +59,15 @@ def __init__(
         self.bounds = bounds
         self.FixedValue = FixedValue
         self.fileTraining = fileTraining
-
-        self.losses = None
-
         if self.dfT is None:
             self.dfT = torch.randn((2, 2),dtype=torch.double,device=torch.device("cpu"))
-
-        # I need: GPs, training, dim
         self.train_Y = torch.from_numpy(Yor).to(self.dfT)
         self.train_X = torch.from_numpy(Xor).to(self.dfT)
 
         # Extend noise if needed
         if isinstance(Yvaror, float) or len(Yvaror.shape) == 1:
-            print(
-                f"\t- Noise (variance) has one value only ({Yvaror}), assuming constant for all samples and outputs in absolute terms",
-            )
+            print(f"\t- Noise (variance) has one value only ({Yvaror}), assuming constant for all samples and outputs in absolute terms")
             Yvaror = Yor * 0.0 + Yvaror
-
         self.train_Yvar = torch.from_numpy(Yvaror).to(self.dfT)
 
         # ---------- Print ----------
@@ -86,6 +75,8 @@ def __init__(
         for i in self.surrogateOptions:
             print(f"\t\t{i:20} = {self.surrogateOptions[i]}")
 
+        self.losses = None
+
         # # --------------------------------------------------------------------
         # # Eliminate points if needed (not from the "added" set)
         # # --------------------------------------------------------------------
@@ -111,125 +102,119 @@ def __init__(
         # -------------------------------------------------------------------------------------
 
         # Points to be added from file
-        continueAdding = False
-        if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
+        # continueAdding = False
+        # if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
 
-            print(
-                f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}"
-            )
+        #     print(
+        #         f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}"
+        #     )
 
-            df = pd.read_csv(self.surrogateOptions["extrapointsFile"])
-            df_model = df[df['Model'] == self.output]
+        #     df = pd.read_csv(self.surrogateOptions["extrapointsFile"])
+        #     df_model = df[df['Model'] == self.output]
 
-            if len(df_model) == 0:
-                print("\t- No points for this output in the file, nothing to add", typeMsg="i")
-                continueAdding = False
-            else:
-                continueAdding = True
+        #     if len(df_model) == 0:
+        #         print("\t- No points for this output in the file, nothing to add", typeMsg="i")
+        #         continueAdding = False
+        #     else:
+        #         continueAdding = True
 
-        if continueAdding:
+        # if continueAdding:
 
-            # Check 1: Do the points for this output share the same x_names?
-            if df_model['x_names'].nunique() > 1:
-                print("Different x_names for points in the file, prone to errors", typeMsg='q')
+        #     # Check 1: Do the points for this output share the same x_names?
+        #     if df_model['x_names'].nunique() > 1:
+        #         print("Different x_names for points in the file, prone to errors", typeMsg='q')
 
-            # Check 2: Is it consistent with the x_names of this run?
-            x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
-            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output]
-            if x_names != x_names_check:
-                print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
+        #     # Check 2: Is it consistent with the x_names of this run?
+        #     x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
+        #     x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output]
+        #     if x_names != x_names_check:
+        #         print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
 
-            self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
-            self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
+        #     self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
+        #     self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
     
-            x = []
-            for i in range(len(x_names)):
-                x.append(df_model[f'x{i}'].to_numpy())
-            self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
-
-            # ------------------------------------------------------------------------------------------------------------
-            # Define transformation (here because I want to account for the added points)
-            # ------------------------------------------------------------------------------------------------------------
-            self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0]
-            input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y = self._define_physics_transformation()
-            # ------------------------------------------------------------------------------------------------------------
-
-            self.train_X_added = (
-                self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full
-            ).to(self.dfT)
-
-        else:
-            if self.fileTraining is not None:
-                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-                    self.train_X,
-                    self.output,
-                    self.surrogate_parameters,
-                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-                )
-                dimTransformedDV_x_full = train_X_Complete.shape[-1]
-            else:
-                dimTransformedDV_x_full = self.train_X.shape[-1]
-
-            # --------------------------------------------------------------------------------------
-            # Define transformation (here because I want to account for the added points)
-            # --------------------------------------------------------------------------------------
-            self.num_training_points = self.train_X.shape[1]
-            input_transform_physics, outcome_transform_physics, dimTransformedDV_x, dimTransformedDV_y = self._define_physics_transformation()
-            # ------------------------------------------------------------------------------------------------------------
-
-            self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT)
-            self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT)
-            self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
-            self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
-
-        # --------------------------------------------------------------------------------------
-        # Make sure that very small variations are not captured
-        # --------------------------------------------------------------------------------------
+        #     x = []
+        #     for i in range(len(x_names)):
+        #         x.append(df_model[f'x{i}'].to_numpy())
+        #     self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
+
+        #     # ------------------------------------------------------------------------------------------------------------
+        #     # Define transformation (here because I want to account for the added points)
+        #     # ------------------------------------------------------------------------------------------------------------
+        #     self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0]
+        #     input_transform_physics, outcome_transform_physics, \
+        #     input_transform_normalization, output_transformed_standardization, \
+        #     dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
+        #     # ------------------------------------------------------------------------------------------------------------
+
+        #     self.train_X_added = (
+        #         self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full
+        #     ).to(self.dfT)
 
-        if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1):
-            self.ensureMinimalVariationSuppressed(input_transform_physics)
+        # else:
+        # if self.fileTraining is not None:
+        #     train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+        #         self.train_X,
+        #         self.output,
+        #         self.surrogate_parameters,
+        #         self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+        #     )
+        #     dimTransformedDV_x_full = train_X_Complete.shape[-1]
+        # else:
+        #     dimTransformedDV_x_full = self.train_X.shape[-1]
 
         # --------------------------------------------------------------------------------------
-        # Make sure at least 2 points
+        # Define transformation (here because I want to account for the added points)
         # --------------------------------------------------------------------------------------
+        self.num_training_points = self.train_X.shape[0]
+        input_transform_physics, outcome_transform_physics,\
+        input_transform_normalization, output_transformed_standardization,\
+        dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
+        # ------------------------------------------------------------------------------------------------------------
+
+        # self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT)
+        # self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT)
+        # self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
+        # self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
+
+        # # --------------------------------------------------------------------------------------
+        # # Make sure that very small variations are not captured
+        # # --------------------------------------------------------------------------------------
+
+        # if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1):
+        #     self.ensureMinimalVariationSuppressed(input_transform_physics)
+
+        # # --------------------------------------------------------------------------------------
+        # # Make sure at least 2 points
+        # # --------------------------------------------------------------------------------------
+
+        # if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
+        #     factor = 1.2
+        #     print(
+        #         f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}",
+        #         typeMsg="w",
+        #     )
+        #     self.train_X = torch.cat((self.train_X, self.train_X * factor))
+        #     self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
+        #     self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
 
-        if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
-            factor = 1.2
-            print(
-                f"\t- This objective had only one point, adding a point with linear interpolation (trick for mitim targets only), {factor}",
-                typeMsg="w",
-            )
-            self.train_X = torch.cat((self.train_X, self.train_X * factor))
-            self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
-            self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
+        # # -------------------------------------------------------------------------------------
+        # # Check minimum noises
+        # # -------------------------------------------------------------------------------------
 
-        # -------------------------------------------------------------------------------------
-        # Check minimum noises
-        # -------------------------------------------------------------------------------------
+        # self.ensureMinimumNoise()
 
-        self.ensureMinimumNoise()
+        # # -------------------------------------------------------------------------------------
+        # # Write file with surrogate if there are transformations
+        # # -------------------------------------------------------------------------------------
 
-        # -------------------------------------------------------------------------------------
-        # Write file with surrogate if there are transformations
-        # -------------------------------------------------------------------------------------
-
-        if (self.fileTraining is not None) and (
-            self.train_X.shape[0] + self.train_X_added.shape[0] > 0
-        ):
-            self.writeFileTraining(input_transform_physics, outcome_transform_physics)
+        # if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
+        #     self.writeFileTraining(input_transform_physics, outcome_transform_physics)
 
         # -------------------------------------------------------------------------------------
-        # Input and Outcome transform (NORMALIZATIONS)
+        # Obtain normalization constants now (although during training this is messed up, so needed later too)
         # -------------------------------------------------------------------------------------
 
-        input_transform_normalization = botorch.models.transforms.input.Normalize(
-            d = dimTransformedDV_x, bounds=None
-        ).to(self.dfT)
-        output_transformed_standardization = (
-            botorch.models.transforms.outcome.Standardize(m = dimTransformedDV_y)
-        ).to(self.dfT)
-
-        # Obtain normalization constants now (although during training this is messed up, so needed later too)
         self.normalization_pass(
             input_transform_physics,
             input_transform_normalization,
@@ -246,7 +231,7 @@ def __init__(
         ).to(self.dfT)
 
         outcome_transform = BOTORCHtools.ChainedOutcomeTransform(
-            tf1=outcome_transform_physics, tf2=output_transformed_standardization
+            tf1=outcome_transform_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension()
         ).to(self.dfT)
 
         self.variables = None
@@ -273,12 +258,25 @@ def __init__(
         self.train_X_added contains the transformed of the table:       (batch2, dimXtr)
         """
 
-        embed()
         self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
-            self.train_X, self.train_Y, train_Yvar = self.train_Yvar, input_transform = input_transform) #, outcome_transform=outcome_transform,
+            self.train_X,
+            self.train_Y,
+            train_Yvar = self.train_Yvar,
+            input_transform = input_transform,
+            outcome_transform=outcome_transform,
         )
         mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.gpmodel.likelihood, self.gpmodel)
         botorch.fit.fit_gpytorch_mll(mll)
+        #self.gpmodel.posterior(self.train_X)
+
+        x = torch.rand(64, self.train_X.shape[-1]).to(self.dfT)
+        from mitim_tools.misc_tools import IOtools
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+            self.gpmodel.posterior(x)
+
+
+        embed()
+
 
         # self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
         #     self.train_X,
@@ -293,64 +291,71 @@ def __init__(
         #     train_Yvar_added=self.train_Yvar_added,
         # )
 
-    def _define_physics_transformation(self):
+    def _define_MITIM_transformations(self):
 
-        # ------------------------------------------------------------------------------------
-        # Define individual transformations and then put together
-        # ------------------------------------------------------------------------------------
+        '''
+        ********************************************************************************
+        Define individual output transformations and then put together
+            X is [batch, dimX]
+            Xtr is [batch, dimXtr] of each individual output
+            Xtr_full is [dimY, batch, dimXtr] of the broadcasted input transformation
+
+            Y is [batch, dimY]
+            Ytr is [batch, dimY]
+        ********************************************************************************
+        '''
 
         self.surrogate_transformation_variables = None
-        if ("surrogate_transformation_variables_alltimes" in self.surrogate_parameters) and (self.surrogate_parameters["surrogate_transformation_variables_alltimes"] is not None):
+        if ("surrogate_transformation_variables_alltimes" in self.surrogate_parameters) and \
+           (self.surrogate_parameters["surrogate_transformation_variables_alltimes"] is not None):
 
             transition_position = list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())[
-                    np.where(
-                        self.num_training_points
-                        < np.array(
-                            list(
-                                self.surrogate_parameters[
-                                    "surrogate_transformation_variables_alltimes"
-                                ].keys()
-                            )
-                        )
-                    )[0][0]
-                ]
+                np.where(
+                    self.num_training_points < np.array(list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())))[0][0]
+                    ]
 
             self.surrogate_transformation_variables = self.surrogate_parameters["surrogate_transformation_variables_alltimes"][transition_position]
 
-        # ------------------------------------------------------------------------------------
-        # Input and Outcome transform (PHYSICS) of each output
-        # ------------------------------------------------------------------------------------
-
         input_transformations_physics = []
-        outcome_transformations_physics = []
-
-        for ind_out in range(self.train_Y.shape[0]):
 
-            dimY = self.train_Y.shape[-1]
+        for ind_out in range(self.train_Y.shape[-1]):
 
             input_transform_physics = BOTORCHtools.Transformation_Inputs(
                 self.outputs[ind_out], self.surrogate_parameters, self.surrogate_transformation_variables
             ).to(self.dfT)
-            outcome_transform_physics = BOTORCHtools.Transformation_Outcomes(
-                dimY, self.outputs[ind_out], self.surrogate_parameters
-            ).to(self.dfT)
 
             input_transformations_physics.append(input_transform_physics)
-            outcome_transformations_physics.append(outcome_transform_physics)
+        
+        dimY = self.train_Y.shape[-1]
+        output_transformation_physics = BOTORCHtools.Transformation_Outcomes(
+                dimY, self.outputs, self.surrogate_parameters
+            ).to(self.dfT)
 
         # ------------------------------------------------------------------------------------
         # Broadcast the input transformation to all outputs
         # ------------------------------------------------------------------------------------
 
-        input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform_MITIM(input_transformations_physics)
-        output_transformation_physics = outcome_transformations_physics[0] #TO FIX
+        input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
 
-        dimX = input_transformation_physics(self.train_X).shape[-1]
-
-        dimTransformedDV_x = dimX
+        dimTransformedDV_x = input_transformation_physics(self.train_X).shape[-1]
         dimTransformedDV_y = self.train_Y.shape[-1]
 
-        return input_transformation_physics, output_transformation_physics, dimTransformedDV_x, dimTransformedDV_y
+        # ------------------------------------------------------------------------------------
+        # Normalizations
+        # ------------------------------------------------------------------------------------
+
+        input_transform_normalization = botorch.models.transforms.input.Normalize(
+            d = dimTransformedDV_x, bounds=None
+        ).to(self.dfT)
+        output_transformed_standardization = (
+            botorch.models.transforms.outcome.Standardize(m = dimTransformedDV_y)
+        ).to(self.dfT)
+
+        return  input_transformation_physics, \
+                output_transformation_physics, \
+                input_transform_normalization, \
+                output_transformed_standardization, \
+                dimTransformedDV_x, dimTransformedDV_y
 
     def normalization_pass(
         self,
@@ -385,9 +390,9 @@ def normalization_pass(
         outcome_transform_normalization._is_trained = torch.tensor(True)
 
     def fit(self):
-        print(
-            f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points"
-        )
+        # print(
+        #     f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points"
+        # )
 
         # ---------------------------------------------------------------------------------------------------
         # Define loss Function to minimize

From 744014b3db59ccc727f57650604bfe03e61e83b6 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Tue, 19 Nov 2024 23:30:38 -0500
Subject: [PATCH 12/34] misc

---
 src/mitim_tools/opt_tools/SURROGATEtools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 4ae30a3d..198019b7 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -269,7 +269,7 @@ def __init__(
         botorch.fit.fit_gpytorch_mll(mll)
         #self.gpmodel.posterior(self.train_X)
 
-        x = torch.rand(64, self.train_X.shape[-1]).to(self.dfT)
+        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
         from mitim_tools.misc_tools import IOtools
         with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
             self.gpmodel.posterior(x)

From 3e840f4da6e935536b863ebe9bc105789819a4e9 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Tue, 19 Nov 2024 23:51:23 -0500
Subject: [PATCH 13/34] time checker added

---
 src/mitim_tools/opt_tools/SURROGATEtools.py | 43 ++++++++-------------
 1 file changed, 16 insertions(+), 27 deletions(-)

diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 198019b7..8204b6d1 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -258,38 +258,19 @@ def __init__(
         self.train_X_added contains the transformed of the table:       (batch2, dimXtr)
         """
 
+
         self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
             self.train_X,
             self.train_Y,
-            train_Yvar = self.train_Yvar,
-            input_transform = input_transform,
+            self.train_Yvar,
+            input_transform=input_transform,
             outcome_transform=outcome_transform,
+            # surrogateOptions=self.surrogateOptions,
+            # variables=self.variables,
+            # train_X_added=self.train_X_added,
+            # train_Y_added=self.train_Y_added,
+            # train_Yvar_added=self.train_Yvar_added,
         )
-        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.gpmodel.likelihood, self.gpmodel)
-        botorch.fit.fit_gpytorch_mll(mll)
-        #self.gpmodel.posterior(self.train_X)
-
-        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        from mitim_tools.misc_tools import IOtools
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
-            self.gpmodel.posterior(x)
-
-
-        embed()
-
-
-        # self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
-        #     self.train_X,
-        #     self.train_Y,
-        #     self.train_Yvar,
-        #     input_transform=input_transform,
-        #     outcome_transform=outcome_transform,
-        #     surrogateOptions=self.surrogateOptions,
-        #     variables=self.variables,
-        #     train_X_added=self.train_X_added,
-        #     train_Y_added=self.train_Y_added,
-        #     train_Yvar_added=self.train_Yvar_added,
-        # )
 
     def _define_MITIM_transformations(self):
 
@@ -424,6 +405,14 @@ def fit(self):
         #with fundamental_model_context(self):
         track_fval = self.perform_model_fit(mll)
 
+        embed()
+        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
+        from mitim_tools.misc_tools import IOtools
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof1") as s:
+            self.gpmodel.posterior(x)
+
+
+
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
         # ---------------------------------------------------------------------------------------------------

From 7f2deb96aaf1f1e9f891522026844de0404b7f58 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Tue, 19 Nov 2024 23:53:16 -0500
Subject: [PATCH 14/34] time checker added

---
 src/mitim_tools/opt_tools/STEPtools.py      | 7 +++++++
 src/mitim_tools/opt_tools/SURROGATEtools.py | 8 --------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 0e2af213..d22cb498 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -335,6 +335,13 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
         )
 
+        embed()
+        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+            self.GP["combined_model"].gpmodel.posterior(x)
+
+
+
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
                 f.write(f" (took total of {txt_time})")
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 8204b6d1..3babe632 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -405,14 +405,6 @@ def fit(self):
         #with fundamental_model_context(self):
         track_fval = self.perform_model_fit(mll)
 
-        embed()
-        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        from mitim_tools.misc_tools import IOtools
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof1") as s:
-            self.gpmodel.posterior(x)
-
-
-
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
         # ---------------------------------------------------------------------------------------------------

From 74ba8c64517e4506c3769d310d9597a8290f2d45 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Wed, 20 Nov 2024 08:35:03 -0500
Subject: [PATCH 15/34] Speed up checked and wrking well

---
 src/mitim_tools/opt_tools/BOTORCHtools.py | 64 +++++------------------
 src/mitim_tools/opt_tools/STEPtools.py    | 16 ++++--
 2 files changed, 25 insertions(+), 55 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index d651ae3f..6c4e546f 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -1,6 +1,6 @@
 """
 **************************************************************************************************************
-This set of tools are custom modifications to BOTORCH or GPYTORCH ones to satisfy my needs
+This set of tools are custom modifications to BOTORCH or GPYTORCH ones to satisfy MITIM/PORTALS needs
 **************************************************************************************************************
 """
 
@@ -30,6 +30,14 @@
 
 from linear_operator.operators import CholLinearOperator, DiagLinearOperator
 
+from typing import Iterable
+from torch.nn import ModuleDict
+from botorch.posteriors.gpytorch import GPyTorchPosterior
+from botorch.posteriors.posterior import Posterior
+from gpytorch.distributions import MultitaskMultivariateNormal
+from linear_operator.operators import BlockDiagLinearOperator
+
+
 class SingleTaskGP_MITIM(botorch.models.gp_regression.SingleTaskGP):
     def __init__(
         self,
@@ -65,6 +73,8 @@ def __init__(
             ignore_X_dims=ignore_X_dims,
         )
         self._set_dimensions(train_X=train_X, train_Y=train_Y)
+        self._aug_batch_shape = train_Y.shape[:-2] #<----- New
+
         train_X, train_Y, train_Yvar = self._transform_tensor_args(
             X=train_X, Y=train_Y, Yvar=train_Yvar
         )
@@ -482,30 +492,14 @@ def transform_inputs(self, X):
         self.prepareToGenerateCommons()
         X_tr = super().transform_inputs(X)
         self.cold_startCommons()
-
         return X_tr
 
-    def posterior(
-        self,
-        X,
-        output_indices=None,
-        observation_noise=False,
-        posterior_transform=None,
-        **kwargs,
-    ):
+    def posterior(self, *args, **kwargs):
         self.prepareToGenerateCommons()
-        posterior = super().posterior(
-            X,
-            output_indices=output_indices,
-            observation_noise=observation_noise,
-            posterior_transform=posterior_transform,
-            **kwargs,
-        )
+        posterior = super().posterior(*args, **kwargs)
         self.cold_startCommons()
-
         return posterior
 
-
 # ----------------------------------------------------------------------------------------------------------------------------
 # I need my own transformation based on physics
 # ----------------------------------------------------------------------------------------------------------------------------
@@ -942,27 +936,6 @@ def forward(self, x):
         return res
 
 
-#!/usr/bin/env -S grimaldi --kernel bento_kernel_automl
-# fmt: off
-
-""":py"""
-# %local-changes
-
-""":py"""
-import botorch
-import torch
-from botorch.fit import fit_gpytorch_mll
-from botorch.models import SingleTaskGP
-from botorch.models.transforms.input import Normalize
-from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
-from torch import Tensor
-
-""":py"""
-from typing import Iterable
-
-from botorch.models.transforms.input import InputTransform
-from torch.nn import ModuleDict
-
 
 class BatchBroadcastedInputTransform(InputTransform, ModuleDict):
     r"""An input transform representing a list of transforms to be broadcasted."""
@@ -1111,17 +1084,6 @@ def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform
         # return zip(Xs, self.transforms)
         return zip([X for _ in self.transforms], self.transforms)
 
-
-
-
-""":py"""
-from botorch.models.transforms.outcome import OutcomeTransform
-from botorch.posteriors.gpytorch import GPyTorchPosterior
-from botorch.posteriors.posterior import Posterior
-from gpytorch.distributions import MultitaskMultivariateNormal
-from linear_operator.operators import BlockDiagLinearOperator
-
-
 class OutcomeToBatchDimension(OutcomeTransform):
     """Transform permuting dimensions in the outcome tensor."""
 
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index d22cb498..bcf3ac68 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -335,10 +335,10 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
         )
 
-        embed()
-        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
-            self.GP["combined_model"].gpmodel.posterior(x)
+        # embed()
+        # x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
+        # with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+        #     self.GP["combined_model"].gpmodel.posterior(x)
 
 
 
@@ -438,6 +438,14 @@ def residual(Y, X = None):
         # around best, needs the raw one! (for noisy it is automatic)
         self.evaluators["acq_function"].X_baseline = self.evaluators["GP"].train_X
 
+
+        embed()
+        x = torch.rand(128, self.train_X.shape[-1]).to(self.dfT)
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+            self.evaluators["acq_function"](x)
+
+
+
         # **************************************************************************************************
         # Quick function to return components (I need this for ROOT too, since I need the components)
         # **************************************************************************************************

From 0d2a76c72ec9337281b0640fb3a2eca2b1b47ea2 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Wed, 20 Nov 2024 08:35:17 -0500
Subject: [PATCH 16/34] misc

---
 src/mitim_tools/opt_tools/STEPtools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index bcf3ac68..b731b875 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -440,8 +440,8 @@ def residual(Y, X = None):
 
 
         embed()
-        x = torch.rand(128, self.train_X.shape[-1]).to(self.dfT)
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+        x = torch.rand(64, self.train_X.shape[-1]).to(self.dfT)
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_acq64.prof") as s:
             self.evaluators["acq_function"](x)
 
 

From 9a9b3c2c0ba41dfb8389ca7ddb81307c8e491d37 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Wed, 20 Nov 2024 11:15:10 -0500
Subject: [PATCH 17/34] check of standard mean and kernel

---
 src/mitim_tools/opt_tools/BOTORCHtools.py   | 156 ++++++++++++++++----
 src/mitim_tools/opt_tools/STEPtools.py      |  17 ++-
 src/mitim_tools/opt_tools/SURROGATEtools.py |   4 +-
 3 files changed, 136 insertions(+), 41 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 6c4e546f..375af794 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -43,13 +43,30 @@ def __init__(
         self,
         train_X,
         train_Y,
-        train_Yvar = None,
-        likelihood = None,
-        covar_module = None,
-        mean_module = None,
-        outcome_transform = None,
-        input_transform = None,
-    ) -> None:
+        train_Yvar,
+        input_transform=None,
+        outcome_transform=None,
+        surrogateOptions={},
+        variables=None,
+        train_X_added=torch.Tensor([]),
+        train_Y_added=torch.Tensor([]),
+        train_Yvar_added=torch.Tensor([]),
+    ):
+        """
+        _added refers to already-transformed variables that are added from table
+        """
+
+        TypeMean = surrogateOptions.get("TypeMean", 0)
+        TypeKernel = surrogateOptions.get("TypeKernel", 0)
+        FixedNoise = surrogateOptions.get("FixedNoise", False)
+        ConstrainNoise = surrogateOptions.get("ConstrainNoise", -1e-4)
+        learn_additional_noise = surrogateOptions.get("ExtraNoise", False)
+        print("\t\t* Surrogate model options:")
+        print(
+            f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
+        )
+
+
 
         self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
         if outcome_transform == DEFAULT:
@@ -61,6 +78,8 @@ def __init__(
                 X=train_X, input_transform=input_transform
             )
 
+        self.ard_num_dims = transformed_X.shape[-1]
+
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
         # Validate again after applying the transforms
@@ -78,36 +97,109 @@ def __init__(
         train_X, train_Y, train_Yvar = self._transform_tensor_args(
             X=train_X, Y=train_Y, Yvar=train_Yvar
         )
-        if likelihood is None:
-            if train_Yvar is None:
-                likelihood = get_gaussian_likelihood_with_lognormal_prior(
-                    batch_shape=self._aug_batch_shape
-                )
-            else:
-                likelihood = FixedNoiseGaussianLikelihood(
-                    noise=train_Yvar, batch_shape=self._aug_batch_shape
-                )
-        else:
-            self._is_custom_likelihood = True
+
+        self._subset_batch_dict = {}
+
+        likelihood = (
+            gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
+                noise=train_Yvar.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
+                batch_shape=self._aug_batch_shape,
+                learn_additional_noise=learn_additional_noise,
+            )
+        )
+        self._is_custom_likelihood = True
+
         ExactGP.__init__(
             self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood
         )
-        if mean_module is None:
-            mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
-        self.mean_module = mean_module
-        if covar_module is None:
-            covar_module = get_covar_module_with_dim_scaled_prior(
-                ard_num_dims=transformed_X.shape[-1],
+
+        """
+		-----------------------------------------------------------------------
+		GP Mean
+		-----------------------------------------------------------------------
+		"""
+
+        if TypeMean == 0:
+            self.mean_module = gpytorch.means.constant_mean.ConstantMean(
+                batch_shape=self._aug_batch_shape
+            )
+        elif TypeMean == 1:
+            self.mean_module = gpytorch.means.linear_mean.LinearMean(
+                self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True
+            )
+        elif TypeMean == 2:
+            self.mean_module = PRF_LinearMeanGradients(
+                batch_shape=self._aug_batch_shape, variables=variables
+            )
+        elif TypeMean == 3:
+            self.mean_module = PRF_CriticalGradient(
+                batch_shape=self._aug_batch_shape, variables=variables
+            )
+
+
+        """
+		-----------------------------------------------------------------------
+		GP Kernel - Covariance
+		-----------------------------------------------------------------------
+		"""
+
+        # Priors
+        lengthscale_prior = gpytorch.priors.torch_priors.GammaPrior(3.0, 6.0)
+        outputscale_prior = gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)
+
+        # Do not allow too small lengthscales?
+        lengthscale_constraint = (
+            None  # gpytorch.constraints.constraints.GreaterThan(0.05)
+        )
+
+        self._subset_batch_dict["covar_module.raw_outputscale"] = -1
+        self._subset_batch_dict["covar_module.base_kernel.raw_lengthscale"] = -3
+
+        if TypeKernel == 0:
+            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
+                base_kernel=gpytorch.kernels.matern_kernel.MaternKernel(
+                    nu=2.5,
+                    ard_num_dims=self.ard_num_dims,
+                    batch_shape=self._aug_batch_shape,
+                    lengthscale_prior=lengthscale_prior,
+                    lengthscale_constraint=lengthscale_constraint,
+                ),
                 batch_shape=self._aug_batch_shape,
+                outputscale_prior=outputscale_prior,
             )
-            # Used for subsetting along the output dimension. See Model.subset_output.
-            self._subset_batch_dict = {
-                "mean_module.raw_constant": -1,
-                "covar_module.raw_lengthscale": -3,
-            }
-            if train_Yvar is None:
-                self._subset_batch_dict["likelihood.noise_covar.raw_noise"] = -2
-        self.covar_module: Module = covar_module
+        elif TypeKernel == 1:
+            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
+                base_kernel=gpytorch.kernels.rbf_kernel.RBFKernel(
+                    ard_num_dims=self.ard_num_dims,
+                    batch_shape=self._aug_batch_shape,
+                    lengthscale_prior=lengthscale_prior,
+                    lengthscale_constraint=lengthscale_constraint,
+                ),
+                batch_shape=self._aug_batch_shape,
+                outputscale_prior=outputscale_prior,
+            )
+        elif TypeKernel == 2:
+            self.covar_module = PRF_ConstantKernel(
+                ard_num_dims=self.ard_num_dims,
+                batch_shape=self._aug_batch_shape,
+                lengthscale_prior=lengthscale_prior,
+                lengthscale_constraint=lengthscale_constraint,
+            )
+        elif TypeKernel == 3:
+            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
+                base_kernel=PRF_NNKernel(
+                    ard_num_dims=self.ard_num_dims,
+                    batch_shape=self._aug_batch_shape,
+                    lengthscale_prior=lengthscale_prior,
+                    lengthscale_constraint=lengthscale_constraint,
+                ),
+                batch_shape=self._aug_batch_shape,
+                outputscale_prior=outputscale_prior,
+            )
+
+
+
+
         # TODO: Allow subsetting of other covar modules
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index b731b875..5e187632 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -158,6 +158,11 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
         print("--> Fitting multiple single-output models and creating composite model")
         time1 = datetime.datetime.now()
 
+
+        surrogateOptions = self.surrogateOptions["selectSurrogate"](
+                'QeTurb_1', self.surrogateOptions
+            )
+
         # full Multi-output model
         self.GP["combined_model"] = SURROGATEtools.surrogate_model(
             self.x,
@@ -168,7 +173,7 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
             outputs_transformed=self.stepSettings["name_transformed_ofs"],
             bounds=self.bounds,
             dfT=self.dfT,
-            surrogateOptions=self.surrogateOptions,
+            surrogateOptions=surrogateOptions,
         )
 
         # Fitting
@@ -335,12 +340,10 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
         )
 
-        # embed()
-        # x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        # with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
-        #     self.GP["combined_model"].gpmodel.posterior(x)
-
-
+        embed()
+        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+            self.GP["combined_model"].gpmodel.posterior(x)
 
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 3babe632..b9f371a2 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -265,8 +265,8 @@ def __init__(
             self.train_Yvar,
             input_transform=input_transform,
             outcome_transform=outcome_transform,
-            # surrogateOptions=self.surrogateOptions,
-            # variables=self.variables,
+            surrogateOptions=self.surrogateOptions,
+            variables=self.variables,
             # train_X_added=self.train_X_added,
             # train_Y_added=self.train_Y_added,
             # train_Yvar_added=self.train_Yvar_added,

From 31f3d81087b1480d3b3111d86a087db91c13dbcb Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Wed, 20 Nov 2024 16:15:16 -0500
Subject: [PATCH 18/34] misc testing

---
 src/mitim_modules/portals/PORTALStools.py   |   4 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py   |  28 +-
 src/mitim_tools/opt_tools/STEPtools.py      | 353 ++++++++++----------
 src/mitim_tools/opt_tools/SURROGATEtools.py |  57 ++--
 4 files changed, 217 insertions(+), 225 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index d8299f34..11442334 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -9,9 +9,7 @@
 
 def selectSurrogate(output, surrogateOptions, CGYROrun=False):
 
-    print(
-        f'\t- Selecting surrogate options for "{output}" to be run'
-    )
+    print(f'\t- Selecting surrogate options for "{output}" to be run')
 
     if output is not None:
         # If it's a target, just linear
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 375af794..6c9b30c3 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -17,15 +17,8 @@
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform, Standardize
 from botorch.models.utils import validate_input_scaling
-from botorch.models.utils.gpytorch_modules import (
-    get_covar_module_with_dim_scaled_prior,
-    get_gaussian_likelihood_with_lognormal_prior,
-)
 from botorch.utils.types import DEFAULT
-from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
-from gpytorch.means.constant_mean import ConstantMean
 from gpytorch.models.exact_gp import ExactGP
-from gpytorch.module import Module
 from torch import Tensor
 
 from linear_operator.operators import CholLinearOperator, DiagLinearOperator
@@ -136,7 +129,6 @@ def __init__(
                 batch_shape=self._aug_batch_shape, variables=variables
             )
 
-
         """
 		-----------------------------------------------------------------------
 		GP Kernel - Covariance
@@ -562,23 +554,15 @@ def __init__(self, *gp_models):
     def prepareToGenerateCommons(self):
         self.models[0].input_transform.tf1.flag_to_store = True
         # Make sure that this ModelListGP evaluation is fresh
-        if (
-            "parameters_combined"
-            in self.models[0].input_transform.tf1.surrogate_parameters
-        ):
-            del self.models[0].input_transform.tf1.surrogate_parameters[
-                "parameters_combined"
-            ]
+        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
+            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
+            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
 
     def cold_startCommons(self):
         self.models[0].input_transform.tf1.flag_to_store = False
-        if (
-            "parameters_combined"
-            in self.models[0].input_transform.tf1.surrogate_parameters
-        ):
-            del self.models[0].input_transform.tf1.surrogate_parameters[
-                "parameters_combined"
-            ]
+        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
+            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
+            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
 
     def transform_inputs(self, X):
         self.prepareToGenerateCommons()
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 5e187632..8d5df959 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -149,205 +149,210 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
 		*********************************************************************************************************************
 		"""
 
-        self.GP = {"individual_models": [None] * self.y.shape[-1]}
+        self.GP = {}
+
+        time1 = datetime.datetime.now()
+
+        self._fit_multioutput_model()
+
+        #self._fit_individual_models(fitWithTrainingDataIfContains=fitWithTrainingDataIfContains)
+        
+        txt_time = IOtools.getTimeDifference(time1)
+        print(f"--> Fitting of all models took {txt_time}")
+        if self.fileOutputs is not None:
+            with open(self.fileOutputs, "a") as f:
+                f.write(f" (took total of {txt_time})")
+
+        embed()
+        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
+        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
+            self.GP["combined_model"].gpmodel.posterior(x)
+
+    def _fit_multioutput_model(self):
+
+        surrogateOptions = self.surrogateOptions["selectSurrogate"]('AllMITIM', self.surrogateOptions)
+
+        self.GP["mo_model"] = SURROGATEtools.surrogate_model(
+            self.x,
+            self.y,
+            self.yvar,
+            self.surrogate_parameters,
+            outputs=self.outputs,
+            outputs_transformed=self.stepSettings["name_transformed_ofs"],
+            bounds=self.bounds,
+            dfT=self.dfT,
+            surrogateOptions=surrogateOptions,
+        )
+
+        # Fitting
+        self.GP["mo_model"].fit()
+
+    def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
+
         fileTraining = IOtools.expandPath(self.stepSettings['folderOutputs']) / "surrogate_data.csv"
         fileBackup = fileTraining.parent / "surrogate_data.csv.bak"
         if fileTraining.exists():
             fileTraining.replace(fileBackup)
 
         print("--> Fitting multiple single-output models and creating composite model")
-        time1 = datetime.datetime.now()
 
+        self.GP["individual_models"] = [None] * self.y.shape[-1]
+
+        for i in range(self.y.shape[-1]):
+            outi = self.outputs[i] if (self.outputs is not None) else None
+
+            # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
+            specialTreatment = (
+                (outi is not None)
+                and (fitWithTrainingDataIfContains is not None)
+                and (fitWithTrainingDataIfContains not in outi)
+            )
+            # -----------------------------------------------------------------------------------------------------------------------------------
+
+            outi_transformed = (
+                self.stepSettings["name_transformed_ofs"][i]
+                if (self.stepSettings["name_transformed_ofs"] is not None)
+                else outi
+            )
+
+            # ---------------------------------------------------------------------------------------------------
+            # Define model-specific functions for this output
+            # ---------------------------------------------------------------------------------------------------
+
+            surrogateOptions = copy.deepcopy(self.surrogateOptions)
 
-        surrogateOptions = self.surrogateOptions["selectSurrogate"](
-                'QeTurb_1', self.surrogateOptions
+            # Then, depending on application (e.g. targets in mitim are fitted differently)
+            if (
+                "selectSurrogate" in surrogateOptions
+                and surrogateOptions["selectSurrogate"] is not None
+            ):
+                surrogateOptions = surrogateOptions["selectSurrogate"](
+                    outi, surrogateOptions
+                )
+
+            # ---------------------------------------------------------------------------------------------------
+            # To avoid problems with fixed values (e.g. calibration terms that are fixed)
+            # ---------------------------------------------------------------------------------------------------
+
+            threshold_to_consider_fixed = 1e-6
+            MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(
+                self.y.mean()
             )
 
-        # full Multi-output model
+            if (
+                np.isnan(MaxRelativeDifference)
+                or (
+                    (self.y.shape[0] > 1)
+                    and ((MaxRelativeDifference < threshold_to_consider_fixed).all())
+                )
+            ) and (not specialTreatment):
+                print(
+                    f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}",
+                    typeMsg="w",
+                )
+                FixedValue = True
+                surrogateOptions["TypeMean"] = 0
+                surrogateOptions["TypeKernel"] = 6  # Constant kernel
+
+            else:
+                FixedValue = False
+
+            # ---------------------------------------------------------------------------------------------------
+            # Fit individual output
+            # ---------------------------------------------------------------------------------------------------
+
+            # Data to train the surrogate
+            x = self.x
+            y = np.expand_dims(self.y[:, i], axis=1)
+            yvar = np.expand_dims(self.yvar[:, i], axis=1)
+
+            if specialTreatment:
+                x, y, yvar = (
+                    np.empty((0, x.shape[-1])),
+                    np.empty((0, y.shape[-1])),
+                    np.empty((0, y.shape[-1])),
+                )
+
+            # Surrogate
+
+            print(f"~ Model for output: {outi}")
+
+            GP = SURROGATEtools.surrogate_model(
+                x,
+                y,
+                yvar,
+                self.surrogate_parameters,
+                bounds=self.bounds,
+                outputs=[outi],
+                outputs_transformed=[outi_transformed],
+                dfT=self.dfT,
+                surrogateOptions=surrogateOptions,
+                # avoidPoints=self.avoidPoints,
+                # FixedValue=FixedValue,
+                # fileTraining=fileTraining,
+            )
+
+            # Fitting
+            GP.fit()
+
+            self.GP["individual_models"][i] = GP
+
+        fileBackup.unlink(missing_ok=True)
+
+        # ------------------------------------------------------------------------------------------------------
+        # Combine them in a ModelListGP (create one single with MV but do not fit)
+        # ------------------------------------------------------------------------------------------------------
+
+        print("~ MV model to initialize combination")
+
         self.GP["combined_model"] = SURROGATEtools.surrogate_model(
             self.x,
             self.y,
             self.yvar,
             self.surrogate_parameters,
-            outputs=self.outputs,
-            outputs_transformed=self.stepSettings["name_transformed_ofs"],
             bounds=self.bounds,
             dfT=self.dfT,
-            surrogateOptions=surrogateOptions,
+            outputs=self.outputs,
+            surrogateOptions=self.surrogateOptions,
+            avoidPoints=self.avoidPoints,
         )
 
-        # Fitting
-        self.GP["combined_model"].fit()
-
-
-        # for i in range(self.y.shape[-1]):
-        #     outi = self.outputs[i] if (self.outputs is not None) else None
-
-        #     # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
-        #     specialTreatment = (
-        #         (outi is not None)
-        #         and (fitWithTrainingDataIfContains is not None)
-        #         and (fitWithTrainingDataIfContains not in outi)
-        #     )
-        #     # -----------------------------------------------------------------------------------------------------------------------------------
-
-        #     outi_transformed = (
-        #         self.stepSettings["name_transformed_ofs"][i]
-        #         if (self.stepSettings["name_transformed_ofs"] is not None)
-        #         else outi
-        #     )
-
-        #     # ---------------------------------------------------------------------------------------------------
-        #     # Define model-specific functions for this output
-        #     # ---------------------------------------------------------------------------------------------------
-
-        #     surrogateOptions = copy.deepcopy(self.surrogateOptions)
-
-        #     # Then, depending on application (e.g. targets in mitim are fitted differently)
-        #     if (
-        #         "selectSurrogate" in surrogateOptions
-        #         and surrogateOptions["selectSurrogate"] is not None
-        #     ):
-        #         surrogateOptions = surrogateOptions["selectSurrogate"](
-        #             outi, surrogateOptions
-        #         )
-
-        #     # ---------------------------------------------------------------------------------------------------
-        #     # To avoid problems with fixed values (e.g. calibration terms that are fixed)
-        #     # ---------------------------------------------------------------------------------------------------
-
-        #     threshold_to_consider_fixed = 1e-6
-        #     MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(
-        #         self.y.mean()
-        #     )
-
-        #     if (
-        #         np.isnan(MaxRelativeDifference)
-        #         or (
-        #             (self.y.shape[0] > 1)
-        #             and ((MaxRelativeDifference < threshold_to_consider_fixed).all())
-        #         )
-        #     ) and (not specialTreatment):
-        #         print(
-        #             f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}",
-        #             typeMsg="w",
-        #         )
-        #         FixedValue = True
-        #         surrogateOptions["TypeMean"] = 0
-        #         surrogateOptions["TypeKernel"] = 6  # Constant kernel
-
-        #     else:
-        #         FixedValue = False
-
-        #     # ---------------------------------------------------------------------------------------------------
-        #     # Fit individual output
-        #     # ---------------------------------------------------------------------------------------------------
-
-        #     # Data to train the surrogate
-        #     x = self.x
-        #     y = np.expand_dims(self.y[:, i], axis=1)
-        #     yvar = np.expand_dims(self.yvar[:, i], axis=1)
-
-        #     if specialTreatment:
-        #         x, y, yvar = (
-        #             np.empty((0, x.shape[-1])),
-        #             np.empty((0, y.shape[-1])),
-        #             np.empty((0, y.shape[-1])),
-        #         )
-
-        #     # Surrogate
-
-        #     print(f"~ Model for output: {outi}")
-
-        #     GP = SURROGATEtools.surrogate_model(
-        #         x,
-        #         y,
-        #         yvar,
-        #         self.surrogate_parameters,
-        #         bounds=self.bounds,
-        #         output=outi,
-        #         output_transformed=outi_transformed,
-        #         avoidPoints=self.avoidPoints,
-        #         dfT=self.dfT,
-        #         surrogateOptions=surrogateOptions,
-        #         FixedValue=FixedValue,
-        #         fileTraining=fileTraining,
-        #     )
-
-        #     # Fitting
-        #     GP.fit()
-
-        #     self.GP["individual_models"][i] = GP
-
-        # fileBackup.unlink(missing_ok=True)
-
-        # # ------------------------------------------------------------------------------------------------------
-        # # Combine them in a ModelListGP (create one single with MV but do not fit)
-        # # ------------------------------------------------------------------------------------------------------
-
-        # print("~ MV model to initialize combination")
-
-        # self.GP["combined_model"] = SURROGATEtools.surrogate_model(
-        #     self.x,
-        #     self.y,
-        #     self.yvar,
-        #     self.surrogate_parameters,
-        #     avoidPoints=self.avoidPoints,
-        #     bounds=self.bounds,
-        #     dfT=self.dfT,
-        #     surrogateOptions=self.surrogateOptions,
-        # )
-
-        # models = ()
-        # for GP in self.GP["individual_models"]:
-        #     models += (GP.gpmodel,)
-        # self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models)
-
-
-
-        # # ------------------------------------------------------------------------------------------------------
-        # # Make sure each model has the right surrogate_transformation_variables inside the combined model
-        # # ------------------------------------------------------------------------------------------------------
-        # if self.GP["combined_model"].surrogate_transformation_variables is not None:
-        #     for i in range(self.y.shape[-1]):
-
-        #         outi = self.outputs[i] if (self.outputs is not None) else None
-
-        #         if outi is not None:
-        #             self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi]
-
-        # print(f"--> Fitting of all models took {IOtools.getTimeDifference(time1)}")
-
-        # """
-		# *********************************************************************************************************************
-		# 	Postprocessing
-		# *********************************************************************************************************************
-		# """
-
-        # # Test (if test could not be launched is likely because a singular matrix for Choleski decomposition)
-        # print("--> Launching tests to assure batch evaluation accuracy")
-        # TESTtools.testBatchCapabilities(self.GP["combined_model"])
-        # print("--> Launching tests to assure model combination accuracy")
-        # TESTtools.testCombinationCapabilities(
-        #     self.GP["individual_models"], self.GP["combined_model"]
-        # )
-        # print("--> Launching tests evaluate accuracy on training set (absolute units)")
-        # self.GP["combined_model"].testTraining()
+        models = ()
+        for GP in self.GP["individual_models"]:
+            models += (GP.gpmodel,)
+        self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models)
 
-        txt_time = IOtools.getTimeDifference(time1)
+        # ------------------------------------------------------------------------------------------------------
+        # Make sure each model has the right surrogate_transformation_variables inside the combined model
+        # ------------------------------------------------------------------------------------------------------
+        if self.GP["combined_model"].surrogate_transformation_variables is not None:
+            for i in range(self.y.shape[-1]):
+
+                outi = self.outputs[i] if (self.outputs is not None) else None
+
+                if outi is not None:
+                    self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi]
+
+        """
+        *********************************************************************************************************************
+        	Postprocessing
+        *********************************************************************************************************************
+        """
+
+        # Test (if test could not be launched is likely because a singular matrix for Choleski decomposition)
+        print("--> Launching tests to assure batch evaluation accuracy")
+        TESTtools.testBatchCapabilities(self.GP["combined_model"])
+        print("--> Launching tests to assure model combination accuracy")
+        TESTtools.testCombinationCapabilities(
+            self.GP["individual_models"], self.GP["combined_model"]
+        )
+        print("--> Launching tests evaluate accuracy on training set (absolute units)")
+        self.GP["combined_model"].testTraining()
 
         print(
             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
         )
 
-        embed()
-        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
-            self.GP["combined_model"].gpmodel.posterior(x)
-
-        if self.fileOutputs is not None:
-            with open(self.fileOutputs, "a") as f:
-                f.write(f" (took total of {txt_time})")
 
     def defineFunctions(self, scalarized_objective):
         """
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index b9f371a2..fedeb50e 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -215,12 +215,12 @@ def __init__(
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
         # -------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            input_transform_physics,
-            input_transform_normalization,
-            outcome_transform_physics,
-            output_transformed_standardization,
-        )
+        # self.normalization_pass(
+        #     input_transform_physics,
+        #     input_transform_normalization,
+        #     outcome_transform_physics,
+        #     output_transformed_standardization,
+        # )
         
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION
@@ -234,16 +234,16 @@ def __init__(
             tf1=outcome_transform_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension()
         ).to(self.dfT)
 
-        self.variables = None
-        # self.variables = (
-        #     self.surrogate_transformation_variables[self.output]
-        #     if (
-        #         (self.output is not None)
-        #         and ("surrogate_transformation_variables" in self.__dict__)
-        #         and (self.surrogate_transformation_variables is not None)
-        #     )
-        #     else None
-        # )
+        self.output = 'QeTurb_1'
+        self.variables = (
+            self.surrogate_transformation_variables[self.output]
+            if (
+                (self.output is not None)
+                and ("surrogate_transformation_variables" in self.__dict__)
+                and (self.surrogate_transformation_variables is not None)
+            )
+            else None
+        )
 
         # *************************************************************************************
         # Model
@@ -318,7 +318,9 @@ def _define_MITIM_transformations(self):
 
         input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
 
-        dimTransformedDV_x = input_transformation_physics(self.train_X).shape[-1]
+        transformed_X = input_transformation_physics(self.train_X)
+
+        dimTransformedDV_x = transformed_X.shape[-1]
         dimTransformedDV_y = self.train_Y.shape[-1]
 
         # ------------------------------------------------------------------------------------
@@ -326,10 +328,12 @@ def _define_MITIM_transformations(self):
         # ------------------------------------------------------------------------------------
 
         input_transform_normalization = botorch.models.transforms.input.Normalize(
-            d = dimTransformedDV_x, bounds=None
+            d = dimTransformedDV_x, bounds=None, batch_shape=transformed_X.shape[:-2]
         ).to(self.dfT)
         output_transformed_standardization = (
-            botorch.models.transforms.outcome.Standardize(m = dimTransformedDV_y)
+            botorch.models.transforms.outcome.Standardize(
+                m = dimTransformedDV_y, #batch_shape=self.train_Y.transpose(0,1).shape
+            )
         ).to(self.dfT)
 
         return  input_transformation_physics, \
@@ -414,12 +418,12 @@ def fit(self):
         # Go back to definining the right normalizations, because the optimizer has to work on training mode...
         # ---------------------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            self.gpmodel.input_transform["tf1"],
-            self.gpmodel.input_transform["tf2"],
-            self.gpmodel.outcome_transform["tf1"],
-            self.gpmodel.outcome_transform["tf2"],
-        )
+        # self.normalization_pass(
+        #     self.gpmodel.input_transform["tf1"],
+        #     self.gpmodel.input_transform["tf2"],
+        #     self.gpmodel.outcome_transform["tf1"],
+        #     self.gpmodel.outcome_transform["tf2"],
+        # )
 
     def perform_model_fit(self, mll):
         self.gpmodel.train()
@@ -432,7 +436,7 @@ def perform_model_fit(self, mll):
 
         # Approx MLL ---------------------------------------
         (train_x,) = mll.model.train_inputs
-        approx_mll = len(train_x) > 2000
+        approx_mll = False #len(train_x) > 2000
         if approx_mll:
             print(
                 f"\t* Using approximate MLL because x has {len(train_x)} elements",
@@ -444,6 +448,7 @@ def perform_model_fit(self, mll):
             -mll.forward(mll.model(*mll.model.train_inputs), mll.model.train_targets)
             .detach()
         ]
+        embed()
 
         def callback(x, y, mll=mll):
             track_fval.append(y.fval)

From adaabd0115ec383b3e491eed5e7c8d2babedc59c Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Wed, 20 Nov 2024 18:15:08 -0500
Subject: [PATCH 19/34] Working version but grad calculation extremely slow

---
 src/mitim_tools/opt_tools/BOTORCHtools.py     |  2 --
 src/mitim_tools/opt_tools/STEPtools.py        |  7 +-----
 src/mitim_tools/opt_tools/SURROGATEtools.py   |  1 -
 .../opt_tools/optimizers/BOTORCHoptim.py      | 25 +++++++++++--------
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 6c9b30c3..d8773c63 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -190,8 +190,6 @@ def __init__(
             )
 
 
-
-
         # TODO: Allow subsetting of other covar modules
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 8d5df959..ed433431 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -163,11 +163,6 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
             with open(self.fileOutputs, "a") as f:
                 f.write(f" (took total of {txt_time})")
 
-        embed()
-        x = torch.rand(10_000, self.train_X.shape[-1]).to(self.dfT)
-        with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_gp64.prof") as s:
-            self.GP["combined_model"].gpmodel.posterior(x)
-
     def _fit_multioutput_model(self):
 
         surrogateOptions = self.surrogateOptions["selectSurrogate"]('AllMITIM', self.surrogateOptions)
@@ -359,7 +354,7 @@ def defineFunctions(self, scalarized_objective):
         I create this so that, upon reading a pickle, I re-call it. Otherwise, it is very heavy to store lambdas
         """
 
-        self.evaluators = {"GP": self.GP["combined_model"]}
+        self.evaluators = {"GP": self.GP["mo_model"]}
 
         # **************************************************************************************************
         # Objective (multi-objective model -> single objective residual)
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index fedeb50e..1be25e14 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -448,7 +448,6 @@ def perform_model_fit(self, mll):
             -mll.forward(mll.model(*mll.model.train_inputs), mll.model.train_targets)
             .detach()
         ]
-        embed()
 
         def callback(x, y, mll=mll):
             track_fval.append(y.fval)
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index f3c1b278..0065cac7 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -1,5 +1,4 @@
 import torch
-import types
 import botorch
 import random
 from mitim_tools.opt_tools import OPTtools
@@ -37,6 +36,7 @@ def findOptima(fun, optimization_params = {}, writeTrajectory=False):
         "sample_around_best": True,
         "disp": 50 if read_verbose_level() == 5 else False,
         "seed": fun.seed,
+        "maxiter": 100,
     }
 
     """
@@ -64,16 +64,19 @@ def __call__(self, x, *args, **kwargs):
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
-    with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
-        x_opt, _ = botorch.optim.optimize_acqf(
-            acq_function=fun_opt,
-            bounds=fun.bounds_mod,
-            raw_samples=raw_samples,
-            q=q,
-            sequential=sequential_q,
-            num_restarts=num_restarts,
-            options=options,
-        )
+   
+    #with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
+    #with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_opt.prof") as s:
+    x_opt, _ = botorch.optim.optimize_acqf(
+        acq_function=fun_opt,
+        bounds=fun.bounds_mod,
+        raw_samples=raw_samples,
+        q=q,
+        sequential=sequential_q,
+        num_restarts=num_restarts,
+        options=options,
+    )
+    embed()
 
     acq_evaluated = torch.Tensor(acq_evaluated)
 

From 4ead1b6eb9bec92f3f9e087c4bb39d38c9c6b54b Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Thu, 21 Nov 2024 20:11:54 -0500
Subject: [PATCH 20/34] General cleanup with the goal to recover old ModelList
 implementation

---
 src/mitim_tools/opt_tools/BOTORCHtools.py   |   8 +-
 src/mitim_tools/opt_tools/STEPtools.py      | 161 +++++-----
 src/mitim_tools/opt_tools/SURROGATEtools.py | 317 ++++++++++----------
 3 files changed, 234 insertions(+), 252 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index d8773c63..a013eb22 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -121,11 +121,11 @@ def __init__(
                 self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True
             )
         elif TypeMean == 2:
-            self.mean_module = PRF_LinearMeanGradients(
+            self.mean_module = MITIM_LinearMeanGradients(
                 batch_shape=self._aug_batch_shape, variables=variables
             )
         elif TypeMean == 3:
-            self.mean_module = PRF_CriticalGradient(
+            self.mean_module = MITIM_CriticalGradient(
                 batch_shape=self._aug_batch_shape, variables=variables
             )
 
@@ -171,7 +171,7 @@ def __init__(
                 outputscale_prior=outputscale_prior,
             )
         elif TypeKernel == 2:
-            self.covar_module = PRF_ConstantKernel(
+            self.covar_module = MITIM_ConstantKernel(
                 ard_num_dims=self.ard_num_dims,
                 batch_shape=self._aug_batch_shape,
                 lengthscale_prior=lengthscale_prior,
@@ -179,7 +179,7 @@ def __init__(
             )
         elif TypeKernel == 3:
             self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
-                base_kernel=PRF_NNKernel(
+                base_kernel=MITIM_NNKernel(
                     ard_num_dims=self.ard_num_dims,
                     batch_shape=self._aug_batch_shape,
                     lengthscale_prior=lengthscale_prior,
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index ed433431..064bc65c 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -30,7 +30,7 @@ def __init__(
         StrategyOptions={},
         BOmetrics=None,
         currentIteration=1,
-    ):
+        ):
         """
         train_Ystd is in standard deviations (square root of the variance), absolute magnitude
         Rule: X_Y are provided in absolute units. Normalization has to happen inside each surrogate_model,
@@ -38,60 +38,13 @@ def __init__(
         """
 
         self.train_X, self.train_Y, self.train_Ystd = train_X, train_Y, train_Ystd
-
-        """
-		Check dimensions
-			- train_X should be (num_train,dimX)
-			- train_Y should be (num_train,dimY)
-			- train_Ystd should be (num_train,dimY) or just one float representing all values
-		"""
-
-        if len(self.train_X.shape) < 2:
-            print(
-                "--> train x only had 1 dimension, assuming that it has only 1 dimension"
-            )
-            self.train_X = np.transpose(np.atleast_2d(self.train_X))
-
-        if len(self.train_Y.shape) < 2:
-            print(
-                "--> train y only had 1 dimension, assuming that it has only 1 dimension"
-            )
-            self.train_Y = np.transpose(np.atleast_2d(self.train_Y))
-
-        if (
-            isinstance(self.train_Ystd, float)
-            or isinstance(self.train_Ystd, int)
-            or len(self.train_Ystd.shape) < 2
-        ):
-            print(
-                "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms"
-            )
-            if self.train_Ystd > 0:
-                print(
-                    "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms"
-                )
-                self.train_Ystd = self.train_Y * 0.0 + self.train_Ystd
-            else:
-                print(
-                    "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in relative terms"
-                )
-                self.train_Ystd = self.train_Y * np.abs(self.train_Ystd)
-
-        if len(self.train_Ystd.shape) < 2:
-            print(
-                "--> train y noise only had 1 dimension, assuming that it has only 1 dimension"
-            )
-            self.train_Ystd = np.transpose(np.atleast_2d(self.train_Ystd))
-
-        # **** Get argumnets into this class
-
         self.bounds = bounds
         self.stepSettings = stepSettings
         self.BOmetrics = BOmetrics
         self.currentIteration = currentIteration
         self.StrategyOptions = StrategyOptions
 
-        # **** Step settings
+        # **** Step Settings
         self.surrogateOptions = self.stepSettings["optimization_options"]["surrogateOptions"]
         self.acquisition_type = self.stepSettings["optimization_options"]["acquisition_type"]
         self.acquisition_params = self.stepSettings["optimization_options"]["acquisition_params"]
@@ -103,27 +56,21 @@ def __init__(
         self.fileOutputs = self.stepSettings["fileOutputs"]
         self.surrogate_parameters = surrogate_parameters
 
+        # **** Check dimensions
+        self._check_dimensions()
+
         # **** From standard deviation to variance
         self.train_Yvar = self.train_Ystd**2
 
-    def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
+    def fit_step(self, avoidPoints=None, fit_output_contains=None):
         """
         Notes:
-            - Note that fitWithTrainingDataIfContains = 'Tar' would only use the train_X,Y,Yvar tensors
+            - Note that fit_output_contains = 'Tar' would only use the train_X,Y,Yvar tensors
                     to fit those surrogate variables that contain 'Tar' in their names. This is useful when in
                     PORTALS I want to simply use the training in a file and not directly from train_X,Y,Yvar for
-                    the fluxes but I do want *new* target calculation
+                    the fluxes but I do want new target calculation
         """
 
-        if avoidPoints is None:
-            avoidPoints = []
-
-        """
-		*********************************************************************************************************************
-			Preparing for fit
-		*********************************************************************************************************************
-		"""
-
         # Prepare case information. Copy because I'll be removing outliers
         self.x, self.y, self.yvar = (
             copy.deepcopy(self.train_X),
@@ -132,30 +79,30 @@ def fit_step(self, avoidPoints=None, fitWithTrainingDataIfContains=None):
         )
 
         # Add outliers to avoid points (it cannot happen inside of SURROGATEtools or it will fail at combining)
-        self.avoidPoints = copy.deepcopy(avoidPoints)
+        self.avoidPoints = copy.deepcopy(avoidPoints) if avoidPoints is not None else []
         self.curate_outliers()
 
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
                 f.write("\n\n-----------------------------------------------------")
                 f.write("\n * Fitting GP models to training data...")
+
+        """
+        *********************************************************************************************************************
+            Performing Fit
+        *********************************************************************************************************************
+        """
+
         print(
             f"\n~~~~~~~ Performing fitting with {len(self.train_X)-len(self.avoidPoints)} training points ({len(self.avoidPoints)} avoided from {len(self.train_X)} total) ~~~~~~~~~~\n"
         )
 
-        """
-		*********************************************************************************************************************
-			Performing Fit
-		*********************************************************************************************************************
-		"""
-
         self.GP = {}
 
         time1 = datetime.datetime.now()
 
-        self._fit_multioutput_model()
-
-        #self._fit_individual_models(fitWithTrainingDataIfContains=fitWithTrainingDataIfContains)
+        #self._fit_multioutput_model()
+        self._fit_individual_models(fit_output_contains=fit_output_contains)
         
         txt_time = IOtools.getTimeDifference(time1)
         print(f"--> Fitting of all models took {txt_time}")
@@ -182,7 +129,7 @@ def _fit_multioutput_model(self):
         # Fitting
         self.GP["mo_model"].fit()
 
-    def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
+    def _fit_individual_models(self, fit_output_contains=None):
 
         fileTraining = IOtools.expandPath(self.stepSettings['folderOutputs']) / "surrogate_data.csv"
         fileBackup = fileTraining.parent / "surrogate_data.csv.bak"
@@ -199,8 +146,8 @@ def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
             # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
             specialTreatment = (
                 (outi is not None)
-                and (fitWithTrainingDataIfContains is not None)
-                and (fitWithTrainingDataIfContains not in outi)
+                and (fit_output_contains is not None)
+                and (fit_output_contains not in outi)
             )
             # -----------------------------------------------------------------------------------------------------------------------------------
 
@@ -282,9 +229,9 @@ def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
                 outputs_transformed=[outi_transformed],
                 dfT=self.dfT,
                 surrogateOptions=surrogateOptions,
-                # avoidPoints=self.avoidPoints,
-                # FixedValue=FixedValue,
-                # fileTraining=fileTraining,
+                avoidPoints=self.avoidPoints,
+                FixedValue=FixedValue,
+                fileTraining=fileTraining,
             )
 
             # Fitting
@@ -330,7 +277,7 @@ def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
 
         """
         *********************************************************************************************************************
-        	Postprocessing
+            Postprocessing
         *********************************************************************************************************************
         """
 
@@ -344,10 +291,7 @@ def _fit_individual_models(self, fitWithTrainingDataIfContains=None):
         print("--> Launching tests evaluate accuracy on training set (absolute units)")
         self.GP["combined_model"].testTraining()
 
-        print(
-            "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
-        )
-
+        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
 
     def defineFunctions(self, scalarized_objective):
         """
@@ -364,9 +308,7 @@ def defineFunctions(self, scalarized_objective):
         def residual(Y, X = None):
             return scalarized_objective(Y)[2]
 
-        self.evaluators["objective"] = botorch.acquisition.objective.GenericMCObjective(
-            residual
-        )
+        self.evaluators["objective"] = botorch.acquisition.objective.GenericMCObjective(residual)
 
         # **************************************************************************************************
         # Acquisition functions (following BoTorch assumption of maximization)
@@ -491,10 +433,10 @@ def optimize(
         self.defineFunctions(scalarized_objective)
 
         """
-		***********************************************
-		Peform optimization
-		***********************************************
-		"""
+        ***********************************************
+        Peform optimization
+        ***********************************************
+        """
 
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
@@ -552,6 +494,47 @@ def curate_outliers(self):
         if len(self.avoidPoints) > 0:
             print(f"\t ~~ Avoiding {len(self.avoidPoints)} points: ", self.avoidPoints)
 
+    def _check_dimensions(self):
+        """
+        Check dimensions
+            - train_X should be (num_train,dimX)
+            - train_Y should be (num_train,dimY)
+            - train_Ystd should be (num_train,dimY) or just one float representing all values
+        """
+
+        if len(self.train_X.shape) < 2:
+            print("--> train x only had 1 dimension, assuming that it has only 1 dimension")
+            self.train_X = np.transpose(np.atleast_2d(self.train_X))
+
+        if len(self.train_Y.shape) < 2:
+            print("--> train y only had 1 dimension, assuming that it has only 1 dimension")
+            self.train_Y = np.transpose(np.atleast_2d(self.train_Y))
+
+        if (
+            isinstance(self.train_Ystd, float)
+            or isinstance(self.train_Ystd, int)
+            or len(self.train_Ystd.shape) < 2
+        ):
+            print(
+                "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms"
+            )
+            if self.train_Ystd > 0:
+                print(
+                    "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in absolute terms"
+                )
+                self.train_Ystd = self.train_Y * 0.0 + self.train_Ystd
+            else:
+                print(
+                    "--> train y noise only had 1 value only, assuming constant (std dev) for all samples in relative terms"
+                )
+                self.train_Ystd = self.train_Y * np.abs(self.train_Ystd)
+
+        if len(self.train_Ystd.shape) < 2:
+            print(
+                "--> train y noise only had 1 dimension, assuming that it has only 1 dimension"
+            )
+            self.train_Ystd = np.transpose(np.atleast_2d(self.train_Ystd))
+
 
 def removeOutliers(y, stds_outside=5, stds_outside_checker=1, alreadyAvoided=[]):
     """
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 1be25e14..88eca7fc 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -16,8 +16,6 @@
 # ---------------------------------------------------------------------------------
 # 	Model Class
 # ---------------------------------------------------------------------------------
-
-
 class surrogate_model:
     """
     This is where each of the fittings take place.
@@ -77,150 +75,137 @@ def __init__(
 
         self.losses = None
 
-        # # --------------------------------------------------------------------
-        # # Eliminate points if needed (not from the "added" set)
-        # # --------------------------------------------------------------------
-
-        # if len(self.avoidPoints) > 0:
-        #     print(
-        #         f"\t- Fitting without considering points: {self.avoidPoints}",
-        #         typeMsg="w",
-        #     )
-
-        #     self.train_X = torch.Tensor(
-        #         np.delete(self.train_X, self.avoidPoints, axis=0)
-        #     ).to(self.dfT)
-        #     self.train_Y = torch.Tensor(
-        #         np.delete(self.train_Y, self.avoidPoints, axis=0)
-        #     ).to(self.dfT)
-        #     self.train_Yvar = torch.Tensor(
-        #         np.delete(self.train_Yvar, self.avoidPoints, axis=0)
-        #     ).to(self.dfT)
+        # --------------------------------------------------------------------
+        # Eliminate points if needed (not from the "added" set)
+        # --------------------------------------------------------------------
+
+        self._remove_points()
 
         # -------------------------------------------------------------------------------------
         # Add points from file
         # -------------------------------------------------------------------------------------
 
         # Points to be added from file
-        # continueAdding = False
-        # if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
+        continueAdding = False
+        if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
 
-        #     print(
-        #         f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}"
-        #     )
+            print(
+                f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}"
+            )
 
-        #     df = pd.read_csv(self.surrogateOptions["extrapointsFile"])
-        #     df_model = df[df['Model'] == self.output]
+            df = pd.read_csv(self.surrogateOptions["extrapointsFile"])
+            df_model = df[df['Model'] == self.output]
 
-        #     if len(df_model) == 0:
-        #         print("\t- No points for this output in the file, nothing to add", typeMsg="i")
-        #         continueAdding = False
-        #     else:
-        #         continueAdding = True
+            if len(df_model) == 0:
+                print("\t- No points for this output in the file, nothing to add", typeMsg="i")
+                continueAdding = False
+            else:
+                continueAdding = True
 
-        # if continueAdding:
+        if continueAdding:
 
-        #     # Check 1: Do the points for this output share the same x_names?
-        #     if df_model['x_names'].nunique() > 1:
-        #         print("Different x_names for points in the file, prone to errors", typeMsg='q')
+            # Check 1: Do the points for this output share the same x_names?
+            if df_model['x_names'].nunique() > 1:
+                print("Different x_names for points in the file, prone to errors", typeMsg='q')
 
-        #     # Check 2: Is it consistent with the x_names of this run?
-        #     x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
-        #     x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output]
-        #     if x_names != x_names_check:
-        #         print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
+            # Check 2: Is it consistent with the x_names of this run?
+            x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
+            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output]
+            if x_names != x_names_check:
+                print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
 
-        #     self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
-        #     self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
+            self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
+            self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
     
-        #     x = []
-        #     for i in range(len(x_names)):
-        #         x.append(df_model[f'x{i}'].to_numpy())
-        #     self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
-
-        #     # ------------------------------------------------------------------------------------------------------------
-        #     # Define transformation (here because I want to account for the added points)
-        #     # ------------------------------------------------------------------------------------------------------------
-        #     self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0]
-        #     input_transform_physics, outcome_transform_physics, \
-        #     input_transform_normalization, output_transformed_standardization, \
-        #     dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
-        #     # ------------------------------------------------------------------------------------------------------------
-
-        #     self.train_X_added = (
-        #         self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full
-        #     ).to(self.dfT)
-
-        # else:
-        # if self.fileTraining is not None:
-        #     train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-        #         self.train_X,
-        #         self.output,
-        #         self.surrogate_parameters,
-        #         self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-        #     )
-        #     dimTransformedDV_x_full = train_X_Complete.shape[-1]
-        # else:
-        #     dimTransformedDV_x_full = self.train_X.shape[-1]
+            x = []
+            for i in range(len(x_names)):
+                x.append(df_model[f'x{i}'].to_numpy())
+            self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
+
+            # ------------------------------------------------------------------------------------------------------------
+            # Define transformation (here because I want to account for the added points)
+            # ------------------------------------------------------------------------------------------------------------
+            self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0]
+            input_transform_physics, outcome_transform_physics, \
+            input_transform_normalization, output_transformed_standardization, \
+            dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
+            # ------------------------------------------------------------------------------------------------------------
+
+            self.train_X_added = (
+                self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full
+            ).to(self.dfT)
+
+        else:
+            if self.fileTraining is not None:
+                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+                    self.train_X,
+                    self.output,
+                    self.surrogate_parameters,
+                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+                )
+                dimTransformedDV_x_full = train_X_Complete.shape[-1]
+            else:
+                dimTransformedDV_x_full = self.train_X.shape[-1]
+
+            # --------------------------------------------------------------------------------------
+            # Define transformation (here because I want to account for the added points)
+            # --------------------------------------------------------------------------------------
+            self.num_training_points = self.train_X.shape[0]
+
+            input_transform_physics, outcome_transform_physics,\
+            input_transform_normalization, output_transformed_standardization,\
+            dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
+            # ------------------------------------------------------------------------------------------------------------
+
+            self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT)
+            self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT)
+            self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
+            self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
 
         # --------------------------------------------------------------------------------------
-        # Define transformation (here because I want to account for the added points)
+        # Make sure that very small variations are not captured
         # --------------------------------------------------------------------------------------
-        self.num_training_points = self.train_X.shape[0]
-        input_transform_physics, outcome_transform_physics,\
-        input_transform_normalization, output_transformed_standardization,\
-        dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
-        # ------------------------------------------------------------------------------------------------------------
-
-        # self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT)
-        # self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT)
-        # self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
-        # self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
-
-        # # --------------------------------------------------------------------------------------
-        # # Make sure that very small variations are not captured
-        # # --------------------------------------------------------------------------------------
-
-        # if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1):
-        #     self.ensureMinimalVariationSuppressed(input_transform_physics)
-
-        # # --------------------------------------------------------------------------------------
-        # # Make sure at least 2 points
-        # # --------------------------------------------------------------------------------------
-
-        # if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
-        #     factor = 1.2
-        #     print(
-        #         f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}",
-        #         typeMsg="w",
-        #     )
-        #     self.train_X = torch.cat((self.train_X, self.train_X * factor))
-        #     self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
-        #     self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
-
-        # # -------------------------------------------------------------------------------------
-        # # Check minimum noises
-        # # -------------------------------------------------------------------------------------
-
-        # self.ensureMinimumNoise()
-
-        # # -------------------------------------------------------------------------------------
-        # # Write file with surrogate if there are transformations
-        # # -------------------------------------------------------------------------------------
-
-        # if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
-        #     self.writeFileTraining(input_transform_physics, outcome_transform_physics)
+
+        if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1):
+            self._ensure_small_variation_suppressed(input_transform_physics)
+
+        # --------------------------------------------------------------------------------------
+        # Make sure at least 2 points
+        # --------------------------------------------------------------------------------------
+
+        if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
+            factor = 1.2
+            print(
+                f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}",
+                typeMsg="w",
+            )
+            self.train_X = torch.cat((self.train_X, self.train_X * factor))
+            self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
+            self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
+
+        # -------------------------------------------------------------------------------------
+        # Check minimum noises
+        # -------------------------------------------------------------------------------------
+
+        self._ensure_minimum_noise()
+
+        # -------------------------------------------------------------------------------------
+        # Write file with surrogate if there are transformations
+        # -------------------------------------------------------------------------------------
+
+        if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
+            self.writeFileTraining(input_transform_physics, outcome_transform_physics)
 
         # -------------------------------------------------------------------------------------
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
         # -------------------------------------------------------------------------------------
 
-        # self.normalization_pass(
-        #     input_transform_physics,
-        #     input_transform_normalization,
-        #     outcome_transform_physics,
-        #     output_transformed_standardization,
-        # )
+        self.normalization_pass(
+            input_transform_physics,
+            input_transform_normalization,
+            outcome_transform_physics,
+            output_transformed_standardization,
+        )
         
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION
@@ -249,16 +234,15 @@ def __init__(
         # Model
         # *************************************************************************************
 
-        # print(
-        #     f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}',
-        # )
+        print(
+            f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}',
+        )
 
         """
         self.train_X contains the untransformed of this specific run:   (batch1, dimX)
         self.train_X_added contains the transformed of the table:       (batch2, dimXtr)
         """
 
-
         self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
             self.train_X,
             self.train_Y,
@@ -267,9 +251,9 @@ def __init__(
             outcome_transform=outcome_transform,
             surrogateOptions=self.surrogateOptions,
             variables=self.variables,
-            # train_X_added=self.train_X_added,
-            # train_Y_added=self.train_Y_added,
-            # train_Yvar_added=self.train_Yvar_added,
+            train_X_added=self.train_X_added,
+            train_Y_added=self.train_Y_added,
+            train_Yvar_added=self.train_Yvar_added,
         )
 
     def _define_MITIM_transformations(self):
@@ -332,7 +316,7 @@ def _define_MITIM_transformations(self):
         ).to(self.dfT)
         output_transformed_standardization = (
             botorch.models.transforms.outcome.Standardize(
-                m = dimTransformedDV_y, #batch_shape=self.train_Y.transpose(0,1).shape
+                m = dimTransformedDV_y,
             )
         ).to(self.dfT)
 
@@ -356,12 +340,12 @@ def normalization_pass(
         train_X_transformed = input_transform_physics(self.train_X)
         train_Y_transformed, train_Yvar_transformed = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
 
-        # train_X_transformed = torch.cat(
-        #     (input_transform_physics(self.train_X), self.train_X_added), axis=0
-        # )
-        # y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
-        # train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0)
-        # train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0)
+        train_X_transformed = torch.cat(
+            (input_transform_physics(self.train_X), self.train_X_added), axis=0
+        )
+        y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
+        train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0)
+        train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0)
 
         train_X_transformed_norm = input_transform_normalization(train_X_transformed)
         (
@@ -375,9 +359,9 @@ def normalization_pass(
         outcome_transform_normalization._is_trained = torch.tensor(True)
 
     def fit(self):
-        # print(
-        #     f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points"
-        # )
+        print(
+            f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points"
+        )
 
         # ---------------------------------------------------------------------------------------------------
         # Define loss Function to minimize
@@ -406,8 +390,8 @@ def fit(self):
 		"""
 
         # Train always in physics-transformed space, to enable mitim re-use training from file
-        #with fundamental_model_context(self):
-        track_fval = self.perform_model_fit(mll)
+        with fundamental_model_context(self):
+            track_fval = self.perform_model_fit(mll)
 
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
@@ -418,12 +402,12 @@ def fit(self):
         # Go back to definining the right normalizations, because the optimizer has to work on training mode...
         # ---------------------------------------------------------------------------------------------------
 
-        # self.normalization_pass(
-        #     self.gpmodel.input_transform["tf1"],
-        #     self.gpmodel.input_transform["tf2"],
-        #     self.gpmodel.outcome_transform["tf1"],
-        #     self.gpmodel.outcome_transform["tf2"],
-        # )
+        self.normalization_pass(
+            self.gpmodel.input_transform["tf1"],
+            self.gpmodel.input_transform["tf2"],
+            self.gpmodel.outcome_transform["tf1"],
+            self.gpmodel.outcome_transform["tf2"],
+        )
 
     def perform_model_fit(self, mll):
         self.gpmodel.train()
@@ -468,9 +452,9 @@ def callback(x, y, mll=mll):
         self.gpmodel.likelihood.eval()
         mll.eval()
 
-        # print(
-        #     f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}"
-        # )
+        print(
+            f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}"
+        )
 
         return track_fval
 
@@ -494,11 +478,7 @@ def predict(self, X, produceFundamental=False, nSamples=None):
         # with 	gpytorch.settings.fast_computations(log_prob=False, solves=False, covar_root_decomposition=False), \
         # 		gpytorch.settings.eval_cg_tolerance(1E-6), gpytorch.settings.fast_pred_samples(state=False), gpytorch.settings.num_trace_samples(0):
 
-        with (
-            fundamental_model_context(self)
-            if produceFundamental
-            else contextlib.nullcontext(self)
-        ) as surrogate_model:
+        with (fundamental_model_context(self) if produceFundamental else contextlib.nullcontext(self)) as surrogate_model:
             posterior = surrogate_model.gpmodel.posterior(X)
 
         mean = posterior.mean
@@ -782,7 +762,26 @@ def testTraining(
 
             return axs
 
-    def ensureMinimalVariationSuppressed(self, input_transform_physics, thr=1e-6):
+    def _remove_points(self):
+
+        if len(self.avoidPoints) > 0:
+            print(
+                f"\t- Fitting without considering points: {self.avoidPoints}",
+                typeMsg="w",
+            )
+
+            self.train_X = torch.Tensor(
+                np.delete(self.train_X, self.avoidPoints, axis=0)
+            ).to(self.dfT)
+            self.train_Y = torch.Tensor(
+                np.delete(self.train_Y, self.avoidPoints, axis=0)
+            ).to(self.dfT)
+            self.train_Yvar = torch.Tensor(
+                np.delete(self.train_Yvar, self.avoidPoints, axis=0)
+            ).to(self.dfT)
+
+
+    def _ensure_small_variation_suppressed(self, input_transform_physics, thr=1e-6):
         """
         In some cases, the added data from file might have extremely small variations in some of the fixed
         inputs, as compared to the trained data of this run. In such a case, modify this variation
@@ -812,7 +811,7 @@ def ensureMinimalVariationSuppressed(self, input_transform_physics, thr=1e-6):
                 typeMsg="w",
             )
 
-    def ensureMinimumNoise(self):
+    def _ensure_minimum_noise(self):
         if ("MinimumRelativeNoise" in self.surrogateOptions) and (
             self.surrogateOptions["MinimumRelativeNoise"] is not None
         ):

From 95cd30a5c59ca8d67bc158bd4248a8380e7cdb11 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Thu, 21 Nov 2024 22:17:08 -0500
Subject: [PATCH 21/34] Recovering previous combined_model

---
 .../maestro/utils/PORTALSbeat.py              |   2 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py     | 299 +-----------------
 src/mitim_tools/opt_tools/STEPtools.py        |   4 +-
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 192 +++++------
 src/mitim_tools/opt_tools/utils/TESTtools.py  |   5 +
 templates/main.namelist.json                  |   2 +-
 6 files changed, 113 insertions(+), 391 deletions(-)

diff --git a/src/mitim_modules/maestro/utils/PORTALSbeat.py b/src/mitim_modules/maestro/utils/PORTALSbeat.py
index 57b14aeb..1a95ec91 100644
--- a/src/mitim_modules/maestro/utils/PORTALSbeat.py
+++ b/src/mitim_modules/maestro/utils/PORTALSbeat.py
@@ -253,7 +253,7 @@ def _inform(self, use_previous_residual = True, use_previous_surrogate_data = Tr
         if use_previous_surrogate_data and ('portals_surrogate_data_file' in self.maestro_instance.parameters_trans_beat):
             if 'surrogateOptions' not in self.optimization_options:
                 self.optimization_options['surrogateOptions'] = {}
-            self.optimization_options['surrogateOptions']["extrapointsFile"] = self.maestro_instance.parameters_trans_beat['portals_surrogate_data_file']
+            self.optimization_options['surrogateOptions']["add_data_from_file"] = self.maestro_instance.parameters_trans_beat['portals_surrogate_data_file']
 
             self.folder_starting_point = self.maestro_instance.parameters_trans_beat['portals_last_run_folder']
 
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index a013eb22..0243de4c 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -197,7 +197,7 @@ def __init__(
             self.input_transform = input_transform
         self.to(train_X)
 
-    def posterior_full(
+    def posterior(
         self,
         X,
         output_indices=None,
@@ -249,303 +249,11 @@ def _Xs_and_transforms(self, X):
         Xs = (X,) * len(self.transforms)
         return zip(Xs, self.transforms)
 
-class SingleTaskGP_MITIM2(botorch.models.gp_regression.SingleTaskGP):
-    def __init__(
-        self,
-        train_X,
-        train_Y,
-        train_Yvar,
-        input_transform=None,
-        outcome_transform=None,
-        surrogateOptions={},
-        variables=None,
-        train_X_added=torch.Tensor([]),
-        train_Y_added=torch.Tensor([]),
-        train_Yvar_added=torch.Tensor([]),
-    ):
-        """
-        _added refers to already-transformed variables that are added from table
-        """
-
-        TypeMean = surrogateOptions.get("TypeMean", 0)
-        TypeKernel = surrogateOptions.get("TypeKernel", 0)
-        FixedNoise = surrogateOptions.get("FixedNoise", False)
-        ConstrainNoise = surrogateOptions.get("ConstrainNoise", -1e-4)
-        learn_additional_noise = surrogateOptions.get("ExtraNoise", False)
-        print("\t\t* Surrogate model options:")
-        print(
-            f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
-        )
-
-        # self.store_training(
-        #     train_X,
-        #     train_X_added,
-        #     train_Y,
-        #     train_Y_added,
-        #     train_Yvar,
-        #     train_Yvar_added,
-        #     input_transform,
-        #     outcome_transform,
-        # )
-
-        """
-		----------------------------------------------------------------------------------------
-		What set_dimensions did, and select things to train (already transformed and normalized)
-		----------------------------------------------------------------------------------------
-		"""
-
-        # Grab num_outputs
-        self._num_outputs = train_Y.shape[-1]
-
-        # Grab ard_num_dims
-        if train_X.shape[0] > 0:
-            with torch.no_grad():
-                transformed_X = self.transform_inputs(
-                    X=train_X, input_transform=input_transform
-                )
-            self.ard_num_dims = transformed_X.shape[-1]
-        else:
-            self.ard_num_dims = train_X_added.shape[-1]
-            transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X)
-
-        # Transform outcomes
-        if outcome_transform is not None:
-            train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
-
-        # # Added points are raw transformed, so I need to normalize them
-        # if train_X_added.shape[0] > 0:
-        #     train_X_added = input_transform["tf2"](train_X_added)
-        #     train_Y_added, train_Yvar_added = outcome_transform["tf2"](
-        #         train_Y_added, train_Yvar_added
-        #     )
-        # -----
-
-        train_X_usedToTrain = transformed_X #torch.cat((transformed_X, train_X_added), axis=0)
-        train_Y_usedToTrain = train_Y #torch.cat((train_Y, train_Y_added), axis=0)
-        train_Yvar_usedToTrain = train_Yvar #torch.cat((train_Yvar, train_Yvar_added), axis=0)
-
-        self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions(
-            train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain
-        )
-
-        # train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1)
-        # train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1)
-
-        """
-		-----------------------------------------------------------------------
-		Likelihood and Noise
-		-----------------------------------------------------------------------
-		"""
-
-        self._subset_batch_dict = {}
-
-        if FixedNoise:
-            # Noise not inferred, given by data
-            likelihood = (
-                gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
-                    noise=train_Yvar_usedToTrain.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
-                    batch_shape=self._aug_batch_shape,
-                    learn_additional_noise=learn_additional_noise,
-                )
-            )
-
-        else:
-            # Infer Noise
-
-            noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05)
-            noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate
-
-            if ConstrainNoise < 0:
-                noise_constraint = gpytorch.constraints.constraints.GreaterThan(
-                    -ConstrainNoise, transform=None, initial_value=noise_prior_mode
-                )
-            else:
-                noise_constraint = gpytorch.constraints.constraints.Interval(
-                    1e-6, ConstrainNoise, transform=None, initial_value=noise_prior_mode
-                )
-
-            likelihood = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(
-                noise_prior=noise_prior,
-                batch_shape=self._aug_batch_shape,
-                noise_constraint=noise_constraint,
-            )
-
-            self._subset_batch_dict["likelihood.noise_covar.raw_noise"] = -2
-
-        """
-		-----------------------------------------------------------------------
-		Initialize ExactGP
-		-----------------------------------------------------------------------
-		"""
-
-        gpytorch.models.exact_gp.ExactGP.__init__(
-            self,
-            train_inputs=train_X_usedToTrain,
-            train_targets=train_Y_usedToTrain,
-            likelihood=likelihood,
-        )
-
-        """
-		-----------------------------------------------------------------------
-		GP Mean
-		-----------------------------------------------------------------------
-		"""
-
-        if TypeMean == 0:
-            self.mean_module = gpytorch.means.constant_mean.ConstantMean(
-                batch_shape=self._aug_batch_shape
-            )
-        elif TypeMean == 1:
-            self.mean_module = gpytorch.means.linear_mean.LinearMean(
-                self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True
-            )
-        elif TypeMean == 2:
-            self.mean_module = MITIM_LinearMeanGradients(
-                batch_shape=self._aug_batch_shape, variables=variables
-            )
-        elif TypeMean == 3:
-            self.mean_module = MITIM_CriticalGradient(
-                batch_shape=self._aug_batch_shape, variables=variables
-            )
-
-        """
-		-----------------------------------------------------------------------
-		GP Kernel - Covariance
-		-----------------------------------------------------------------------
-		"""
-
-        # Priors
-        lengthscale_prior = gpytorch.priors.torch_priors.GammaPrior(3.0, 6.0)
-        outputscale_prior = gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)
-
-        # Do not allow too small lengthscales?
-        lengthscale_constraint = (
-            None  # gpytorch.constraints.constraints.GreaterThan(0.05)
-        )
-
-        self._subset_batch_dict["covar_module.raw_outputscale"] = -1
-        self._subset_batch_dict["covar_module.base_kernel.raw_lengthscale"] = -3
-
-        if TypeKernel == 0:
-            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
-                base_kernel=gpytorch.kernels.matern_kernel.MaternKernel(
-                    nu=2.5,
-                    ard_num_dims=self.ard_num_dims,
-                    batch_shape=self._aug_batch_shape,
-                    lengthscale_prior=lengthscale_prior,
-                    lengthscale_constraint=lengthscale_constraint,
-                ),
-                batch_shape=self._aug_batch_shape,
-                outputscale_prior=outputscale_prior,
-            )
-        elif TypeKernel == 1:
-            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
-                base_kernel=gpytorch.kernels.rbf_kernel.RBFKernel(
-                    ard_num_dims=self.ard_num_dims,
-                    batch_shape=self._aug_batch_shape,
-                    lengthscale_prior=lengthscale_prior,
-                    lengthscale_constraint=lengthscale_constraint,
-                ),
-                batch_shape=self._aug_batch_shape,
-                outputscale_prior=outputscale_prior,
-            )
-        elif TypeKernel == 2:
-            self.covar_module = MITIM_ConstantKernel(
-                ard_num_dims=self.ard_num_dims,
-                batch_shape=self._aug_batch_shape,
-                lengthscale_prior=lengthscale_prior,
-                lengthscale_constraint=lengthscale_constraint,
-            )
-        elif TypeKernel == 3:
-            self.covar_module = gpytorch.kernels.scale_kernel.ScaleKernel(
-                base_kernel=MITIM_NNKernel(
-                    ard_num_dims=self.ard_num_dims,
-                    batch_shape=self._aug_batch_shape,
-                    lengthscale_prior=lengthscale_prior,
-                    lengthscale_constraint=lengthscale_constraint,
-                ),
-                batch_shape=self._aug_batch_shape,
-                outputscale_prior=outputscale_prior,
-            )
-
-        if outcome_transform is not None:
-            self.outcome_transform = outcome_transform
-        if input_transform is not None:
-            self.input_transform = input_transform
-
-        self.to(train_X)
-
-    def store_training(self, x, xa, y, ya, yv, yva, input_transform, outcome_transform):
-
-        # x, y are raw untransformed, and I want raw transformed
-        if input_transform is not None:
-            x_tr = input_transform["tf1"](x)
-        else:
-            x_tr = x
-        if outcome_transform is not None:
-            y_tr, yv_tr = outcome_transform["tf1"](x, y, yv)
-        else:
-            y_tr, yv_tr = y, yv
-
-        # xa, ya are raw transformed
-        xa_tr = xa
-        ya_tr, yva_tr = ya, yva
-
-        self.train_X_usedToTrain = torch.cat((xa_tr, x_tr), axis=0)
-        self.train_Y_usedToTrain = torch.cat((ya_tr, y_tr), axis=0)
-        self.train_Yvar_usedToTrain = torch.cat((yva_tr, yv_tr), axis=0)
-
-    # Modify posterior call from BatchedMultiOutputGPyTorchModel to call posterior untransform with "X"
-
-    def posterior(
-        self,
-        X,
-        output_indices=None,
-        observation_noise=False,
-        posterior_transform=None,
-        **kwargs,
-    ):
-
-        self.eval()  # make sure model is in eval mode
-        # input transforms are applied at `posterior` in `eval` mode, and at
-        # `model.forward()` at the training time
-        Xtr = self.transform_inputs(X)
-        with botorch.models.utils.gpt_posterior_settings():
-            # insert a dimension for the output dimension
-            if self._num_outputs > 1:
-                Xtr, output_dim_idx = botorch.models.utils.add_output_dim(
-                    X=Xtr, original_batch_shape=self._input_batch_shape
-                )
-            # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus
-            # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior,
-            # rather than e.g. SingleTaskGP's forward, which computes the prior.
-            mvn = self(Xtr)
-            mvn = self._apply_noise(X=Xtr, mvn=mvn, observation_noise=observation_noise)
-            if self._num_outputs > 1:
-                mean_x = mvn.mean
-                covar_x = mvn.lazy_covariance_matrix
-                output_indices = output_indices or range(self._num_outputs)
-                mvns = [
-                    gpytorch.distributions.MultivariateNormal(
-                        mean_x.select(dim=output_dim_idx, index=t),
-                        covar_x[(slice(None),) * output_dim_idx + (t,)],
-                    )
-                    for t in output_indices
-                ]
-                mvn = gpytorch.distributions.MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
-
-        posterior = botorch.posteriors.gpytorch.GPyTorchPosterior(distribution=mvn)
-        if hasattr(self, "outcome_transform"):
-            posterior = self.outcome_transform.untransform_posterior(X, posterior)
-        if posterior_transform is not None:
-            return posterior_transform(posterior)
-        return posterior
-
 # ----------------------------------------------------------------------------------------------------------------------------
 # ModelListGP needs to be modified to allow me to have "common" parameters to models, to not run at every transformation again
 # ----------------------------------------------------------------------------------------------------------------------------
 
-class ModifiedModelListGP(botorch.models.model_list_gp_regression.ModelListGP):
+class ModelListGP_MITIM(botorch.models.model_list_gp_regression.ModelListGP):
     def __init__(self, *gp_models):
         super().__init__(*gp_models)
 
@@ -1237,6 +945,7 @@ def untransform_posterior(self, posterior: Posterior) -> Posterior:
         # could potentially use from_independent_mvns
         # print(f"{mvn._covar.shape = }")
         # print(f"{covar.shape=}")
-        dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
+        from gpytorch.distributions import MultivariateNormal
+        dis = MultivariateNormal(mean=mean, covariance_matrix=covar)
         return GPyTorchPosterior(distribution=dis)
 
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 064bc65c..b4258839 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -262,7 +262,7 @@ def _fit_individual_models(self, fit_output_contains=None):
         models = ()
         for GP in self.GP["individual_models"]:
             models += (GP.gpmodel,)
-        self.GP["combined_model"].gpmodel = BOTORCHtools.ModifiedModelListGP(*models)
+        self.GP["combined_model"].gpmodel = BOTORCHtools.ModelListGP_MITIM(*models)
 
         # ------------------------------------------------------------------------------------------------------
         # Make sure each model has the right surrogate_transformation_variables inside the combined model
@@ -298,7 +298,7 @@ def defineFunctions(self, scalarized_objective):
         I create this so that, upon reading a pickle, I re-call it. Otherwise, it is very heavy to store lambdas
         """
 
-        self.evaluators = {"GP": self.GP["mo_model"]}
+        self.evaluators = {"GP": self.GP["combined_model"]}#mo_model"]}
 
         # **************************************************************************************************
         # Objective (multi-objective model -> single objective residual)
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 88eca7fc..e9db228b 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -85,16 +85,22 @@ def __init__(
         # Add points from file
         # -------------------------------------------------------------------------------------
 
+        addition_of_points = ("add_data_from_file" in self.surrogateOptions) and (self.surrogateOptions["add_data_from_file"] is not None)
+        is_this_single_output = (self.outputs is not None) and (len(self.outputs) == 1)
+
+        if addition_of_points and is_this_single_output:
+            raise Exception("[MITIM] add_data_from_file can only be used for single output models as of now...")
+
         # Points to be added from file
         continueAdding = False
-        if ("extrapointsFile" in self.surrogateOptions) and (self.surrogateOptions["extrapointsFile"] is not None) and (self.output is not None) and (self.output in self.surrogateOptions["extrapointsModels"]):
+        if addition_of_points and (self.outputs is not None) and (self.outputs[0] in self.surrogateOptions["extrapointsModels"]):
 
             print(
-                f"\t* Requested extension of training set by points in file {self.surrogateOptions['extrapointsFile']}"
+                f"\t* Requested extension of training set by points in file {self.surrogateOptions['add_data_from_file']}"
             )
 
-            df = pd.read_csv(self.surrogateOptions["extrapointsFile"])
-            df_model = df[df['Model'] == self.output]
+            df = pd.read_csv(self.surrogateOptions["add_data_from_file"])
+            df_model = df[df['Model'] == self.outputs[0]]
 
             if len(df_model) == 0:
                 print("\t- No points for this output in the file, nothing to add", typeMsg="i")
@@ -110,7 +116,7 @@ def __init__(
 
             # Check 2: Is it consistent with the x_names of this run?
             x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
-            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.output]
+            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.outputs[0]]
             if x_names != x_names_check:
                 print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
 
@@ -139,7 +145,7 @@ def __init__(
             if self.fileTraining is not None:
                 train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
                     self.train_X,
-                    self.output,
+                    self.outputs[0],
                     self.surrogate_parameters,
                     self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
                 )
@@ -194,18 +200,18 @@ def __init__(
         # -------------------------------------------------------------------------------------
 
         if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
-            self.writeFileTraining(input_transform_physics, outcome_transform_physics)
+            self.write_datafile(input_transform_physics, outcome_transform_physics)
 
         # -------------------------------------------------------------------------------------
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
         # -------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            input_transform_physics,
-            input_transform_normalization,
-            outcome_transform_physics,
-            output_transformed_standardization,
-        )
+        # self.normalization_pass(
+        #     input_transform_physics,
+        #     input_transform_normalization,
+        #     outcome_transform_physics,
+        #     output_transformed_standardization,
+        # )
         
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION
@@ -216,14 +222,13 @@ def __init__(
         ).to(self.dfT)
 
         outcome_transform = BOTORCHtools.ChainedOutcomeTransform(
-            tf1=outcome_transform_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension()
+            tf1=outcome_transform_physics, tf2=output_transformed_standardization #, tf3=BOTORCHtools.OutcomeToBatchDimension()
         ).to(self.dfT)
 
-        self.output = 'QeTurb_1'
         self.variables = (
-            self.surrogate_transformation_variables[self.output]
+            self.surrogate_transformation_variables[self.outputs[0]]
             if (
-                (self.output is not None)
+                (self.outputs is not None)
                 and ("surrogate_transformation_variables" in self.__dict__)
                 and (self.surrogate_transformation_variables is not None)
             )
@@ -235,7 +240,7 @@ def __init__(
         # *************************************************************************************
 
         print(
-            f'\t- Initializing model{" for "+self.output_transformed if (self.output_transformed is not None) else ""}',
+            f'\t- Initializing model{" for "+self.outputs_transformed[0] if (self.outputs_transformed is not None and (len(self.outputs)==1)) else ""}',
         )
 
         """
@@ -300,7 +305,7 @@ def _define_MITIM_transformations(self):
         # Broadcast the input transformation to all outputs
         # ------------------------------------------------------------------------------------
 
-        input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
+        input_transformation_physics = input_transformations_physics[0] #BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
 
         transformed_X = input_transformation_physics(self.train_X)
 
@@ -390,8 +395,8 @@ def fit(self):
 		"""
 
         # Train always in physics-transformed space, to enable mitim re-use training from file
-        with fundamental_model_context(self):
-            track_fval = self.perform_model_fit(mll)
+        #with fundamental_model_context(self):
+        track_fval = self.perform_model_fit(mll)
 
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
@@ -402,12 +407,12 @@ def fit(self):
         # Go back to definining the right normalizations, because the optimizer has to work on training mode...
         # ---------------------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            self.gpmodel.input_transform["tf1"],
-            self.gpmodel.input_transform["tf2"],
-            self.gpmodel.outcome_transform["tf1"],
-            self.gpmodel.outcome_transform["tf2"],
-        )
+        # self.normalization_pass(
+        #     self.gpmodel.input_transform["tf1"],
+        #     self.gpmodel.input_transform["tf2"],
+        #     self.gpmodel.outcome_transform["tf1"],
+        #     self.gpmodel.outcome_transform["tf2"],
+        # )
 
     def perform_model_fit(self, mll):
         self.gpmodel.train()
@@ -453,7 +458,7 @@ def callback(x, y, mll=mll):
         mll.eval()
 
         print(
-            f"\n\t- Marginal log likelihood went from {track_fval[0]:.3f} to {track_fval[-1]:.3f}"
+            f"\n\t- Marginal log likelihood went from {track_fval[0]} to {track_fval[-1]:.3f}"
         )
 
         return track_fval
@@ -494,94 +499,97 @@ def predict(self, X, produceFundamental=False, nSamples=None):
 
         return mean, upper, lower, samples
 
-    def writeFileTraining(self, input_transform_physics, outcome_transform_physics):
+    def write_datafile(self, input_transform_physics, outcome_transform_physics):
         """
         --------------------------------------------------------------------
         Write file with surrogate if there are transformations
-                Note: USE TRANSFORMATIONS AT COMPLETE NUMBER (AFTER TRANSITIONS) for those in this run, but
-                simply use the info that was in extra_points_file
+                Note: USE TRANSFORMATIONS AT COMPLETE NUMBER (AFTER TRANSITIONS)
+                for those in this run, but simply use the info that was in
+                extra_points_file
         --------------------------------------------------------------------
         """
 
-        # ------------------------------------------------------------------------------------------------------------------------
-        # Transform the points without the added from file
-        # ------------------------------------------------------------------------------------------------------------------------
+        for i,output in enumerate(self.outputs):
 
-        # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration
-        train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-            self.train_X,
-            self.output,
-            self.surrogate_parameters,
-            self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-        )
+            # ------------------------------------------------------------------------------------------------------------------------
+            # Transform the points without the added from file
+            # ------------------------------------------------------------------------------------------------------------------------
 
-        train_Y, train_Yvar = outcome_transform_physics(
-            self.train_X, self.train_Y, self.train_Yvar
-        )
+            # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration
+            train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+                self.train_X,
+                output,
+                self.surrogate_parameters,
+                self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+            )
 
-        dv_names_Complete = (
-            self.surrogate_parameters["surrogate_transformation_variables_lasttime"][self.output]
-            if (
-                "surrogate_transformation_variables_lasttime" in self.surrogate_parameters
-                and self.surrogate_parameters["surrogate_transformation_variables_lasttime"]
-                is not None
+            train_Y, train_Yvar = outcome_transform_physics(
+                self.train_X, self.train_Y[...,i].unsqueeze(-1), self.train_Yvar[...,i].unsqueeze(-1)
             )
-            else [i for i in self.bounds]
-        )
 
-        if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]:
-            print(
-                "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file",
-                typeMsg="w",
+            dv_names_Complete = (
+                self.surrogate_parameters["surrogate_transformation_variables_lasttime"][output]
+                if (
+                    "surrogate_transformation_variables_lasttime" in self.surrogate_parameters
+                    and self.surrogate_parameters["surrogate_transformation_variables_lasttime"]
+                    is not None
+                )
+                else [i for i in self.bounds]
             )
-            self.train_X_added_full = torch.cat(
-                (
-                    self.train_X_added_full,
-                    torch.full(
-                        (
-                            self.train_X_added_full.shape[0],
-                            train_X_Complete.shape[-1]
-                            - self.train_X_added_full.shape[-1],
+
+            if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]:
+                print(
+                    "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file",
+                    typeMsg="w",
+                )
+                self.train_X_added_full = torch.cat(
+                    (
+                        self.train_X_added_full,
+                        torch.full(
+                            (
+                                self.train_X_added_full.shape[0],
+                                train_X_Complete.shape[-1]
+                                - self.train_X_added_full.shape[-1],
+                            ),
+                            torch.nan,
                         ),
-                        torch.nan,
                     ),
-                ),
-                axis=-1,
-            )
-        elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]:
-            print(
-                "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file",
-                typeMsg="w",
-            )
-            self.train_X_added_full = self.train_X_added_full[
-                :, : train_X_Complete.shape[-1]
-            ]
+                    axis=-1,
+                )
+            elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]:
+                print(
+                    "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file",
+                    typeMsg="w",
+                )
+                self.train_X_added_full = self.train_X_added_full[
+                    :, : train_X_Complete.shape[-1]
+                ]
 
-        x = torch.cat((self.train_X_added_full, train_X_Complete), axis=0)
-        y = torch.cat((self.train_Y_added, train_Y), axis=0)
-        yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=0)
+            x = torch.cat((self.train_X_added_full, train_X_Complete), axis=0)
+            y = torch.cat((self.train_Y_added, train_Y), axis=0)
+            yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=0)
 
 
-        # ------------------------------------------------------------------------------------------------------------------------
-        # Merged data with existing data frame and write
-        # ------------------------------------------------------------------------------------------------------------------------
+            # ------------------------------------------------------------------------------------------------------------------------
+            # Merged data with existing data frame and write
+            # ------------------------------------------------------------------------------------------------------------------------
 
-        new_df = create_df_portals(x,y,yvar,dv_names_Complete,self.output)
+            new_df = create_df_portals(x,y,yvar,dv_names_Complete,output)
 
-        if self.fileTraining.exists():
+            if self.fileTraining.exists():
 
-            # Load the existing DataFrame from the HDF5 file
-            existing_df = pd.read_csv(self.fileTraining)
+                # Load the existing DataFrame from the HDF5 file
+                existing_df = pd.read_csv(self.fileTraining)
 
-            # Concatenate the existing DataFrame with the new DataFrame
-            combined_df = pd.concat([existing_df, new_df], ignore_index=True)
+                # Concatenate the existing DataFrame with the new DataFrame
+                combined_df = pd.concat([existing_df, new_df], ignore_index=True)
 
-        else:
+            else:
 
-            combined_df = new_df
+                combined_df = new_df
 
-        # Save the combined DataFrame back to the file
-        combined_df.to_csv(self.fileTraining, index=False)
+            # Save the combined DataFrame back to the file
+            combined_df.to_csv(self.fileTraining, index=False)
 
     # --------------------------
     # PLOTTING AND POST-ANALYSIS
diff --git a/src/mitim_tools/opt_tools/utils/TESTtools.py b/src/mitim_tools/opt_tools/utils/TESTtools.py
index cab8a417..c23bb619 100644
--- a/src/mitim_tools/opt_tools/utils/TESTtools.py
+++ b/src/mitim_tools/opt_tools/utils/TESTtools.py
@@ -47,9 +47,14 @@ def testBatchCapabilities(GPs, combinations=[2, 100, 1000]):
     It stops running if the error gets larger than thrPercent in those cases
     """
 
+    from mitim_tools.misc_tools import IOtools
+    with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_evv.prof") as s:
+        GPs.predict(GPs.train_X[0:1, :].repeat(50, 1))
+
     for i in combinations:
         x = GPs.train_X[0:1, :].repeat(i, 1)
 
+        
         y1 = GPs.predict(x)[0]
         y2 = GPs.predict(x[0:1, :])[0]
 
diff --git a/templates/main.namelist.json b/templates/main.namelist.json
index 03351169..73837d0b 100644
--- a/templates/main.namelist.json
+++ b/templates/main.namelist.json
@@ -43,7 +43,7 @@
         "MinimumRelativeNoise": null,
         "stds_outside": null,
         "stds_outside_checker": 5,
-        "extrapointsFile": null,
+        "add_data_from_file": null,
         "extrapointsModels": null,
         "extrapointsModelsAvoidContent": null
     },

From 55e9ff3196b1d89cfb0e7a96b61e69c220130906 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Thu, 21 Nov 2024 22:44:40 -0500
Subject: [PATCH 22/34] misc

---
 src/mitim_tools/opt_tools/BOTORCHtools.py    | 139 ++++++++++++++++---
 src/mitim_tools/opt_tools/SURROGATEtools.py  |  50 ++++---
 src/mitim_tools/opt_tools/utils/TESTtools.py |   4 -
 3 files changed, 149 insertions(+), 44 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 0243de4c..402d0236 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -59,22 +59,58 @@ def __init__(
             f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
         )
 
+        self.store_training(
+            train_X,
+            train_X_added,
+            train_Y,
+            train_Y_added,
+            train_Yvar,
+            train_Yvar_added,
+            input_transform,
+            outcome_transform,
+        )
 
+        # Grab num_outputs
+        self._num_outputs = train_Y.shape[-1]
 
-        self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
-        if outcome_transform == DEFAULT:
-            outcome_transform = Standardize(
-                m=train_Y.shape[-1], batch_shape=train_X.shape[:-2]
-            )
-        with torch.no_grad():
-            transformed_X = self.transform_inputs(
-                X=train_X, input_transform=input_transform
-            )
-
-        self.ard_num_dims = transformed_X.shape[-1]
+        # Grab ard_num_dims
+        if train_X.shape[0] > 0:
+            with torch.no_grad():
+                transformed_X = self.transform_inputs(
+                    X=train_X, input_transform=input_transform
+                )
+            self.ard_num_dims = transformed_X.shape[-1]
+        else:
+            self.ard_num_dims = train_X_added.shape[-1]
+            transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X)
 
+        # Transform outcomes
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
+
+        # Added points are raw transformed, so I need to normalize them
+        if train_X_added.shape[0] > 0:
+            train_X_added = input_transform["tf2"](train_X_added)
+            train_Y_added, train_Yvar_added = outcome_transform["tf2"](
+                train_Y_added, train_Yvar_added
+            )
+        # -----
+
+        train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=0)
+        train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=0)
+        train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=0)
+
+        self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions(
+            train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain
+        )
+
+        train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1)
+        train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1)
+
+        #self._aug_batch_shape = train_Y.shape[:-2] #<----- New
+
+
+
         # Validate again after applying the transforms
         self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar)
         ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
@@ -85,25 +121,65 @@ def __init__(
             ignore_X_dims=ignore_X_dims,
         )
         self._set_dimensions(train_X=train_X, train_Y=train_Y)
-        self._aug_batch_shape = train_Y.shape[:-2] #<----- New
+        
 
         train_X, train_Y, train_Yvar = self._transform_tensor_args(
             X=train_X, Y=train_Y, Yvar=train_Yvar
         )
 
+        """
+		-----------------------------------------------------------------------
+		Likelihood and Noise
+		-----------------------------------------------------------------------
+		"""
+
         self._subset_batch_dict = {}
 
-        likelihood = (
-            gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
-                noise=train_Yvar.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
+        if FixedNoise:
+            # Noise not inferred, given by data
+            
+            likelihood = (
+                gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
+                    noise=train_Yvar_usedToTrain.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
+                    batch_shape=self._aug_batch_shape,
+                    learn_additional_noise=learn_additional_noise,
+                )
+            )
+
+        else:
+            # Infer Noise
+
+            noise_prior = gpytorch.priors.torch_priors.GammaPrior(1.1, 0.05)
+            noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate
+
+            if ConstrainNoise < 0:
+                noise_constraint = gpytorch.constraints.constraints.GreaterThan(
+                    -ConstrainNoise, transform=None, initial_value=noise_prior_mode
+                )
+            else:
+                noise_constraint = gpytorch.constraints.constraints.Interval(
+                    1e-6, ConstrainNoise, transform=None, initial_value=noise_prior_mode
+                )
+
+            likelihood = gpytorch.likelihoods.gaussian_likelihood.GaussianLikelihood(
+                noise_prior=noise_prior,
                 batch_shape=self._aug_batch_shape,
-                learn_additional_noise=learn_additional_noise,
+                noise_constraint=noise_constraint,
             )
-        )
-        self._is_custom_likelihood = True
 
-        ExactGP.__init__(
-            self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood
+            self._subset_batch_dict["likelihood.noise_covar.raw_noise"] = -2
+
+        """
+		-----------------------------------------------------------------------
+		Initialize ExactGP
+		-----------------------------------------------------------------------
+		"""
+
+        gpytorch.models.exact_gp.ExactGP.__init__(
+            self,
+            train_inputs=train_X_usedToTrain,
+            train_targets=train_Y_usedToTrain,
+            likelihood=likelihood,
         )
 
         """
@@ -189,14 +265,33 @@ def __init__(
                 outputscale_prior=outputscale_prior,
             )
 
-
-        # TODO: Allow subsetting of other covar modules
         if outcome_transform is not None:
             self.outcome_transform = outcome_transform
         if input_transform is not None:
             self.input_transform = input_transform
         self.to(train_X)
 
+    def store_training(self, x, xa, y, ya, yv, yva, input_transform, outcome_transform):
+
+        # x, y are raw untransformed, and I want raw transformed
+        if input_transform is not None:
+            x_tr = input_transform["tf1"](x)
+        else:
+            x_tr = x
+        if outcome_transform is not None:
+            y_tr, yv_tr = outcome_transform["tf1"](x, y, yv)
+        else:
+            y_tr, yv_tr = y, yv
+
+        # xa, ya are raw transformed
+        xa_tr = xa
+        ya_tr, yva_tr = ya, yva
+
+        self.train_X_usedToTrain = torch.cat((xa_tr, x_tr), axis=0)
+        self.train_Y_usedToTrain = torch.cat((ya_tr, y_tr), axis=0)
+        self.train_Yvar_usedToTrain = torch.cat((yva_tr, yv_tr), axis=0)
+
+    # Modify posterior call from BatchedMultiOutputGPyTorchModel to call posterior untransform with "X"
     def posterior(
         self,
         X,
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index e9db228b..e936ebce 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -206,12 +206,12 @@ def __init__(
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
         # -------------------------------------------------------------------------------------
 
-        # self.normalization_pass(
-        #     input_transform_physics,
-        #     input_transform_normalization,
-        #     outcome_transform_physics,
-        #     output_transformed_standardization,
-        # )
+        self.normalization_pass(
+            input_transform_physics,
+            input_transform_normalization,
+            outcome_transform_physics,
+            output_transformed_standardization,
+        )
         
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION
@@ -395,8 +395,10 @@ def fit(self):
 		"""
 
         # Train always in physics-transformed space, to enable mitim re-use training from file
-        #with fundamental_model_context(self):
-        track_fval = self.perform_model_fit(mll)
+        with fundamental_model_context(self):
+            track_fval = self.perform_model_fit(mll)
+
+        embed()
 
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
@@ -407,12 +409,12 @@ def fit(self):
         # Go back to definining the right normalizations, because the optimizer has to work on training mode...
         # ---------------------------------------------------------------------------------------------------
 
-        # self.normalization_pass(
-        #     self.gpmodel.input_transform["tf1"],
-        #     self.gpmodel.input_transform["tf2"],
-        #     self.gpmodel.outcome_transform["tf1"],
-        #     self.gpmodel.outcome_transform["tf2"],
-        # )
+        self.normalization_pass(
+            self.gpmodel.input_transform["tf1"],
+            self.gpmodel.input_transform["tf2"],
+            self.gpmodel.outcome_transform["tf1"],
+            self.gpmodel.outcome_transform["tf2"],
+        )
 
     def perform_model_fit(self, mll):
         self.gpmodel.train()
@@ -901,17 +903,29 @@ def __init__(self, surrogate_model):
 
     def __enter__(self):
         # Works for individual models, not ModelList
-        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
-            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False
+        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = False
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False
 
         return self.surrogate_model
 
     def __exit__(self, *args):
-        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
-            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True
+        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = True
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True
 
+    # def __enter__(self):
+    #     # Works for individual models, not ModelList
+    #     embed()
+    #     for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+    #         self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False
+    #     self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False
+
+    #     return self.surrogate_model
+
+    # def __exit__(self, *args):
+    #     for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+    #         self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True
+    #     self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True
+
 def create_df_portals(x, y, yvar, x_names, output, max_x = 20):
 
     new_data = []
diff --git a/src/mitim_tools/opt_tools/utils/TESTtools.py b/src/mitim_tools/opt_tools/utils/TESTtools.py
index c23bb619..cee62b1d 100644
--- a/src/mitim_tools/opt_tools/utils/TESTtools.py
+++ b/src/mitim_tools/opt_tools/utils/TESTtools.py
@@ -47,10 +47,6 @@ def testBatchCapabilities(GPs, combinations=[2, 100, 1000]):
     It stops running if the error gets larger than thrPercent in those cases
     """
 
-    from mitim_tools.misc_tools import IOtools
-    with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_evv.prof") as s:
-        GPs.predict(GPs.train_X[0:1, :].repeat(50, 1))
-
     for i in combinations:
         x = GPs.train_X[0:1, :].repeat(i, 1)
 

From d541748bbf753f1759d3a4e08e04b43b5a798efc Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 22 Nov 2024 00:31:15 -0500
Subject: [PATCH 23/34] good progress

---
 src/mitim_tools/opt_tools/BOTORCHtools.py     |  85 ++++---------
 src/mitim_tools/opt_tools/STEPtools.py        |   6 +-
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 118 ++++++++----------
 .../opt_tools/optimizers/BOTORCHoptim.py      |  23 ++--
 4 files changed, 88 insertions(+), 144 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 402d0236..c08cca5f 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -15,19 +15,14 @@
 # ----------------------------------------------------------------------------------------------------------------------------
 
 from botorch.models.transforms.input import InputTransform
-from botorch.models.transforms.outcome import OutcomeTransform, Standardize
+from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.models.utils import validate_input_scaling
-from botorch.utils.types import DEFAULT
-from gpytorch.models.exact_gp import ExactGP
 from torch import Tensor
-
 from linear_operator.operators import CholLinearOperator, DiagLinearOperator
-
 from typing import Iterable
 from torch.nn import ModuleDict
 from botorch.posteriors.gpytorch import GPyTorchPosterior
 from botorch.posteriors.posterior import Posterior
-from gpytorch.distributions import MultitaskMultivariateNormal
 from linear_operator.operators import BlockDiagLinearOperator
 
 
@@ -59,16 +54,14 @@ def __init__(
             f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
         )
 
-        self.store_training(
-            train_X,
-            train_X_added,
-            train_Y,
-            train_Y_added,
-            train_Yvar,
-            train_Yvar_added,
-            input_transform,
-            outcome_transform,
-        )
+        # ** Store training data
+
+        # x, y are raw untransformed, and I want raw transformed. xa, ya are raw transformed
+        #x_tr = input_transform["tf1"](train_X)if input_transform is not None else train_X
+        #y_tr, yv_tr = outcome_transform["tf1"](train_X, train_Y, train_Yvar) if outcome_transform is not None else train_Y, train_Yvar
+        #self.train_X_usedToTrain = torch.cat((train_X_added, x_tr), axis=-2)
+        #self.train_Y_usedToTrain = torch.cat((train_Y_added, y_tr), axis=-2)
+        #self.train_Yvar_usedToTrain = torch.cat((train_Yvar_added, yv_tr), axis=-2)
 
         # Grab num_outputs
         self._num_outputs = train_Y.shape[-1]
@@ -91,40 +84,29 @@ def __init__(
         # Added points are raw transformed, so I need to normalize them
         if train_X_added.shape[0] > 0:
             train_X_added = input_transform["tf2"](train_X_added)
-            train_Y_added, train_Yvar_added = outcome_transform["tf2"](
-                train_Y_added, train_Yvar_added
-            )
-        # -----
-
-        train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=0)
-        train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=0)
-        train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=0)
-
-        self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions(
-            train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain
-        )
-
-        train_Y_usedToTrain = train_Y_usedToTrain.squeeze(-1)
-        train_Yvar_usedToTrain = train_Yvar_usedToTrain.squeeze(-1)
-
-        #self._aug_batch_shape = train_Y.shape[:-2] #<----- New
+            train_Y_added = outcome_transform["tf3"].untransform(train_Y_added)[0]
+            train_Yvar_added = outcome_transform["tf3"].untransform(train_Yvar_added)[0]
+            train_Y_added, train_Yvar_added = outcome_transform["tf3"](*outcome_transform["tf2"](train_Y_added, train_Yvar_added))
 
+        # -----
 
+        train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=-2)
+        train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=-2)
+        train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=-2)
 
         # Validate again after applying the transforms
-        self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar)
+        self._validate_tensor_args(X=train_X_usedToTrain, Y=train_Y_usedToTrain, Yvar=train_Yvar_usedToTrain)
         ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
         validate_input_scaling(
-            train_X=transformed_X,
-            train_Y=train_Y,
-            train_Yvar=train_Yvar,
+            train_X=train_X_usedToTrain,
+            train_Y=train_Y_usedToTrain,
+            train_Yvar=train_Yvar_usedToTrain,
             ignore_X_dims=ignore_X_dims,
         )
-        self._set_dimensions(train_X=train_X, train_Y=train_Y)
+        self._set_dimensions(train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain)
         
-
-        train_X, train_Y, train_Yvar = self._transform_tensor_args(
-            X=train_X, Y=train_Y, Yvar=train_Yvar
+        train_X_usedToTrain, train_Y_usedToTrain, train_Yvar_usedToTrain = self._transform_tensor_args(
+            X=train_X_usedToTrain, Y=train_Y_usedToTrain, Yvar=train_Yvar_usedToTrain
         )
 
         """
@@ -271,26 +253,6 @@ def __init__(
             self.input_transform = input_transform
         self.to(train_X)
 
-    def store_training(self, x, xa, y, ya, yv, yva, input_transform, outcome_transform):
-
-        # x, y are raw untransformed, and I want raw transformed
-        if input_transform is not None:
-            x_tr = input_transform["tf1"](x)
-        else:
-            x_tr = x
-        if outcome_transform is not None:
-            y_tr, yv_tr = outcome_transform["tf1"](x, y, yv)
-        else:
-            y_tr, yv_tr = y, yv
-
-        # xa, ya are raw transformed
-        xa_tr = xa
-        ya_tr, yva_tr = ya, yva
-
-        self.train_X_usedToTrain = torch.cat((xa_tr, x_tr), axis=0)
-        self.train_Y_usedToTrain = torch.cat((ya_tr, y_tr), axis=0)
-        self.train_Yvar_usedToTrain = torch.cat((yva_tr, yv_tr), axis=0)
-
     # Modify posterior call from BatchedMultiOutputGPyTorchModel to call posterior untransform with "X"
     def posterior(
         self,
@@ -559,6 +521,7 @@ def untransform_posterior(self, X, posterior):
                 if i == len(self.values())-1
                 else tf.untransform_posterior(posterior)
             )  # Only physics transformation (tf1) takes X
+            
 
         return posterior
 
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index b4258839..5992c00c 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -101,9 +101,9 @@ def fit_step(self, avoidPoints=None, fit_output_contains=None):
 
         time1 = datetime.datetime.now()
 
-        #self._fit_multioutput_model()
-        self._fit_individual_models(fit_output_contains=fit_output_contains)
-        
+        self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"]
+        #self._fit_individual_models(fit_output_contains=fit_output_contains)
+
         txt_time = IOtools.getTimeDifference(time1)
         print(f"--> Fitting of all models took {txt_time}")
         if self.fileOutputs is not None:
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index e936ebce..24410b4a 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -163,10 +163,17 @@ def __init__(
             dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
             # ------------------------------------------------------------------------------------------------------------
 
-            self.train_X_added_full = torch.empty((0, dimTransformedDV_x_full)).to(self.dfT)
-            self.train_X_added = torch.empty((0, dimTransformedDV_x)).to(self.dfT)
-            self.train_Y_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
-            self.train_Yvar_added = torch.empty((0, dimTransformedDV_y)).to(self.dfT)
+            x_transformed = input_transform_physics(self.train_X)
+            shape = list(x_transformed.shape)
+            shape[-2] = 0
+            shape[-1] = dimTransformedDV_x_full
+
+            self.train_X_added_full = torch.empty(*shape).to(self.dfT)
+            shape[-1] = dimTransformedDV_x
+            self.train_X_added = torch.empty(*shape).to(self.dfT)
+            shape[-1] = 1
+            self.train_Y_added = torch.empty(*shape).to(self.dfT)
+            self.train_Yvar_added = torch.empty(*shape).to(self.dfT)
 
         # --------------------------------------------------------------------------------------
         # Make sure that very small variations are not captured
@@ -202,17 +209,7 @@ def __init__(
         if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
             self.write_datafile(input_transform_physics, outcome_transform_physics)
 
-        # -------------------------------------------------------------------------------------
-        # Obtain normalization constants now (although during training this is messed up, so needed later too)
-        # -------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            input_transform_physics,
-            input_transform_normalization,
-            outcome_transform_physics,
-            output_transformed_standardization,
-        )
-        
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION
         # ------------------------------------------------------------------------------------
@@ -222,9 +219,15 @@ def __init__(
         ).to(self.dfT)
 
         outcome_transform = BOTORCHtools.ChainedOutcomeTransform(
-            tf1=outcome_transform_physics, tf2=output_transformed_standardization #, tf3=BOTORCHtools.OutcomeToBatchDimension()
+            tf1=outcome_transform_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension()
         ).to(self.dfT)
 
+        # -------------------------------------------------------------------------------------
+        # Obtain normalization constants now (although during training this is messed up, so needed later too)
+        # -------------------------------------------------------------------------------------
+
+        self.normalization_pass(input_transform, outcome_transform)
+
         self.variables = (
             self.surrogate_transformation_variables[self.outputs[0]]
             if (
@@ -305,7 +308,7 @@ def _define_MITIM_transformations(self):
         # Broadcast the input transformation to all outputs
         # ------------------------------------------------------------------------------------
 
-        input_transformation_physics = input_transformations_physics[0] #BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
+        input_transformation_physics = BOTORCHtools.BatchBroadcastedInputTransform(input_transformations_physics)
 
         transformed_X = input_transformation_physics(self.train_X)
 
@@ -331,41 +334,41 @@ def _define_MITIM_transformations(self):
                 output_transformed_standardization, \
                 dimTransformedDV_x, dimTransformedDV_y
 
-    def normalization_pass(
-        self,
-        input_transform_physics,
-        input_transform_normalization,
-        outcome_transform_physics,
-        outcome_transform_normalization,
-    ):
-        input_transform_normalization.training = True
-        outcome_transform_normalization.training = True
-        outcome_transform_normalization._is_trained = torch.tensor(False)
+    def normalization_pass(self,input_transform, outcome_transform):
+        '''
+        The goal of this is to capture NOW the normalization and standardization constants,
+        by account for both the actual data and the added data from file 
+        '''
 
-        train_X_transformed = input_transform_physics(self.train_X)
-        train_Y_transformed, train_Yvar_transformed = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
+        # Get input normalization and outcome standardization in training mode
+        input_transform['tf2'].training = True
+        outcome_transform['tf2'].training = True
+        outcome_transform['tf2']._is_trained = torch.tensor(False)
 
-        train_X_transformed = torch.cat(
-            (input_transform_physics(self.train_X), self.train_X_added), axis=0
-        )
-        y, yvar = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
-        train_Y_transformed = torch.cat((y, self.train_Y_added), axis=0)
-        train_Yvar_transformed = torch.cat((yvar, self.train_Yvar_added), axis=0)
+        # Get the input normalization constants by physics-transforming the train_x and adding the data from file
+        train_X_transformed = input_transform['tf1'](self.train_X)
+        train_X_transformed = torch.cat((train_X_transformed, self.train_X_added), axis=-2)
+        _ = input_transform['tf2'](train_X_transformed)
 
-        train_X_transformed_norm = input_transform_normalization(train_X_transformed)
-        (
-            train_Y_transformed_norm,
-            train_Yvar_transformed_norm,
-        ) = outcome_transform_normalization(train_Y_transformed, train_Yvar_transformed)
+        # Get the outcome standardization constants by physics-transforming the train_y and adding the data from file
+        # With the caveat that the added points have to not be batched
+        train_Y_transformed, train_Yvar_transformed = outcome_transform['tf1'](self.train_X, self.train_Y, self.train_Yvar)
+        y, yvar = outcome_transform['tf1'](self.train_X, self.train_Y, self.train_Yvar)
+        
+        train_Y_transformed = torch.cat((y, outcome_transform['tf3'].untransform(self.train_Y_added)[0]), axis=-2)
+        train_Yvar_transformed = torch.cat((yvar, outcome_transform['tf3'].untransform(self.train_Yvar_added)[0]), axis=0)
+
+        train_Y_transformed_norm, train_Yvar_transformed_norm = outcome_transform['tf2'](train_Y_transformed, train_Yvar_transformed)
 
         # Make sure they are not on training mode
-        input_transform_normalization.training = False
-        outcome_transform_normalization.training = False
-        outcome_transform_normalization._is_trained = torch.tensor(True)
+        input_transform['tf2'].training = False
+        outcome_transform['tf2'].training = False
+        outcome_transform['tf2']._is_trained = torch.tensor(True)
+
 
     def fit(self):
         print(
-            f"\t- Fitting model to {self.train_X.shape[0]+self.train_X_added.shape[0]} points"
+            f"\t- Fitting model to {self.train_X.shape[-2]+self.train_X_added.shape[-2]} points"
         )
 
         # ---------------------------------------------------------------------------------------------------
@@ -398,8 +401,6 @@ def fit(self):
         with fundamental_model_context(self):
             track_fval = self.perform_model_fit(mll)
 
-        embed()
-
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
         # ---------------------------------------------------------------------------------------------------
@@ -409,12 +410,7 @@ def fit(self):
         # Go back to definining the right normalizations, because the optimizer has to work on training mode...
         # ---------------------------------------------------------------------------------------------------
 
-        self.normalization_pass(
-            self.gpmodel.input_transform["tf1"],
-            self.gpmodel.input_transform["tf2"],
-            self.gpmodel.outcome_transform["tf1"],
-            self.gpmodel.outcome_transform["tf2"],
-        )
+        self.normalization_pass(self.gpmodel.input_transform, self.gpmodel.outcome_transform)
 
     def perform_model_fit(self, mll):
         self.gpmodel.train()
@@ -903,29 +899,17 @@ def __init__(self, surrogate_model):
 
     def __enter__(self):
         # Works for individual models, not ModelList
-        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = False
+        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False
 
         return self.surrogate_model
 
     def __exit__(self, *args):
-        self.surrogate_model.gpmodel.input_transform.tf1.flag_to_evaluate = True
+        for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
+            self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True
         self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True
 
-    # def __enter__(self):
-    #     # Works for individual models, not ModelList
-    #     embed()
-    #     for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
-    #         self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = False
-    #     self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = False
-
-    #     return self.surrogate_model
-
-    # def __exit__(self, *args):
-    #     for i in range(len(self.surrogate_model.gpmodel.input_transform.tf1.transforms)):
-    #         self.surrogate_model.gpmodel.input_transform.tf1.transforms[i].flag_to_evaluate = True
-    #     self.surrogate_model.gpmodel.outcome_transform.tf1.flag_to_evaluate = True
-
 def create_df_portals(x, y, yvar, x_names, output, max_x = 20):
 
     new_data = []
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index 0065cac7..27dc1313 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -36,7 +36,6 @@ def findOptima(fun, optimization_params = {}, writeTrajectory=False):
         "sample_around_best": True,
         "disp": 50 if read_verbose_level() == 5 else False,
         "seed": fun.seed,
-        "maxiter": 100,
     }
 
     """
@@ -64,18 +63,16 @@ def __call__(self, x, *args, **kwargs):
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
-   
-    #with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
-    #with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/profiler_opt.prof") as s:
-    x_opt, _ = botorch.optim.optimize_acqf(
-        acq_function=fun_opt,
-        bounds=fun.bounds_mod,
-        raw_samples=raw_samples,
-        q=q,
-        sequential=sequential_q,
-        num_restarts=num_restarts,
-        options=options,
-    )
+    with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
+        x_opt, _ = botorch.optim.optimize_acqf(
+            acq_function=fun_opt,
+            bounds=fun.bounds_mod,
+            raw_samples=raw_samples,
+            q=q,
+            sequential=sequential_q,
+            num_restarts=num_restarts,
+            options=options,
+        )
     embed()
 
     acq_evaluated = torch.Tensor(acq_evaluated)

From 44c8a3cf8954db2ab499b94dcc950cc87973a9b4 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 22 Nov 2024 14:28:08 -0500
Subject: [PATCH 24/34] Cleaned up SURROGATEtools

---
 .../maestro/utils/PORTALSbeat.py              |   2 +-
 src/mitim_modules/portals/PORTALSmain.py      |  20 +-
 src/mitim_tools/misc_tools/IOtools.py         |   1 +
 src/mitim_tools/opt_tools/STEPtools.py        | 127 ++---
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 471 ++++++++++--------
 templates/main.namelist.json                  |   4 +-
 6 files changed, 316 insertions(+), 309 deletions(-)

diff --git a/src/mitim_modules/maestro/utils/PORTALSbeat.py b/src/mitim_modules/maestro/utils/PORTALSbeat.py
index 1a95ec91..217fc5d8 100644
--- a/src/mitim_modules/maestro/utils/PORTALSbeat.py
+++ b/src/mitim_modules/maestro/utils/PORTALSbeat.py
@@ -295,7 +295,7 @@ def _inform(self, use_previous_residual = True, use_previous_surrogate_data = Tr
         # In the situation where the last radial location moves, I cannot reuse that surrogate data
         if last_radial_location_moved and reusing_surrogate_data:
             print('\t\t- Last radial location was moved, so surrogate data will not be reused for that specific location')
-            self.optimization_options['surrogateOptions']["extrapointsModelsAvoidContent"] = ['Tar',f'_{len(self.MODELparameters[strKeys])}']
+            self.optimization_options['surrogateOptions']["add_data_to_modelsAvoidContent"] = ['Tar',f'_{len(self.MODELparameters[strKeys])}']
             self.try_flux_match_only_for_first_point = False
 
     def _inform_save(self):
diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py
index aec99d35..f281f00e 100644
--- a/src/mitim_modules/portals/PORTALSmain.py
+++ b/src/mitim_modules/portals/PORTALSmain.py
@@ -336,14 +336,14 @@ def prep(
         # Ignore targets in surrogate_data.csv
         # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-        if 'extrapointsModels' not in self.optimization_options['surrogateOptions'] or \
-            self.optimization_options['surrogateOptions']['extrapointsModels'] is None or \
-            len(self.optimization_options['surrogateOptions']['extrapointsModels'])==0:
+        if 'add_data_to_models' not in self.optimization_options['surrogateOptions'] or \
+            self.optimization_options['surrogateOptions']['add_data_to_models'] is None or \
+            len(self.optimization_options['surrogateOptions']['add_data_to_models'])==0:
 
             self._define_reuse_models()
 
         else:
-            print("\t- extrapointsModels already defined, not changing")
+            print("\t- add_data_to_models already defined, not changing")
 
     def _define_reuse_models(self):
         '''
@@ -353,21 +353,21 @@ def _define_reuse_models(self):
             '_5' to avoid reusing position 5
         '''
 
-        self.optimization_options['surrogateOptions']['extrapointsModels'] = []
+        self.optimization_options['surrogateOptions']['add_data_to_models'] = []
 
         # Define avoiders
-        if self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent'] is None:
-            self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent'] = ['Tar']
+        if self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent'] is None:
+            self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent'] = ['Tar']
 
-        # Define extrapointsModels
+        # Define add_data_to_models
         for key in self.surrogate_parameters['surrogate_transformation_variables_lasttime'].keys():
             add_key = True
-            for avoid in self.optimization_options['surrogateOptions']['extrapointsModelsAvoidContent']:
+            for avoid in self.optimization_options['surrogateOptions']['add_data_to_modelsAvoidContent']:
                 if avoid in key:
                     add_key = False
                     break
             if add_key:
-                self.optimization_options['surrogateOptions']['extrapointsModels'].append(key)
+                self.optimization_options['surrogateOptions']['add_data_to_models'].append(key)
 
     def run(self, paramsfile, resultsfile):
         # Read what PORTALS sends
diff --git a/src/mitim_tools/misc_tools/IOtools.py b/src/mitim_tools/misc_tools/IOtools.py
index e808bd15..f6430dd8 100644
--- a/src/mitim_tools/misc_tools/IOtools.py
+++ b/src/mitim_tools/misc_tools/IOtools.py
@@ -82,6 +82,7 @@ def __exit__(self, *args):
     def _get_time(self):
 
         self.timeDiff = getTimeDifference(self.timeBeginning, niceText=False)
+        self.timeDiff_txt = createTimeTXT(self.timeDiff)
 
         print(f'{self.name} took {createTimeTXT(self.timeDiff)}')
 
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 5992c00c..24b5689d 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -32,9 +32,10 @@ def __init__(
         currentIteration=1,
         ):
         """
-        train_Ystd is in standard deviations (square root of the variance), absolute magnitude
-        Rule: X_Y are provided in absolute units. Normalization has to happen inside each surrogate_model,
-                and de-normalized before giving results to the outside of the function
+        Notes:
+            - train_Ystd is in standard deviations (square root of the variance), absolute magnitude
+            - X_Y are provided untransformed and unnormalized. Normalization has to happen inside each
+              surrogate_model, and de-normalized before giving results to the outside of the function
         """
 
         self.train_X, self.train_Y, self.train_Ystd = train_X, train_Y, train_Ystd
@@ -51,6 +52,7 @@ def __init__(
         self.favor_proximity_type = self.stepSettings["optimization_options"]["favor_proximity_type"]
         self.optimizers = self.stepSettings["optimization_options"]["optimizers"]
         self.outputs = self.stepSettings["outputs"]
+        self.outputs_transformed = self.stepSettings["name_transformed_ofs"]
         self.dfT = self.stepSettings["dfT"]
         self.best_points_sequence = self.stepSettings["best_points_sequence"]
         self.fileOutputs = self.stepSettings["fileOutputs"]
@@ -65,50 +67,39 @@ def __init__(
     def fit_step(self, avoidPoints=None, fit_output_contains=None):
         """
         Notes:
-            - Note that fit_output_contains = 'Tar' would only use the train_X,Y,Yvar tensors
-                    to fit those surrogate variables that contain 'Tar' in their names. This is useful when in
-                    PORTALS I want to simply use the training in a file and not directly from train_X,Y,Yvar for
-                    the fluxes but I do want new target calculation
+            - fit_output_contains = 'Tar' would only use the train_X,Y,Yvar tensors
+              to fit those surrogate variables that contain 'Tar' in their names. This is useful when in
+              PORTALS I want to simply use the training in a file and not directly from train_X,Y,Yvar for
+              the fluxes but I do want new target calculation
         """
 
         # Prepare case information. Copy because I'll be removing outliers
-        self.x, self.y, self.yvar = (
-            copy.deepcopy(self.train_X),
-            copy.deepcopy(self.train_Y),
-            copy.deepcopy(self.train_Yvar),
-        )
+        self.x = copy.deepcopy(self.train_X)
+        self.y = copy.deepcopy(self.train_Y)
+        self.yvar = copy.deepcopy(self.train_Yvar)
 
         # Add outliers to avoid points (it cannot happen inside of SURROGATEtools or it will fail at combining)
         self.avoidPoints = copy.deepcopy(avoidPoints) if avoidPoints is not None else []
-        self.curate_outliers()
+        self._curate_outliers()
 
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
                 f.write("\n\n-----------------------------------------------------")
                 f.write("\n * Fitting GP models to training data...")
 
-        """
-        *********************************************************************************************************************
-            Performing Fit
-        *********************************************************************************************************************
-        """
+        # Performing Fit
 
-        print(
-            f"\n~~~~~~~ Performing fitting with {len(self.train_X)-len(self.avoidPoints)} training points ({len(self.avoidPoints)} avoided from {len(self.train_X)} total) ~~~~~~~~~~\n"
-        )
-
-        self.GP = {}
+        print(f"\n~~~~~~~ Fitting with {len(self.train_X)-len(self.avoidPoints)} training points ({len(self.avoidPoints)} avoided from {len(self.train_X)} total) ~~~~~~~~~~\n")
 
-        time1 = datetime.datetime.now()
+        with IOtools.timer(name = "\n\t- Fitting", name_timer = '\t\t- Time: ') as t:
 
-        self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"]
-        #self._fit_individual_models(fit_output_contains=fit_output_contains)
+            self.GP = {}
+            #self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"]
+            self._fit_individual_models(fit_output_contains=fit_output_contains)
 
-        txt_time = IOtools.getTimeDifference(time1)
-        print(f"--> Fitting of all models took {txt_time}")
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
-                f.write(f" (took total of {txt_time})")
+                f.write(f" (took total of {t.timeDiff_txt})")
 
     def _fit_multioutput_model(self):
 
@@ -120,7 +111,7 @@ def _fit_multioutput_model(self):
             self.yvar,
             self.surrogate_parameters,
             outputs=self.outputs,
-            outputs_transformed=self.stepSettings["name_transformed_ofs"],
+            outputs_transformed=self.outputs_transformed,
             bounds=self.bounds,
             dfT=self.dfT,
             surrogateOptions=surrogateOptions,
@@ -141,22 +132,14 @@ def _fit_individual_models(self, fit_output_contains=None):
         self.GP["individual_models"] = [None] * self.y.shape[-1]
 
         for i in range(self.y.shape[-1]):
-            outi = self.outputs[i] if (self.outputs is not None) else None
 
-            # ----------------- specialTreatment is applied when I only want to use training data from a file, not from train_X
-            specialTreatment = (
-                (outi is not None)
-                and (fit_output_contains is not None)
-                and (fit_output_contains not in outi)
-            )
-            # -----------------------------------------------------------------------------------------------------------------------------------
-
-            outi_transformed = (
-                self.stepSettings["name_transformed_ofs"][i]
-                if (self.stepSettings["name_transformed_ofs"] is not None)
-                else outi
-            )
+            # Grab name of output (raw and transformed)
+            output_this = self.outputs[i] if (self.outputs is not None) else None
+            output_this_tr = self.outputs_transformed[i] if (self.outputs_transformed is not None) else None
 
+            #  specialTreatment is applied when I only want to use training data from a file, not from train_X
+            specialTreatment = (output_this is not None) and (fit_output_contains is not None) and (fit_output_contains not in output_this)
+            
             # ---------------------------------------------------------------------------------------------------
             # Define model-specific functions for this output
             # ---------------------------------------------------------------------------------------------------
@@ -164,41 +147,25 @@ def _fit_individual_models(self, fit_output_contains=None):
             surrogateOptions = copy.deepcopy(self.surrogateOptions)
 
             # Then, depending on application (e.g. targets in mitim are fitted differently)
-            if (
-                "selectSurrogate" in surrogateOptions
-                and surrogateOptions["selectSurrogate"] is not None
-            ):
-                surrogateOptions = surrogateOptions["selectSurrogate"](
-                    outi, surrogateOptions
-                )
+            if ("selectSurrogate" in surrogateOptions) and (surrogateOptions["selectSurrogate"] is not None):
+                surrogateOptions = surrogateOptions["selectSurrogate"](output_this, surrogateOptions)
 
             # ---------------------------------------------------------------------------------------------------
             # To avoid problems with fixed values (e.g. calibration terms that are fixed)
             # ---------------------------------------------------------------------------------------------------
 
             threshold_to_consider_fixed = 1e-6
-            MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(
-                self.y.mean()
-            )
+            MaxRelativeDifference = np.abs(self.y.max() - self.y.min()) / np.abs(self.y.mean())
 
-            if (
-                np.isnan(MaxRelativeDifference)
-                or (
-                    (self.y.shape[0] > 1)
-                    and ((MaxRelativeDifference < threshold_to_consider_fixed).all())
-                )
-            ) and (not specialTreatment):
-                print(
-                    f"\t- Identified that outputs did not change, utilizing constant kernel for {outi}",
-                    typeMsg="w",
-                )
+            FixedValue = False
+            if (np.isnan(MaxRelativeDifference) or \
+                ((self.y.shape[0] > 1) and ((MaxRelativeDifference < threshold_to_consider_fixed).all()))
+                ) and (not specialTreatment):
+                print(f"\t- Identified that outputs did not change, utilizing constant kernel for {output_this}",typeMsg="w",)
                 FixedValue = True
                 surrogateOptions["TypeMean"] = 0
                 surrogateOptions["TypeKernel"] = 6  # Constant kernel
-
-            else:
-                FixedValue = False
-
+                
             # ---------------------------------------------------------------------------------------------------
             # Fit individual output
             # ---------------------------------------------------------------------------------------------------
@@ -209,15 +176,13 @@ def _fit_individual_models(self, fit_output_contains=None):
             yvar = np.expand_dims(self.yvar[:, i], axis=1)
 
             if specialTreatment:
-                x, y, yvar = (
-                    np.empty((0, x.shape[-1])),
-                    np.empty((0, y.shape[-1])),
-                    np.empty((0, y.shape[-1])),
-                )
+                x = np.empty((0, x.shape[-1]))
+                y = np.empty((0, y.shape[-1]))
+                yvar = np.empty((0, y.shape[-1]))
 
             # Surrogate
 
-            print(f"~ Model for output: {outi}")
+            print(f"~ Model for output: {output_this}")
 
             GP = SURROGATEtools.surrogate_model(
                 x,
@@ -225,8 +190,8 @@ def _fit_individual_models(self, fit_output_contains=None):
                 yvar,
                 self.surrogate_parameters,
                 bounds=self.bounds,
-                outputs=[outi],
-                outputs_transformed=[outi_transformed],
+                outputs=[output_this],
+                outputs_transformed=[output_this_tr],
                 dfT=self.dfT,
                 surrogateOptions=surrogateOptions,
                 avoidPoints=self.avoidPoints,
@@ -270,10 +235,10 @@ def _fit_individual_models(self, fit_output_contains=None):
         if self.GP["combined_model"].surrogate_transformation_variables is not None:
             for i in range(self.y.shape[-1]):
 
-                outi = self.outputs[i] if (self.outputs is not None) else None
+                output_this = self.outputs[i] if (self.outputs is not None) else None
 
-                if outi is not None:
-                    self.GP["combined_model"].surrogate_transformation_variables[outi] = self.GP["individual_models"][i].surrogate_transformation_variables[outi]
+                if output_this is not None:
+                    self.GP["combined_model"].surrogate_transformation_variables[output_this] = self.GP["individual_models"][i].surrogate_transformation_variables[output_this]
 
         """
         *********************************************************************************************************************
@@ -471,7 +436,7 @@ def optimize(
             f"\n~~ Complete acquisition workflows found {self.x_next.shape[0]} points"
         )
 
-    def curate_outliers(self):
+    def _curate_outliers(self):
         # Remove outliers
         self.outliers = removeOutliers(
             self.y,
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 24410b4a..105a7a5d 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -43,11 +43,16 @@ def __init__(
         seed = 0
     ):
         """
-        Noise is variance here (square of standard deviation).
+        Note:
+            - noise is variance (square of standard deviation).
         """
 
         torch.manual_seed(seed)
 
+        # --------------------------------------------------------------------
+        # Input parameters
+        # --------------------------------------------------------------------
+
         self.avoidPoints = avoidPoints if avoidPoints is not None else []
         self.outputs = outputs
         self.outputs_transformed = outputs_transformed
@@ -68,133 +73,76 @@ def __init__(
             Yvaror = Yor * 0.0 + Yvaror
         self.train_Yvar = torch.from_numpy(Yvaror).to(self.dfT)
 
-        # ---------- Print ----------
+        self.losses = None
+
+        # Print options
         print("\t- Surrogate options:")
         for i in self.surrogateOptions:
             print(f"\t\t{i:20} = {self.surrogateOptions[i]}")
 
-        self.losses = None
-
         # --------------------------------------------------------------------
         # Eliminate points if needed (not from the "added" set)
         # --------------------------------------------------------------------
 
         self._remove_points()
 
+        # ------------------------------------------------------------------------------------------
+        # Retrieve points from file -> Xtr[batch, dimXtr], Ytr[batch, dimYtr], Yvartr[batch, dimYtr]
+        # ------------------------------------------------------------------------------------------
+
+        train_X_added_full, train_Y_added, train_Yvar_added, dx_tr_full = self._add_points_from_file()
+
         # -------------------------------------------------------------------------------------
-        # Add points from file
+        # Define transformations
         # -------------------------------------------------------------------------------------
 
-        addition_of_points = ("add_data_from_file" in self.surrogateOptions) and (self.surrogateOptions["add_data_from_file"] is not None)
-        is_this_single_output = (self.outputs is not None) and (len(self.outputs) == 1)
+        num_training_points = self.train_X.shape[0] + (train_X_added_full.shape[0] if train_X_added_full is not None else 0)
 
-        if addition_of_points and is_this_single_output:
-            raise Exception("[MITIM] add_data_from_file can only be used for single output models as of now...")
+        input_transform, outcome_transform, dx_tr, dy_tr = self._define_MITIM_transformations(num_training_points = num_training_points)
 
-        # Points to be added from file
-        continueAdding = False
-        if addition_of_points and (self.outputs is not None) and (self.outputs[0] in self.surrogateOptions["extrapointsModels"]):
+        # For easy future use
+        input_transform_physics = input_transform['tf1']
+        outcome_transform_physics = outcome_transform['tf1']
 
-            print(
-                f"\t* Requested extension of training set by points in file {self.surrogateOptions['add_data_from_file']}"
-            )
-
-            df = pd.read_csv(self.surrogateOptions["add_data_from_file"])
-            df_model = df[df['Model'] == self.outputs[0]]
-
-            if len(df_model) == 0:
-                print("\t- No points for this output in the file, nothing to add", typeMsg="i")
-                continueAdding = False
-            else:
-                continueAdding = True
+        # --------------------------------------------------------------------------------------------
+        # Add points from file (provided as if tf1 was used -> I need to broadcast Xtr to all outputs)
+        # --------------------------------------------------------------------------------------------
 
-        if continueAdding:
+        if train_X_added_full is not None:
 
-            # Check 1: Do the points for this output share the same x_names?
-            if df_model['x_names'].nunique() > 1:
-                print("Different x_names for points in the file, prone to errors", typeMsg='q')
+            raise Exception("[PRF] This is not working, I need to broadcast the input transformation to all outputs")
+            self.train_X_added_full = train_X_added_full.to(self.dfT)
+            self.train_X_added = (self.train_X_added_full[:, :dx_tr] if self.train_X_added_full.shape[-1] > dx_tr else self.train_X_added_full).to(self.dfT)
+            self.train_Y_added = train_Y_added.to(self.dfT)
+            self.train_Yvar_added = train_Yvar_added.to(self.dfT)
+        
+        else:
 
-            # Check 2: Is it consistent with the x_names of this run?
-            x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
-            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.outputs[0]]
-            if x_names != x_names_check:
-                print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
+            x_transformed = input_transform_physics(self.train_X) # [batch, dimX] -> [batch, dimXtr] -> [dimY, batch, dimXtr]
 
-            self.train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
-            self.train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
-    
-            x = []
-            for i in range(len(x_names)):
-                x.append(df_model[f'x{i}'].to_numpy())
-            self.train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
-
-            # ------------------------------------------------------------------------------------------------------------
-            # Define transformation (here because I want to account for the added points)
-            # ------------------------------------------------------------------------------------------------------------
-            self.num_training_points = self.train_X.shape[0] + self.train_X_added_full.shape[0]
-            input_transform_physics, outcome_transform_physics, \
-            input_transform_normalization, output_transformed_standardization, \
-            dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
-            # ------------------------------------------------------------------------------------------------------------
-
-            self.train_X_added = (
-                self.train_X_added_full[:, :dimTransformedDV_x] if self.train_X_added_full.shape[-1] > dimTransformedDV_x else self.train_X_added_full
-            ).to(self.dfT)
+            shape_xtr = list(x_transformed.shape)
+            shape_xtr[-2] = 0
+            shape_xtr[-1] = dx_tr_full
+            self.train_X_added_full = torch.empty(*shape_xtr).to(self.dfT) # [dimY, 0, dimXtr]
+            self.train_X_added = torch.empty(*shape_xtr).to(self.dfT)
 
-        else:
-            if self.fileTraining is not None:
-                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-                    self.train_X,
-                    self.outputs[0],
-                    self.surrogate_parameters,
-                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-                )
-                dimTransformedDV_x_full = train_X_Complete.shape[-1]
-            else:
-                dimTransformedDV_x_full = self.train_X.shape[-1]
-
-            # --------------------------------------------------------------------------------------
-            # Define transformation (here because I want to account for the added points)
-            # --------------------------------------------------------------------------------------
-            self.num_training_points = self.train_X.shape[0]
-
-            input_transform_physics, outcome_transform_physics,\
-            input_transform_normalization, output_transformed_standardization,\
-            dimTransformedDV_x, dimTransformedDV_y = self._define_MITIM_transformations()
-            # ------------------------------------------------------------------------------------------------------------
-
-            x_transformed = input_transform_physics(self.train_X)
-            shape = list(x_transformed.shape)
-            shape[-2] = 0
-            shape[-1] = dimTransformedDV_x_full
-
-            self.train_X_added_full = torch.empty(*shape).to(self.dfT)
-            shape[-1] = dimTransformedDV_x
-            self.train_X_added = torch.empty(*shape).to(self.dfT)
-            shape[-1] = 1
-            self.train_Y_added = torch.empty(*shape).to(self.dfT)
-            self.train_Yvar_added = torch.empty(*shape).to(self.dfT)
+            y_transformed, yvar_transformed = outcome_transform_physics(self.train_X, self.train_Y, self.train_Yvar)
+            shape_ytr = list(y_transformed.shape)
+            shape_ytr[-2] = 0
+            self.train_Y_added = torch.empty(*shape_ytr).to(self.dfT)
+            self.train_Yvar_added = torch.empty(*shape_ytr).to(self.dfT)
 
         # --------------------------------------------------------------------------------------
         # Make sure that very small variations are not captured
         # --------------------------------------------------------------------------------------
 
-        if (self.train_X_added.shape[0] > 0) and (self.train_X.shape[0] > 1):
-            self._ensure_small_variation_suppressed(input_transform_physics)
+        self._ensure_small_variation_suppressed(input_transform_physics)
 
         # --------------------------------------------------------------------------------------
         # Make sure at least 2 points
         # --------------------------------------------------------------------------------------
 
-        if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
-            factor = 1.2
-            print(
-                f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}",
-                typeMsg="w",
-            )
-            self.train_X = torch.cat((self.train_X, self.train_X * factor))
-            self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
-            self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
+        self._ensure_minimum_dataset()
 
         # -------------------------------------------------------------------------------------
         # Check minimum noises
@@ -206,21 +154,7 @@ def __init__(
         # Write file with surrogate if there are transformations
         # -------------------------------------------------------------------------------------
 
-        if (self.fileTraining is not None) and (self.train_X.shape[0] + self.train_X_added.shape[0] > 0):
-            self.write_datafile(input_transform_physics, outcome_transform_physics)
-
-
-        # ------------------------------------------------------------------------------------
-        # Combine transformations in chain of PHYSICS + NORMALIZATION
-        # ------------------------------------------------------------------------------------
-
-        input_transform = botorch.models.transforms.input.ChainedInputTransform(
-            tf1=input_transform_physics, tf2=input_transform_normalization
-        ).to(self.dfT)
-
-        outcome_transform = BOTORCHtools.ChainedOutcomeTransform(
-            tf1=outcome_transform_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension()
-        ).to(self.dfT)
+        #self._write_datafile(input_transform_physics, outcome_transform_physics)
 
         # -------------------------------------------------------------------------------------
         # Obtain normalization constants now (although during training this is messed up, so needed later too)
@@ -242,9 +176,7 @@ def __init__(
         # Model
         # *************************************************************************************
 
-        print(
-            f'\t- Initializing model{" for "+self.outputs_transformed[0] if (self.outputs_transformed is not None and (len(self.outputs)==1)) else ""}',
-        )
+        print(f'\t- Initializing model{" for "+self.outputs_transformed[0] if (self.outputs_transformed is not None and (len(self.outputs)==1)) else ""}',)
 
         """
         self.train_X contains the untransformed of this specific run:   (batch1, dimX)
@@ -264,7 +196,20 @@ def __init__(
             train_Yvar_added=self.train_Yvar_added,
         )
 
-    def _define_MITIM_transformations(self):
+    def _ensure_minimum_dataset(self):
+
+        if self.train_X.shape[0] + self.train_X_added.shape[0] == 1:
+            factor = 1.2
+            print(
+                f"\t- This dataset had only one point, adding a point with linear interpolation (trick for PORTALS targets only), {factor}",
+                typeMsg="w",
+            )
+            self.train_X = torch.cat((self.train_X, self.train_X * factor))
+            self.train_Y = torch.cat((self.train_Y, self.train_Y * factor))
+            self.train_Yvar = torch.cat((self.train_Yvar, self.train_Yvar * factor))
+
+
+    def _define_MITIM_transformations(self, num_training_points):
 
         '''
         ********************************************************************************
@@ -284,7 +229,7 @@ def _define_MITIM_transformations(self):
 
             transition_position = list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())[
                 np.where(
-                    self.num_training_points < np.array(list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())))[0][0]
+                    num_training_points < np.array(list(self.surrogate_parameters["surrogate_transformation_variables_alltimes"].keys())))[0][0]
                     ]
 
             self.surrogate_transformation_variables = self.surrogate_parameters["surrogate_transformation_variables_alltimes"][transition_position]
@@ -312,55 +257,149 @@ def _define_MITIM_transformations(self):
 
         transformed_X = input_transformation_physics(self.train_X)
 
-        dimTransformedDV_x = transformed_X.shape[-1]
-        dimTransformedDV_y = self.train_Y.shape[-1]
+        dx_tr = transformed_X.shape[-1]
+        dy_tr = self.train_Y.shape[-1]
 
         # ------------------------------------------------------------------------------------
         # Normalizations
         # ------------------------------------------------------------------------------------
 
         input_transform_normalization = botorch.models.transforms.input.Normalize(
-            d = dimTransformedDV_x, bounds=None, batch_shape=transformed_X.shape[:-2]
+            d = dx_tr, bounds=None, batch_shape=transformed_X.shape[:-2]
         ).to(self.dfT)
         output_transformed_standardization = (
             botorch.models.transforms.outcome.Standardize(
-                m = dimTransformedDV_y,
+                m = dy_tr,
             )
         ).to(self.dfT)
 
-        return  input_transformation_physics, \
-                output_transformation_physics, \
-                input_transform_normalization, \
-                output_transformed_standardization, \
-                dimTransformedDV_x, dimTransformedDV_y
+        # ------------------------------------------------------------------------------------
+        # Combine transformations in chain of PHYSICS + NORMALIZATION + BATCHING
+        # ------------------------------------------------------------------------------------
+
+        input_transform = botorch.models.transforms.input.ChainedInputTransform(
+            tf1=input_transformation_physics, tf2=input_transform_normalization ).to(self.dfT)
+
+        outcome_transform = BOTORCHtools.ChainedOutcomeTransform(
+            tf1=output_transformation_physics, tf2=output_transformed_standardization, tf3=BOTORCHtools.OutcomeToBatchDimension() ).to(self.dfT)
+
+        return input_transform, outcome_transform, dx_tr, dy_tr
+
+    def _add_points_from_file(self):
+
+        is_this_single_output = (self.outputs is not None) and (len(self.outputs) == 1)
+        potential_addition_of_points = ("add_data_from_file" in self.surrogateOptions) and (self.surrogateOptions["add_data_from_file"] is not None)
+        
+        if potential_addition_of_points:
+            if is_this_single_output:
+                addition_of_points = self.outputs[0] in self.surrogateOptions["add_data_to_models"]
+            else:
+                raise Exception("[MITIM] add_data_from_file can only be used for single output models as of now...")
+        else:
+            addition_of_points = False
+
+        # Points to be added from file
+        continueAdding = False
+        if addition_of_points:
+
+            print(f"\t* Extending training set by points in file {self.surrogateOptions['add_data_from_file']}")
+
+            df = pd.read_csv(self.surrogateOptions["add_data_from_file"])
+            df_model = df[df['Model'] == self.outputs[0]]
+
+            if len(df_model) == 0:
+                print("\t- No points for this output in the file, nothing to add", typeMsg="i")
+                continueAdding = False
+            else:
+                continueAdding = True
+
+        if continueAdding:
+
+            # Check 1: Do the points for this output share the same x_names?
+            if df_model['x_names'].nunique() > 1:
+                print("Different x_names for points in the file, prone to errors", typeMsg='q')
+
+            # Check 2: Is it consistent with the x_names of this run?
+            x_names = df_model['x_names'].apply(ast.literal_eval).iloc[0]
+            x_names_check = self.surrogate_parameters['surrogate_transformation_variables_lasttime'][self.outputs[0]]
+            if x_names != x_names_check:
+                print("x_names in file do not match the ones in this run, prone to errors", typeMsg='q')            
+
+            train_Y_added = torch.from_numpy(df_model['y'].to_numpy()).unsqueeze(-1).to(self.dfT)
+            train_Yvar_added = torch.from_numpy(df_model['yvar'].to_numpy()).unsqueeze(-1).to(self.dfT)
+    
+            x = []
+            for i in range(len(x_names)):
+                x.append(df_model[f'x{i}'].to_numpy())
+            train_X_added_full = torch.from_numpy(np.array(x).T).to(self.dfT)
+
+            dx_tr_full = train_X_added_full.shape[-1]
+
+        else:
+
+            train_X_added_full = None
+            train_Y_added = None
+            train_Yvar_added = None
+
+            if self.fileTraining is not None:
+                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+                    self.train_X,
+                    self.outputs[0],
+                    self.surrogate_parameters,
+                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+                )
+                dx_tr_full = train_X_Complete.shape[-1]
+            else:
+                dx_tr_full = self.train_X.shape[-1]
+
+
+        return train_X_added_full, train_Y_added, train_Yvar_added, dx_tr_full
 
     def normalization_pass(self,input_transform, outcome_transform):
         '''
-        The goal of this is to capture NOW the normalization and standardization constants,
-        by account for both the actual data and the added data from file 
+        Notes:
+            - The goal of this is to capture NOW the normalization and standardization constants,
+              by account for both the actual data and the added data from file 
         '''
 
+        # -------------------------------------------------------------------------------------
         # Get input normalization and outcome standardization in training mode
+        # -------------------------------------------------------------------------------------
+
         input_transform['tf2'].training = True
         outcome_transform['tf2'].training = True
         outcome_transform['tf2']._is_trained = torch.tensor(False)
 
+        # -------------------------------------------------------------------------------------------------------
         # Get the input normalization constants by physics-transforming the train_x and adding the data from file
+        # -------------------------------------------------------------------------------------------------------
+
+        # Transform the data from file
         train_X_transformed = input_transform['tf1'](self.train_X)
+
+        # Concatenate the training data and the data from file
         train_X_transformed = torch.cat((train_X_transformed, self.train_X_added), axis=-2)
+
+        # Get the normalization constants
         _ = input_transform['tf2'](train_X_transformed)
 
+        # -----------------------------------------------------------------------------------------------------------
         # Get the outcome standardization constants by physics-transforming the train_y and adding the data from file
-        # With the caveat that the added points have to not be batched
+        # -----------------------------------------------------------------------------------------------------------
+
+        # Transform the data from file
         train_Y_transformed, train_Yvar_transformed = outcome_transform['tf1'](self.train_X, self.train_Y, self.train_Yvar)
-        y, yvar = outcome_transform['tf1'](self.train_X, self.train_Y, self.train_Yvar)
         
-        train_Y_transformed = torch.cat((y, outcome_transform['tf3'].untransform(self.train_Y_added)[0]), axis=-2)
-        train_Yvar_transformed = torch.cat((yvar, outcome_transform['tf3'].untransform(self.train_Yvar_added)[0]), axis=0)
+        # Concatenate the training data and the data from file
+        train_Y_transformed = torch.cat((train_Y_transformed, self.train_Y_added), axis=-2)
+        train_Yvar_transformed = torch.cat((train_Yvar_transformed, self.train_Yvar_added), axis=0)
 
+        # Get the standardization constants
         train_Y_transformed_norm, train_Yvar_transformed_norm = outcome_transform['tf2'](train_Y_transformed, train_Yvar_transformed)
 
+        # -------------------------------------------------------------------------------------
         # Make sure they are not on training mode
+        # -------------------------------------------------------------------------------------
         input_transform['tf2'].training = False
         outcome_transform['tf2'].training = False
         outcome_transform['tf2']._is_trained = torch.tensor(True)
@@ -497,7 +536,7 @@ def predict(self, X, produceFundamental=False, nSamples=None):
 
         return mean, upper, lower, samples
 
-    def write_datafile(self, input_transform_physics, outcome_transform_physics):
+    def _write_datafile(self, input_transform_physics, outcome_transform_physics):
         """
         --------------------------------------------------------------------
         Write file with surrogate if there are transformations
@@ -507,87 +546,87 @@ def write_datafile(self, input_transform_physics, outcome_transform_physics):
         --------------------------------------------------------------------
         """
 
-        for i,output in enumerate(self.outputs):
+        if (self.fileTraining is not None) and (self.train_X.shape[-2] + self.train_X_added.shape[-2] > 0):
 
-            # ------------------------------------------------------------------------------------------------------------------------
-            # Transform the points without the added from file
-            # ------------------------------------------------------------------------------------------------------------------------
+            for i,output in enumerate(self.outputs):
 
-            # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration
-            train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-                self.train_X,
-                output,
-                self.surrogate_parameters,
-                self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-            )
+                # ------------------------------------------------------------------------------------------------------------------------
+                # Transform the points without the added from file
+                # ------------------------------------------------------------------------------------------------------------------------
 
-            train_Y, train_Yvar = outcome_transform_physics(
-                self.train_X, self.train_Y[...,i].unsqueeze(-1), self.train_Yvar[...,i].unsqueeze(-1)
-            )
+                # I do not use directly input_transform_physics because I need all the columns, not of this specif iteration
+                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+                    self.train_X,
+                    output,
+                    self.surrogate_parameters,
+                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+                )
 
-            dv_names_Complete = (
-                self.surrogate_parameters["surrogate_transformation_variables_lasttime"][output]
-                if (
-                    "surrogate_transformation_variables_lasttime" in self.surrogate_parameters
-                    and self.surrogate_parameters["surrogate_transformation_variables_lasttime"]
-                    is not None
+                train_Y, train_Yvar = outcome_transform_physics(
+                    self.train_X, self.train_Y[...,i].unsqueeze(-1), self.train_Yvar[...,i].unsqueeze(-1)
                 )
-                else [i for i in self.bounds]
-            )
 
-            if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]:
-                print(
-                    "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file",
-                    typeMsg="w",
+                dv_names_Complete = (
+                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"][output]
+                    if (
+                        "surrogate_transformation_variables_lasttime" in self.surrogate_parameters
+                        and self.surrogate_parameters["surrogate_transformation_variables_lasttime"]
+                        is not None
+                    )
+                    else [i for i in self.bounds]
                 )
-                self.train_X_added_full = torch.cat(
-                    (
-                        self.train_X_added_full,
-                        torch.full(
-                            (
-                                self.train_X_added_full.shape[0],
-                                train_X_Complete.shape[-1]
-                                - self.train_X_added_full.shape[-1],
+
+                if self.train_X_added_full.shape[-1] < train_X_Complete.shape[-1]:
+                    print(
+                        "\t\t- Points from file have less input dimensions, extending with NaNs for writing new file",
+                        typeMsg="w",
+                    )
+                    self.train_X_added_full = torch.cat(
+                        (
+                            self.train_X_added_full,
+                            torch.full(
+                                (
+                                    self.train_X_added_full.shape[-2],
+                                    train_X_Complete.shape[-1]
+                                    - self.train_X_added_full.shape[-1],
+                                ),
+                                torch.nan,
                             ),
-                            torch.nan,
                         ),
-                    ),
-                    axis=-1,
-                )
-            elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]:
-                print(
-                    "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file",
-                    typeMsg="w",
-                )
-                self.train_X_added_full = self.train_X_added_full[
-                    :, : train_X_Complete.shape[-1]
-                ]
+                        axis=-1,
+                    )
+                elif self.train_X_added_full.shape[-1] > train_X_Complete.shape[-1]:
+                    print(
+                        "\t\t- Points from file have more input dimensions, removing last dimensions for writing new file",
+                        typeMsg="w",
+                    )
+                    self.train_X_added_full = self.train_X_added_full[..., : train_X_Complete.shape[-1]]
 
-            x = torch.cat((self.train_X_added_full, train_X_Complete), axis=0)
-            y = torch.cat((self.train_Y_added, train_Y), axis=0)
-            yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=0)
+                x = torch.cat((self.train_X_added_full, train_X_Complete), axis=-2)
+                y = torch.cat((self.train_Y_added, train_Y), axis=-2)
+                yvar = torch.cat((self.train_Yvar_added, train_Yvar), axis=-2)
 
 
-            # ------------------------------------------------------------------------------------------------------------------------
-            # Merged data with existing data frame and write
-            # ------------------------------------------------------------------------------------------------------------------------
+                # ------------------------------------------------------------------------------------------------------------------------
+                # Merged data with existing data frame and write
+                # ------------------------------------------------------------------------------------------------------------------------
 
-            new_df = create_df_portals(x,y,yvar,dv_names_Complete,output)
+                new_df = create_df_portals(x,y,yvar,dv_names_Complete,output)
 
-            if self.fileTraining.exists():
+                if self.fileTraining.exists():
 
-                # Load the existing DataFrame from the HDF5 file
-                existing_df = pd.read_csv(self.fileTraining)
+                    # Load the existing DataFrame from the HDF5 file
+                    existing_df = pd.read_csv(self.fileTraining)
 
-                # Concatenate the existing DataFrame with the new DataFrame
-                combined_df = pd.concat([existing_df, new_df], ignore_index=True)
+                    # Concatenate the existing DataFrame with the new DataFrame
+                    combined_df = pd.concat([existing_df, new_df], ignore_index=True)
 
-            else:
+                else:
 
-                combined_df = new_df
+                    combined_df = new_df
 
-            # Save the combined DataFrame back to the file
-            combined_df.to_csv(self.fileTraining, index=False)
+                # Save the combined DataFrame back to the file
+                combined_df.to_csv(self.fileTraining, index=False)
 
     # --------------------------
     # PLOTTING AND POST-ANALYSIS
@@ -793,29 +832,31 @@ def _ensure_small_variation_suppressed(self, input_transform_physics, thr=1e-6):
         inputs, as compared to the trained data of this run. In such a case, modify this variation
         """
 
-        # Do dimensions of the non-added points change?
-        x_transform = input_transform_physics(self.train_X)
-        indecesUnchanged = torch.where(
-            (x_transform.max(axis=0)[0] - x_transform.min(axis=0)[0])
-            / x_transform.mean(axis=0)[0]
-            < thr
-        )[0]
+        if (self.train_X_added.shape[-2] > 0) and (self.train_X.shape[-2] > 1):
 
-        HasThisBeenApplied = 0
+            # Do dimensions of the non-added points change?
+            x_transform = input_transform_physics(self.train_X)
+            indecesUnchanged = torch.where(
+                (x_transform.max(axis=-2)[0] - x_transform.min(axis=-2)[0])
+                / x_transform.mean(axis=-2)[0]
+                < thr
+            )[0]
 
-        for i in indecesUnchanged:
-            if (
-                (self.train_X_added[:, i] - x_transform[0, i]) / x_transform[0, i]
-            ).abs().max() < thr:
-                HasThisBeenApplied += 1
-                for j in range(self.train_X_added.shape[0]):
-                    self.train_X_added[j, i] = x_transform[0, i]
+            HasThisBeenApplied = 0
 
-        if HasThisBeenApplied > 0:
-            print(
-                f"\t- Supression of small variations {thr:.1e} in added data applied to {HasThisBeenApplied} dims",
-                typeMsg="w",
-            )
+            for i in indecesUnchanged:
+                if (
+                    (self.train_X_added[:, i] - x_transform[0, i]) / x_transform[0, i]
+                ).abs().max() < thr:
+                    HasThisBeenApplied += 1
+                    for j in range(self.train_X_added.shape[-2]):
+                        self.train_X_added[...,j, i] = x_transform[...,0, i]
+
+            if HasThisBeenApplied > 0:
+                print(
+                    f"\t- Supression of small variations {thr:.1e} in added data applied to {HasThisBeenApplied} dims",
+                    typeMsg="w",
+                )
 
     def _ensure_minimum_noise(self):
         if ("MinimumRelativeNoise" in self.surrogateOptions) and (
diff --git a/templates/main.namelist.json b/templates/main.namelist.json
index 73837d0b..4e834c18 100644
--- a/templates/main.namelist.json
+++ b/templates/main.namelist.json
@@ -44,8 +44,8 @@
         "stds_outside": null,
         "stds_outside_checker": 5,
         "add_data_from_file": null,
-        "extrapointsModels": null,
-        "extrapointsModelsAvoidContent": null
+        "add_data_to_models": null,
+        "add_data_to_modelsAvoidContent": null
     },
     "StrategyOptions": {
         "boundsRefine": null,

From 3f13cc82395c8ba15e4968a2600b4dd00bbf4c70 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Fri, 22 Nov 2024 16:15:20 -0500
Subject: [PATCH 25/34] good progress

---
 .../portals/utils/PORTALSanalysis.py          |   8 +-
 src/mitim_tools/opt_tools/BOTORCHtools.py     | 618 +++++++++---------
 src/mitim_tools/opt_tools/STEPtools.py        |   4 +-
 src/mitim_tools/opt_tools/SURROGATEtools.py   |  29 +-
 4 files changed, 317 insertions(+), 342 deletions(-)

diff --git a/src/mitim_modules/portals/utils/PORTALSanalysis.py b/src/mitim_modules/portals/utils/PORTALSanalysis.py
index 69218a7f..901eaf76 100644
--- a/src/mitim_modules/portals/utils/PORTALSanalysis.py
+++ b/src/mitim_modules/portals/utils/PORTALSanalysis.py
@@ -730,15 +730,15 @@ def __init__(self, gpdict):
                     self._output_variables.append(key)
         for key in self._models:
             if hasattr(self._models[key], 'gpmodel'):
-                if hasattr(self._models[key].gpmodel, 'train_X_usedToTrain'):
-                    xtrain = self._models[key].gpmodel.train_X_usedToTrain.detach().cpu().numpy()
+                if hasattr(self._models[key].gpmodel, 'train_X_use'):
+                    xtrain = self._models[key].gpmodel.train_X_use.detach().cpu().numpy()
                     if len(xtrain.shape) < 2:
                         xtrain = np.atleast_2d(xtrain)
                     if xtrain.shape[1] != len(self._input_variables):
                         xtrain = xtrain.T
                     self._training_inputs[key] = pd.DataFrame(xtrain, columns=self._input_variables)
-                if hasattr(self._models[key].gpmodel, 'train_Y_usedToTrain'):
-                    ytrain = self._models[key].gpmodel.train_Y_usedToTrain.detach().cpu().numpy()
+                if hasattr(self._models[key].gpmodel, 'train_Y_use'):
+                    ytrain = self._models[key].gpmodel.train_Y_use.detach().cpu().numpy()
                     if len(ytrain.shape) < 2:
                         ytrain = np.atleast_2d(ytrain)
                     if ytrain.shape[1] != 1:
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index c08cca5f..ac8e8596 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -10,10 +10,6 @@
 from IPython import embed
 from mitim_tools.misc_tools.LOGtools import printMsg as print
 
-# ----------------------------------------------------------------------------------------------------------------------------
-# SingleTaskGP needs to be modified because I want to input options and outcome transform taking X, otherwise it should be a copy
-# ----------------------------------------------------------------------------------------------------------------------------
-
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.models.utils import validate_input_scaling
@@ -24,8 +20,16 @@
 from botorch.posteriors.gpytorch import GPyTorchPosterior
 from botorch.posteriors.posterior import Posterior
 from linear_operator.operators import BlockDiagLinearOperator
-
-
+from gpytorch.distributions import MultitaskMultivariateNormal
+
+'''
+*******************************************************************************
+SingleTaskGP needs to be custom in MITIM because:
+    - Training occurs in transformed space directly, to allow for _added points
+    - Outcome transform calls are modified to take X 
+    - Options are provided (secondary)
+*******************************************************************************
+'''
 class SingleTaskGP_MITIM(botorch.models.gp_regression.SingleTaskGP):
     def __init__(
         self,
@@ -41,27 +45,36 @@ def __init__(
         train_Yvar_added=torch.Tensor([]),
     ):
         """
-        _added refers to already-transformed variables that are added from table
+        Notes:
+            - train_X is raw untransformed,     [batch, dx]
+            - train_Y is raw untransformed,     [batch, dy]
+            - train_Yvar is raw untransformed,  [batch, dy]
+            - _added refers to already-transformed variables (tf1) that are added from table:
+                    train_X_added is raw transformed,   [dytr, batch, dxtr]
+                    train_Y_added is raw transformed,   [batch, dytr]
+                    train_Yvar_added is raw transformed,[batch, dytr]
         """
 
+        # -----------------------------------------------------------------------
+        # Surrogate model options
+        # -----------------------------------------------------------------------
+
         TypeMean = surrogateOptions.get("TypeMean", 0)
         TypeKernel = surrogateOptions.get("TypeKernel", 0)
         FixedNoise = surrogateOptions.get("FixedNoise", False)
         ConstrainNoise = surrogateOptions.get("ConstrainNoise", -1e-4)
         learn_additional_noise = surrogateOptions.get("ExtraNoise", False)
         print("\t\t* Surrogate model options:")
-        print(
-            f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}"
-        )
+        print(f"\t\t\t- FixedNoise: {FixedNoise} (extra noise: {learn_additional_noise}), TypeMean: {TypeMean}, TypeKernel: {TypeKernel}, ConstrainNoise: {ConstrainNoise:.1e}")
 
         # ** Store training data
 
         # x, y are raw untransformed, and I want raw transformed. xa, ya are raw transformed
         #x_tr = input_transform["tf1"](train_X)if input_transform is not None else train_X
         #y_tr, yv_tr = outcome_transform["tf1"](train_X, train_Y, train_Yvar) if outcome_transform is not None else train_Y, train_Yvar
-        #self.train_X_usedToTrain = torch.cat((train_X_added, x_tr), axis=-2)
-        #self.train_Y_usedToTrain = torch.cat((train_Y_added, y_tr), axis=-2)
-        #self.train_Yvar_usedToTrain = torch.cat((train_Yvar_added, yv_tr), axis=-2)
+        #self.train_X_use = torch.cat((train_X_added, x_tr), axis=-2)
+        #self.train_Y_use = torch.cat((train_Y_added, y_tr), axis=-2)
+        #self.train_Yvar_use = torch.cat((train_Yvar_added, yv_tr), axis=-2)
 
         # Grab num_outputs
         self._num_outputs = train_Y.shape[-1]
@@ -69,9 +82,7 @@ def __init__(
         # Grab ard_num_dims
         if train_X.shape[0] > 0:
             with torch.no_grad():
-                transformed_X = self.transform_inputs(
-                    X=train_X, input_transform=input_transform
-                )
+                transformed_X = self.transform_inputs(X=train_X, input_transform=input_transform)
             self.ard_num_dims = transformed_X.shape[-1]
         else:
             self.ard_num_dims = train_X_added.shape[-1]
@@ -84,29 +95,26 @@ def __init__(
         # Added points are raw transformed, so I need to normalize them
         if train_X_added.shape[0] > 0:
             train_X_added = input_transform["tf2"](train_X_added)
-            train_Y_added = outcome_transform["tf3"].untransform(train_Y_added)[0]
-            train_Yvar_added = outcome_transform["tf3"].untransform(train_Yvar_added)[0]
             train_Y_added, train_Yvar_added = outcome_transform["tf3"](*outcome_transform["tf2"](train_Y_added, train_Yvar_added))
 
-        # -----
-
-        train_X_usedToTrain = torch.cat((transformed_X, train_X_added), axis=-2)
-        train_Y_usedToTrain = torch.cat((train_Y, train_Y_added), axis=-2)
-        train_Yvar_usedToTrain = torch.cat((train_Yvar, train_Yvar_added), axis=-2)
+        # Concatenate the added points
+        train_X_use = torch.cat((transformed_X, train_X_added), axis=-2)
+        train_Y_use = torch.cat((train_Y, train_Y_added), axis=-2)
+        train_Yvar_use = torch.cat((train_Yvar, train_Yvar_added), axis=-2)
 
         # Validate again after applying the transforms
-        self._validate_tensor_args(X=train_X_usedToTrain, Y=train_Y_usedToTrain, Yvar=train_Yvar_usedToTrain)
+        self._validate_tensor_args(X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use)
         ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
         validate_input_scaling(
-            train_X=train_X_usedToTrain,
-            train_Y=train_Y_usedToTrain,
-            train_Yvar=train_Yvar_usedToTrain,
+            train_X=train_X_use,
+            train_Y=train_Y_use,
+            train_Yvar=train_Yvar_use,
             ignore_X_dims=ignore_X_dims,
         )
-        self._set_dimensions(train_X=train_X_usedToTrain, train_Y=train_Y_usedToTrain)
+        self._set_dimensions(train_X=train_X_use, train_Y=train_Y_use)
         
-        train_X_usedToTrain, train_Y_usedToTrain, train_Yvar_usedToTrain = self._transform_tensor_args(
-            X=train_X_usedToTrain, Y=train_Y_usedToTrain, Yvar=train_Yvar_usedToTrain
+        train_X_use, train_Y_use, train_Yvar_use = self._transform_tensor_args(
+            X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use
         )
 
         """
@@ -122,7 +130,7 @@ def __init__(
             
             likelihood = (
                 gpytorch.likelihoods.gaussian_likelihood.FixedNoiseGaussianLikelihood(
-                    noise=train_Yvar_usedToTrain.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
+                    noise=train_Yvar_use.clip(1e-6), # I clip the noise to avoid numerical issues (gpytorch would do it anyway, but this way it doesn't throw a warning)
                     batch_shape=self._aug_batch_shape,
                     learn_additional_noise=learn_additional_noise,
                 )
@@ -157,12 +165,7 @@ def __init__(
 		-----------------------------------------------------------------------
 		"""
 
-        gpytorch.models.exact_gp.ExactGP.__init__(
-            self,
-            train_inputs=train_X_usedToTrain,
-            train_targets=train_Y_usedToTrain,
-            likelihood=likelihood,
-        )
+        gpytorch.models.exact_gp.ExactGP.__init__(self, train_inputs=train_X_use, train_targets=train_Y_use, likelihood=likelihood)
 
         """
 		-----------------------------------------------------------------------
@@ -172,20 +175,16 @@ def __init__(
 
         if TypeMean == 0:
             self.mean_module = gpytorch.means.constant_mean.ConstantMean(
-                batch_shape=self._aug_batch_shape
-            )
+                batch_shape=self._aug_batch_shape )
         elif TypeMean == 1:
             self.mean_module = gpytorch.means.linear_mean.LinearMean(
-                self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True
-            )
+                self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True )
         elif TypeMean == 2:
             self.mean_module = MITIM_LinearMeanGradients(
-                batch_shape=self._aug_batch_shape, variables=variables
-            )
+                batch_shape=self._aug_batch_shape, variables=variables )
         elif TypeMean == 3:
             self.mean_module = MITIM_CriticalGradient(
-                batch_shape=self._aug_batch_shape, variables=variables
-            )
+                batch_shape=self._aug_batch_shape, variables=variables )
 
         """
 		-----------------------------------------------------------------------
@@ -198,9 +197,7 @@ def __init__(
         outputscale_prior = gpytorch.priors.torch_priors.GammaPrior(2.0, 0.15)
 
         # Do not allow too small lengthscales?
-        lengthscale_constraint = (
-            None  # gpytorch.constraints.constraints.GreaterThan(0.05)
-        )
+        lengthscale_constraint = None  # gpytorch.constraints.constraints.GreaterThan(0.05)
 
         self._subset_batch_dict["covar_module.raw_outputscale"] = -1
         self._subset_batch_dict["covar_module.base_kernel.raw_lengthscale"] = -3
@@ -298,18 +295,14 @@ def posterior(
             return posterior_transform(posterior)
         return posterior
 
-class BatchBroadcastedInputTransform_MITIM(botorch.models.transforms.input.BatchBroadcastedInputTransform):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-  
-    def _Xs_and_transforms(self, X):
-        Xs = (X,) * len(self.transforms)
-        return zip(Xs, self.transforms)
-
-# ----------------------------------------------------------------------------------------------------------------------------
-# ModelListGP needs to be modified to allow me to have "common" parameters to models, to not run at every transformation again
-# ----------------------------------------------------------------------------------------------------------------------------
-
+'''
+*******************************************************************************
+ModelListGP needs to be custom in MITIM because:
+    - I shouldn't run the full transformation at every posterior call, only
+      once per ModelList. This will allow me to have "common"  parameters
+      to models, to not run at every transformation again
+*******************************************************************************
+'''
 class ModelListGP_MITIM(botorch.models.model_list_gp_regression.ModelListGP):
     def __init__(self, *gp_models):
         super().__init__(*gp_models)
@@ -339,10 +332,11 @@ def posterior(self, *args, **kwargs):
         self.cold_startCommons()
         return posterior
 
-# ----------------------------------------------------------------------------------------------------------------------------
-# I need my own transformation based on physics
-# ----------------------------------------------------------------------------------------------------------------------------
-
+'''
+*******************************************************************************
+Physics transformation for inputs
+*******************************************************************************
+'''
 class Transformation_Inputs(
     botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module):
     def __init__(
@@ -375,9 +369,7 @@ def __init__(
     @botorch.models.transforms.utils.subset_transform
     def _transform(self, X):
         if (self.output is not None) and (self.flag_to_evaluate):
-            Xtr, parameters_combined = self.surrogate_parameters[
-                "transformationInputs"
-            ](
+            Xtr, parameters_combined = self.surrogate_parameters["transformationInputs"](
                 X,
                 self.output,
                 self.surrogate_parameters,
@@ -397,13 +389,13 @@ def _transform(self, X):
     def _untransform(self, X):
         raise NotImplementedError("[MITIM] This situation has not been implemented yet")
 
-
-# ----------------------------------------------------------------------------------------------------------------------------
-# I need my own outcome transformation based on physics and that takes "X" as well
-# ----------------------------------------------------------------------------------------------------------------------------
-
-
-# Copy standardize but modify in untransform the "std" which is my factor!
+'''
+*******************************************************************************
+Physics transformation for outputs. Notes:
+    - It needs to take "X" as well
+    - I leverage what's build in standardize to avoid repeating code
+*******************************************************************************
+'''
 class Transformation_Outcomes(botorch.models.transforms.outcome.Standardize):
     def __init__(self, m, outputs_names, surrogate_parameters):
         super().__init__(m)
@@ -499,8 +491,14 @@ def untransform_posterior_mod(self, posterior):
         mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs)
         return GPyTorchPosterior(mvn_tf)
 
-
 # Because I need it to take X too (for physics only, which is always the first tf)
+
+'''
+*******************************************************************************
+ChainedOutcomeTransform needs to be custom in MITIM because the first
+transformation (tf1) is assumed to be the physics one and  needs to take X
+*******************************************************************************
+'''
 class ChainedOutcomeTransform(
     botorch.models.transforms.outcome.ChainedOutcomeTransform):
     def __init__(self, **transforms):
@@ -528,6 +526,234 @@ def untransform_posterior(self, X, posterior):
     def untransform(self, X, Y, Yvar):
         raise NotImplementedError("[MITIM] This situation has not been implemented yet")
 
+class BatchBroadcastedInputTransform(InputTransform, ModuleDict):
+    r"""An input transform representing a list of transforms to be broadcasted."""
+
+    def __init__(
+        self,
+        transforms: list[InputTransform],
+        broadcast_index: int = -3,
+    ) -> None:
+        r"""A transform list that is broadcasted across a batch dimension specified by
+        `broadcast_index`. This is allows using a batched Gaussian process model when
+        the input transforms are different for different batch dimensions.
+
+        Args:
+            transforms: The transforms to broadcast across the first batch dimension.
+                The transform at position i in the list will be applied to `X[i]` for
+                a given input tensor `X` in the forward pass.
+            broadcast_index: The tensor index at which the transforms are broadcasted.
+
+        Example:
+            >>> tf1 = Normalize(d=2)
+            >>> tf2 = InputStandardize(d=2)
+            >>> tf = BatchBroadcastedTransformList(transforms=[tf1, tf2])
+        """
+        super().__init__()
+        self.transform_on_train = False
+        self.transform_on_eval = False
+        self.transform_on_fantasize = False
+        self.transforms = transforms
+        if broadcast_index >= 0:
+            raise ValueError("A non-negative broadcast index is not supported yet.")
+        if broadcast_index in (-2, -1):
+            raise ValueError(
+                "The broadcast index cannot be -2 and -1, as these indices are reserved"
+                " for non-batch, data and input dimensions."
+            )
+        self.broadcast_index = broadcast_index
+        self.is_one_to_many = self.transforms[0].is_one_to_many
+        if not all(tf.is_one_to_many == self.is_one_to_many for tf in self.transforms):
+            raise ValueError(  # output shapes of transforms must be the same
+                "All transforms must have the same is_one_to_many property."
+            )
+        for tf in self.transforms:
+            self.transform_on_train |= tf.transform_on_train
+            self.transform_on_eval |= tf.transform_on_eval
+            self.transform_on_fantasize |= tf.transform_on_fantasize
+
+    def transform(self, X: Tensor) -> Tensor:
+        r"""Transform the inputs to a model.
+
+        Individual transforms are applied in sequence and results are returned as
+        a batched tensor.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of transformed inputs.
+        """
+        return torch.stack(
+            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+
+    def untransform(self, X: Tensor) -> Tensor:
+        r"""Un-transform the inputs to a model.
+
+        Un-transforms of the individual transforms are applied in reverse sequence.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of transformed inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of un-transformed inputs.
+        """
+        # return torch.stack(
+        #     [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+        #     dim=self.broadcast_index,
+        # )
+        #
+        # return self.transforms[0].untransform(X)
+        Xt = torch.stack(
+            [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+        Xt = Xt.unique(dim=self.broadcast_index)
+        # since we are assuming that this batch dimension was added solely
+        # because of different transforms, rather than different original inputs X.
+        assert Xt.shape[self.broadcast_index] == 1
+        return Xt.squeeze(self.broadcast_index)
+
+    def equals(self, other: InputTransform) -> bool:
+        r"""Check if another input transform is equivalent.
+
+        Args:
+            other: Another input transform.
+
+        Returns:
+            A boolean indicating if the other transform is equivalent.
+        """
+        return (
+            super().equals(other=other)
+            and all(t1.equals(t2) for t1, t2 in zip(self.transforms, other.transforms))
+            and (self.broadcast_index == other.broadcast_index)
+        )
+
+    def preprocess_transform(self, X: Tensor) -> Tensor:
+        r"""Apply transforms for preprocessing inputs.
+
+        The main use cases for this method are 1) to preprocess training data
+        before calling `set_train_data` and 2) preprocess `X_baseline` for noisy
+        acquisition functions so that `X_baseline` is "preprocessed" with the
+        same transformations as the cached training inputs.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            A `batch_shape x n x d`-dim tensor of (transformed) inputs.
+        """
+        return torch.stack(
+            [t.preprocess_transform(Xi) for Xi, t in self._Xs_and_transforms(X)],
+            dim=self.broadcast_index,
+        )
+
+    def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform]]:
+        r"""Returns an iterable of sub-tensors of X and their associated transforms.
+
+        Args:
+            X: A `batch_shape x n x d`-dim tensor of inputs.
+
+        Returns:
+            An iterable containing tuples of sub-tensors of X and their transforms.
+        """
+        # transform_shape = (
+        #     len(input_transform.transforms),
+        #     *(1 for _ in range(abs(self.broadcast_index) - 1)),
+        # )
+        # print(f"{transform_shape = }")
+        # print(f"{X.shape = }")
+        # TODO: Add dimension rather than broadcasting over the inputs.
+
+        # broadcast_shape = torch.broadcast_shapes(transform_shape, X.shape)
+        # X_expanded = X.expand(broadcast_shape)
+        # Xs = X_expanded.unbind(dim=self.broadcast_index)
+        # return zip(Xs, self.transforms)
+        return zip([X for _ in self.transforms], self.transforms)
+
+class OutcomeToBatchDimension(OutcomeTransform):
+    """Transform permuting dimensions in the outcome tensor."""
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self, Y: Tensor, Yvar: Tensor | None = None
+    ) -> tuple[Tensor, Tensor | None]:
+        r"""Transform the outcomes in a model's training targets
+
+        Args:
+            Y: A `batch_shape x n x m`-dim tensor of training targets.
+            Yvar: A `batch_shape x n x m`-dim tensor of observation noises
+                associated with the training targets (if applicable).
+
+        Returns:
+            A two-tuple with the transformed outcomes (batch_shape x m x n x 1).
+
+            - The transformed outcome observations.
+            - The transformed observation noise (if applicable).
+        """
+        return Y.unsqueeze(-3).transpose(-3, -1), (
+            Yvar.unsqueeze(-3).transpose(-3, -1) #if Yvar else None
+        )
+
+    def untransform(
+        self, Y: Tensor, Yvar: Tensor | None = None
+    ) -> tuple[Tensor, Tensor | None]:
+        r"""Un-transform previously transformed outcomes
+
+        Args:
+            Y: A `batch_shape x n x m`-dim tensor of transfomred training targets.
+            Yvar: A `batch_shape x n x m`-dim tensor of transformed observation
+                noises associated with the training targets (if applicable).
+
+        Returns:
+            A two-tuple with the un-transformed outcomes:
+
+            - The un-transformed outcome observations.
+            - The un-transformed observation noise (if applicable).
+        """
+        assert Y.shape[-1] == 1
+        Y_perm = Y.transpose(-3, -1).squeeze(-3)
+        Yvar_perm = Yvar.transpose(-3, -1).squeeze(-3) if Yvar else None
+        return Y_perm, Yvar_perm
+
+    @property
+    def _is_linear(self) -> bool:
+        """
+        True for transformations such as `Standardize`; these should be able to apply
+        `untransform_posterior` to a GPyTorchPosterior and return a GPyTorchPosterior,
+        because a multivariate normal distribution should remain multivariate normal
+        after applying the transform.
+        """
+        return True
+
+    def untransform_posterior(self, posterior: Posterior) -> Posterior:
+        r"""Un-transform a posterior.
+
+        Posteriors with `_is_linear=True` should return a `GPyTorchPosterior` when
+        `posterior` is a `GPyTorchPosterior`. Posteriors with `_is_linear=False`
+        likely return a `TransformedPosterior` instead.
+
+        Args:
+            posterior: A posterior in the transformed space.
+
+        Returns:
+            The un-transformed posterior.
+        """
+        mvn = posterior.mvn
+        # print(f"{posterior.mean.shape = }")
+        # print(f"{mvn.mean.shape = }")
+        mean = self.untransform(posterior.mean)[0]
+        # print(f"{mean.shape = }")
+        covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
+        # could potentially use from_independent_mvns
+        # print(f"{mvn._covar.shape = }")
+        # print(f"{covar.shape=}")
+        dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
+        return GPyTorchPosterior(distribution=dis)
 
 # ----------------------------------------------------------------------------------------------------------------------------
 # Mean acquisition function in botorch doesn't allow objectives because it's analytic
@@ -575,7 +801,7 @@ def forward(self, X):
         return acq
 
 # ----------------------------------------------------------------------------------------------------------------------------
-# Custom kernels
+# Custom kernels and means
 # ----------------------------------------------------------------------------------------------------------------------------
 
 class MITIM_NNKernel(gpytorch.kernels.Kernel):
@@ -664,7 +890,6 @@ def forward(
 
         return val
 
-
 class MITIM_ConstantKernel(gpytorch.kernels.Kernel):
     has_lengthscale = False
 
@@ -695,13 +920,8 @@ def forward(
 
         return val
 
-
-# ----------------------------------------------------------------------------------------------------------------------------
-# Custom means
-# ----------------------------------------------------------------------------------------------------------------------------
-
-# mitim application: If a variable is a gradient, do linear, if not, do just bias
 class MITIM_LinearMeanGradients(gpytorch.means.mean.Mean):
+    # PORTALS application: If a variable is a gradient, do linear, if not, do just bias
     def __init__(self, batch_shape=torch.Size(), variables=None, **kwargs):
         super().__init__()
 
@@ -728,7 +948,6 @@ def forward(self, x):
         res = x[..., self.indeces_grad].matmul(self.weights_lin).squeeze(-1) + self.bias
         return res
 
-
 class MITIM_CriticalGradient(gpytorch.means.mean.Mean):
     def __init__(self, batch_shape=torch.Size(), variables=None, **kwargs):
         super().__init__()
@@ -776,234 +995,3 @@ def forward(self, x):
         return res
 
 
-
-class BatchBroadcastedInputTransform(InputTransform, ModuleDict):
-    r"""An input transform representing a list of transforms to be broadcasted."""
-
-    def __init__(
-        self,
-        transforms: list[InputTransform],
-        broadcast_index: int = -3,
-    ) -> None:
-        r"""A transform list that is broadcasted across a batch dimension specified by
-        `broadcast_index`. This is allows using a batched Gaussian process model when
-        the input transforms are different for different batch dimensions.
-
-        Args:
-            transforms: The transforms to broadcast across the first batch dimension.
-                The transform at position i in the list will be applied to `X[i]` for
-                a given input tensor `X` in the forward pass.
-            broadcast_index: The tensor index at which the transforms are broadcasted.
-
-        Example:
-            >>> tf1 = Normalize(d=2)
-            >>> tf2 = InputStandardize(d=2)
-            >>> tf = BatchBroadcastedTransformList(transforms=[tf1, tf2])
-        """
-        super().__init__()
-        self.transform_on_train = False
-        self.transform_on_eval = False
-        self.transform_on_fantasize = False
-        self.transforms = transforms
-        if broadcast_index >= 0:
-            raise ValueError("A non-negative broadcast index is not supported yet.")
-        if broadcast_index in (-2, -1):
-            raise ValueError(
-                "The broadcast index cannot be -2 and -1, as these indices are reserved"
-                " for non-batch, data and input dimensions."
-            )
-        self.broadcast_index = broadcast_index
-        self.is_one_to_many = self.transforms[0].is_one_to_many
-        if not all(tf.is_one_to_many == self.is_one_to_many for tf in self.transforms):
-            raise ValueError(  # output shapes of transforms must be the same
-                "All transforms must have the same is_one_to_many property."
-            )
-        for tf in self.transforms:
-            self.transform_on_train |= tf.transform_on_train
-            self.transform_on_eval |= tf.transform_on_eval
-            self.transform_on_fantasize |= tf.transform_on_fantasize
-
-    def transform(self, X: Tensor) -> Tensor:
-        r"""Transform the inputs to a model.
-
-        Individual transforms are applied in sequence and results are returned as
-        a batched tensor.
-
-        Args:
-            X: A `batch_shape x n x d`-dim tensor of inputs.
-
-        Returns:
-            A `batch_shape x n x d`-dim tensor of transformed inputs.
-        """
-        return torch.stack(
-            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],
-            dim=self.broadcast_index,
-        )
-
-    def untransform(self, X: Tensor) -> Tensor:
-        r"""Un-transform the inputs to a model.
-
-        Un-transforms of the individual transforms are applied in reverse sequence.
-
-        Args:
-            X: A `batch_shape x n x d`-dim tensor of transformed inputs.
-
-        Returns:
-            A `batch_shape x n x d`-dim tensor of un-transformed inputs.
-        """
-        # return torch.stack(
-        #     [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
-        #     dim=self.broadcast_index,
-        # )
-        #
-        # return self.transforms[0].untransform(X)
-        Xt = torch.stack(
-            [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
-            dim=self.broadcast_index,
-        )
-        Xt = Xt.unique(dim=self.broadcast_index)
-        # since we are assuming that this batch dimension was added solely
-        # because of different transforms, rather than different original inputs X.
-        assert Xt.shape[self.broadcast_index] == 1
-        return Xt.squeeze(self.broadcast_index)
-
-    def equals(self, other: InputTransform) -> bool:
-        r"""Check if another input transform is equivalent.
-
-        Args:
-            other: Another input transform.
-
-        Returns:
-            A boolean indicating if the other transform is equivalent.
-        """
-        return (
-            super().equals(other=other)
-            and all(t1.equals(t2) for t1, t2 in zip(self.transforms, other.transforms))
-            and (self.broadcast_index == other.broadcast_index)
-        )
-
-    def preprocess_transform(self, X: Tensor) -> Tensor:
-        r"""Apply transforms for preprocessing inputs.
-
-        The main use cases for this method are 1) to preprocess training data
-        before calling `set_train_data` and 2) preprocess `X_baseline` for noisy
-        acquisition functions so that `X_baseline` is "preprocessed" with the
-        same transformations as the cached training inputs.
-
-        Args:
-            X: A `batch_shape x n x d`-dim tensor of inputs.
-
-        Returns:
-            A `batch_shape x n x d`-dim tensor of (transformed) inputs.
-        """
-        return torch.stack(
-            [t.preprocess_transform(Xi) for Xi, t in self._Xs_and_transforms(X)],
-            dim=self.broadcast_index,
-        )
-
-    def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform]]:
-        r"""Returns an iterable of sub-tensors of X and their associated transforms.
-
-        Args:
-            X: A `batch_shape x n x d`-dim tensor of inputs.
-
-        Returns:
-            An iterable containing tuples of sub-tensors of X and their transforms.
-        """
-        # transform_shape = (
-        #     len(input_transform.transforms),
-        #     *(1 for _ in range(abs(self.broadcast_index) - 1)),
-        # )
-        # print(f"{transform_shape = }")
-        # print(f"{X.shape = }")
-        # TODO: Add dimension rather than broadcasting over the inputs.
-
-        # broadcast_shape = torch.broadcast_shapes(transform_shape, X.shape)
-        # X_expanded = X.expand(broadcast_shape)
-        # Xs = X_expanded.unbind(dim=self.broadcast_index)
-        # return zip(Xs, self.transforms)
-        return zip([X for _ in self.transforms], self.transforms)
-
-class OutcomeToBatchDimension(OutcomeTransform):
-    """Transform permuting dimensions in the outcome tensor."""
-
-    def __init__(self):
-        super().__init__()
-
-    def forward(
-        self, Y: Tensor, Yvar: Tensor | None = None
-    ) -> tuple[Tensor, Tensor | None]:
-        r"""Transform the outcomes in a model's training targets
-
-        Args:
-            Y: A `batch_shape x n x m`-dim tensor of training targets.
-            Yvar: A `batch_shape x n x m`-dim tensor of observation noises
-                associated with the training targets (if applicable).
-
-        Returns:
-            A two-tuple with the transformed outcomes (batch_shape x m x n x 1).
-
-            - The transformed outcome observations.
-            - The transformed observation noise (if applicable).
-        """
-        return Y.unsqueeze(-3).transpose(-3, -1), (
-            Yvar.unsqueeze(-3).transpose(-3, -1) #if Yvar else None
-        )
-
-    def untransform(
-        self, Y: Tensor, Yvar: Tensor | None = None
-    ) -> tuple[Tensor, Tensor | None]:
-        r"""Un-transform previously transformed outcomes
-
-        Args:
-            Y: A `batch_shape x n x m`-dim tensor of transfomred training targets.
-            Yvar: A `batch_shape x n x m`-dim tensor of transformed observation
-                noises associated with the training targets (if applicable).
-
-        Returns:
-            A two-tuple with the un-transformed outcomes:
-
-            - The un-transformed outcome observations.
-            - The un-transformed observation noise (if applicable).
-        """
-        assert Y.shape[-1] == 1
-        Y_perm = Y.transpose(-3, -1).squeeze(-3)
-        Yvar_perm = Yvar.transpose(-3, -1).squeeze(-3) if Yvar else None
-        return Y_perm, Yvar_perm
-
-    @property
-    def _is_linear(self) -> bool:
-        """
-        True for transformations such as `Standardize`; these should be able to apply
-        `untransform_posterior` to a GPyTorchPosterior and return a GPyTorchPosterior,
-        because a multivariate normal distribution should remain multivariate normal
-        after applying the transform.
-        """
-        return True
-
-    def untransform_posterior(self, posterior: Posterior) -> Posterior:
-        r"""Un-transform a posterior.
-
-        Posteriors with `_is_linear=True` should return a `GPyTorchPosterior` when
-        `posterior` is a `GPyTorchPosterior`. Posteriors with `_is_linear=False`
-        likely return a `TransformedPosterior` instead.
-
-        Args:
-            posterior: A posterior in the transformed space.
-
-        Returns:
-            The un-transformed posterior.
-        """
-        mvn = posterior.mvn
-        # print(f"{posterior.mean.shape = }")
-        # print(f"{mvn.mean.shape = }")
-        mean = self.untransform(posterior.mean)[0]
-        # print(f"{mean.shape = }")
-        covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
-        # could potentially use from_independent_mvns
-        # print(f"{mvn._covar.shape = }")
-        # print(f"{covar.shape=}")
-        from gpytorch.distributions import MultivariateNormal
-        dis = MultivariateNormal(mean=mean, covariance_matrix=covar)
-        return GPyTorchPosterior(distribution=dis)
-
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 24b5689d..c98fec94 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -94,8 +94,8 @@ def fit_step(self, avoidPoints=None, fit_output_contains=None):
         with IOtools.timer(name = "\n\t- Fitting", name_timer = '\t\t- Time: ') as t:
 
             self.GP = {}
-            #self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"]
-            self._fit_individual_models(fit_output_contains=fit_output_contains)
+            self._fit_multioutput_model(); self.GP["combined_model"] = self.GP["mo_model"]
+            #self._fit_individual_models(fit_output_contains=fit_output_contains)
 
         if self.fileOutputs is not None:
             with open(self.fileOutputs, "a") as f:
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 105a7a5d..0b4089f3 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -178,11 +178,6 @@ def __init__(
 
         print(f'\t- Initializing model{" for "+self.outputs_transformed[0] if (self.outputs_transformed is not None and (len(self.outputs)==1)) else ""}',)
 
-        """
-        self.train_X contains the untransformed of this specific run:   (batch1, dimX)
-        self.train_X_added contains the transformed of the table:       (batch2, dimXtr)
-        """
-
         self.gpmodel = BOTORCHtools.SingleTaskGP_MITIM(
             self.train_X,
             self.train_Y,
@@ -267,11 +262,7 @@ def _define_MITIM_transformations(self, num_training_points):
         input_transform_normalization = botorch.models.transforms.input.Normalize(
             d = dx_tr, bounds=None, batch_shape=transformed_X.shape[:-2]
         ).to(self.dfT)
-        output_transformed_standardization = (
-            botorch.models.transforms.outcome.Standardize(
-                m = dy_tr,
-            )
-        ).to(self.dfT)
+        output_transformed_standardization = botorch.models.transforms.outcome.Standardize(m = dy_tr).to(self.dfT)
 
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION + BATCHING
@@ -341,17 +332,13 @@ def _add_points_from_file(self):
             train_Y_added = None
             train_Yvar_added = None
 
-            if self.fileTraining is not None:
-                train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
-                    self.train_X,
-                    self.outputs[0],
-                    self.surrogate_parameters,
-                    self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
-                )
-                dx_tr_full = train_X_Complete.shape[-1]
-            else:
-                dx_tr_full = self.train_X.shape[-1]
-
+            train_X_Complete, _ = self.surrogate_parameters["transformationInputs"](
+                self.train_X,
+                self.outputs[0],
+                self.surrogate_parameters,
+                self.surrogate_parameters["surrogate_transformation_variables_lasttime"],
+            )
+            dx_tr_full = train_X_Complete.shape[-1]
 
         return train_X_added_full, train_Y_added, train_Yvar_added, dx_tr_full
 

From 9484c4dd5eb169a85d4a4c6fdabb9e1e19d67577 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 18:16:17 -0500
Subject: [PATCH 26/34] Working fine without _added and fundamental context.
 Still expensive acq

---
 src/mitim_tools/opt_tools/BOTORCHtools.py     | 311 +++++++++---------
 src/mitim_tools/opt_tools/STEPtools.py        |   1 -
 src/mitim_tools/opt_tools/SURROGATEtools.py   |  20 +-
 .../opt_tools/optimizers/BOTORCHoptim.py      |   9 +-
 tests/PORTALS_workflow.py                     |   2 +-
 5 files changed, 183 insertions(+), 160 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index ac8e8596..84c9352a 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -53,6 +53,10 @@ def __init__(
                     train_X_added is raw transformed,   [dytr, batch, dxtr]
                     train_Y_added is raw transformed,   [batch, dytr]
                     train_Yvar_added is raw transformed,[batch, dytr]
+            - The original SingleTaskGP receives train_X and train_Y completely raw, 
+              and inside of its __init__, it does transform train_Y and treats it like that
+              throughout. For train_X, it passes the original, but calculates dimensions
+              on the transformed version.
         """
 
         # -----------------------------------------------------------------------
@@ -76,47 +80,77 @@ def __init__(
         #self.train_Y_use = torch.cat((train_Y_added, y_tr), axis=-2)
         #self.train_Yvar_use = torch.cat((train_Yvar_added, yv_tr), axis=-2)
 
-        # Grab num_outputs
-        self._num_outputs = train_Y.shape[-1]
 
-        # Grab ard_num_dims
-        if train_X.shape[0] > 0:
-            with torch.no_grad():
-                transformed_X = self.transform_inputs(X=train_X, input_transform=input_transform)
-            self.ard_num_dims = transformed_X.shape[-1]
-        else:
-            self.ard_num_dims = train_X_added.shape[-1]
-            transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X)
-
-        # Transform outcomes
+        self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
+        with torch.no_grad():
+            transformed_X = self.transform_inputs(
+                X=train_X, input_transform=input_transform
+            )
         if outcome_transform is not None:
             train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
-
-        # Added points are raw transformed, so I need to normalize them
-        if train_X_added.shape[0] > 0:
-            train_X_added = input_transform["tf2"](train_X_added)
-            train_Y_added, train_Yvar_added = outcome_transform["tf3"](*outcome_transform["tf2"](train_Y_added, train_Yvar_added))
-
-        # Concatenate the added points
-        train_X_use = torch.cat((transformed_X, train_X_added), axis=-2)
-        train_Y_use = torch.cat((train_Y, train_Y_added), axis=-2)
-        train_Yvar_use = torch.cat((train_Yvar, train_Yvar_added), axis=-2)
-
         # Validate again after applying the transforms
-        self._validate_tensor_args(X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use)
+        self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar)
         ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
         validate_input_scaling(
-            train_X=train_X_use,
-            train_Y=train_Y_use,
-            train_Yvar=train_Yvar_use,
+            train_X=transformed_X,
+            train_Y=train_Y,
+            train_Yvar=train_Yvar,
             ignore_X_dims=ignore_X_dims,
         )
-        self._set_dimensions(train_X=train_X_use, train_Y=train_Y_use)
-        
-        train_X_use, train_Y_use, train_Yvar_use = self._transform_tensor_args(
-            X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use
+        self._set_dimensions(train_X=train_X, train_Y=train_Y)
+        train_X, train_Y, train_Yvar = self._transform_tensor_args(
+            X=train_X, Y=train_Y, Yvar=train_Yvar
         )
 
+        self._aug_batch_shape = transformed_X.shape[:-2]
+        train_X_use = train_X
+        train_Y_use = train_Y
+        train_Yvar_use = train_Yvar
+        self.ard_num_dims = transformed_X.shape[-1]
+
+
+
+        # # Grab num_outputs
+        # self._num_outputs = train_Y.shape[-1]
+
+        # # Grab ard_num_dims
+        # if train_X.shape[0] > 0:
+        #     with torch.no_grad():
+        #         transformed_X = self.transform_inputs(X=train_X, input_transform=input_transform)
+        #     self.ard_num_dims = transformed_X.shape[-1]
+        # else:
+        #     self.ard_num_dims = train_X_added.shape[-1]
+        #     transformed_X = torch.empty((0, self.ard_num_dims)).to(train_X)
+
+        # # Transform outcomes
+        # if outcome_transform is not None:
+        #     train_Y, train_Yvar = outcome_transform(train_X, train_Y, train_Yvar)
+
+        # # Added points are raw transformed, so I need to normalize them
+        # if train_X_added.shape[0] > 0:
+        #     train_X_added = input_transform["tf2"](train_X_added)
+        #     train_Y_added, train_Yvar_added = outcome_transform["tf3"](*outcome_transform["tf2"](train_Y_added, train_Yvar_added))
+
+        # # Concatenate the added points
+        # train_X_use = torch.cat((transformed_X, train_X_added), axis=-2)
+        # train_Y_use = torch.cat((train_Y, train_Y_added), axis=-2)
+        # train_Yvar_use = torch.cat((train_Yvar, train_Yvar_added), axis=-2)
+
+        # # Validate again after applying the transforms
+        # self._validate_tensor_args(X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use)
+        # ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
+        # validate_input_scaling(
+        #     train_X=train_X_use,
+        #     train_Y=train_Y_use,
+        #     train_Yvar=train_Yvar_use,
+        #     ignore_X_dims=ignore_X_dims,
+        # )
+        # self._set_dimensions(train_X=train_X_use, train_Y=train_Y_use)
+        
+        # train_X_use, train_Y_use, train_Yvar_use = self._transform_tensor_args(
+        #     X=train_X_use, Y=train_Y_use, Yvar=train_Yvar_use
+        # )
+
         """
 		-----------------------------------------------------------------------
 		Likelihood and Noise
@@ -248,7 +282,7 @@ def __init__(
             self.outcome_transform = outcome_transform
         if input_transform is not None:
             self.input_transform = input_transform
-        self.to(train_X)
+        self.to(train_X_use)
 
     # Modify posterior call from BatchedMultiOutputGPyTorchModel to call posterior untransform with "X"
     def posterior(
@@ -295,49 +329,12 @@ def posterior(
             return posterior_transform(posterior)
         return posterior
 
-'''
-*******************************************************************************
-ModelListGP needs to be custom in MITIM because:
-    - I shouldn't run the full transformation at every posterior call, only
-      once per ModelList. This will allow me to have "common"  parameters
-      to models, to not run at every transformation again
-*******************************************************************************
-'''
-class ModelListGP_MITIM(botorch.models.model_list_gp_regression.ModelListGP):
-    def __init__(self, *gp_models):
-        super().__init__(*gp_models)
-
-    def prepareToGenerateCommons(self):
-        self.models[0].input_transform.tf1.flag_to_store = True
-        # Make sure that this ModelListGP evaluation is fresh
-        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
-            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
-            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
-
-    def cold_startCommons(self):
-        self.models[0].input_transform.tf1.flag_to_store = False
-        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
-            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
-            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
-
-    def transform_inputs(self, X):
-        self.prepareToGenerateCommons()
-        X_tr = super().transform_inputs(X)
-        self.cold_startCommons()
-        return X_tr
-
-    def posterior(self, *args, **kwargs):
-        self.prepareToGenerateCommons()
-        posterior = super().posterior(*args, **kwargs)
-        self.cold_startCommons()
-        return posterior
-
 '''
 *******************************************************************************
 Physics transformation for inputs
 *******************************************************************************
 '''
-class Transformation_Inputs(
+class input_physics_transform(
     botorch.models.transforms.input.ReversibleInputTransform, torch.nn.Module):
     def __init__(
         self,
@@ -393,105 +390,58 @@ def _untransform(self, X):
 *******************************************************************************
 Physics transformation for outputs. Notes:
     - It needs to take "X" as well
-    - I leverage what's build in standardize to avoid repeating code
 *******************************************************************************
 '''
-class Transformation_Outcomes(botorch.models.transforms.outcome.Standardize):
-    def __init__(self, m, outputs_names, surrogate_parameters):
-        super().__init__(m)
 
+class outcome_physics_transform(botorch.models.transforms.outcome.OutcomeTransform):
+    def __init__(self, m, outputs_names, surrogate_parameters):
+        super().__init__()
         self.outputs_names = outputs_names
         self.surrogate_parameters = surrogate_parameters
         self.flag_to_evaluate = True
 
-    def forward(self, X, Y, Yvar):
+    def _is_linear(self):
+        return True
+
+    def grab_factor(self, X):
         if (self.outputs_names is not None) and (self.flag_to_evaluate):
             factor = self.surrogate_parameters["transformationOutputs"](
                 X, self.surrogate_parameters, self.outputs_names
             ).to(X.device)
         else:
-            factor = Y.mean(dim=-2, keepdim=True).to(Y.device) * 0.0 + 1.0
-
-        # This occurs in Standardize, now I'm tricking it
-        self.stdvs = factor
-        self.means = self.stdvs * 0.0
-        self._stdvs_sq = self.stdvs.pow(2)
-        self._is_trained = torch.tensor(True)
+            factor = torch.ones_like(X)
+        return factor
 
-        # When calling the forward method of Standardize, do not recalculate mean and stdvs (never be on training)
-        self.training = False
-        # ----------------------------------------
+    def forward(self, X, Y, Yvar):
+        factor = self.grab_factor(X)
+        return Y / factor, Yvar / factor.pow(2) if Yvar is not None else None
 
-        return super().forward(Y, Yvar)
+    def untransform(self, X, Y, Yvar):
+        factor = self.grab_factor(X)
+        return Y * factor, Yvar * factor.pow(2) if Yvar is not None else None
 
     def untransform_posterior(self, X, posterior):
-        if (self.outputs_names is not None) and (self.flag_to_evaluate):
-            factor = self.surrogate_parameters["transformationOutputs"](
-                X, self.surrogate_parameters, self.outputs_names
-            ).to(X.device)
-
-            self.stdvs = factor
-            self.means = self.stdvs * 0.0
-            self._stdvs_sq = self.stdvs.pow(2)
-            return self.untransform_posterior_mod(posterior)
-        else:
-            return posterior
-
-    def untransform(self, Y, Yvar):
-        raise NotImplementedError("[MITIM] This situation has not been implemented yet")
-
-    def untransform_posterior_mod(self, posterior):
         '''
-        PRF: I modified this because I cannot make the squeeze operation in the posterior, otherwise
-        I miss the element of the batch dimension 
+        PRF: Please check, this is an attempt to 
+        replicate the untransform_posterior method from Standardize
         '''
-        is_mtgp_posterior = False
-        if type(posterior) is GPyTorchPosterior:
-            is_mtgp_posterior = posterior._is_mt
-        if not self._m == posterior._extended_shape()[-1] and not is_mtgp_posterior:
-            raise RuntimeError(
-                "Incompatible output dimensions encountered. Transform has output "
-                f"dimension {self._m} and posterior has "
-                f"{posterior._extended_shape()[-1]}."
-            )
 
+        # Grab linear factor
+        factor = self.grab_factor(X)
+
+        # Grab the posterior distribution to modify
         mvn = posterior.distribution
-        offset = self.means
-        scale_fac = self.stdvs
-        if not posterior._is_mt:
-            mean_tf = offset.squeeze(-1) + scale_fac.squeeze(-1) * mvn.mean
-            scale_fac = scale_fac.squeeze(-1).expand_as(mean_tf)
-        else:
-            mean_tf = offset + scale_fac * mvn.mean
-            # reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
-            # scale_fac = scale_fac.squeeze(-2)
+        lcv = mvn.lazy_covariance_matrix
 
-            reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
-            scale_fac = scale_fac.view(-1)
-            
-            # if mvn._interleaved:
-            #     scale_fac = scale_fac.repeat(*[1 for _ in scale_fac.shape[:-1]], reps)
-            # else:
-            #     scale_fac = torch.repeat_interleave(scale_fac, reps, dim=-1)
-
-        if (
-            not mvn.islazy
-            # TODO: Figure out attribute namming weirdness here
-            or mvn._MultivariateNormal__unbroadcasted_scale_tril is not None
-        ):
-            # if already computed, we can save a lot of time using scale_tril
-            covar_tf = CholLinearOperator(mvn.scale_tril * scale_fac.unsqueeze(-1))
-        else:
-            lcv = mvn.lazy_covariance_matrix
-            #scale_fac = scale_fac.expand(lcv.shape[:-1])
-            scale_mat = DiagLinearOperator(scale_fac)
-            covar_tf = scale_mat @ lcv @ scale_mat
+        # Calculate the new mean and covariance
+        mean_tf = factor * mvn.mean
+        scale_mat = DiagLinearOperator(factor)
+        covar_tf = scale_mat @ lcv @ scale_mat
 
+        # Recreate the untranformed posterior
         kwargs = {"interleaved": mvn._interleaved} if posterior._is_mt else {}
         mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs)
-        return GPyTorchPosterior(mvn_tf)
-
-# Because I need it to take X too (for physics only, which is always the first tf)
+        return botorch.posteriors.gpytorch.GPyTorchPosterior(mvn_tf)
 
 '''
 *******************************************************************************
@@ -520,7 +470,6 @@ def untransform_posterior(self, X, posterior):
                 else tf.untransform_posterior(posterior)
             )  # Only physics transformation (tf1) takes X
             
-
         return posterior
 
     def untransform(self, X, Y, Yvar):
@@ -584,10 +533,25 @@ def transform(self, X: Tensor) -> Tensor:
         Returns:
             A `batch_shape x n x d`-dim tensor of transformed inputs.
         """
-        return torch.stack(
-            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],
+
+        self.prepare_expensive_parameters()
+        v = torch.stack(
+            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],                #PRF
             dim=self.broadcast_index,
         )
+        self.restart_expensive_parameters()
+        return v
+
+    def prepare_expensive_parameters(self):
+        self.transforms[0].flag_to_store = True
+        # Make sure that this evaluation is fresh
+        if "parameters_combined" in self.transforms[0].surrogate_parameters:
+            del self.transforms[0].surrogate_parameters["parameters_combined"]
+
+    def restart_expensive_parameters(self):
+        self.transforms[0].flag_to_store = False
+        if "parameters_combined" in self.transforms[0].surrogate_parameters:
+            del self.transforms[0].surrogate_parameters["parameters_combined"]
 
     def untransform(self, X: Tensor) -> Tensor:
         r"""Un-transform the inputs to a model.
@@ -606,10 +570,12 @@ def untransform(self, X: Tensor) -> Tensor:
         # )
         #
         # return self.transforms[0].untransform(X)
+        self.prepare_expensive_parameters()
         Xt = torch.stack(
             [t.untransform(Xi) for Xi, t in self._Xs_and_transforms(X)],
             dim=self.broadcast_index,
         )
+        self.restart_expensive_parameters()
         Xt = Xt.unique(dim=self.broadcast_index)
         # since we are assuming that this batch dimension was added solely
         # because of different transforms, rather than different original inputs X.
@@ -645,10 +611,15 @@ def preprocess_transform(self, X: Tensor) -> Tensor:
         Returns:
             A `batch_shape x n x d`-dim tensor of (transformed) inputs.
         """
-        return torch.stack(
+
+
+        self.prepare_expensive_parameters()
+        v = torch.stack(
             [t.preprocess_transform(Xi) for Xi, t in self._Xs_and_transforms(X)],
             dim=self.broadcast_index,
         )
+        self.restart_expensive_parameters()
+        return v
 
     def _Xs_and_transforms(self, X: Tensor) -> Iterable[tuple[Tensor, InputTransform]]:
         r"""Returns an iterable of sub-tensors of X and their associated transforms.
@@ -752,9 +723,51 @@ def untransform_posterior(self, posterior: Posterior) -> Posterior:
         # could potentially use from_independent_mvns
         # print(f"{mvn._covar.shape = }")
         # print(f"{covar.shape=}")
+        
+        #dis = gpytorch.distributions.MultivariateNormal(mean=mean, covariance_matrix=covar)
+        #dis = mvn.__class__(mean=mean, covariance_matrix=covar) #PRF <----- CHANGED <<<<<<<<<<<<< CHGECK
+        #dis = gpytorch.distributions.multivariate_normal.MultivariateNormal(mean=mean, covariance_matrix=covar)
         dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
         return GPyTorchPosterior(distribution=dis)
 
+'''
+*******************************************************************************
+ModelListGP needs to be custom in MITIM because:
+    - I shouldn't run the full transformation at every posterior call, only
+      once per ModelList. This will allow me to have "common"  parameters
+      to models, to not run at every transformation again
+*******************************************************************************
+'''
+class ModelListGP_MITIM(botorch.models.model_list_gp_regression.ModelListGP):
+    def __init__(self, *gp_models):
+        super().__init__(*gp_models)
+
+    def prepare_expensive_parameters(self):
+        self.models[0].input_transform.tf1.flag_to_store = True
+        # Make sure that this ModelListGP evaluation is fresh
+        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
+            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
+            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
+
+    def restart_expensive_parameters(self):
+        self.models[0].input_transform.tf1.flag_to_store = False
+        if ("surrogate_parameters" in self.models[0].input_transform.tf1.__dict__) and \
+            ("parameters_combined" in self.models[0].input_transform.tf1.surrogate_parameters):
+            del self.models[0].input_transform.tf1.surrogate_parameters["parameters_combined"]
+
+    def transform_inputs(self, X):
+        self.prepare_expensive_parameters()
+        X_tr = super().transform_inputs(X)
+        self.restart_expensive_parameters()
+        return X_tr
+
+    def posterior(self, *args, **kwargs):
+        self.prepare_expensive_parameters()
+        posterior = super().posterior(*args, **kwargs)
+        self.restart_expensive_parameters()
+        return posterior
+
+
 # ----------------------------------------------------------------------------------------------------------------------------
 # Mean acquisition function in botorch doesn't allow objectives because it's analytic
 # ----------------------------------------------------------------------------------------------------------------------------
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index c98fec94..bd86d34e 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -1,5 +1,4 @@
 import copy
-import datetime
 import torch
 import botorch
 import numpy as np
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 0b4089f3..dda91b9d 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -233,14 +233,14 @@ def _define_MITIM_transformations(self, num_training_points):
 
         for ind_out in range(self.train_Y.shape[-1]):
 
-            input_transform_physics = BOTORCHtools.Transformation_Inputs(
+            input_transform_physics = BOTORCHtools.input_physics_transform(
                 self.outputs[ind_out], self.surrogate_parameters, self.surrogate_transformation_variables
             ).to(self.dfT)
 
             input_transformations_physics.append(input_transform_physics)
         
         dimY = self.train_Y.shape[-1]
-        output_transformation_physics = BOTORCHtools.Transformation_Outcomes(
+        output_transformation_physics = BOTORCHtools.outcome_physics_transform(
                 dimY, self.outputs, self.surrogate_parameters
             ).to(self.dfT)
 
@@ -262,7 +262,9 @@ def _define_MITIM_transformations(self, num_training_points):
         input_transform_normalization = botorch.models.transforms.input.Normalize(
             d = dx_tr, bounds=None, batch_shape=transformed_X.shape[:-2]
         ).to(self.dfT)
-        output_transformed_standardization = botorch.models.transforms.outcome.Standardize(m = dy_tr).to(self.dfT)
+        output_transformed_standardization = botorch.models.transforms.outcome.Standardize(
+            m = dy_tr, batch_shape=self.train_Y.shape[:-2]
+        ).to(self.dfT)
 
         # ------------------------------------------------------------------------------------
         # Combine transformations in chain of PHYSICS + NORMALIZATION + BATCHING
@@ -346,7 +348,7 @@ def normalization_pass(self,input_transform, outcome_transform):
         '''
         Notes:
             - The goal of this is to capture NOW the normalization and standardization constants,
-              by account for both the actual data and the added data from file 
+              by accounting for both the actual data and the added data from file 
         '''
 
         # -------------------------------------------------------------------------------------
@@ -424,8 +426,8 @@ def fit(self):
 		"""
 
         # Train always in physics-transformed space, to enable mitim re-use training from file
-        with fundamental_model_context(self):
-            track_fval = self.perform_model_fit(mll)
+        #with fundamental_model_context(self):
+        track_fval = self.perform_model_fit(mll)
 
         # ---------------------------------------------------------------------------------------------------
         # Asses optimization
@@ -919,9 +921,11 @@ def assess_optimization(self, track_fval):
 
             BOgraphics.printParam(param_name, param, extralab="\t\t\t")
 
-
-# Class to call the model posterior directly on transformed space (x and y)
 class fundamental_model_context(object):
+    '''
+    This is a context manager that will temporarily disable the physics transformations (tf1)
+    in the surrogate model
+    '''
     def __init__(self, surrogate_model):
         self.surrogate_model = surrogate_model
 
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index 27dc1313..966692b4 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -63,7 +63,9 @@ def __call__(self, x, *args, **kwargs):
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
-    with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
+    options["maxiter"] = 100
+    with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/ev.prof") as s:
+    #with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
         x_opt, _ = botorch.optim.optimize_acqf(
             acq_function=fun_opt,
             bounds=fun.bounds_mod,
@@ -73,6 +75,11 @@ def __call__(self, x, *args, **kwargs):
             num_restarts=num_restarts,
             options=options,
         )
+
+        # 
+        #     self.gpmodel.posterior(x).mean
+        #     #torch.autograd.grad(loss, X)[0]
+
     embed()
 
     acq_evaluated = torch.Tensor(acq_evaluated)
diff --git a/tests/PORTALS_workflow.py b/tests/PORTALS_workflow.py
index 9c504a27..e4ac0ba4 100644
--- a/tests/PORTALS_workflow.py
+++ b/tests/PORTALS_workflow.py
@@ -16,7 +16,7 @@
     os.system(f"rm -r {folderWork.resolve()}")
 
 # Let's not consume the entire computer resources when running test... limit to 4 threads
-torch.set_num_threads(8)
+# torch.set_num_threads(8)
 
 # --------------------------------------------------------------------------------------------
 # Optimization Class

From 8d1fbeb0a0f27afa38ca098184cc90f27b097f32 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 20:28:24 -0500
Subject: [PATCH 27/34] Resume next time to increase speed,
 from_independent_mvns didnt work

---
 src/mitim_tools/opt_tools/BOTORCHtools.py | 51 ++++++++++++++++-------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 84c9352a..8e9b4c6f 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -19,8 +19,9 @@
 from torch.nn import ModuleDict
 from botorch.posteriors.gpytorch import GPyTorchPosterior
 from botorch.posteriors.posterior import Posterior
-from linear_operator.operators import BlockDiagLinearOperator
+from linear_operator.operators import BlockDiagLinearOperator, CatLinearOperator, BlockInterleavedLinearOperator
 from gpytorch.distributions import MultitaskMultivariateNormal
+from gpytorch.distributions import MultivariateNormal
 
 '''
 *******************************************************************************
@@ -435,7 +436,7 @@ def untransform_posterior(self, X, posterior):
 
         # Calculate the new mean and covariance
         mean_tf = factor * mvn.mean
-        scale_mat = DiagLinearOperator(factor)
+        scale_mat = DiagLinearOperator(factor.squeeze(-1))
         covar_tf = scale_mat @ lcv @ scale_mat
 
         # Recreate the untranformed posterior
@@ -474,7 +475,12 @@ def untransform_posterior(self, X, posterior):
 
     def untransform(self, X, Y, Yvar):
         raise NotImplementedError("[MITIM] This situation has not been implemented yet")
-
+'''
+*******************************************************************************
+BatchBroadcastedInputTransform needs to be custom in MITIM because of the no
+repetition of expensive parameters
+******************************************************************************* 
+'''
 class BatchBroadcastedInputTransform(InputTransform, ModuleDict):
     r"""An input transform representing a list of transforms to be broadcasted."""
 
@@ -536,7 +542,7 @@ def transform(self, X: Tensor) -> Tensor:
 
         self.prepare_expensive_parameters()
         v = torch.stack(
-            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],                #PRF
+            [t.forward(Xi) for Xi, t in self._Xs_and_transforms(X)],
             dim=self.broadcast_index,
         )
         self.restart_expensive_parameters()
@@ -714,20 +720,33 @@ def untransform_posterior(self, posterior: Posterior) -> Posterior:
         Returns:
             The un-transformed posterior.
         """
+        # mvn = posterior.mvn
+        # mean = self.untransform(posterior.mean)[0]
+        # covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
+        # dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
+        # return GPyTorchPosterior(distribution=dis)
+
         mvn = posterior.mvn
-        # print(f"{posterior.mean.shape = }")
-        # print(f"{mvn.mean.shape = }")
         mean = self.untransform(posterior.mean)[0]
-        # print(f"{mean.shape = }")
-        covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
-        # could potentially use from_independent_mvns
-        # print(f"{mvn._covar.shape = }")
-        # print(f"{covar.shape=}")
-        
-        #dis = gpytorch.distributions.MultivariateNormal(mean=mean, covariance_matrix=covar)
-        #dis = mvn.__class__(mean=mean, covariance_matrix=covar) #PRF <----- CHANGED <<<<<<<<<<<<< CHGECK
-        #dis = gpytorch.distributions.multivariate_normal.MultivariateNormal(mean=mean, covariance_matrix=covar)
-        dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
+        #covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
+
+        _num_outputs = mean.shape[-1]
+
+        output_indices = range(_num_outputs)
+        mvns = []
+        for t in output_indices:
+            slices = [slice(None)] * mvn._covar.ndim
+            slices[-3] = 0
+            mvns.append(MultivariateNormal(
+                mean.select(dim=-1, index=t),
+                mvn._covar[tuple(slices)],
+            )
+            )
+        if len(mvns) == 1:
+            dis = mvns[0]
+        else:
+            dis = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
+
         return GPyTorchPosterior(distribution=dis)
 
 '''

From 698ef693f6e38d6d982b6e37c4121f71d93568a8 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 20:41:32 -0500
Subject: [PATCH 28/34] misc

---
 src/mitim_tools/opt_tools/BOTORCHtools.py     |  2 +-
 .../opt_tools/optimizers/BOTORCHoptim.py      | 20 +++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 8e9b4c6f..0f20e396 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -725,7 +725,7 @@ def untransform_posterior(self, posterior: Posterior) -> Posterior:
         # covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
         # dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
         # return GPyTorchPosterior(distribution=dis)
-
+        # ========================================================+++++++++++++++++++++++++++PRF Check
         mvn = posterior.mvn
         mean = self.untransform(posterior.mean)[0]
         #covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index 966692b4..987c76df 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -64,17 +64,17 @@ def __call__(self, x, *args, **kwargs):
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
     options["maxiter"] = 100
-    with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/ev.prof") as s:
+    #with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/ev.prof") as s:
     #with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
-        x_opt, _ = botorch.optim.optimize_acqf(
-            acq_function=fun_opt,
-            bounds=fun.bounds_mod,
-            raw_samples=raw_samples,
-            q=q,
-            sequential=sequential_q,
-            num_restarts=num_restarts,
-            options=options,
-        )
+    x_opt, _ = botorch.optim.optimize_acqf(
+        acq_function=fun_opt,
+        bounds=fun.bounds_mod,
+        raw_samples=raw_samples,
+        q=q,
+        sequential=sequential_q,
+        num_restarts=num_restarts,
+        options=options,
+    )
 
         # 
         #     self.gpmodel.posterior(x).mean

From b1e2fc3e24857c043bb102fe089ea1d3531f7728 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 21:01:42 -0500
Subject: [PATCH 29/34] misc

---
 templates/main.namelist.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/main.namelist.json b/templates/main.namelist.json
index 4e834c18..24591c5a 100644
--- a/templates/main.namelist.json
+++ b/templates/main.namelist.json
@@ -24,7 +24,7 @@
         "acquisition_params" : {
             "mc_samples": 1024,
             "acquisition_optimization" : {
-                "num_restarts": 128,
+                "num_restarts": 64,
                 "raw_samples": 1024
             }
         },

From 0e52addabffdbb3918a993dd1320b35339ddeccd Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 21:13:54 -0500
Subject: [PATCH 30/34] fixing samples

---
 src/mitim_tools/opt_tools/STEPtools.py        |  2 +-
 .../opt_tools/optimizers/BOTORCHoptim.py      | 26 ++++++++-----------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index bd86d34e..86201d4d 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -339,7 +339,7 @@ def residual(Y, X = None):
                     self.evaluators["GP"].gpmodel,
                     objective=self.evaluators["objective"],
                     X_baseline=self.evaluators["GP"].train_X,
-                    sampler=sampler
+                    #sampler=sampler
                 )
             )
 
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index 987c76df..c0016ca7 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -63,22 +63,18 @@ def __call__(self, x, *args, **kwargs):
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
-    options["maxiter"] = 100
+    # options["maxiter"] = 100
     #with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/ev.prof") as s:
-    #with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
-    x_opt, _ = botorch.optim.optimize_acqf(
-        acq_function=fun_opt,
-        bounds=fun.bounds_mod,
-        raw_samples=raw_samples,
-        q=q,
-        sequential=sequential_q,
-        num_restarts=num_restarts,
-        options=options,
-    )
-
-        # 
-        #     self.gpmodel.posterior(x).mean
-        #     #torch.autograd.grad(loss, X)[0]
+    with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
+        x_opt, _ = botorch.optim.optimize_acqf(
+            acq_function=fun_opt,
+            bounds=fun.bounds_mod,
+            raw_samples=raw_samples,
+            q=q,
+            sequential=sequential_q,
+            num_restarts=num_restarts,
+            options=options,
+        )
 
     embed()
 

From df3819bc0505f3dc9cbf2bac2ea482016c8d4a27 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 21:27:50 -0500
Subject: [PATCH 31/34] from_independent didnt really help, go back to original

---
 src/mitim_tools/opt_tools/BOTORCHtools.py | 71 +----------------------
 1 file changed, 2 insertions(+), 69 deletions(-)

diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index 0f20e396..db79b4ce 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -659,19 +659,6 @@ def __init__(self):
     def forward(
         self, Y: Tensor, Yvar: Tensor | None = None
     ) -> tuple[Tensor, Tensor | None]:
-        r"""Transform the outcomes in a model's training targets
-
-        Args:
-            Y: A `batch_shape x n x m`-dim tensor of training targets.
-            Yvar: A `batch_shape x n x m`-dim tensor of observation noises
-                associated with the training targets (if applicable).
-
-        Returns:
-            A two-tuple with the transformed outcomes (batch_shape x m x n x 1).
-
-            - The transformed outcome observations.
-            - The transformed observation noise (if applicable).
-        """
         return Y.unsqueeze(-3).transpose(-3, -1), (
             Yvar.unsqueeze(-3).transpose(-3, -1) #if Yvar else None
         )
@@ -679,19 +666,6 @@ def forward(
     def untransform(
         self, Y: Tensor, Yvar: Tensor | None = None
     ) -> tuple[Tensor, Tensor | None]:
-        r"""Un-transform previously transformed outcomes
-
-        Args:
-            Y: A `batch_shape x n x m`-dim tensor of transfomred training targets.
-            Yvar: A `batch_shape x n x m`-dim tensor of transformed observation
-                noises associated with the training targets (if applicable).
-
-        Returns:
-            A two-tuple with the un-transformed outcomes:
-
-            - The un-transformed outcome observations.
-            - The un-transformed observation noise (if applicable).
-        """
         assert Y.shape[-1] == 1
         Y_perm = Y.transpose(-3, -1).squeeze(-3)
         Yvar_perm = Yvar.transpose(-3, -1).squeeze(-3) if Yvar else None
@@ -699,54 +673,13 @@ def untransform(
 
     @property
     def _is_linear(self) -> bool:
-        """
-        True for transformations such as `Standardize`; these should be able to apply
-        `untransform_posterior` to a GPyTorchPosterior and return a GPyTorchPosterior,
-        because a multivariate normal distribution should remain multivariate normal
-        after applying the transform.
-        """
         return True
 
     def untransform_posterior(self, posterior: Posterior) -> Posterior:
-        r"""Un-transform a posterior.
-
-        Posteriors with `_is_linear=True` should return a `GPyTorchPosterior` when
-        `posterior` is a `GPyTorchPosterior`. Posteriors with `_is_linear=False`
-        likely return a `TransformedPosterior` instead.
-
-        Args:
-            posterior: A posterior in the transformed space.
-
-        Returns:
-            The un-transformed posterior.
-        """
-        # mvn = posterior.mvn
-        # mean = self.untransform(posterior.mean)[0]
-        # covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
-        # dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
-        # return GPyTorchPosterior(distribution=dis)
-        # ========================================================+++++++++++++++++++++++++++PRF Check
         mvn = posterior.mvn
         mean = self.untransform(posterior.mean)[0]
-        #covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
-
-        _num_outputs = mean.shape[-1]
-
-        output_indices = range(_num_outputs)
-        mvns = []
-        for t in output_indices:
-            slices = [slice(None)] * mvn._covar.ndim
-            slices[-3] = 0
-            mvns.append(MultivariateNormal(
-                mean.select(dim=-1, index=t),
-                mvn._covar[tuple(slices)],
-            )
-            )
-        if len(mvns) == 1:
-            dis = mvns[0]
-        else:
-            dis = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
-
+        covar = BlockDiagLinearOperator(base_linear_op=mvn._covar, block_dim=-3)
+        dis = MultitaskMultivariateNormal(mean=mean, covariance_matrix=covar)
         return GPyTorchPosterior(distribution=dis)
 
 '''

From 042e6540d0f4c9761434b3d4421686222135847d Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sat, 23 Nov 2024 21:42:32 -0500
Subject: [PATCH 32/34] misc

---
 src/mitim_tools/opt_tools/STEPtools.py               | 2 +-
 src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 86201d4d..bd86d34e 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -339,7 +339,7 @@ def residual(Y, X = None):
                     self.evaluators["GP"].gpmodel,
                     objective=self.evaluators["objective"],
                     X_baseline=self.evaluators["GP"].train_X,
-                    #sampler=sampler
+                    sampler=sampler
                 )
             )
 
diff --git a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
index c0016ca7..5c78f32a 100644
--- a/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
+++ b/src/mitim_tools/opt_tools/optimizers/BOTORCHoptim.py
@@ -63,8 +63,6 @@ def __call__(self, x, *args, **kwargs):
     seq_message = f'({"sequential" if sequential_q else "joint"}) ' if q>1 else ''
     print(f"\t\t- Optimizing using optimize_acqf: {q = } {seq_message}, {num_restarts = }, {raw_samples = }")
 
-    # options["maxiter"] = 100
-    #with IOtools.speeder("/Users/pablorf/PROJECTS/project_2024_PORTALSdevelopment/speed/ev.prof") as s:
     with IOtools.timer(name = "\n\t- Optimization", name_timer = '\t\t- Time: '):
         x_opt, _ = botorch.optim.optimize_acqf(
             acq_function=fun_opt,
@@ -76,8 +74,6 @@ def __call__(self, x, *args, **kwargs):
             options=options,
         )
 
-    embed()
-
     acq_evaluated = torch.Tensor(acq_evaluated)
 
     """

From 7156b7f2cd58b463cc3cfd93d6c5e90ea046bd6a Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sun, 24 Nov 2024 01:35:22 -0500
Subject: [PATCH 33/34] good progress

---
 src/mitim_modules/portals/PORTALSmain.py      |  4 +-
 src/mitim_modules/portals/PORTALStools.py     | 54 +++++++++------
 .../portals/utils/PORTALSinit.py              | 61 ++++++++++-------
 src/mitim_tools/opt_tools/STEPtools.py        | 39 +++++++++--
 src/mitim_tools/opt_tools/SURROGATEtools.py   | 68 +++++++++++++++----
 templates/main.namelist.json                  |  2 +-
 6 files changed, 160 insertions(+), 68 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALSmain.py b/src/mitim_modules/portals/PORTALSmain.py
index f281f00e..2570d452 100644
--- a/src/mitim_modules/portals/PORTALSmain.py
+++ b/src/mitim_modules/portals/PORTALSmain.py
@@ -63,8 +63,8 @@ def default_namelist(optimization_options, CGYROrun=False):
             }
 
     # Surrogate
-    optimization_options["surrogateOptions"]["selectSurrogate"] = partial(
-        PORTALStools.selectSurrogate, CGYROrun=CGYROrun
+    optimization_options["surrogateOptions"]["selectSurrogates"] = partial(
+        PORTALStools.selectSurrogates, CGYROrun=CGYROrun
     )
 
     optimization_options["surrogateOptions"]["ensure_within_bounds"] = True
diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index 11442334..70ecce0a 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -7,22 +7,42 @@
 from mitim_tools.misc_tools.LOGtools import printMsg as print
 from IPython import embed
 
-def selectSurrogate(output, surrogateOptions, CGYROrun=False):
+def selectSurrogates(outputs, surrogateOptions, CGYROrun=False):
+    '''
+    This divides potentially different outputs into different
+    surrogates to be joined with ModelList 
+    '''
+
+    # Find transition to Targets
+    for iTar, output in enumerate(outputs):
+        if output[2:5] == "Tar":
+            break
 
-    print(f'\t- Selecting surrogate options for "{output}" to be run')
+    # Find transition to last location of transport
+    for iTra, output in enumerate(outputs):
+        if output.split('_')[-1] == outputs[iTar-1].split('_')[-1]:
+            break
 
-    if output is not None:
-        # If it's a target, just linear
-        if output[2:5] == "Tar":
-            surrogateOptions["TypeMean"] = 1
-            surrogateOptions["TypeKernel"] = 2  # Constant kernel
-        # If it's not, stndard
-        else:
-            surrogateOptions["TypeMean"] = 2  # Linear in gradients, constant in rest
-            surrogateOptions["TypeKernel"] = 1  # RBF
-            # surrogateOptions['ExtraNoise']  = True
+    surrogateOptions_dict = {}
+
+    # Turbulent and Neoclassical
+
+    surrogateOptions_dict[iTra] = copy.deepcopy(surrogateOptions)
+    surrogateOptions_dict[iTra]["TypeMean"] = 2  # Linear in gradients, constant in rest
+    surrogateOptions_dict[iTra]["TypeKernel"] = 1  # RBF
+    # surrogateOptions_dict[len(output)]['ExtraNoise']  = True
 
-    return surrogateOptions
+    surrogateOptions_dict[iTar] = copy.deepcopy(surrogateOptions)
+    surrogateOptions_dict[iTar]["TypeMean"] = 2  # Linear in gradients, constant in rest
+    surrogateOptions_dict[iTar]["TypeKernel"] = 1  # RBF
+    # surrogateOptions_dict[len(output)]['ExtraNoise']  = True
+
+    # Targets (If it's a target, just linear)
+    surrogateOptions_dict[len(outputs)] = copy.deepcopy(surrogateOptions)
+    surrogateOptions_dict[len(outputs)]["TypeMean"] = 1
+    surrogateOptions_dict[len(outputs)]["TypeKernel"] = 2  # Constant kernel
+
+    return surrogateOptions_dict
 
 def default_portals_transformation_variables(additional_params = []):
     """
@@ -121,18 +141,12 @@ def input_transformation_portals(Xorig, output, surrogate_parameters, surrogate_
 	"""
 
     _, num = output.split("_")
-    index = powerstate.indexes_simulation[
-        int(num)
-    ]  # num=1 -> pos=1, so that it takes the second value in vectors
+    index = powerstate.indexes_simulation[int(num)]  # num=1 -> pos=1, so that it takes the second value in vectors
 
     xFit = torch.Tensor().to(X)
     for ikey in surrogate_transformation_variables[output]:
         xx = powerstate.plasma[ikey][: X.shape[0], index]
         xFit = torch.cat((xFit, xx.unsqueeze(1)), dim=1).to(X)
-    
-    #TO FIX
-    import torch.nn.functional as F
-    xFit = F.pad(xFit, (0, 3-xFit.shape[-1]))
 
     parameters_combined = {"powerstate": powerstate}
 
diff --git a/src/mitim_modules/portals/utils/PORTALSinit.py b/src/mitim_modules/portals/utils/PORTALSinit.py
index 46ae9e65..4b94a156 100644
--- a/src/mitim_modules/portals/utils/PORTALSinit.py
+++ b/src/mitim_modules/portals/utils/PORTALSinit.py
@@ -277,33 +277,46 @@ def initializeProblem(
                 dictDVs[name] = [dvs_fixed[name][0], base_gradient, dvs_fixed[name][1]]
 
     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    # Define output dictionaries
+    # Define output dictionaries (order is important, consistent with selectSurrogates)
     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
     ofs, name_objectives = [], []
-    for ikey in dictCPs_base:
-        if ikey == "te":
-            var = "Qe"
-        elif ikey == "ti":
-            var = "Qi"
-        elif ikey == "ne":
-            var = "Ge"
-        elif ikey == "nZ":
-            var = "GZ"
-        elif ikey == "w0":
-            var = "Mt"
-
-        for i in range(len(portals_fun.MODELparameters["RhoLocations"])):
-            ofs.append(f"{var}Turb_{i+1}")
-            ofs.append(f"{var}Neo_{i+1}")
-
-            ofs.append(f"{var}Tar_{i+1}")
-
-            name_objectives.append(f"{var}Res_{i+1}")
-
-    if portals_fun.PORTALSparameters["surrogateForTurbExch"]:
-        for i in range(len(portals_fun.MODELparameters["RhoLocations"])):
-            ofs.append(f"PexchTurb_{i+1}")
+
+    # Turb and neo, ending with last location
+
+    for i in range(len(portals_fun.MODELparameters["RhoLocations"])):
+        for model in ["Turb", "Neo"]:
+            for ikey in dictCPs_base:
+                if ikey == "te":    var = "Qe"
+                elif ikey == "ti":  var = "Qi"
+                elif ikey == "ne":  var = "Ge"
+                elif ikey == "nZ":  var = "GZ"
+                elif ikey == "w0":  var = "Mt"
+
+                ofs.append(f"{var}{model}_{i+1}")
+
+
+                if f"{var}Res_{i+1}" not in name_objectives:
+                    name_objectives.append(f"{var}Res_{i+1}")
+
+        if portals_fun.PORTALSparameters["surrogateForTurbExch"]:
+            for i in range(len(portals_fun.MODELparameters["RhoLocations"])):
+                ofs.append(f"PexchTurb_{i+1}")
+
+    # Tar
+
+    for i in range(len(portals_fun.MODELparameters["RhoLocations"])):
+        model = "Tar"
+        for ikey in dictCPs_base:
+            if ikey == "te":    var = "Qe"
+            elif ikey == "ti":  var = "Qi"
+            elif ikey == "ne":  var = "Ge"
+            elif ikey == "nZ":  var = "GZ"
+            elif ikey == "w0":  var = "Mt"
+
+            ofs.append(f"{var}{model}_{i+1}")
+
+
 
     name_transformed_ofs = []
     for of in ofs:
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index bd86d34e..4f494a62 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -102,9 +102,8 @@ def fit_step(self, avoidPoints=None, fit_output_contains=None):
 
     def _fit_multioutput_model(self):
 
-        surrogateOptions = self.surrogateOptions["selectSurrogate"]('AllMITIM', self.surrogateOptions)
-
-        self.GP["mo_model"] = SURROGATEtools.surrogate_model(
+        # Base model
+        self.GP["mo_model"] = SURROGATEtools.surrogate_model.simple(
             self.x,
             self.y,
             self.yvar,
@@ -113,11 +112,37 @@ def _fit_multioutput_model(self):
             outputs_transformed=self.outputs_transformed,
             bounds=self.bounds,
             dfT=self.dfT,
-            surrogateOptions=surrogateOptions,
+            surrogateOptions=self.surrogateOptions,
         )
 
+        # -
+        surrogateOptions_dict = self.surrogateOptions["selectSurrogates"](self.outputs, self.surrogateOptions)
+
+        gps, j = [], 0
+        for i in surrogateOptions_dict:
+            gps.append(
+                SURROGATEtools.surrogate_model(
+                    self.x,
+                    self.y[:, j:i],
+                    self.yvar[:, j:i],
+                    self.surrogate_parameters,
+                    outputs=self.outputs[j:i],
+                    outputs_transformed=self.outputs_transformed[j:i],
+                    bounds=self.bounds,
+                    dfT=self.dfT,
+                    surrogateOptions=surrogateOptions_dict[i],
+                )
+            )
+            j = i
+
         # Fitting
-        self.GP["mo_model"].fit()
+        gpmodels = []
+        for gp in gps:
+            gp.fit()
+            gpmodels.append(gp.gpmodel)
+
+        # Joining    
+        self.GP["mo_model"].gpmodel = BOTORCHtools.ModelListGP_MITIM(*gpmodels)
 
     def _fit_individual_models(self, fit_output_contains=None):
 
@@ -146,8 +171,8 @@ def _fit_individual_models(self, fit_output_contains=None):
             surrogateOptions = copy.deepcopy(self.surrogateOptions)
 
             # Then, depending on application (e.g. targets in mitim are fitted differently)
-            if ("selectSurrogate" in surrogateOptions) and (surrogateOptions["selectSurrogate"] is not None):
-                surrogateOptions = surrogateOptions["selectSurrogate"](output_this, surrogateOptions)
+            if ("selectSurrogates" in surrogateOptions) and (surrogateOptions["selectSurrogates"] is not None):
+                surrogateOptions = surrogateOptions["selectSurrogates"](output_this, surrogateOptions)
 
             # ---------------------------------------------------------------------------------------------------
             # To avoid problems with fixed values (e.g. calibration terms that are fixed)
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index dda91b9d..9f94f805 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -26,8 +26,15 @@ class surrogate_model:
 
     """
 
-    def __init__(
-        self,
+    @classmethod
+    def simple(cls, *args, **kwargs):
+        # Create an instance of the class
+        instance = cls.__new__(cls)
+        # Initialize the parameters manually
+        instance._init_parameters(*args, **kwargs)
+        return instance
+
+    def _init_parameters(self,
         Xor,
         Yor,
         Yvaror,
@@ -42,17 +49,6 @@ def __init__(
         fileTraining=None,
         seed = 0
     ):
-        """
-        Note:
-            - noise is variance (square of standard deviation).
-        """
-
-        torch.manual_seed(seed)
-
-        # --------------------------------------------------------------------
-        # Input parameters
-        # --------------------------------------------------------------------
-
         self.avoidPoints = avoidPoints if avoidPoints is not None else []
         self.outputs = outputs
         self.outputs_transformed = outputs_transformed
@@ -75,6 +71,50 @@ def __init__(
 
         self.losses = None
 
+
+    def __init__(
+        self,
+        Xor,
+        Yor,
+        Yvaror,
+        surrogate_parameters,
+        outputs=None,
+        outputs_transformed=None,
+        bounds=None,
+        avoidPoints=None,
+        dfT=None,
+        surrogateOptions={},
+        FixedValue=False,
+        fileTraining=None,
+        seed = 0
+    ):
+        """
+        Note:
+            - noise is variance (square of standard deviation).
+        """
+
+        torch.manual_seed(seed)
+
+        # --------------------------------------------------------------------
+        # Input parameters
+        # --------------------------------------------------------------------
+
+        self._init_parameters(
+            Xor,
+            Yor,
+            Yvaror,
+            surrogate_parameters,
+            outputs=outputs,
+            outputs_transformed=outputs_transformed,
+            bounds=bounds,
+            avoidPoints=avoidPoints,
+            dfT=dfT,
+            surrogateOptions=surrogateOptions,
+            FixedValue=FixedValue,
+            fileTraining=fileTraining,
+            seed=seed
+        )
+
         # Print options
         print("\t- Surrogate options:")
         for i in self.surrogateOptions:
@@ -367,7 +407,7 @@ def normalization_pass(self,input_transform, outcome_transform):
         train_X_transformed = input_transform['tf1'](self.train_X)
 
         # Concatenate the training data and the data from file
-        train_X_transformed = torch.cat((train_X_transformed, self.train_X_added), axis=-2)
+        #train_X_transformed = torch.cat((train_X_transformed, self.train_X_added), axis=-2)
 
         # Get the normalization constants
         _ = input_transform['tf2'](train_X_transformed)
diff --git a/templates/main.namelist.json b/templates/main.namelist.json
index 24591c5a..ed9ecaf6 100644
--- a/templates/main.namelist.json
+++ b/templates/main.namelist.json
@@ -36,7 +36,7 @@
     "surrogateOptions": {
         "TypeKernel": 0,
         "TypeMean": 0,
-        "selectSurrogate": null,
+        "selectSurrogates": null,
         "FixedNoise": true,
         "ExtraNoise": false,
         "ConstrainNoise": -1e-3,

From 8ad1247a3e96c7503a5c592ba88d4cfb9a3fb015 Mon Sep 17 00:00:00 2001
From: Pablo RF <pablorf@mit.edu>
Date: Sun, 24 Nov 2024 11:59:49 -0500
Subject: [PATCH 34/34] trying to recover the modellist

---
 src/mitim_modules/portals/PORTALStools.py   | 38 ++++++++++++---------
 src/mitim_tools/opt_tools/BOTORCHtools.py   | 16 ++++-----
 src/mitim_tools/opt_tools/STEPtools.py      |  2 +-
 src/mitim_tools/opt_tools/SURROGATEtools.py | 22 ++++++------
 tests/PORTALS_workflow.py                   |  2 +-
 5 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/src/mitim_modules/portals/PORTALStools.py b/src/mitim_modules/portals/PORTALStools.py
index 70ecce0a..52662332 100644
--- a/src/mitim_modules/portals/PORTALStools.py
+++ b/src/mitim_modules/portals/PORTALStools.py
@@ -25,22 +25,28 @@ def selectSurrogates(outputs, surrogateOptions, CGYROrun=False):
 
     surrogateOptions_dict = {}
 
-    # Turbulent and Neoclassical
-
-    surrogateOptions_dict[iTra] = copy.deepcopy(surrogateOptions)
-    surrogateOptions_dict[iTra]["TypeMean"] = 2  # Linear in gradients, constant in rest
-    surrogateOptions_dict[iTra]["TypeKernel"] = 1  # RBF
-    # surrogateOptions_dict[len(output)]['ExtraNoise']  = True
-
-    surrogateOptions_dict[iTar] = copy.deepcopy(surrogateOptions)
-    surrogateOptions_dict[iTar]["TypeMean"] = 2  # Linear in gradients, constant in rest
-    surrogateOptions_dict[iTar]["TypeKernel"] = 1  # RBF
-    # surrogateOptions_dict[len(output)]['ExtraNoise']  = True
-
-    # Targets (If it's a target, just linear)
-    surrogateOptions_dict[len(outputs)] = copy.deepcopy(surrogateOptions)
-    surrogateOptions_dict[len(outputs)]["TypeMean"] = 1
-    surrogateOptions_dict[len(outputs)]["TypeKernel"] = 2  # Constant kernel
+    for i in range(len(outputs)):
+        # Turbulent and Neoclassical at inner locations
+        surrogateOptions_dict[i+1] = copy.deepcopy(surrogateOptions)
+        surrogateOptions_dict[i+1]["TypeMean"] = 1  # Linear
+        surrogateOptions_dict[i+1]["TypeKernel"] = 1  # RBF
+
+
+
+    # # Turbulent and Neoclassical at inner locations
+    # surrogateOptions_dict[iTra] = copy.deepcopy(surrogateOptions)
+    # surrogateOptions_dict[iTra]["TypeMean"] = 1  # Linear
+    # surrogateOptions_dict[iTra]["TypeKernel"] = 1  # RBF
+
+    # # Turbulent and Neoclassical at outer location (generally less variables)
+    # surrogateOptions_dict[iTar] = copy.deepcopy(surrogateOptions)
+    # surrogateOptions_dict[iTar]["TypeMean"] = 1  # Linear
+    # surrogateOptions_dict[iTar]["TypeKernel"] = 1  # RBF
+
+    # # Targets (If it's a target, just linear)
+    # surrogateOptions_dict[len(outputs)] = copy.deepcopy(surrogateOptions)
+    # surrogateOptions_dict[len(outputs)]["TypeMean"] = 1 # Linear
+    # surrogateOptions_dict[len(outputs)]["TypeKernel"] = 2  # Constant kernel
 
     return surrogateOptions_dict
 
diff --git a/src/mitim_tools/opt_tools/BOTORCHtools.py b/src/mitim_tools/opt_tools/BOTORCHtools.py
index db79b4ce..2a982ed2 100644
--- a/src/mitim_tools/opt_tools/BOTORCHtools.py
+++ b/src/mitim_tools/opt_tools/BOTORCHtools.py
@@ -40,7 +40,7 @@ def __init__(
         input_transform=None,
         outcome_transform=None,
         surrogateOptions={},
-        variables=None,
+        #variables=None,
         train_X_added=torch.Tensor([]),
         train_Y_added=torch.Tensor([]),
         train_Yvar_added=torch.Tensor([]),
@@ -214,12 +214,12 @@ def __init__(
         elif TypeMean == 1:
             self.mean_module = gpytorch.means.linear_mean.LinearMean(
                 self.ard_num_dims, batch_shape=self._aug_batch_shape, bias=True )
-        elif TypeMean == 2:
-            self.mean_module = MITIM_LinearMeanGradients(
-                batch_shape=self._aug_batch_shape, variables=variables )
-        elif TypeMean == 3:
-            self.mean_module = MITIM_CriticalGradient(
-                batch_shape=self._aug_batch_shape, variables=variables )
+        # elif TypeMean == 2:
+        #     self.mean_module = MITIM_LinearMeanGradients(
+        #         batch_shape=self._aug_batch_shape, variables=variables )
+        # elif TypeMean == 3:
+        #     self.mean_module = MITIM_CriticalGradient(
+        #         batch_shape=self._aug_batch_shape, variables=variables )
 
         """
 		-----------------------------------------------------------------------
@@ -890,7 +890,7 @@ class MITIM_LinearMeanGradients(gpytorch.means.mean.Mean):
     def __init__(self, batch_shape=torch.Size(), variables=None, **kwargs):
         super().__init__()
 
-        # Indeces of variables that are gradient, so subject to CG behavior
+        # Indeces of variables that are gradient, so subject to critical gradient behavior
         grad_vector = []
         if variables is not None:
             for i, variable in enumerate(variables):
diff --git a/src/mitim_tools/opt_tools/STEPtools.py b/src/mitim_tools/opt_tools/STEPtools.py
index 4f494a62..77a0f67d 100644
--- a/src/mitim_tools/opt_tools/STEPtools.py
+++ b/src/mitim_tools/opt_tools/STEPtools.py
@@ -103,7 +103,7 @@ def fit_step(self, avoidPoints=None, fit_output_contains=None):
     def _fit_multioutput_model(self):
 
         # Base model
-        self.GP["mo_model"] = SURROGATEtools.surrogate_model.simple(
+        self.GP["mo_model"] = SURROGATEtools.surrogate_model.only_define(
             self.x,
             self.y,
             self.yvar,
diff --git a/src/mitim_tools/opt_tools/SURROGATEtools.py b/src/mitim_tools/opt_tools/SURROGATEtools.py
index 9f94f805..6679c8da 100644
--- a/src/mitim_tools/opt_tools/SURROGATEtools.py
+++ b/src/mitim_tools/opt_tools/SURROGATEtools.py
@@ -27,7 +27,7 @@ class surrogate_model:
     """
 
     @classmethod
-    def simple(cls, *args, **kwargs):
+    def only_define(cls, *args, **kwargs):
         # Create an instance of the class
         instance = cls.__new__(cls)
         # Initialize the parameters manually
@@ -202,15 +202,15 @@ def __init__(
 
         self.normalization_pass(input_transform, outcome_transform)
 
-        self.variables = (
-            self.surrogate_transformation_variables[self.outputs[0]]
-            if (
-                (self.outputs is not None)
-                and ("surrogate_transformation_variables" in self.__dict__)
-                and (self.surrogate_transformation_variables is not None)
-            )
-            else None
-        )
+        # self.variables = (
+        #     self.surrogate_transformation_variables[self.outputs[0]]
+        #     if (
+        #         (self.outputs is not None)
+        #         and ("surrogate_transformation_variables" in self.__dict__)
+        #         and (self.surrogate_transformation_variables is not None)
+        #     )
+        #     else None
+        # )
 
         # *************************************************************************************
         # Model
@@ -225,7 +225,7 @@ def __init__(
             input_transform=input_transform,
             outcome_transform=outcome_transform,
             surrogateOptions=self.surrogateOptions,
-            variables=self.variables,
+            #variables=self.variables,
             train_X_added=self.train_X_added,
             train_Y_added=self.train_Y_added,
             train_Yvar_added=self.train_Yvar_added,
diff --git a/tests/PORTALS_workflow.py b/tests/PORTALS_workflow.py
index e4ac0ba4..92c37990 100644
--- a/tests/PORTALS_workflow.py
+++ b/tests/PORTALS_workflow.py
@@ -24,7 +24,7 @@
 
 # Initialize class
 portals_fun = PORTALSmain.portals(folderWork)
-portals_fun.optimization_options["BO_iterations"] = 1
+portals_fun.optimization_options["BO_iterations"] = 5
 portals_fun.optimization_options["initial_training"] = 3
 portals_fun.MODELparameters["RhoLocations"] = [0.25, 0.45, 0.65, 0.85]
 portals_fun.INITparameters["removeFast"] = True