Completes OPEN-3563 Add model property indicating if baseline / shell / non-shell and Closes OPEN-3616 Replace 'baseline-model' resource name in favor of common 'model' resource

gustavocidornelas · whoseoyster · commit 190ed2e5301c · 2023-02-25T12:51:13.000-08:00
diff --git a/openlayer/__init__.py b/openlayer/__init__.py
@@ -16,7 +16,7 @@
 from .version import __version__  # noqa: F401
 
 OPENLAYER_DIR = os.path.join(os.path.expanduser("~"), ".openlayer")
-VALID_RESOURCE_NAMES = {"baseline-model", "model", "training", "validation"}
+VALID_RESOURCE_NAMES = {"model", "training", "validation"}
 
 
 class OpenlayerClient(object):
@@ -411,6 +411,9 @@ def add_model(
             if model_package_dir:
                 shutil.copytree(model_package_dir, temp_dir, dirs_exist_ok=True)
                 utils.write_python_version(temp_dir)
+                model_data["modelType"] = "full"
+            else:
+                model_data["modelType"] = "shell"
 
             utils.write_yaml(model_data, f"{temp_dir}/model_config.yaml")
 
@@ -479,14 +482,15 @@ def add_baseline_model(
         model_config = {}
         if model_config_file_path is not None:
             model_config = utils.read_yaml(model_config_file_path)
+        model_config["modelType"] = "baseline"
         model_data = BaselineModelSchema().load(model_config)
 
         # Copy relevant resources to temp directory
         with tempfile.TemporaryDirectory() as temp_dir:
             utils.write_yaml(model_data, f"{temp_dir}/model_config.yaml")
 
             self._stage_resource(
-                resource_name="baseline-model",
+                resource_name="model",
                 resource_dir=temp_dir,
                 project_id=project_id,
                 force=force,
@@ -1182,30 +1186,14 @@ def _stage_resource(
         """
         if resource_name not in VALID_RESOURCE_NAMES:
             raise ValueError(
-                "Resource name must be one of 'baseline-model', 'model', 'training', or"
+                "Resource name must be one of 'model', 'training', or"
                 f" 'validation', but got '{resource_name}'."
             )
 
         project_dir = f"{OPENLAYER_DIR}/{project_id}/staging"
 
         resources_staged = utils.list_resources_in_bundle(project_dir)
 
-        if resource_name == "model" and "baseline-model" in resources_staged:
-            raise exceptions.OpenlayerException(
-                "Trying to stage a `model` when there is a `baseline-model` already staged."
-                + " You can either add a `model` or a `baseline-model`, but not both at the"
-                + " same time. Please remove one of them from the staging area using the"
-                + " `restore` method."
-            ) from None
-
-        if resource_name == "baseline-model" and "model" in resources_staged:
-            raise exceptions.OpenlayerException(
-                "Trying to stage a `baseline-model` when there is a `model` already staged."
-                + " You can either add a `model` or a `baseline-model`, but not both at the"
-                + " same time. Please remove one of them from the staging area using the"
-                + " `restore` method."
-            ) from None
-
         if resource_name in resources_staged:
             print(f"Found an existing `{resource_name}` resource staged.")
 
diff --git a/openlayer/schemas.py b/openlayer/schemas.py
@@ -23,6 +23,7 @@ class BaselineModelSchema(ma.Schema):
     """Schema for baseline models."""
 
     metadata = ma.fields.Dict(allow_none=True, load_default={})
+    modelType = ma.fields.Str()
 
 
 class CommitSchema(ma.Schema):
@@ -119,6 +120,7 @@ class ModelSchema(ma.Schema):
         allow_none=True,
         load_default={},
     )
+    modelType = ma.fields.Str()
     architectureType = ma.fields.Str(
         validate=ma.validate.OneOf(
             [model_framework.value for model_framework in ModelType],
diff --git a/openlayer/validators.py b/openlayer/validators.py
@@ -147,8 +147,8 @@ def _validate_bundle_state(self):
         """Checks whether the bundle is in a valid state.
 
         This includes:
-        - When a "model" is included, you always need to provide predictions for both
-          "validation" and "training" (regardless of artifact or no artifact).
+        - When a "model" (shell or full) is included, you always need to provide predictions for both
+          "validation" and "training".
         - When a "baseline-model" is included, you always need to provide a "training"
           and "validation" set without predictions.
         - When a "model" nor a "baseline-model" are included, you always need to NOT
@@ -186,33 +186,35 @@ def _validate_bundle_state(self):
             )
 
         if "model" in self._bundle_resources:
+            model_config = self._load_model_config_from_bundle()
+            model_type = model_config.get("modelType")
             if (
                 training_predictions_column_name is None
                 or validation_predictions_column_name is None
-            ):
+            ) and model_type != "baseline":
                 bundle_state_failed_validations.append(
                     "To push a model to the platform, you must provide "
                     "training and a validation sets with predictions in the column "
                     "`predictions_column_name`."
                 )
-        elif "baseline-model" in self._bundle_resources:
-            if (
-                "training" not in self._bundle_resources
-                or "validation" not in self._bundle_resources
-            ):
-                bundle_state_failed_validations.append(
-                    "To push a baseline model to the platform, you must provide "
-                    "training and validation sets."
-                )
-            elif (
-                training_predictions_column_name is not None
-                and validation_predictions_column_name is not None
-            ):
-                bundle_state_failed_validations.append(
-                    "To push a baseline model to the platform, you must not provide "
-                    "training and a validation sets without predictions in the column "
-                    "`predictions_column_name`."
-                )
+            if model_type == "baseline":
+                if (
+                    "training" not in self._bundle_resources
+                    or "validation" not in self._bundle_resources
+                ):
+                    bundle_state_failed_validations.append(
+                        "To push a baseline model to the platform, you must provide "
+                        "training and validation sets."
+                    )
+                elif (
+                    training_predictions_column_name is not None
+                    and validation_predictions_column_name is not None
+                ):
+                    bundle_state_failed_validations.append(
+                        "To push a baseline model to the platform, you must provide "
+                        "training and validation sets without predictions in the column "
+                        "`predictions_column_name`."
+                    )
         else:
             if (
                 "training" in self._bundle_resources
@@ -260,26 +262,15 @@ def _validate_bundle_resources(self):
                 validation_set_validator.validate()
             )
 
-        if (
-            "baseline-model" in self._bundle_resources
-            and not self._skip_model_validation
-        ):
-            baseline_model_validator = BaselineModelValidator(
-                model_config_file_path=f"{self.bundle_path}/baseline-model/model_config.yaml"
-            )
-            bundle_resources_failed_validations.extend(
-                baseline_model_validator.validate()
-            )
-
         if "model" in self._bundle_resources and not self._skip_model_validation:
-            model_files = os.listdir(f"{self.bundle_path}/model")
-            # Shell model
-            if len(model_files) == 1:
+            model_config_file_path = f"{self.bundle_path}/model/model_config.yaml"
+            model_config = self._load_model_config_from_bundle()
+
+            if model_config["modelType"] == "shell":
                 model_validator = ModelValidator(
-                    model_config_file_path=f"{self.bundle_path}/model/model_config.yaml"
+                    model_config_file_path=model_config_file_path
                 )
-            # Model package
-            else:
+            elif model_config["modelType"] == "full":
                 # Use data from the validation as test data
                 validation_dataset_df = self._load_dataset_from_bundle("validation")
                 validation_dataset_config = self._load_dataset_config_from_bundle(
@@ -298,12 +289,21 @@ def _validate_bundle_resources(self):
                     ].head()
 
                 model_validator = ModelValidator(
-                    model_config_file_path=f"{self.bundle_path}/model/model_config.yaml",
+                    model_config_file_path=model_config_file_path,
                     model_package_dir=f"{self.bundle_path}/model",
                     sample_data=sample_data,
                     use_runner=self._use_runner,
                 )
-                bundle_resources_failed_validations.extend(model_validator.validate())
+            elif model_config["modelType"] == "baseline":
+                model_validator = BaselineModelValidator(
+                    model_config_file_path=model_config_file_path
+                )
+            else:
+                raise ValueError(
+                    f"Invalid model type: {model_config['modelType']}. "
+                    "The model type must be one of 'shell', 'full' or 'baseline'."
+                )
+            bundle_resources_failed_validations.extend(model_validator.validate())
 
         # Add the bundle resources failed validations to the list of all failed validations
         self.failed_validations.extend(bundle_resources_failed_validations)
@@ -347,6 +347,21 @@ def _load_dataset_config_from_bundle(self, label: str) -> Dict[str, Any]:
 
         return dataset_config
 
+    def _load_model_config_from_bundle(self) -> Dict[str, Any]:
+        """Loads a model config from a commit bundle.
+
+        Returns
+        -------
+        Dict[str, Any]
+            The model config.
+        """
+        model_config_file_path = f"{self.bundle_path}/model/model_config.yaml"
+
+        with open(model_config_file_path, "r", encoding="UTF-8") as stream:
+            model_config = yaml.safe_load(stream)
+
+        return model_config
+
     def validate(self) -> List[str]:
         """Validates the commit bundle.