16
16
from .version import __version__ # noqa: F401
17
17
18
18
OPENLAYER_DIR = os .path .join (os .path .expanduser ("~" ), ".openlayer" )
19
+ VALID_RESOURCE_NAMES = {"baseline-model" , "model" , "training" , "validation" }
19
20
20
21
21
22
class OpenlayerClient (object ):
@@ -441,6 +442,85 @@ def add_model(
441
442
force = force ,
442
443
)
443
444
445
+ def add_baseline_model (
446
+ self ,
447
+ project_id : int ,
448
+ task_type : TaskType ,
449
+ model_config_file_path : Optional [str ] = None ,
450
+ force : bool = False ,
451
+ ):
452
+ """
453
+ **Coming soon...**
454
+
455
+ Add a baseline model to the project.
456
+
457
+ Baseline models should be added together with training and validation
458
+ sets. A model will then be trained on the platform using AutoML, using
459
+ the parameters provided in the model config file.
460
+
461
+ .. important::
462
+ This feature is experimental and currently under development. Only
463
+ tabular classification tasks are supported for now.
464
+
465
+ Parameters
466
+ ----------
467
+ model_config_file_path : str, optional
468
+ Path to the model configuration YAML file. If not provided, the default
469
+ model config will be used.
470
+
471
+ .. admonition:: What's on the model config file?
472
+
473
+ For baseline models, the content of the YAML file should contain:
474
+
475
+ - ``ensembleSize`` : int, default 10
476
+ Number of models ensembled.
477
+ - ``randomSeed`` : int, default 42
478
+ Random seed to be used for model training.
479
+ - ``timeout`` : int, default 60
480
+ Maximum time (in seconds) to train all the models.
481
+ - ``perRunLimit`` : int, optional
482
+ Maximum time (in seconds) to train each model.
483
+ - ``metadata`` : Dict[str, any], default {}
484
+ Dictionary containing metadata about the model. This is the
485
+ metadata that will be displayed on the Openlayer platform.
486
+ force : bool, optional
487
+ Whether to force the addition of the baseline model to the project.
488
+ If set to True, any existing staged baseline model will be overwritten.
489
+ """
490
+ if task_type is not TaskType .TabularClassification :
491
+ raise exceptions .OpenlayerException (
492
+ "Only tabular classification is supported for model baseline for now."
493
+ )
494
+
495
+ # Validate the baseline model
496
+ baseline_model_validator = validators .BaselineModelValidator (
497
+ model_config_file_path = model_config_file_path ,
498
+ )
499
+ failed_validations = baseline_model_validator .validate ()
500
+
501
+ if failed_validations :
502
+ raise exceptions .OpenlayerValidationError (
503
+ "There are issues with the baseline model. \n "
504
+ "Make sure to fix all of the issues listed above before the upload." ,
505
+ ) from None
506
+
507
+ # Load model config and augment with defaults
508
+ model_config = {}
509
+ if model_config_file_path is not None :
510
+ model_config = utils .read_yaml (model_config_file_path )
511
+ model_data = schemas .BaselineModelSchema ().load (model_config )
512
+
513
+ # Copy relevant resources to temp directory
514
+ with tempfile .TemporaryDirectory () as temp_dir :
515
+ utils .write_yaml (model_data , f"{ temp_dir } /model_config.yaml" )
516
+
517
+ self ._stage_resource (
518
+ resource_name = "baseline-model" ,
519
+ resource_dir = temp_dir ,
520
+ project_id = project_id ,
521
+ force = force ,
522
+ )
523
+
444
524
def add_dataset (
445
525
self ,
446
526
file_path : str ,
@@ -1034,7 +1114,6 @@ def status(self, project_id: int):
1034
1114
:obj:`commit` method).
1035
1115
"""
1036
1116
project_dir = f"{ OPENLAYER_DIR } /{ project_id } /staging"
1037
- valid_resource_names = ["model" , "training" , "validation" ]
1038
1117
1039
1118
if not os .listdir (project_dir ):
1040
1119
print (
@@ -1046,7 +1125,7 @@ def status(self, project_id: int):
1046
1125
if not os .path .exists (f"{ project_dir } /commit.yaml" ):
1047
1126
print ("The following resources are staged, waiting to be committed:" )
1048
1127
for file in os .listdir (project_dir ):
1049
- if file in valid_resource_names :
1128
+ if file in VALID_RESOURCE_NAMES :
1050
1129
print (f"\t - { file } " )
1051
1130
print ("Use the `commit` method to add a commit message to your changes." )
1052
1131
return
@@ -1055,7 +1134,7 @@ def status(self, project_id: int):
1055
1134
commit = yaml .safe_load (commit_file )
1056
1135
print ("The following resources are committed, waiting to be pushed:" )
1057
1136
for file in os .listdir (project_dir ):
1058
- if file != "commit.yaml" :
1137
+ if file in VALID_RESOURCE_NAMES :
1059
1138
print (f"\t - { file } " )
1060
1139
print (f"Commit message from { commit ['date' ]} :" )
1061
1140
print (f"\t { commit ['message' ]} " )
@@ -1128,31 +1207,43 @@ def _stage_resource(
1128
1207
force : bool
1129
1208
Whether to overwrite the resource if it already exists in the staging area.
1130
1209
"""
1131
- if resource_name not in [ "model" , "training" , "validation" ] :
1210
+ if resource_name not in VALID_RESOURCE_NAMES :
1132
1211
raise ValueError (
1133
- f"Resource name must be one of 'model', 'training', or 'validation',"
1134
- f" but got { resource_name } ."
1212
+ f"Resource name must be one of 'baseline-model', ' model', 'training', or 'validation',"
1213
+ f" but got ' { resource_name } ' ."
1135
1214
)
1136
1215
1137
- staging_dir = f"{ OPENLAYER_DIR } /{ project_id } /staging/{ resource_name } "
1216
+ project_dir = f"{ OPENLAYER_DIR } /{ project_id } /staging"
1217
+
1218
+ resources_staged = utils .list_resources_in_bundle (project_dir )
1138
1219
1139
- # Append 'dataset' to the end of the resource name for the prints
1140
- if resource_name in ["training" , "validation" ]:
1141
- resource_name += " dataset"
1220
+ if resource_name == "model" and "baseline-model" in resources_staged :
1221
+ raise exceptions .OpenlayerException (
1222
+ "Trying to stage a `model` when there is a `baseline-model` already staged."
1223
+ + " You can either add a `model` or a `baseline-model`, but not both at the same time."
1224
+ + " Please remove one of them from the staging area using the `restore` method."
1225
+ ) from None
1142
1226
1143
- if os .path .exists (staging_dir ):
1144
- print (f"Found an existing { resource_name } staged." )
1145
- overwrite = "n"
1227
+ if resource_name == "baseline-model" and "model" in resources_staged :
1228
+ raise exceptions .OpenlayerException (
1229
+ "Trying to stage a `baseline-model` when there is a `model` already staged."
1230
+ + " You can either add a `model` or a `baseline-model`, but not both at the same time."
1231
+ + " Please remove one of them from the staging area using the `restore` method."
1232
+ ) from None
1233
+
1234
+ if resource_name in resources_staged :
1235
+ print (f"Found an existing `{ resource_name } ` resource staged." )
1146
1236
1237
+ overwrite = "n"
1147
1238
if not force :
1148
1239
overwrite = input ("Do you want to overwrite it? [y/n] " )
1149
1240
if overwrite .lower () == "y" or force :
1150
- print (f"Overwriting previously staged { resource_name } ..." )
1151
- shutil .rmtree (staging_dir )
1241
+ print (f"Overwriting previously staged ` { resource_name } ` resource ..." )
1242
+ shutil .rmtree (project_dir + "/" + resource_name )
1152
1243
else :
1153
- print (f"Keeping the existing { resource_name } staged." )
1244
+ print (f"Keeping the existing ` { resource_name } ` resource staged." )
1154
1245
return
1155
1246
1156
- shutil .copytree (resource_dir , staging_dir )
1247
+ shutil .copytree (resource_dir , project_dir + "/" + resource_name )
1157
1248
1158
- print (f"Staged the { resource_name } !" )
1249
+ print (f"Staged the ` { resource_name } ` resource !" )
0 commit comments