7
7
import uuid
8
8
from typing import Callable , List , Optional
9
9
10
+ import marshmallow as ma
10
11
import pandas as pd
11
- from bentoml .saved_bundle .bundler import _write_bento_content_to_dir
12
- from bentoml .utils .tempdir import TempDirectory
13
- from marshmallow import ValidationError
12
+ from bentoml .saved_bundle import bundler
13
+ from bentoml .utils import tempdir
14
14
15
- from .api import Api
15
+ from . import api , exceptions , schemas , utils
16
16
from .datasets import Dataset
17
- from .exceptions import (
18
- UnboxDatasetInconsistencyError ,
19
- UnboxDuplicateTask ,
20
- UnboxResourceError ,
21
- UnboxSubscriptionPlanException ,
22
- UnboxValidationError ,
23
- )
24
17
from .models import Model , ModelType , create_template_model
25
18
from .projects import Project
26
- from .schemas import DatasetSchema , ModelSchema , ProjectSchema
27
19
from .tasks import TaskType
28
- from .utils import HidePrints
29
20
from .version import __version__ # noqa: F401
30
21
31
22
@@ -46,7 +37,7 @@ class UnboxClient(object):
46
37
"""
47
38
48
39
def __init__ (self , api_key : str = None ):
49
- self .api = Api (api_key )
40
+ self .api = api . Api (api_key )
50
41
self .subscription_plan = self .api .get_request ("me/subscription-plan" )
51
42
52
43
def create_project (
@@ -94,11 +85,13 @@ def create_project(
94
85
obj:`add_dataframe` for detailed examples.
95
86
"""
96
87
# ----------------------------- Schema validation ---------------------------- #
97
- project_schema = ProjectSchema ()
88
+ project_schema = schemas . ProjectSchema ()
98
89
try :
99
90
project_schema .load ({"name" : name , "description" : description })
100
- except ValidationError as err :
101
- raise UnboxValidationError (self ._format_error_message (err )) from None
91
+ except ma .ValidationError as err :
92
+ raise exceptions .UnboxValidationError (
93
+ self ._format_error_message (err )
94
+ ) from None
102
95
103
96
endpoint = "projects"
104
97
payload = dict (name = name , description = description , taskType = task_type .value )
@@ -195,7 +188,7 @@ def create_or_load_project(
195
188
return self .create_project (
196
189
name = name , task_type = task_type , description = description
197
190
)
198
- except UnboxDuplicateTask :
191
+ except exceptions . UnboxDuplicateTask :
199
192
return self .load_project (name )
200
193
201
194
def add_model (
@@ -502,17 +495,17 @@ def add_model(
502
495
TaskType .TabularClassification ,
503
496
TaskType .TextClassification ,
504
497
]:
505
- raise UnboxValidationError (
498
+ raise exceptions . UnboxValidationError (
506
499
"`task_type` must be either TaskType.TabularClassification or "
507
500
"TaskType.TextClassification. \n "
508
501
) from None
509
502
if model_type not in [model_framework for model_framework in ModelType ]:
510
- raise UnboxValidationError (
503
+ raise exceptions . UnboxValidationError (
511
504
"`model_type` must be one of the supported ModelTypes. Check out "
512
505
"our API reference for a full list "
513
506
"https://reference.unbox.ai/reference/api/unboxapi.ModelType.html. \n "
514
507
) from None
515
- model_schema = ModelSchema ()
508
+ model_schema = schemas . ModelSchema ()
516
509
try :
517
510
model_schema .load (
518
511
{
@@ -530,27 +523,29 @@ def add_model(
530
523
"dependent_dir" : dependent_dir ,
531
524
}
532
525
)
533
- except ValidationError as err :
534
- raise UnboxValidationError (self ._format_error_message (err )) from None
526
+ except ma .ValidationError as err :
527
+ raise exceptions .UnboxValidationError (
528
+ self ._format_error_message (err )
529
+ ) from None
535
530
536
531
# --------------------------- Resource validations --------------------------- #
537
532
# Requirements check
538
533
if requirements_txt_file and not os .path .isfile (
539
534
os .path .expanduser (requirements_txt_file )
540
535
):
541
- raise UnboxResourceError (
536
+ raise exceptions . UnboxResourceError (
542
537
f"File `{ requirements_txt_file } ` does not exist. \n "
543
538
) from None
544
539
545
540
# Setup script
546
541
if setup_script and not os .path .isfile (os .path .expanduser (setup_script )):
547
- raise UnboxResourceError (
542
+ raise exceptions . UnboxResourceError (
548
543
f"File `{ setup_script } ` does not exist. \n "
549
544
) from None
550
545
551
546
# Dependent dir
552
547
if dependent_dir and dependent_dir == os .getcwd ():
553
- raise UnboxResourceError (
548
+ raise exceptions . UnboxResourceError (
554
549
"`dependent_dir` cannot be the working directory. \n " ,
555
550
mitigation = "Make sure that the specified `dependent_dir` is different "
556
551
f"from `{ os .getcwd ()} `." ,
@@ -559,13 +554,13 @@ def add_model(
559
554
# Training set
560
555
if task_type in [TaskType .TabularClassification , TaskType .TabularRegression ]:
561
556
if len (train_sample_df .index ) < 100 :
562
- raise UnboxResourceError (
557
+ raise exceptions . UnboxResourceError (
563
558
context = "There's an issue with the specified `train_sample_df`. \n " ,
564
559
message = f"Only { len (train_sample_df .index )} rows were found. \n " ,
565
560
mitigation = "Make sure to upload a training sample with 100+ rows." ,
566
561
) from None
567
562
if train_sample_df .isnull ().values .any ():
568
- raise UnboxResourceError (
563
+ raise exceptions . UnboxResourceError (
569
564
context = "There's an issue with the specified `train_sample_df`. \n " ,
570
565
message = f"The `train_sample_df` contains null values, which is "
571
566
"currently not supported. \n " ,
@@ -579,14 +574,14 @@ def add_model(
579
574
580
575
# predict_proba
581
576
if not isinstance (function , Callable ):
582
- raise UnboxValidationError (
577
+ raise exceptions . UnboxValidationError (
583
578
f"- `{ function } ` specified as `function` is not callable. \n "
584
579
) from None
585
580
586
581
user_args = function .__code__ .co_varnames [: function .__code__ .co_argcount ][2 :]
587
582
kwarg_keys = tuple (kwargs )
588
583
if user_args != kwarg_keys :
589
- raise UnboxResourceError (
584
+ raise exceptions . UnboxResourceError (
590
585
context = "There's an issue with the speficied `function`. \n " ,
591
586
message = f"Your function's additional args { user_args } do not match the "
592
587
f"kwargs you specifed { kwarg_keys } . \n " ,
@@ -601,20 +596,20 @@ def add_model(
601
596
TaskType .TabularRegression ,
602
597
]:
603
598
test_input = train_sample_df [:3 ][feature_names ].to_numpy ()
604
- with HidePrints ():
599
+ with utils . HidePrints ():
605
600
function (model , test_input , ** kwargs )
606
601
else :
607
602
test_input = [
608
603
"Unbox is great!" ,
609
604
"Let's see if this function is ready for some error analysis" ,
610
605
]
611
- with HidePrints ():
606
+ with utils . HidePrints ():
612
607
function (model , test_input , ** kwargs )
613
608
except Exception as e :
614
609
exception_stack = "" .join (
615
610
traceback .format_exception (type (e ), e , e .__traceback__ )
616
611
)
617
- raise UnboxResourceError (
612
+ raise exceptions . UnboxResourceError (
618
613
context = "There's an issue with the specified `function`. \n " ,
619
614
message = f"It is failing with the following error: \n "
620
615
f"{ exception_stack } " ,
@@ -626,7 +621,7 @@ def add_model(
626
621
# Transformers resources
627
622
if model_type is ModelType .transformers :
628
623
if "tokenizer" not in kwargs :
629
- raise UnboxResourceError (
624
+ raise exceptions . UnboxResourceError (
630
625
context = "There's a missing kwarg for the specified model type. \n " ,
631
626
message = "`tokenizer` must be specified in kwargs when using a "
632
627
"transformers model. \n " ,
@@ -648,7 +643,7 @@ def add_model(
648
643
for feature in feature_names + [train_sample_label_column_name ]
649
644
if feature not in headers
650
645
]
651
- raise UnboxDatasetInconsistencyError (
646
+ raise exceptions . UnboxDatasetInconsistencyError (
652
647
f"Features { features_not_in_dataset } specified in `feature_names` "
653
648
"are not on the training sample. \n "
654
649
) from None
@@ -660,13 +655,13 @@ def add_model(
660
655
]
661
656
for value , field in required_fields :
662
657
if value is None :
663
- raise UnboxDatasetInconsistencyError (
658
+ raise exceptions . UnboxDatasetInconsistencyError (
664
659
message = f"TabularClassification task missing `{ field } `.\n " ,
665
660
mitigation = f"Make sure to specify `{ field } ` for tabular "
666
661
"classification tasks." ,
667
662
) from None
668
663
669
- with TempDirectory () as dir :
664
+ with tempdir . TempDirectory () as dir :
670
665
bento_service = create_template_model (
671
666
model_type ,
672
667
task_type ,
@@ -686,9 +681,9 @@ def add_model(
686
681
bento_service .pack ("function" , function )
687
682
bento_service .pack ("kwargs" , kwargs )
688
683
689
- with TempDirectory () as temp_dir :
684
+ with tempdir . TempDirectory () as temp_dir :
690
685
print ("Bundling model and artifacts..." )
691
- _write_bento_content_to_dir (bento_service , temp_dir )
686
+ bundler . _write_bento_content_to_dir (bento_service , temp_dir )
692
687
693
688
if model_type is ModelType .rasa :
694
689
dependent_dir = model .model_metadata .model_dir
@@ -715,7 +710,7 @@ def add_model(
715
710
)
716
711
717
712
# Tar the model bundle with its artifacts and upload
718
- with TempDirectory () as tarfile_dir :
713
+ with tempdir . TempDirectory () as tarfile_dir :
719
714
tarfile_path = f"{ tarfile_dir } /model"
720
715
721
716
with tarfile .open (tarfile_path , mode = "w:gz" ) as tar :
@@ -899,11 +894,11 @@ def add_dataset(
899
894
TaskType .TabularClassification ,
900
895
TaskType .TextClassification ,
901
896
]:
902
- raise UnboxValidationError (
897
+ raise exceptions . UnboxValidationError (
903
898
"`task_type` must be either TaskType.TabularClassification or "
904
899
"TaskType.TextClassification. \n "
905
900
) from None
906
- dataset_schema = DatasetSchema ()
901
+ dataset_schema = schemas . DatasetSchema ()
907
902
try :
908
903
dataset_schema .load (
909
904
{
@@ -920,14 +915,16 @@ def add_dataset(
920
915
"categorical_feature_names" : categorical_feature_names ,
921
916
}
922
917
)
923
- except ValidationError as err :
924
- raise UnboxValidationError (self ._format_error_message (err )) from None
918
+ except ma .ValidationError as err :
919
+ raise exceptions .UnboxValidationError (
920
+ self ._format_error_message (err )
921
+ ) from None
925
922
926
923
# --------------------------- Resource validations --------------------------- #
927
924
exp_file_path = os .path .expanduser (file_path )
928
925
object_name = "original.csv"
929
926
if not os .path .isfile (exp_file_path ):
930
- raise UnboxResourceError (
927
+ raise exceptions . UnboxResourceError (
931
928
f"File at path `{ file_path } ` does not contain the dataset. \n "
932
929
) from None
933
930
@@ -939,7 +936,7 @@ def add_dataset(
939
936
df = pd .read_csv (file_path , sep = sep )
940
937
941
938
if df .isnull ().values .any ():
942
- raise UnboxResourceError (
939
+ raise exceptions . UnboxResourceError (
943
940
context = "There's an issue with the specified dataset. \n " ,
944
941
message = "The dataset contains null values, which is currently "
945
942
"not supported. \n " ,
@@ -951,14 +948,14 @@ def add_dataset(
951
948
try :
952
949
headers .index (label_column_name )
953
950
except ValueError :
954
- raise UnboxDatasetInconsistencyError (
951
+ raise exceptions . UnboxDatasetInconsistencyError (
955
952
f"`{ label_column_name } ` specified as `label_column_name` is not "
956
953
"in the dataset. \n "
957
954
) from None
958
955
959
956
dataset_classes = list (df [label_column_name ].unique ())
960
957
if len (dataset_classes ) > len (class_names ):
961
- raise UnboxDatasetInconsistencyError (
958
+ raise exceptions . UnboxDatasetInconsistencyError (
962
959
f"There are { len (dataset_classes )} classes represented in the dataset, "
963
960
f"but only { len (class_names )} items in your `class_names`. \n " ,
964
961
mitigation = f"Make sure that there are at most { len (class_names )} "
@@ -973,15 +970,15 @@ def add_dataset(
973
970
headers .index (feature_name )
974
971
except ValueError :
975
972
if text_column_name :
976
- raise UnboxDatasetInconsistencyError (
973
+ raise exceptions . UnboxDatasetInconsistencyError (
977
974
f"`{ text_column_name } ` specified as `text_column_name` is not in "
978
975
"the dataset. \n "
979
976
) from None
980
977
else :
981
978
features_not_in_dataset = [
982
979
feature for feature in feature_names if feature not in headers
983
980
]
984
- raise UnboxDatasetInconsistencyError (
981
+ raise exceptions . UnboxDatasetInconsistencyError (
985
982
f"Features { features_not_in_dataset } specified in `feature_names` "
986
983
"are not in the dataset. \n "
987
984
) from None
@@ -991,22 +988,22 @@ def add_dataset(
991
988
if tag_column_name :
992
989
headers .index (tag_column_name )
993
990
except ValueError :
994
- raise UnboxDatasetInconsistencyError (
991
+ raise exceptions . UnboxDatasetInconsistencyError (
995
992
f"`{ tag_column_name } ` specified as `tag_column_name` is not in "
996
993
"the dataset. \n "
997
994
) from None
998
995
999
996
# ----------------------- Subscription plan validations ---------------------- #
1000
997
if row_count > self .subscription_plan ["datasetSize" ]:
1001
- raise UnboxSubscriptionPlanException (
998
+ raise exceptions . UnboxSubscriptionPlanException (
1002
999
f"The dataset your are trying to upload contains { row_count } rows, "
1003
1000
"which exceeds your plan's limit of "
1004
1001
f"{ self .subscription_plan ['datasetSize' ]} . \n "
1005
1002
) from None
1006
1003
if task_type == TaskType .TextClassification :
1007
1004
max_text_size = df [text_column_name ].str .len ().max ()
1008
1005
if max_text_size > 100000 :
1009
- raise UnboxSubscriptionPlanException (
1006
+ raise exceptions . UnboxSubscriptionPlanException (
1010
1007
"The dataset you are trying to upload contains rows with "
1011
1008
f"{ max_text_size } characters, which exceeds the 100,000 character "
1012
1009
"limit."
@@ -1182,7 +1179,7 @@ def add_dataframe(
1182
1179
"""
1183
1180
# --------------------------- Resource validations --------------------------- #
1184
1181
if not isinstance (df , pd .DataFrame ):
1185
- raise UnboxValidationError (
1182
+ raise exceptions . UnboxValidationError (
1186
1183
f"- `df` is a `{ type (df )} `, but it must be of type `pd.DataFrame`. \n "
1187
1184
) from None
1188
1185
with tempfile .TemporaryDirectory () as tmp_dir :
0 commit comments