Skip to content

Commit 6aaf4ad

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Completes UNB-2579 and UNB-2644 - repeated feature names for NLP datasets and shrink character limit for NLP dataset upload
1 parent a4e8cdf commit 6aaf4ad

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

unboxapi/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ def add_dataset(
965965
# Feature validations
966966
try:
967967
if text_column_name:
968-
feature_names.append(text_column_name)
968+
feature_names = [text_column_name]
969969
for feature_name in feature_names:
970970
headers.index(feature_name)
971971
except ValueError:
@@ -982,7 +982,6 @@ def add_dataset(
982982
f"Features {features_not_in_dataset} specified in `feature_names` "
983983
"are not in the dataset. \n"
984984
) from None
985-
986985
# Tag column validation
987986
try:
988987
if tag_column_name:
@@ -1002,10 +1001,10 @@ def add_dataset(
10021001
) from None
10031002
if task_type == TaskType.TextClassification:
10041003
max_text_size = df[text_column_name].str.len().max()
1005-
if max_text_size > 100000:
1004+
if max_text_size > 1000:
10061005
raise exceptions.UnboxSubscriptionPlanException(
10071006
"The dataset you are trying to upload contains rows with "
1008-
f"{max_text_size} characters, which exceeds the 100,000 character "
1007+
f"{max_text_size} characters, which exceeds the 1000 character "
10091008
"limit."
10101009
) from None
10111010

0 commit comments

Comments
 (0)