Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/common/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Copyright (c) MLCommons and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
Expand Down Expand Up @@ -42,4 +46,5 @@
"ssl_org_pem_file_path": os.environ["SSL_ORG_PEM_FILE"],
"trial_jwtexp": 900,
"frontend_ip": os.environ["FRONTEND_IP"],
"runpod_api_key": os.environ.get("RUNPOD_API_KEY", ""),
}
121 changes: 79 additions & 42 deletions api/controllers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,14 @@ def do_upload_via_train_files(credentials, tid, model_name):

else:
current_upload = json.loads(upload.file.read().decode("utf-8"))
upload.file.seek(0)
payload = {
"id_json": current_upload,
"bucket_name": task.s3_bucket,
"key": name,
"input": {
"id_json": current_upload,
"bucket_name": task.s3_bucket,
"key": name,
"model_id": model[1], # Add model_id for backend processing
}
}
s3_client.upload_fileobj(
upload.file,
Expand All @@ -356,63 +360,96 @@ def do_upload_via_train_files(credentials, tid, model_name):
)

light_model_endpoint = task.lambda_model
r = requests.post(light_model_endpoint, json=payload)

try:
score = r.json()["score"]
except Exception as ex:
logger.exception(ex)
subject = f"Model {model_name} failed training as {r.json()['detail']}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {config['runpod_api_key']}",
}

r = requests.post(light_model_endpoint, json=payload, headers=headers)

if r.status_code != 200:
logger.error(
f"RunPod request failed with status {r.status_code}: {r.text}"
)
Email().send(
contact=user.email,
cc_contact="[email protected]",
template_name="model_train_failed.txt",
msg_dict={"name": model_name},
subject=subject,
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} submission failed",
)
bottle.abort(400)
bottle.abort(400, f"RunPod request failed: {r.text}")

logger.info(f"RunPod job submitted successfully for model {model_name}")
score = None # Will be set by the async evaluation

did = dm.getByName(name).id
r_realid = rm.getByTid(tid)[0].rid
if isinstance(task_config.get("perf_metric"), list):
metric = task_config.get("perf_metric")[0].get("type")
elif isinstance(task_config.get("perf_metric"), dict):
metric = task_config.get("perf_metric").get("type")
new_score = {
metric: score,
"perf": score,
"perf_std": 0.0,
"perf_by_tag": [
{
"tag": str(name),
"pretty_perf": f"{score} %",
"perf": score,
"perf_std": 0.0,
"perf_dict": {metric: score},
}
],
}

new_score_string = json.dumps(new_score)
if score is not None:
new_score = {
metric: score,
"perf": score,
"perf_std": 0.0,
"perf_by_tag": [
{
"tag": str(name),
"pretty_perf": f"{score} %",
"perf": score,
"perf_std": 0.0,
"perf_dict": {metric: score},
}
],
}

new_score_string = json.dumps(new_score)

sm.create(
model_id=model[1],
r_realid=r_realid,
did=did,
pretty_perf=f"{score} %",
perf=score,
metadata_json=new_score_string,
)

sm.create(
model_id=model[1],
r_realid=r_realid,
did=did,
pretty_perf=f"{score} %",
perf=score,
metadata_json=new_score_string,
if any(upload.content_type != "text/plain" for upload in train_files.values()):
Email().send(
contact=user.email,
cc_contact="[email protected]",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} submitted for evaluation.",
)

Email().send(
contact=user.email,
cc_contact="[email protected]",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} training succeeded.",
)
return util.json_encode(
{
"success": "ok",
"model_id": model[1],
"message": "Model submitted for evaluation. You will receive an email when evaluation is complete.",
}
)
else:
Email().send(
contact=user.email,
cc_contact="[email protected]",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} evaluation completed.",
)

return util.json_encode({"success": "ok", "model_id": model[1]})
return util.json_encode(
{
"success": "ok",
"model_id": model[1],
"message": "Model evaluation completed successfully.",
}
)


@bottle.post("/models/upload_predictions/<tid:int>/<model_name>")
Expand Down
54 changes: 50 additions & 4 deletions backend/app/domain/services/base/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def add_scores_and_update_model(
self.email_helper.send(
contact=user.email,
cc_contact=self.email_sender,
template_name="model_inference_failed.txt",
template_name="model_evaluation_failed.txt",
msg_dict={"name": model["name"], "message": message},
subject=f"Model {model['name']} evaluation failed.",
)
Expand All @@ -418,11 +418,57 @@ def add_scores_and_update_model(
round_info = self.round_repository.get_round_info_by_round_and_task(
model["tid"], round_id
)

# Get task configuration to determine score handling
task_config = self.task_repository.get_config_file_by_task_id(
model["tid"]
)[0]
task_config = yaml.safe_load(task_config)

metadata_json = dict(scores)

# Determine the main performance metric based on task configuration
perf_metric = task_config.get("perf_metric", {})
if isinstance(perf_metric, list):
main_metric = perf_metric[0].get("type", "score")
elif isinstance(perf_metric, dict):
main_metric = perf_metric.get("type", "score")
else:
main_metric = "score" # Default fallback

# Extract the score value - handle different formats
score_value = None

# First, try to extract from nested results (for RunPod format)
if "results" in metadata_json and "score" in metadata_json["results"]:
score_value = metadata_json["results"]["score"]
elif "score" in metadata_json:
score_value = metadata_json["score"]
elif main_metric in metadata_json:
score_value = metadata_json[main_metric]
elif "Standard_CER_15_WORSE" in metadata_json:
# Backward compatibility for speech tasks
score_value = metadata_json["Standard_CER_15_WORSE"]
main_metric = "Standard_CER_15_WORSE"
else:
raise ValueError(f"No score found in metadata: {metadata_json}")

# Format the score appropriately based on metric type
if main_metric == "Standard_CER_15_WORSE":
# Speech recognition - percentage format
pretty_perf = f"{100 * score_value:.2f}%"
else:
# Other tasks - decimal format
pretty_perf = f"{score_value:.4f}"

# Store the main metric type in metadata for reference
metadata_json["main_metric"] = main_metric
metadata_json["task_perf_metric"] = perf_metric

# Build the score structure with proper metric information
new_score = {
"perf": metadata_json["Standard_CER_15_WORSE"],
"pretty_perf": f"{100*metadata_json['Standard_CER_15_WORSE']:.2f}%",
"perf": score_value,
"pretty_perf": pretty_perf,
"mid": model_id,
"r_realid": round_info.id,
"did": datasets[0]["id"],
Expand All @@ -437,7 +483,7 @@ def add_scores_and_update_model(
cc_contact=self.email_sender,
template_name="model_evaluation_sucessful.txt",
msg_dict={"name": model["name"], "model_id": model["id"]},
subject=f"Model {model['name']} evaluation succeeded.",
subject=f"Model {model['name']} evaluation completed successfully.",
)
print(
f"sent email evaluation sucessful to {user.email} model {model['name']} "
Expand Down
Loading