@@ -63,16 +63,16 @@ def extract_evaluator_scores(evaluation_run_results: Any) -> Dict[str, float]:
6363 evaluation_run_results: EvaluationRunResult object containing evaluation results
6464
6565 Returns:
66- Dictionary mapping evaluator IDs to their normalized scores (0-100)
66+ Dictionary mapping evaluator names to their normalized scores (0-100)
6767 """
6868 scores : Dict [str , float ] = {}
6969 if not evaluation_run_results .evaluation_run_results :
7070 return scores
7171
7272 for result in evaluation_run_results .evaluation_run_results :
73- evaluator_id = result .evaluator_id
73+ evaluator_name = result .evaluator_name
7474 score = result .result .score
75- scores [evaluator_id ] = normalize_score_to_100 (score )
75+ scores [evaluator_name ] = normalize_score_to_100 (score )
7676
7777 return scores
7878
@@ -89,7 +89,7 @@ def set_eval_set_run_output_and_metadata(
8989
9090 Args:
9191 span: The OpenTelemetry span to set attributes on
92- evaluator_scores: Dictionary mapping evaluator IDs to their average scores (0-100)
92+ evaluator_scores: Dictionary mapping evaluator names to their average scores (0-100)
9393 execution_id: The execution ID for the evaluation set run
9494 input_schema: The input schema from the runtime
9595 output_schema: The output schema from the runtime
@@ -137,7 +137,7 @@ def set_evaluation_output_and_metadata(
137137
138138 Args:
139139 span: The OpenTelemetry span to set attributes on
140- evaluator_scores: Dictionary mapping evaluator IDs to their scores (0-100)
140+ evaluator_scores: Dictionary mapping evaluator names to their scores (0-100)
141141 execution_id: The execution ID for this evaluation
142142 input_data: The input data for this evaluation
143143 has_error: Whether the evaluation had an error
@@ -212,15 +212,15 @@ async def configure_eval_set_run_span(
212212
213213 Args:
214214 span: The OpenTelemetry span to configure
215- evaluator_averages: Dictionary mapping evaluator IDs to their average scores
215+ evaluator_averages: Dictionary mapping evaluator names to their average scores
216216 execution_id: The execution ID for the evaluation set run
217217 schema: The runtime schema
218218 success: Whether the evaluation set run was successful
219219 """
220220 # Normalize all scores to 0-100 range
221221 evaluator_scores = {
222- evaluator_id : normalize_score_to_100 (score )
223- for evaluator_id , score in evaluator_averages .items ()
222+ evaluator_name : normalize_score_to_100 (score )
223+ for evaluator_name , score in evaluator_averages .items ()
224224 }
225225
226226 # Get runtime schemas
0 commit comments