Skip to content

Commit a4c51af

Browse files
committed
fix: change score type to float to prevent truncation when evaluator score range is 0-1
The bug was in span_utils.py where EvalSetRunOutput and EvaluationOutput models had score field typed as int instead of float. This caused float scores (e.g., 0.85) to be truncated to 0 when converted to int. Changes: - Changed score field from int to float in EvalSetRunOutput model - Changed score field from int to float in EvaluationOutput model - Removed int() conversion when creating output objects - Updated tests to expect float scores instead of int Fixes: AE-1000
1 parent c120bef commit a4c51af

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

src/uipath/_cli/_evals/_span_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ class EvalSetRunOutput(BaseModel):
1818

1919
model_config = ConfigDict(populate_by_name=True)
2020

21-
score: int = Field(..., alias="score")
21+
score: float = Field(..., alias="score")
2222

2323

2424
class EvaluationOutput(BaseModel):
2525
"""Output model for Evaluation span."""
2626

2727
model_config = ConfigDict(populate_by_name=True)
2828

29-
score: int = Field(..., alias="score")
29+
score: float = Field(..., alias="score")
3030

3131

3232
class EvaluationOutputSpanOutput(BaseModel):
@@ -91,7 +91,7 @@ def set_eval_set_run_output_and_metadata(
9191
success: Whether the evaluation set run was successful
9292
"""
9393
# Set span output with overall score using Pydantic model (formatted for UI rendering)
94-
output = EvalSetRunOutput(score=int(overall_score))
94+
output = EvalSetRunOutput(score=overall_score)
9595
span.set_attribute("output", output.model_dump_json(by_alias=True, indent=2))
9696

9797
# Set metadata attributes
@@ -139,7 +139,7 @@ def set_evaluation_output_and_metadata(
139139
error_message: Optional error message if has_error is True
140140
"""
141141
# Set span output with average score using Pydantic model (formatted for UI rendering)
142-
output = EvaluationOutput(score=int(avg_score))
142+
output = EvaluationOutput(score=avg_score)
143143
span.set_attribute("output", output.model_dump_json(by_alias=True, indent=2))
144144

145145
# Set input data if provided (formatted JSON for UI rendering)

tests/cli/eval/test_eval_span_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_eval_set_run_output_model(self):
4545
data = json.loads(json_str)
4646

4747
assert data == {"score": 85}
48-
assert isinstance(data["score"], int)
48+
assert isinstance(data["score"], (int, float))
4949

5050
def test_evaluation_output_model(self):
5151
"""Test EvaluationOutput model serialization."""
@@ -54,7 +54,7 @@ def test_evaluation_output_model(self):
5454
data = json.loads(json_str)
5555

5656
assert data == {"score": 90}
57-
assert isinstance(data["score"], int)
57+
assert isinstance(data["score"], (int, float))
5858

5959
def test_evaluation_output_span_output_model_with_justification(self):
6060
"""Test EvaluationOutputSpanOutput model with justification."""
@@ -150,7 +150,7 @@ def test_set_eval_set_run_output_and_metadata(self):
150150
# Check output
151151
assert "output" in span.attributes
152152
output_data = json.loads(span.attributes["output"])
153-
assert output_data == {"score": 82}
153+
assert output_data == {"score": 82.5}
154154

155155
# Check metadata
156156
assert span.attributes["agentId"] == "exec-123"
@@ -202,7 +202,7 @@ def test_set_evaluation_output_and_metadata(self):
202202
# Check output
203203
assert "output" in span.attributes
204204
output_data = json.loads(span.attributes["output"])
205-
assert output_data == {"score": 88}
205+
assert output_data == {"score": 88.3}
206206

207207
# Check metadata
208208
assert span.attributes["agentId"] == "eval-789"
@@ -304,7 +304,7 @@ async def test_configure_eval_set_run_span(self):
304304

305305
# Verify score calculation
306306
output_data = json.loads(span.attributes["output"])
307-
assert output_data["score"] == 85 # (80 + 90) / 2
307+
assert output_data["score"] == 85.0 # (80 + 90) / 2
308308

309309
# Verify metadata
310310
assert span.attributes["agentId"] == "exec-complete"
@@ -377,7 +377,7 @@ async def test_configure_evaluation_span(self):
377377

378378
# Verify score calculation
379379
output_data = json.loads(span.attributes["output"])
380-
assert output_data["score"] == 80 # (70 + 90) / 2
380+
assert output_data["score"] == 80.0 # (70 + 90) / 2
381381

382382
# Verify metadata
383383
assert span.attributes["agentId"] == "eval-complete"
@@ -489,7 +489,7 @@ def test_set_evaluation_output_and_metadata_with_input_data(self):
489489

490490
# Verify output is set
491491
output_data = json.loads(span.attributes["output"])
492-
assert output_data == {"score": 92}
492+
assert output_data == {"score": 92.0}
493493

494494
# Verify other attributes
495495
assert span.attributes["agentId"] == "eval-input-test"

tests/cli/eval/test_eval_tracing_integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ async def test_evaluation_set_run_span_has_output_attribute(
790790

791791
output_data = json.loads(eval_span["attributes"]["output"])
792792
assert "score" in output_data
793-
assert isinstance(output_data["score"], int)
793+
assert isinstance(output_data["score"], (int, float))
794794

795795
@pytest.mark.asyncio
796796
async def test_evaluation_span_has_metadata_attributes(

0 commit comments

Comments
 (0)