diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index e7015b3e8309..eece18881253 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -9,12 +9,11 @@ ### Features Added - Added support for Azure OpenAI Python grader via `AzureOpenAIPythonGrader` class, which serves as a wrapper around Azure Open AI Python grader configurations. This new grader object can be supplied to the main `evaluate` method as if it were a normal callable evaluator. - -### Features Added - Added `attack_success_thresholds` parameter to `RedTeam` class for configuring custom thresholds that determine attack success. This allows users to set specific threshold values for each risk category, with scores greater than the threshold considered successful attacks (i.e. higher threshold means higher tolerance for harmful responses). - Enhanced threshold reporting in RedTeam results to include default threshold values when custom thresholds aren't specified, providing better transparency about the evaluation criteria used. + ### Bugs Fixed - Fixed red team scan `output_path` issue where individual evaluation results were overwriting each other instead of being preserved as separate files. Individual evaluations now create unique files while the user's `output_path` is reserved for final aggregated results. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py index 12808bf576fe..c40e1b5286d2 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py @@ -178,7 +178,6 @@ def _log_metrics_and_instance_results_onedp( properties = { EvaluationRunProperties.RUN_TYPE: "eval_run", - EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun", EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}", "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]), } @@ -191,6 +190,7 @@ def _log_metrics_and_instance_results_onedp( upload_run_response = client.start_evaluation_run( evaluation=EvaluationUpload( display_name=evaluation_name, + properties=properties, ) ) @@ -202,7 +202,6 @@ def _log_metrics_and_instance_results_onedp( outputs={ "evaluationResultId": create_evaluation_result_response.id, }, - properties=properties, ), )