Azure · luigiw · Oct 5, 2025 · Aug 11, 2025 · Aug 12, 2025 · Aug 14, 2025
@@ -11,7 +11,7 @@
 
 # Import all evals
 from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
-from azure.ai.evaluation._evaluators._task_success import TaskSuccessEvaluator
+from azure.ai.evaluation._evaluators._task_completion import TaskCompletionEvaluator
 from azure.ai.evaluation import (
     BleuScoreEvaluator,
     CodeVulnerabilityEvaluator,
@@ -68,7 +68,7 @@
     SexualEvaluator: "sexual",
     SimilarityEvaluator: "similarity",
     TaskAdherenceEvaluator: "task_adherence",
-    TaskSuccessEvaluator: "task_success",
+    TaskCompletionEvaluator: "task_completion",
     ToolCallAccuracyEvaluator: "tool_call_accuracy",
     UngroundedAttributesEvaluator: "ungrounded_attributes",
     ViolenceEvaluator: "violence",

@@ -2,6 +2,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
-from ._task_success import TaskSuccessEvaluator
+from ._task_completion import TaskCompletionEvaluator
 
-__all__ = ["TaskSuccessEvaluator"]
+__all__ = ["TaskCompletionEvaluator"]
@@ -18,8 +18,8 @@
 
 
 @experimental
-class TaskSuccessEvaluator(PromptyEvaluatorBase[Union[str, bool]]):
-    """The Task Success evaluator determines whether an AI agent successfully completed the requested task based on:
+class TaskCompletionEvaluator(PromptyEvaluatorBase[Union[str, bool]]):
+    """The Task Completion evaluator determines whether an AI agent successfully completed the requested task based on:
 
         - Final outcome and deliverable of the task
         - Completeness of task requirements
@@ -39,29 +39,29 @@ class TaskSuccessEvaluator(PromptyEvaluatorBase[Union[str, bool]]):
 
     .. admonition:: Example:
         .. literalinclude:: ../samples/evaluation_samples_evaluate.py
-            :start-after: [START task_success_evaluator]
-            :end-before: [END task_success_evaluator]
+            :start-after: [START task_completion_evaluator]
+            :end-before: [END task_completion_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call a TaskSuccessEvaluator with a query and response.
+            :caption: Initialize and call a TaskCompletionEvaluator with a query and response.
 
     .. admonition:: Example using Azure AI Project URL:
 
         .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
-            :start-after: [START task_success_evaluator]
-            :end-before: [END task_success_evaluator]
+            :start-after: [START task_completion_evaluator]
+            :end-before: [END task_completion_evaluator]
             :language: python
             :dedent: 8
-            :caption: Initialize and call TaskSuccessEvaluator using Azure AI Project URL in the following format
+            :caption: Initialize and call TaskCompletionEvaluator using Azure AI Project URL in the following format
                 https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
 
     """
 
-    _PROMPTY_FILE = "task_success.prompty"
-    _RESULT_KEY = "task_success"
+    _PROMPTY_FILE = "task_completion.prompty"
+    _RESULT_KEY = "task_completion"
     _OPTIONAL_PARAMS = ["tool_definitions"]
 
-    id = "azureai://built-in/evaluators/task_success"
+    id = "azureai://built-in/evaluators/task_completion"
     """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
 
     @override
@@ -84,19 +84,19 @@ def __call__(
         response: Union[str, List[dict]],
         tool_definitions: Optional[Union[dict, List[dict]]] = None,
     ) -> Dict[str, Union[str, bool]]:
-        """Evaluate task success for a given query, response, and optionally tool definitions.
+        """Evaluate task completion for a given query, response, and optionally tool definitions.
         The query and response can be either a string or a list of messages.
 
 
         Example with string inputs and no tools:
-            evaluator = TaskSuccessEvaluator(model_config)
+            evaluator = TaskCompletionEvaluator(model_config)
             query = "Plan a 3-day itinerary for Paris with cultural landmarks and local cuisine."
             response = "**Day 1:** Morning: Louvre Museum, Lunch: Le Comptoir du Relais..."
 
             result = evaluator(query=query, response=response)
 
         Example with list of messages:
-            evaluator = TaskSuccessEvaluator(model_config)
+            evaluator = TaskCompletionEvaluator(model_config)
             query = [{'role': 'system', 'content': 'You are a helpful travel planning assistant.'}, {'createdAt': 1700000060, 'role': 'user', 'content': [{'type': 'text', 'text': 'Plan a 3-day Paris itinerary with cultural landmarks and cuisine'}]}]
             response = [{'createdAt': 1700000070, 'run_id': '0', 'role': 'assistant', 'content': [{'type': 'text', 'text': '**Day 1:** Morning: Visit Louvre Museum (9 AM - 12 PM)...'}]}]
             tool_definitions = [{'name': 'get_attractions', 'description': 'Get tourist attractions for a city.', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string', 'description': 'The city name.'}}}}]
@@ -109,7 +109,7 @@ def __call__(
         :paramtype response: Union[str, List[dict]]
         :keyword tool_definitions: An optional list of messages containing the tool definitions the agent is aware of.
         :paramtype tool_definitions: Optional[Union[dict, List[dict]]]
-        :return: A dictionary with the task success evaluation results.
+        :return: A dictionary with the task completion evaluation results.
         :rtype: Dict[str, Union[str, bool]]
         """
 
@@ -128,7 +128,7 @@ def __call__(  # pylint: disable=docstring-missing-param
 
     @override
     async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[bool, str]]:  # type: ignore[override]
-        """Do Task Success evaluation.
+        """Do Task Completion evaluation.
         :param eval_input: The input to the evaluator. Expected to contain whatever inputs are needed for the _flow method
         :type eval_input: Dict
         :return: The evaluation result.
@@ -138,11 +138,11 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[bool, str]]:  # ty
         # which is a different schema than _base_prompty_eval.py
         if "query" not in eval_input and "response" not in eval_input:
             raise EvaluationException(
-                message=f"Both query and response must be provided as input to the Task Success evaluator.",
-                internal_message=f"Both query and response must be provided as input to the Task Success evaluator.",
+                message=f"Both query and response must be provided as input to the Task Completion evaluator.",
+                internal_message=f"Both query and response must be provided as input to the Task Completion evaluator.",
                 blame=ErrorBlame.USER_ERROR,
                 category=ErrorCategory.MISSING_FIELD,
-                target=ErrorTarget.TASK_SUCCESS_EVALUATOR,
+                target=ErrorTarget.TASK_COMPLETION_EVALUATOR,
             )
         eval_input["query"] = reformat_conversation_history(eval_input["query"], logger, include_system_messages=True)
         eval_input["response"] = reformat_agent_response(eval_input["response"], logger, include_tool_messages=True)
@@ -155,7 +155,7 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[bool, str]]:  # ty
             if isinstance(success, str):
                 success = success.upper() == "TRUE"
 
-            success_result = "pass" if success == True else "fail"
+            success_result = "pass" if success else "fail"
             reason = llm_output.get("explanation", "")
             return {
                 f"{self._result_key}": success,

@@ -1,5 +1,5 @@
 ---
-name: Task Success
+name: Task Completion
 description: Evaluates whether a task was successfully completed
 model:
   api: chat
@@ -27,7 +27,7 @@ You are an expert evaluator who determines if an agent has successfully complete
 user:
 ROLE
 ====
-You are a judge on Task Success who assesses the final outcome of a user-agent interaction. Your single focus is: **Was the user's task successfully and completely accomplished?**
+You are a judge on Task Completion who assesses the final outcome of a user-agent interaction. Your single focus is: **Was the user's task successfully and completely accomplished?**
 
 You are NOT evaluating:
 - How well the agent followed instructions

@@ -85,7 +85,7 @@ class ErrorTarget(Enum):
     FLUENCY_EVALUATOR = "FluencyEvaluator"
     RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
     TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
-    TASK_SUCCESS_EVALUATOR = "TaskSuccessEvaluator"
+    TASK_COMPLETION_EVALUATOR = "TaskCompletionEvaluator"
     INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"
     INDIRECT_ATTACK_SIMULATOR = "IndirectAttackSimulator"
     ADVERSARIAL_SIMULATOR = "AdversarialSimulator"

@@ -33,8 +33,8 @@
 		"sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_intent_resolution/intent_resolution.prompty",
         "sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_data_sources/grounding.json",
         "sdk/evaluation/azure-ai-evaluation/samples/data/evaluate_test_data.jsonl",
-        "sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_success/task_success.prompty",
-        "sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_success/_task_success.py"
+        "sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_completion/task_completion.prompty",
+        "sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_task_completion/_task_completion.py"
     ],
     "words": [
         "Aoai",

@@ -422,17 +422,17 @@ def evaluation_evaluate_classes_methods(self):
         task_adherence_evaluator(query=query, response=response, tool_definitions=tool_definitions)
         # [END task_adherence_evaluator]
 
-        # [START task_success_evaluator]
+        # [START task_completion_evaluator]
         import os
-        from azure.ai.evaluation._evaluators._task_success import TaskSuccessEvaluator
+        from azure.ai.evaluation._evaluators._task_completion import TaskCompletionEvaluator
 
         model_config = {
             "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
             "api_key": os.environ.get("AZURE_OPENAI_KEY"),
             "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
         }
 
-        task_success_evaluator = TaskSuccessEvaluator(model_config=model_config)
+        task_completion_evaluator = TaskCompletionEvaluator(model_config=model_config)
 
         query = [
             {"role": "system", "content": "You are a travel booking assistant. Help users find and book flights."},
@@ -499,8 +499,8 @@ def evaluation_evaluate_classes_methods(self):
             }
         ]
 
-        task_success_evaluator(query=query, response=response, tool_definitions=tool_definitions)
-        # [END task_success_evaluator]
+        task_completion_evaluator(query=query, response=response, tool_definitions=tool_definitions)
+        # [END task_completion_evaluator]
 
         # [START indirect_attack_evaluator]
         import os

@@ -432,17 +432,17 @@ def evaluation_evaluate_classes_methods(self):
         task_adherence_evaluator(query=query, response=response, tool_definitions=tool_definitions)
         # [END task_adherence_evaluator]
 
-        # [START task_success_evaluator]
+        # [START task_completion_evaluator]
         import os
-        from azure.ai.evaluation._evaluators._task_success import TaskSuccessEvaluator
+        from azure.ai.evaluation._evaluators._task_completion import TaskCompletionEvaluator
 
         model_config = {
             "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),  # https://<account_name>.services.ai.azure.com
             "api_key": os.environ.get("AZURE_OPENAI_KEY"),
             "azure_deployment": os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
         }
 
-        task_success_evaluator = TaskSuccessEvaluator(model_config=model_config)
+        task_completion_evaluator = TaskCompletionEvaluator(model_config=model_config)
 
         query = [
             {"role": "system", "content": "You are a travel booking assistant. Help users find and book flights."},
@@ -509,8 +509,8 @@ def evaluation_evaluate_classes_methods(self):
             }
         ]
 
-        task_success_evaluator(query=query, response=response, tool_definitions=tool_definitions)
-        # [END task_success_evaluator]
+        task_completion_evaluator(query=query, response=response, tool_definitions=tool_definitions)
+        # [END task_completion_evaluator]
 
         # [START indirect_attack_evaluator]
         import os