From 87e03141ee9277ae1d0ab854cd38b8a7d169a35a Mon Sep 17 00:00:00 2001 From: scuuy <912074188@qq.com> Date: Tue, 4 Nov 2025 18:12:22 +0800 Subject: [PATCH 1/4] fix bug in op --- .../filter/reasoning_answer_model_judge_filter.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py b/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py index 33f5c0f0..eeb4fecc 100644 --- a/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py +++ b/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py @@ -3,7 +3,7 @@ from dataflow.core import OperatorABC from dataflow.utils.storage import DataFlowStorage from dataflow.core import LLMServingABC -from dataflow.prompts.model_evaluation.general import AnswerJudgePrompt +from dataflow.prompts.model_evaluation.general import AnswerJudgePromptQuestion, AnswerJudgePrompt from dataflow.core.prompt import prompt_restrict, DIYPromptABC import re @@ -12,7 +12,8 @@ from typing import Union @prompt_restrict( - AnswerJudgePrompt + AnswerJudgePromptQuestion, + AnswerJudgePrompt, ) @OPERATOR_REGISTRY.register() @@ -20,7 +21,7 @@ class ReasoningAnswerModelJudgeFilter(OperatorABC): def __init__(self, system_prompt: str = "You are a helpful assistant specialized in evaluating answer correctness.", llm_serving: LLMServingABC = None, - prompt_template: Union[AnswerJudgePrompt, DIYPromptABC] = AnswerJudgePrompt, + prompt_template: Union[AnswerJudgePrompt, DIYPromptABC] = AnswerJudgePromptQuestion, keep_all_samples: bool = False, # 新增参数,控制是否保留所有样本 ): From 29bfe1fbe17305398a9bbea549e5002d4156653e Mon Sep 17 00:00:00 2001 From: scuuy <912074188@qq.com> Date: Tue, 4 Nov 2025 18:31:22 +0800 Subject: [PATCH 2/4] fix bug in op --- .../reasoning/filter/reasoning_answer_model_judge_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py b/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py index eeb4fecc..28d45457 100644 --- a/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py +++ b/dataflow/operators/reasoning/filter/reasoning_answer_model_judge_filter.py @@ -21,7 +21,7 @@ class ReasoningAnswerModelJudgeFilter(OperatorABC): def __init__(self, system_prompt: str = "You are a helpful assistant specialized in evaluating answer correctness.", llm_serving: LLMServingABC = None, - prompt_template: Union[AnswerJudgePrompt, DIYPromptABC] = AnswerJudgePromptQuestion, + prompt_template: Union[AnswerJudgePromptQuestion,AnswerJudgePrompt, DIYPromptABC] = AnswerJudgePromptQuestion, keep_all_samples: bool = False, # 新增参数,控制是否保留所有样本 ): From 1559321cfc573c05529723aabad9416cbb8a8530 Mon Sep 17 00:00:00 2001 From: scuuy <912074188@qq.com> Date: Wed, 5 Nov 2025 16:04:00 +0800 Subject: [PATCH 3/4] fix bug in build_prompt --- dataflow/prompts/model_evaluation/general.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataflow/prompts/model_evaluation/general.py b/dataflow/prompts/model_evaluation/general.py index 195ec7ee..6eabd925 100644 --- a/dataflow/prompts/model_evaluation/general.py +++ b/dataflow/prompts/model_evaluation/general.py @@ -13,7 +13,7 @@ class AnswerJudgePrompt(PromptABC): def __init__(self): pass - def build_prompt(self, answer, reference_answer): + def build_prompt(self, answer, reference_answer, question=None): prompt = f""" As an answer evaluation expert, please assess whether the following answer is correct. From 6d60e80ff2589beb4dd94074d288d317ac1f7ec6 Mon Sep 17 00:00:00 2001 From: scuuy <912074188@qq.com> Date: Wed, 5 Nov 2025 16:31:48 +0800 Subject: [PATCH 4/4] fix the system prompt in general&diy reasoning pipelines --- .../statics/pipelines/api_pipelines/reasoning_diy_pipeline.py | 2 +- .../pipelines/api_pipelines/reasoning_general_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dataflow/statics/pipelines/api_pipelines/reasoning_diy_pipeline.py b/dataflow/statics/pipelines/api_pipelines/reasoning_diy_pipeline.py index f6b36d3d..e72b15b3 100644 --- a/dataflow/statics/pipelines/api_pipelines/reasoning_diy_pipeline.py +++ b/dataflow/statics/pipelines/api_pipelines/reasoning_diy_pipeline.py @@ -61,7 +61,7 @@ def __init__(self, llm_serving: LLMServingABC = None): ) self.question_filter_step1 = ReasoningQuestionFilter( - system_prompt="You are an expert in evaluating mathematical problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.", + system_prompt="You are an expert in evaluating problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.", llm_serving=self.llm_serving, prompt_template=DiyQuestionFilterPrompt(DIY_PROMPT_QUESTION) ) diff --git a/dataflow/statics/pipelines/api_pipelines/reasoning_general_pipeline.py b/dataflow/statics/pipelines/api_pipelines/reasoning_general_pipeline.py index cce72c63..38fd42a1 100644 --- a/dataflow/statics/pipelines/api_pipelines/reasoning_general_pipeline.py +++ b/dataflow/statics/pipelines/api_pipelines/reasoning_general_pipeline.py @@ -31,7 +31,7 @@ def __init__(self, llm_serving: LLMServingABC = None): ) self.question_filter_step1 = ReasoningQuestionFilter( - system_prompt="You are an expert in evaluating mathematical problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.", + system_prompt="You are an expert in evaluating problems. Follow the user's instructions strictly and output your final judgment in the required JSON format.", llm_serving=self.llm_serving, prompt_template=GeneralQuestionFilterPrompt() )