Fixing typo in comment

swarna · swarna · commit b1ba0e246273 · 2025-08-27T18:04:03.000Z
diff --git a/src/fairseq2/recipes/lm/_online_finetune/_grpo.py b/src/fairseq2/recipes/lm/_online_finetune/_grpo.py
@@ -194,7 +194,7 @@ def validate_reward(
             ) in self._config.loss_config.validation_vllm_sampling_params.items():
                 policy_sampling_params.__setattr__(k, v)
 
-            # For a pairwise RM, need to sample at least two judgments
+            # For a pairwise RM, need to sample at least two rollouts
             policy_sampling_params.n = (
                 2 if self._reward.reward_name == "generative_pairwise_verifier" else 1
             )
diff --git a/src/fairseq2/recipes/lm/_online_finetune/_online_dpo.py b/src/fairseq2/recipes/lm/_online_finetune/_online_dpo.py
@@ -140,7 +140,7 @@ def validate_reward(
             ) in self._config.loss_config.validation_vllm_sampling_params.items():
                 policy_sampling_params.__setattr__(k, v)
 
-            # For a pairwise RM, need to sample at least two judgments
+            # For a pairwise RM, need to sample at least two rollouts
             policy_sampling_params.n = (
                 2 if self._reward.reward_name == "generative_pairwise_verifier" else 1
             )

Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,7 @@ def validate_reward(`
`194`	`194`	`) in self._config.loss_config.validation_vllm_sampling_params.items():`
`195`	`195`	`policy_sampling_params.__setattr__(k, v)`
`196`	`196`
`197`		`- # For a pairwise RM, need to sample at least two judgments`
	`197`	`+ # For a pairwise RM, need to sample at least two rollouts`
`198`	`198`	`policy_sampling_params.n = (`
`199`	`199`	`2 if self._reward.reward_name == "generative_pairwise_verifier" else 1`
`200`	`200`	`)`
Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ def validate_reward(`
`140`	`140`	`) in self._config.loss_config.validation_vllm_sampling_params.items():`
`141`	`141`	`policy_sampling_params.__setattr__(k, v)`
`142`	`142`
`143`		`- # For a pairwise RM, need to sample at least two judgments`
	`143`	`+ # For a pairwise RM, need to sample at least two rollouts`
`144`	`144`	`policy_sampling_params.n = (`
`145`	`145`	`2 if self._reward.reward_name == "generative_pairwise_verifier" else 1`
`146`	`146`	`)`