Remove fp8 kv cache option from grpo_fast (#1203)

finbarrtimbers · web-flow · commit 1986faa38de9 · 2025-11-17T18:20:07.000Z
diff --git a/open_instruct/benchmark_generators.py b/open_instruct/benchmark_generators.py
@@ -263,7 +263,6 @@ def setup_vllm_engines(
         prompt_queue=param_prompt_Q,
         results_queue=inference_results_Q,
         actor_manager=actor_manager,
-        use_fp8_kv_cache=args.use_fp8_kv_cache,
         inflight_updates=args.inflight_updates,
     )
 
diff --git a/open_instruct/grpo_fast.py b/open_instruct/grpo_fast.py
@@ -227,9 +227,6 @@ class Args:
     stop_strings: list[str] | None = None
     """List of strings that stop the generation when they are generated.
     The returned output will not contain the stop strings."""
-    use_fp8_kv_cache: bool = False
-    """Whether to use fp8 kv cache. This is useful for larger models or olmo."""
-
     # Algorithm
     async_steps: int = 1
     """Number of steps ahead to generate responses. Set to 0 to make the code synchronous. Values greater than 0 learn from a policy up to async_steps old like Cleanba (https://arxiv.org/abs/2310.00036)"""
@@ -2336,7 +2333,6 @@ def create_model_and_optimizer(
         results_queue=inference_results_Q,
         eval_results_queue=evaluation_inference_results_Q,
         actor_manager=actor_manager,
-        use_fp8_kv_cache=args.use_fp8_kv_cache,
         inflight_updates=args.inflight_updates,
     )
 
diff --git a/scripts/train/olmo3/32b_rl_smoke_test.sh b/scripts/train/olmo3/32b_rl_smoke_test.sh
@@ -69,7 +69,6 @@ python mason.py \
         --llm_judge_max_context_length 32768 \
         --clip_higher 0.272 \
         --allow_world_padding False \
-        --use_fp8_kv_cache False \
         --code_api_url https://p9f1719l7f.execute-api.us-west-2.amazonaws.com/prod/test_program \
         --code_pass_rate_reward_threshold 0.99 \
         --oe_eval_max_length 32768 \

Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,6 @@ def setup_vllm_engines(`
`263`	`263`	`prompt_queue=param_prompt_Q,`
`264`	`264`	`results_queue=inference_results_Q,`
`265`	`265`	`actor_manager=actor_manager,`
`266`		`- use_fp8_kv_cache=args.use_fp8_kv_cache,`
`267`	`266`	`inflight_updates=args.inflight_updates,`
`268`	`267`	`)`
`269`	`268`