Skip to content

Commit 1986faa

Browse files
Remove fp8 kv cache option from grpo_fast (#1203)
1 parent a152ba8 commit 1986faa

File tree

3 files changed

+0
-6
lines changed

3 files changed

+0
-6
lines changed

open_instruct/benchmark_generators.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,6 @@ def setup_vllm_engines(
263263
prompt_queue=param_prompt_Q,
264264
results_queue=inference_results_Q,
265265
actor_manager=actor_manager,
266-
use_fp8_kv_cache=args.use_fp8_kv_cache,
267266
inflight_updates=args.inflight_updates,
268267
)
269268

open_instruct/grpo_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,6 @@ class Args:
227227
stop_strings: list[str] | None = None
228228
"""List of strings that stop the generation when they are generated.
229229
The returned output will not contain the stop strings."""
230-
use_fp8_kv_cache: bool = False
231-
"""Whether to use fp8 kv cache. This is useful for larger models or olmo."""
232-
233230
# Algorithm
234231
async_steps: int = 1
235232
"""Number of steps ahead to generate responses. Set to 0 to make the code synchronous. Values greater than 0 learn from a policy up to async_steps old like Cleanba (https://arxiv.org/abs/2310.00036)"""
@@ -2336,7 +2333,6 @@ def create_model_and_optimizer(
23362333
results_queue=inference_results_Q,
23372334
eval_results_queue=evaluation_inference_results_Q,
23382335
actor_manager=actor_manager,
2339-
use_fp8_kv_cache=args.use_fp8_kv_cache,
23402336
inflight_updates=args.inflight_updates,
23412337
)
23422338

scripts/train/olmo3/32b_rl_smoke_test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ python mason.py \
6969
--llm_judge_max_context_length 32768 \
7070
--clip_higher 0.272 \
7171
--allow_world_padding False \
72-
--use_fp8_kv_cache False \
7372
--code_api_url https://p9f1719l7f.execute-api.us-west-2.amazonaws.com/prod/test_program \
7473
--code_pass_rate_reward_threshold 0.99 \
7574
--oe_eval_max_length 32768 \

0 commit comments

Comments
 (0)