-
Notifications
You must be signed in to change notification settings - Fork 765
Description
Here is my test code:
CUDA_VISIBLE_DEVICES=4,5,6,7,0,1,2,3 lm_eval --model vllm --model_args pretrained=/home/lizhangming/lzm_project/s1/ckpts/s1_20250625_075920,tokenizer=/home/lizhangming/lzm_project/s1/ckpts/s1_20250625_075920,dtype=float32,tensor_parallel_size=8 --tasks aime24_nofigures --batch_size auto --apply_chat_template --output_path results/aime24_nofigures_full_fintune --log_samples --gen_kwargs "max_gen_toks=32768,max_tokens_thinking=auto,thinking_n_ignore=2,thinking_n_ignore_str=Wait"
I use the deepspeed_zero3 to finetuning Qwen/Qwen2.5-32B-Instruct.
here is my code:
uid="$(date +%Y%m%d_%H%M%S)"
base_model="Qwen/Qwen2.5-32B-Instruct"
lr=1e-5
epochs=5
micro_batch_size=2
push_to_hub=false
gradient_accumulation_steps=1
max_steps=-1
gpu_count=8
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file train/deepspeed_zero3.yaml --deepspeed_config_file train/ds_config.json
train/sft.py
--per_device_train_batch_size=${micro_batch_size}
--per_device_eval_batch_size=${micro_batch_size}
--gradient_accumulation_steps=${gradient_accumulation_steps}
--num_train_epochs=${epochs}
--max_steps=${max_steps}
--train_file_path="simplescaling/s1K_tokenized"
--model_name=${base_model}
--warmup_ratio=0.05
--bf16=True
--eval_strategy="steps"
--eval_steps=50
--logging_steps=1
--lr_scheduler_type="cosine"
--learning_rate=${lr}
--weight_decay=1e-4
--adam_beta1=0.9
--adam_beta2=0.95
--output_dir="ckpts/s1_${uid}"
--save_only_model=True
--gradient_checkpointing=True
--save_strategy=no
--dataset_text_field="text"
deepspeed_zero3.yaml
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
ds_config.json
{
"train_micro_batch_size_per_gpu": "auto",
"gradient_accumulation_steps": 1,
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "none"
},
"stage3_param_persistence_threshold": 1e6,
"stage3_max_live_parameters": 1e9,
"stage3_prefetch_bucket_size": 5e7
},
"bf16": {
"enabled": true
}
}
