LazyAGI · JingofXin · Jul 8, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
diff --git a/lazyllm/common/common.py b/lazyllm/common/common.py
@@ -31,15 +31,19 @@ def absorb(self, item):
 
 
 class ArgsDict(dict):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, with_line=True, **kwargs):
         super(ArgsDict, self).__init__(*args, **kwargs)
+        self._with_line = with_line
 
     def check_and_update(self, kw):
         assert set(kw.keys()).issubset(set(self)), f'unexpected keys: {set(kw.keys()) - set(self)}'
         self.update(kw)
 
     def parse_kwargs(self):
-        string = ' '.join(f'--{k}={v}' if type(v) is not str else f'--{k}=\"{v}\"' for k, v in self.items())
+        if self._with_line:
+            string = ' '.join(f'--{k}={v}' if type(v) is not str else f'--{k}=\"{v}\"' for k, v in self.items())
+        else:
+            string = ' '.join(f'{k}={v}' if type(v) is not str else f'{k}=\"{v}\"' for k, v in self.items())
         return string
 
 class CaseInsensitiveDict(dict):

diff --git a/lazyllm/components/deploy/vllm.py b/lazyllm/components/deploy/vllm.py
@@ -72,12 +72,17 @@ def __init__(self, trust_remote_code: bool = True, launcher: LazyLLMLaunchersBas
                 ray_launcher[0], post_action=(lazyllm.parallel(*parall_launcher) if len(parall_launcher) else None))
 
     def cmd(self, finetuned_model=None, base_model=None, master_ip=None):
-        if not os.path.exists(finetuned_model) or \
-            not any(filename.endswith('.bin') or filename.endswith('.safetensors')
-                    for filename in os.listdir(finetuned_model)):
-            if not finetuned_model:
-                LOG.warning(f"Note! That finetuned_model({finetuned_model}) is an invalid path, "
-                            f"base_model({base_model}) will be used")
+        if not finetuned_model:
+            LOG.warning(f"Note! finetuned_model is empty, using base_model({base_model}) instead.")
+            finetuned_model = base_model
+        elif not os.path.exists(finetuned_model):
+            LOG.warning(f"Warning! The finetuned_model path does not exist: {finetuned_model}. "
+                        f"Using base_model({base_model}) instead.")
+            finetuned_model = base_model
+        elif not any(filename.endswith(('.bin', '.safetensors', '.pt'))
+                     for filename in os.listdir(finetuned_model)):
+            LOG.warning(f"Warning! No valid model files (.bin, .safetensors or .pt) found in: {finetuned_model}. "
+                        f"Using base_model({base_model}) instead.")
             finetuned_model = base_model
 
         def impl():

diff --git a/lazyllm/components/finetune/__init__.py b/lazyllm/components/finetune/__init__.py
@@ -3,11 +3,13 @@
 from .collie import CollieFinetune
 from .llamafactory import LlamafactoryFinetune
 from .flagembedding import FlagembeddingFinetune
+from .easyr1 import EasyR1Finetune
 
 __all__ = [
     'LazyLLMFinetuneBase',
     'AlpacaloraFinetune',
     'CollieFinetune',
     'LlamafactoryFinetune',
     'FlagembeddingFinetune',
+    'EasyR1Finetune',
 ]
diff --git a/lazyllm/components/finetune/easy_r1/config.yaml b/lazyllm/components/finetune/easy_r1/config.yaml
@@ -0,0 +1,103 @@
+data:
+  train_files: hiyouga/math12k@train
+  val_files: hiyouga/math12k@test
+  prompt_key: problem
+  answer_key: answer
+  image_key: images
+  video_key: videos
+  image_dir: null
+  video_fps: 2.0
+  max_prompt_length: 2048
+  max_response_length: 2048
+  rollout_batch_size: 512  # equivalent to verl's data.train_batch_size
+  mini_rollout_batch_size: null  # equivalent to verl's data.gen_batch_size
+  val_batch_size: 1024
+  format_prompt: ./examples/format_prompt/math.jinja
+  override_chat_template: null
+  shuffle: true
+  seed: 1
+  min_pixels: 262144
+  max_pixels: 4194304
+  filter_overlong_prompts: true
+
+algorithm:
+  adv_estimator: grpo
+  disable_kl: false
+  use_kl_loss: true
+  kl_penalty: low_var_kl
+  kl_coef: 1.0e-2
+  online_filtering: false  # dapo filter groups
+  filter_key: overall
+  filter_low: 0.01
+  filter_high: 0.99
+
+worker:
+  actor:
+    global_batch_size: 128  # equivalent to verl's actor.ppo_mini_batch_size
+    micro_batch_size_per_device_for_update: 4  # equivalent to verl's actor.ppo_micro_batch_size_per_gpu
+    micro_batch_size_per_device_for_experience: 16  # equivalent to verl's rollout.log_prob_micro_batch_size_per_gpu
+    max_grad_norm: 1.0
+    padding_free: true
+    ulysses_size: 1
+    model:
+      model_path: /mnt/lustre/share_data/sunxiaoye/models/qwen2.5-0.5b-instruct
+      enable_gradient_checkpointing: true
+      trust_remote_code: false
+      freeze_vision_tower: false
+    optim:
+      lr: 1.0e-6
+      weight_decay: 1.0e-2
+      strategy: adamw  # {adamw, adamw_bf16}
+      lr_warmup_ratio: 0.0
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: false
+      enable_rank0_init: true
+    offload:
+      offload_params: true  # true: more CPU memory; false: more GPU memory
+      offload_optimizer: true  # true: more CPU memory; false: more GPU memory
+
+  rollout:
+    n: 5
+    temperature: 1.0
+    top_p: 0.99
+    limit_images: 0
+    gpu_memory_utilization: 0.6
+    enforce_eager: false
+    enable_chunked_prefill: false
+    tensor_parallel_size: 1
+    disable_tqdm: false
+    val_override_config:
+      temperature: 0.5
+      n: 1
+
+  ref:
+    fsdp:
+      enable_full_shard: true
+      enable_cpu_offload: true  # true: more CPU memory; false: more GPU memory
+      enable_rank0_init: true
+    offload:
+      offload_params: false
+
+  reward:
+    reward_type: batch
+    reward_function: ./examples/reward_function/math.py:compute_score
+
+trainer:
+  total_epochs: 15
+  max_steps: null
+  project_name: easy_r1
+  experiment_name: qwen2_5_7b_math_grpo
+  logger: ["console"]
+  nnodes: 1
+  n_gpus_per_node: 1
+  max_try_make_batch: 20  # -1 means no limit
+  val_freq: 5  # -1 to disable
+  val_before_train: true
+  val_only: false
+  val_generations_to_log: 3
+  save_freq: 5  # -1 to disable
+  save_limit: 3  # -1 to disable
+  save_model_only: false
+  save_checkpoint_path: null
+  load_checkpoint_path: null
diff --git a/lazyllm/components/finetune/easy_r1/format_prompt/dapo.jinja b/lazyllm/components/finetune/easy_r1/format_prompt/dapo.jinja
@@ -0,0 +1 @@
+Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content | trim }}\n\nRemember to put your answer on its own line after "Answer:".
diff --git a/lazyllm/components/finetune/easy_r1/format_prompt/math.jinja b/lazyllm/components/finetune/easy_r1/format_prompt/math.jinja
@@ -0,0 +1 @@
+{{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
diff --git a/lazyllm/components/finetune/easy_r1/format_prompt/r1v.jinja b/lazyllm/components/finetune/easy_r1/format_prompt/r1v.jinja
@@ -0,0 +1 @@
+{{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
diff --git a/lazyllm/components/finetune/easy_r1/model_merger.py b/lazyllm/components/finetune/easy_r1/model_merger.py
@@ -0,0 +1,188 @@
+# flake8: noqa
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import re
+from concurrent.futures import ThreadPoolExecutor
+from typing import Dict, List, Tuple
+
+import numpy as np
+import torch
+from torch.distributed._tensor import DTensor, Placement, Shard
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForTokenClassification,
+    AutoModelForVision2Seq,
+    PretrainedConfig,
+    PreTrainedModel,
+)
+
+
+def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
+    if placement.is_replicate():
+        return tensors[0]
+    elif placement.is_partial():
+        raise NotImplementedError("Partial placement is not supported yet")
+    elif placement.is_shard():
+        return torch.cat(tensors, dim=placement.dim).contiguous()
+    else:
+        raise ValueError(f"Unsupported placement: {placement}")
+
+
+def upload_model_to_huggingface(local_path: str, remote_path: str):
+    # Push to hugging face
+    from huggingface_hub import HfApi
+
+    api = HfApi()
+    api.create_repo(repo_id=remote_path, private=False, exist_ok=True)
+    api.upload_folder(repo_id=remote_path, folder_path=local_path, repo_type="model")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
+    parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
+    args = parser.parse_args()
+    local_dir: str = args.local_dir
+
+    assert not local_dir.endswith("huggingface"), "The local_dir should not end with huggingface."
+
+    # copy rank zero to find the shape of (dp, fsdp)
+    rank = 0
+    world_size = 0
+    for filename in os.listdir(local_dir):
+        match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
+        if match:
+            world_size = match.group(1)
+            break
+
+    assert world_size, "No model file with the proper format."
+
+    rank0_weight_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+    state_dict = torch.load(rank0_weight_path, map_location="cpu", weights_only=False)
+    pivot_key = sorted(state_dict.keys())[0]
+    weight = state_dict[pivot_key]
+    if isinstance(weight, DTensor):
+        # get sharding info
+        device_mesh = weight.device_mesh
+        mesh = device_mesh.mesh
+        mesh_dim_names = device_mesh.mesh_dim_names
+    else:
+        # for non-DTensor
+        mesh = np.array([int(world_size)], dtype=np.int64)
+        mesh_dim_names = ("fsdp",)
+
+    print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
+
+    assert mesh_dim_names in (("fsdp",), ("ddp", "fsdp")), f"Unsupported mesh_dim_names {mesh_dim_names}."
+
+    if "tp" in mesh_dim_names:
+        # fsdp * tp
+        total_shards = mesh.shape[-1] * mesh.shape[-2]
+        mesh_shape = (mesh.shape[-2], mesh.shape[-1])
+    else:
+        # fsdp
+        total_shards = mesh.shape[-1]
+        mesh_shape = (mesh.shape[-1],)
+
+    print(f"Processing {total_shards} model shards in total.")
+    model_state_dict_lst = []
+    model_state_dict_lst.append(state_dict)
+    model_state_dict_lst.extend([""] * (total_shards - 1))
+
+    def process_one_shard(rank, model_state_dict_lst):
+        model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
+        state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
+        model_state_dict_lst[rank] = state_dict
+        return state_dict
+
+    with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+        for rank in range(1, total_shards):
+            executor.submit(process_one_shard, rank, model_state_dict_lst)
+
+    state_dict: Dict[str, List[torch.Tensor]] = {}
+    param_placements: Dict[str, List[Placement]] = {}
+    keys = set(model_state_dict_lst[0].keys())
+    for key in keys:
+        state_dict[key] = []
+        for model_state_dict in model_state_dict_lst:
+            try:
+                tensor = model_state_dict.pop(key)
+            except Exception:
+                print(f"Cannot find key {key} in rank {rank}.")
+
+            if isinstance(tensor, DTensor):
+                state_dict[key].append(tensor._local_tensor.bfloat16())
+                placements = tuple(tensor.placements)
+                # replicated placement at ddp dimension can be discarded
+                if mesh_dim_names[0] == "ddp":
+                    placements = placements[1:]
+
+                if key not in param_placements:
+                    param_placements[key] = placements
+                else:
+                    assert param_placements[key] == placements
+            else:
+                state_dict[key].append(tensor.bfloat16())
+
+    del model_state_dict_lst
+
+    for key in sorted(state_dict):
+        if not isinstance(state_dict[key], list):
+            print(f"No need to merge key {key}")
+            continue
+
+        if key in param_placements:
+            # merge shards
+            placements: Tuple[Shard] = param_placements[key]
+            if len(mesh_shape) == 1:
+                # 1-D list, FSDP without TP
+                assert len(placements) == 1
+                shards = state_dict[key]
+                state_dict[key] = merge_by_placement(shards, placements[0])
+            else:
+                # 2-D list, FSDP + TP
+                raise NotImplementedError("FSDP + TP is not supported yet.")
+        else:
+            state_dict[key] = torch.cat(state_dict[key], dim=0)
+
+    print("Merge completed.")
+    hf_path = os.path.join(local_dir, "huggingface")
+    config: PretrainedConfig = AutoConfig.from_pretrained(hf_path)
+    architectures: List[str] = getattr(config, "architectures", ["Unknown"])
+
+    if "ForTokenClassification" in architectures[0]:
+        AutoClass = AutoModelForTokenClassification
+    elif "ForCausalLM" in architectures[0]:
+        AutoClass = AutoModelForCausalLM
+    elif "ForConditionalGeneration" in architectures[0]:
+        AutoClass = AutoModelForVision2Seq
+    else:
+        raise NotImplementedError(f"Unknown architecture {architectures}.")
+
+    with torch.device("meta"):
+        model: PreTrainedModel = AutoClass.from_config(config, torch_dtype=torch.bfloat16)
+
+    assert isinstance(model, PreTrainedModel)
+    model.to_empty(device="cpu")
+
+    print(f"Saving model to {hf_path}...")
+    model.save_pretrained(hf_path, state_dict=state_dict)
+    del state_dict, model
+
+    if args.hf_upload_path:
+        upload_model_to_huggingface(hf_path, args.hf_upload_path)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content \| trim }}\n\nRemember to put your answer on its own line after "Answer:".
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{{ content \| trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{{ content \| trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>