Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lazyllm/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,19 @@ def absorb(self, item):


class ArgsDict(dict):
def __init__(self, *args, **kwargs):
def __init__(self, *args, with_line=True, **kwargs):
super(ArgsDict, self).__init__(*args, **kwargs)
self._with_line = with_line

def check_and_update(self, kw):
assert set(kw.keys()).issubset(set(self)), f'unexpected keys: {set(kw.keys()) - set(self)}'
self.update(kw)

def parse_kwargs(self):
string = ' '.join(f'--{k}={v}' if type(v) is not str else f'--{k}=\"{v}\"' for k, v in self.items())
if self._with_line:
string = ' '.join(f'--{k}={v}' if type(v) is not str else f'--{k}=\"{v}\"' for k, v in self.items())
else:
string = ' '.join(f'{k}={v}' if type(v) is not str else f'{k}=\"{v}\"' for k, v in self.items())
return string

class CaseInsensitiveDict(dict):
Expand Down
17 changes: 11 additions & 6 deletions lazyllm/components/deploy/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,17 @@ def __init__(self, trust_remote_code: bool = True, launcher: LazyLLMLaunchersBas
ray_launcher[0], post_action=(lazyllm.parallel(*parall_launcher) if len(parall_launcher) else None))

def cmd(self, finetuned_model=None, base_model=None, master_ip=None):
if not os.path.exists(finetuned_model) or \
not any(filename.endswith('.bin') or filename.endswith('.safetensors')
for filename in os.listdir(finetuned_model)):
if not finetuned_model:
LOG.warning(f"Note! That finetuned_model({finetuned_model}) is an invalid path, "
f"base_model({base_model}) will be used")
if not finetuned_model:
LOG.warning(f"Note! finetuned_model is empty, using base_model({base_model}) instead.")
finetuned_model = base_model
elif not os.path.exists(finetuned_model):
LOG.warning(f"Warning! The finetuned_model path does not exist: {finetuned_model}. "
f"Using base_model({base_model}) instead.")
finetuned_model = base_model
elif not any(filename.endswith(('.bin', '.safetensors', '.pt'))
for filename in os.listdir(finetuned_model)):
LOG.warning(f"Warning! No valid model files (.bin, .safetensors or .pt) found in: {finetuned_model}. "
f"Using base_model({base_model}) instead.")
finetuned_model = base_model

def impl():
Expand Down
2 changes: 2 additions & 0 deletions lazyllm/components/finetune/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from .collie import CollieFinetune
from .llamafactory import LlamafactoryFinetune
from .flagembedding import FlagembeddingFinetune
from .easyr1 import EasyR1Finetune

__all__ = [
'LazyLLMFinetuneBase',
'AlpacaloraFinetune',
'CollieFinetune',
'LlamafactoryFinetune',
'FlagembeddingFinetune',
'EasyR1Finetune',
]
103 changes: 103 additions & 0 deletions lazyllm/components/finetune/easy_r1/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
data:
train_files: hiyouga/math12k@train
val_files: hiyouga/math12k@test
prompt_key: problem
answer_key: answer
image_key: images
video_key: videos
image_dir: null
video_fps: 2.0
max_prompt_length: 2048
max_response_length: 2048
rollout_batch_size: 512 # equivalent to verl's data.train_batch_size
mini_rollout_batch_size: null # equivalent to verl's data.gen_batch_size
val_batch_size: 1024
format_prompt: ./examples/format_prompt/math.jinja
override_chat_template: null
shuffle: true
seed: 1
min_pixels: 262144
max_pixels: 4194304
filter_overlong_prompts: true

algorithm:
adv_estimator: grpo
disable_kl: false
use_kl_loss: true
kl_penalty: low_var_kl
kl_coef: 1.0e-2
online_filtering: false # dapo filter groups
filter_key: overall
filter_low: 0.01
filter_high: 0.99

worker:
actor:
global_batch_size: 128 # equivalent to verl's actor.ppo_mini_batch_size
micro_batch_size_per_device_for_update: 4 # equivalent to verl's actor.ppo_micro_batch_size_per_gpu
micro_batch_size_per_device_for_experience: 16 # equivalent to verl's rollout.log_prob_micro_batch_size_per_gpu
max_grad_norm: 1.0
padding_free: true
ulysses_size: 1
model:
model_path: /mnt/lustre/share_data/sunxiaoye/models/qwen2.5-0.5b-instruct
enable_gradient_checkpointing: true
trust_remote_code: false
freeze_vision_tower: false
optim:
lr: 1.0e-6
weight_decay: 1.0e-2
strategy: adamw # {adamw, adamw_bf16}
lr_warmup_ratio: 0.0
fsdp:
enable_full_shard: true
enable_cpu_offload: false
enable_rank0_init: true
offload:
offload_params: true # true: more CPU memory; false: more GPU memory
offload_optimizer: true # true: more CPU memory; false: more GPU memory

rollout:
n: 5
temperature: 1.0
top_p: 0.99
limit_images: 0
gpu_memory_utilization: 0.6
enforce_eager: false
enable_chunked_prefill: false
tensor_parallel_size: 1
disable_tqdm: false
val_override_config:
temperature: 0.5
n: 1

ref:
fsdp:
enable_full_shard: true
enable_cpu_offload: true # true: more CPU memory; false: more GPU memory
enable_rank0_init: true
offload:
offload_params: false

reward:
reward_type: batch
reward_function: ./examples/reward_function/math.py:compute_score

trainer:
total_epochs: 15
max_steps: null
project_name: easy_r1
experiment_name: qwen2_5_7b_math_grpo
logger: ["console"]
nnodes: 1
n_gpus_per_node: 1
max_try_make_batch: 20 # -1 means no limit
val_freq: 5 # -1 to disable
val_before_train: true
val_only: false
val_generations_to_log: 3
save_freq: 5 # -1 to disable
save_limit: 3 # -1 to disable
save_model_only: false
save_checkpoint_path: null
load_checkpoint_path: null
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Solve the following math problem step by step. The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n{{ content | trim }}\n\nRemember to put your answer on its own line after "Answer:".
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ content | trim }} You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{ content | trim }} A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer. The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>
188 changes: 188 additions & 0 deletions lazyllm/components/finetune/easy_r1/model_merger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# flake8: noqa
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import re
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, List, Tuple

import numpy as np
import torch
from torch.distributed._tensor import DTensor, Placement, Shard
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoModelForTokenClassification,
AutoModelForVision2Seq,
PretrainedConfig,
PreTrainedModel,
)


def merge_by_placement(tensors: List[torch.Tensor], placement: Placement):
if placement.is_replicate():
return tensors[0]
elif placement.is_partial():
raise NotImplementedError("Partial placement is not supported yet")
elif placement.is_shard():
return torch.cat(tensors, dim=placement.dim).contiguous()
else:
raise ValueError(f"Unsupported placement: {placement}")


def upload_model_to_huggingface(local_path: str, remote_path: str):
# Push to hugging face
from huggingface_hub import HfApi

api = HfApi()
api.create_repo(repo_id=remote_path, private=False, exist_ok=True)
api.upload_folder(repo_id=remote_path, folder_path=local_path, repo_type="model")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--local_dir", required=True, type=str, help="The path for your saved model")
parser.add_argument("--hf_upload_path", default=False, type=str, help="The path of the huggingface repo to upload")
args = parser.parse_args()
local_dir: str = args.local_dir

assert not local_dir.endswith("huggingface"), "The local_dir should not end with huggingface."

# copy rank zero to find the shape of (dp, fsdp)
rank = 0
world_size = 0
for filename in os.listdir(local_dir):
match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
if match:
world_size = match.group(1)
break

assert world_size, "No model file with the proper format."

rank0_weight_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
state_dict = torch.load(rank0_weight_path, map_location="cpu", weights_only=False)
pivot_key = sorted(state_dict.keys())[0]
weight = state_dict[pivot_key]
if isinstance(weight, DTensor):
# get sharding info
device_mesh = weight.device_mesh
mesh = device_mesh.mesh
mesh_dim_names = device_mesh.mesh_dim_names
else:
# for non-DTensor
mesh = np.array([int(world_size)], dtype=np.int64)
mesh_dim_names = ("fsdp",)

print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")

assert mesh_dim_names in (("fsdp",), ("ddp", "fsdp")), f"Unsupported mesh_dim_names {mesh_dim_names}."

if "tp" in mesh_dim_names:
# fsdp * tp
total_shards = mesh.shape[-1] * mesh.shape[-2]
mesh_shape = (mesh.shape[-2], mesh.shape[-1])
else:
# fsdp
total_shards = mesh.shape[-1]
mesh_shape = (mesh.shape[-1],)

print(f"Processing {total_shards} model shards in total.")
model_state_dict_lst = []
model_state_dict_lst.append(state_dict)
model_state_dict_lst.extend([""] * (total_shards - 1))

def process_one_shard(rank, model_state_dict_lst):
model_path = os.path.join(local_dir, f"model_world_size_{world_size}_rank_{rank}.pt")
state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
model_state_dict_lst[rank] = state_dict
return state_dict

with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
for rank in range(1, total_shards):
executor.submit(process_one_shard, rank, model_state_dict_lst)

state_dict: Dict[str, List[torch.Tensor]] = {}
param_placements: Dict[str, List[Placement]] = {}
keys = set(model_state_dict_lst[0].keys())
for key in keys:
state_dict[key] = []
for model_state_dict in model_state_dict_lst:
try:
tensor = model_state_dict.pop(key)
except Exception:
print(f"Cannot find key {key} in rank {rank}.")

if isinstance(tensor, DTensor):
state_dict[key].append(tensor._local_tensor.bfloat16())
placements = tuple(tensor.placements)
# replicated placement at ddp dimension can be discarded
if mesh_dim_names[0] == "ddp":
placements = placements[1:]

if key not in param_placements:
param_placements[key] = placements
else:
assert param_placements[key] == placements
else:
state_dict[key].append(tensor.bfloat16())

del model_state_dict_lst

for key in sorted(state_dict):
if not isinstance(state_dict[key], list):
print(f"No need to merge key {key}")
continue

if key in param_placements:
# merge shards
placements: Tuple[Shard] = param_placements[key]
if len(mesh_shape) == 1:
# 1-D list, FSDP without TP
assert len(placements) == 1
shards = state_dict[key]
state_dict[key] = merge_by_placement(shards, placements[0])
else:
# 2-D list, FSDP + TP
raise NotImplementedError("FSDP + TP is not supported yet.")
else:
state_dict[key] = torch.cat(state_dict[key], dim=0)

print("Merge completed.")
hf_path = os.path.join(local_dir, "huggingface")
config: PretrainedConfig = AutoConfig.from_pretrained(hf_path)
architectures: List[str] = getattr(config, "architectures", ["Unknown"])

if "ForTokenClassification" in architectures[0]:
AutoClass = AutoModelForTokenClassification
elif "ForCausalLM" in architectures[0]:
AutoClass = AutoModelForCausalLM
elif "ForConditionalGeneration" in architectures[0]:
AutoClass = AutoModelForVision2Seq
else:
raise NotImplementedError(f"Unknown architecture {architectures}.")

with torch.device("meta"):
model: PreTrainedModel = AutoClass.from_config(config, torch_dtype=torch.bfloat16)

assert isinstance(model, PreTrainedModel)
model.to_empty(device="cpu")

print(f"Saving model to {hf_path}...")
model.save_pretrained(hf_path, state_dict=state_dict)
del state_dict, model

if args.hf_upload_path:
upload_model_to_huggingface(hf_path, args.hf_upload_path)
Loading
Loading