Skip to content

Commit 2888a8b

Browse files
authored
Merge branch 'main' into sdk_dependency
2 parents 930da54 + d37233e commit 2888a8b

20 files changed

+1186
-684
lines changed

QEfficient/finetune/utils/train_utils.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,9 @@ def train(
124124

125125
if train_config.use_peft and train_config.from_peft_checkpoint:
126126
intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
127+
intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
127128
if epoch < intermediate_epoch:
128129
logger.log_rank_zero(f"Skipping epoch {epoch + 1} since fine tuning has already completed for it.")
129-
# to bring the count of train_step in sync with where it left off
130-
total_train_steps += len(train_dataloader)
131130
continue
132131

133132
logger.log_rank_zero(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
@@ -149,20 +148,18 @@ def train(
149148

150149
num_dummy_samples = 0
151150
for step, batch in enumerate(train_dataloader):
151+
# total_train_steps indicates the cumulative number of training steps completed across all epochs.
152+
# When resuming fine-tuning from previously saved checkpoints, total_train_steps indicates the total number of steps trained across the earlier session and the ongoing one.
153+
total_train_steps = (epoch) * len(train_dataloader) + step
152154
# resume training from a particular checkpoint, assuming the dataset is not shuffled
153155
if train_config.use_peft and train_config.from_peft_checkpoint:
154-
intermediate_step = int(train_config.from_peft_checkpoint.split("/")[-1].split("_")[-1])
155-
intermediate_epoch = int(train_config.from_peft_checkpoint.split("/")[-2].split("_")[-1]) - 1
156156
# to bring the count of train_step in sync with where it left off
157157
if epoch == intermediate_epoch and step == 0:
158-
total_train_steps += intermediate_step
159158
logger.log_rank_zero(
160159
f"Skipping first {intermediate_step} steps for epoch {epoch + 1}, since fine tuning has already completed for it."
161160
)
162161
if epoch == intermediate_epoch and step < intermediate_step:
163-
total_train_steps += 1
164162
continue
165-
total_train_steps += 1
166163

167164
if train_config.max_train_step > 0 and total_train_steps >= train_config.max_train_step:
168165
max_steps_reached = True
@@ -235,12 +232,12 @@ def train(
235232
else:
236233
num_samples_in_cur_update = len(train_dataloader) % train_config.gradient_accumulation_steps
237234

238-
loss = loss / num_samples_in_cur_update
235+
normalized_loss = loss / num_samples_in_cur_update
239236

240237
if train_config.grad_scaler:
241-
scaler.scale(loss).backward() # backward pass
238+
scaler.scale(normalized_loss).backward() # backward pass
242239
else:
243-
loss.backward() # backward pass
240+
normalized_loss.backward() # backward pass
244241

245242
if is_optimizer_step:
246243
if train_config.grad_scaler:
@@ -358,7 +355,6 @@ def train(
358355
logger.log_rank_zero(
359356
f"Epoch {epoch + 1}: Train epoch loss: {train_epoch_loss:.4f}, Train metric: {train_epoch_metric:.4f}, Epoch time {epoch_end_time:.2f} sec"
360357
)
361-
362358
# Saving the results every epoch to plot later
363359
if train_config.save_metrics:
364360
save_to_json(
@@ -377,9 +373,14 @@ def train(
377373

378374
results["last_epoch_train_loss"] = train_epoch_loss.cpu()
379375
results["last_epoch_train_metric"] = train_epoch_metric.cpu()
376+
results["train_step_loss"] = train_step_loss
377+
results["train_step_metric"] = train_step_metric
378+
380379
if train_config.run_validation:
381380
results["last_epoch_eval_loss"] = eval_epoch_loss.cpu()
382381
results["last_epoch_eval_metric"] = eval_epoch_metric.cpu()
382+
results["eval_step_loss"] = eval_step_loss
383+
results["eval_step_metric"] = eval_step_metric
383384
results["avg_epoch_time"] = avg_epoch_time
384385
results["avg_checkpoint_time"] = avg_checkpoint_time
385386
if train_config.save_metrics:

QEfficient/transformers/models/llama4/modeling_llama4.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -925,14 +925,6 @@ def get_specializations(
925925
)
926926
vision_size = num_features_per_tile * max_num_tiles
927927

928-
downsample_ratio = int(round(1.0 / (self.config.vision_config.pixel_shuffle_ratio**2)))
929-
num_features_per_tile = int(
930-
(img_size // self.config.vision_config.patch_size)
931-
* (img_size // self.config.vision_config.patch_size)
932-
// downsample_ratio
933-
)
934-
vision_size = num_features_per_tile * max_num_tiles
935-
936928
vision = [
937929
{
938930
"batch_size": batch_size,

QEfficient/transformers/models/modeling_auto.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,8 @@ def compile(
322322
]
323323

324324
return self._compile(
325-
onnx_path,
326-
compile_dir,
325+
onnx_path=onnx_path,
326+
compile_dir=compile_dir,
327327
compile_only=True,
328328
specializations=specializations,
329329
convert_to_fp16=True,
@@ -450,7 +450,7 @@ def __init__(self, model: nn.modules):
450450
self.model = model.get_qeff_vision_encoder()
451451

452452
def export(self, inputs, output_names, dynamic_axes, export_dir=None):
453-
return self._export(inputs, output_names, dynamic_axes, export_dir)
453+
return self._export(inputs, output_names, dynamic_axes, export_dir=export_dir)
454454

455455
def compile(
456456
self,
@@ -518,7 +518,7 @@ def __init__(self, model):
518518
self.model = model.get_qeff_language_decoder()
519519

520520
def export(self, inputs, output_names, dynamic_axes, export_dir=None):
521-
return self._export(inputs, output_names, dynamic_axes, export_dir)
521+
return self._export(inputs, output_names, dynamic_axes, export_dir=export_dir)
522522

523523
def compile(
524524
self,
@@ -631,10 +631,10 @@ def export(
631631
inputs["vision"],
632632
output_names["vision"],
633633
dynamic_axes["vision"],
634-
export_dir,
634+
export_dir=export_dir,
635635
)
636636

637-
self.lang_model.export(inputs["lang"], output_names["lang"], dynamic_axes["lang"], export_dir)
637+
self.lang_model.export(inputs["lang"], output_names["lang"], dynamic_axes["lang"], export_dir=export_dir)
638638
return self.onnx_path
639639

640640
def compile(
@@ -699,7 +699,7 @@ def compile(
699699

700700
if not skip_vision:
701701
self.vision_model._compile(
702-
compile_dir,
702+
compile_dir=compile_dir,
703703
compile_only=True,
704704
specializations=specializations["vision"],
705705
convert_to_fp16=True,
@@ -726,7 +726,7 @@ def compile(
726726
custom_io_lang[output_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype
727727

728728
self.lang_model._compile(
729-
compile_dir,
729+
compile_dir=compile_dir,
730730
compile_only=True,
731731
retained_state=True,
732732
specializations=specializations["lang"],
@@ -863,10 +863,6 @@ def kv_offload_generate(
863863
chunk_inputs = lang_inputs.copy()
864864
prefill_start = perf_counter()
865865

866-
# Prepare inputs for prefill
867-
chunk_inputs = lang_inputs.copy()
868-
prefill_start = perf_counter()
869-
870866
# Run prefill
871867
chunk_inputs = lang_inputs.copy()
872868
for i in range(num_chunks):
@@ -1042,8 +1038,8 @@ def compile(
10421038
custom_io[output_name] = "float16" if "pixel_values" in output_name else kv_cache_dtype
10431039

10441040
self._compile(
1045-
onnx_path,
1046-
compile_dir,
1041+
onnx_path=onnx_path,
1042+
compile_dir=compile_dir,
10471043
compile_only=True,
10481044
retained_state=True,
10491045
specializations=specializations,
@@ -2081,8 +2077,8 @@ def compile(
20812077
custom_io[output_name] = kv_cache_dtype
20822078

20832079
return self._compile(
2084-
onnx_path,
2085-
compile_dir,
2080+
onnx_path=onnx_path,
2081+
compile_dir=compile_dir,
20862082
compile_only=True,
20872083
retained_state=True,
20882084
specializations=specializations,

QEfficient/utils/test_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,24 @@ def __call__(
150150
image_tokens = IMG_START_TOKEN + IMG_CONTEXT_TOKEN * self.num_image_token * num_patches + IMG_END_TOKEN
151151
query = query.replace("<image>", image_tokens, 1)
152152
return query
153+
154+
155+
class ModelConfig:
156+
"""
157+
Contains all the model types which are not default model like quantized models, external models, swiftkv models etc,.
158+
"""
159+
160+
QUANTIZED_MODELS = {
161+
"neuralmagic/Qwen2-0.5B-Instruct-FP8",
162+
"neuralmagic/Llama-3.2-3B-Instruct-FP8",
163+
"TheBloke/Llama-2-7B-GPTQ",
164+
"TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
165+
}
166+
167+
EXTERNAL_MODELS = {
168+
"hpcai-tech/grok-1",
169+
}
170+
171+
SWIFTKV_MODELS = {
172+
"Snowflake/Llama-3.1-SwiftKV-8B-Instruct",
173+
}

examples/llama4_example.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import torch
99
import transformers
10-
from transformers import AutoConfig, AutoModelForImageTextToText, AutoProcessor, TextStreamer
10+
from transformers import AutoConfig, AutoProcessor, TextStreamer
1111

1212
from QEfficient import QEFFAutoModelForImageTextToText
1313

@@ -17,14 +17,12 @@
1717
config.text_config.num_hidden_layers = 4
1818
config.vision_config.num_hidden_layers = 2
1919

20-
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation="eager", config=config)
21-
model.eval()
22-
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
20+
qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
21+
model_id, attn_implementation="eager", kv_offload=True, config=config
22+
)
23+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
2324
processor = AutoProcessor.from_pretrained(model_id)
2425

25-
### For running the model in single QPC approach use kv_offload=False. For Dual QPC approach use kv_offload=True ###
26-
qeff_model = QEFFAutoModelForImageTextToText(model, kv_offload=True)
27-
2826
### use skip_vision=Ture, if want to run only text, ow false ###
2927
skip_vision = True
3028

examples/llama4_multi_image_example.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import torch
99
import transformers
10-
from transformers import AutoConfig, AutoModelForImageTextToText, AutoProcessor, TextStreamer
10+
from transformers import AutoConfig, AutoProcessor, TextStreamer
1111

1212
from QEfficient import QEFFAutoModelForImageTextToText
1313

@@ -17,14 +17,12 @@
1717
config.text_config.num_hidden_layers = 4
1818
config.vision_config.num_hidden_layers = 2
1919

20-
model = AutoModelForImageTextToText.from_pretrained(model_id, attn_implementation="eager", config=config)
21-
model.eval()
22-
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
20+
qeff_model = QEFFAutoModelForImageTextToText.from_pretrained(
21+
model_id, attn_implementation="eager", kv_offload=True, config=config
22+
)
23+
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
2324
processor = AutoProcessor.from_pretrained(model_id)
2425

25-
### For running the model in single QPC approach use kv_offload=False. For Dual QPC approach use kv_offload=True ###
26-
qeff_model = QEFFAutoModelForImageTextToText(model, kv_offload=True)
27-
2826
### For multi-image, the value of max_num_tiles should be the sum of the num_tiles values across all the images ###
2927
qeff_model.compile(
3028
prefill_seq_len=128,

scripts/Jenkinsfile

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ pipeline {
5959
mkdir -p $PWD/Non_qaic &&
6060
export TOKENIZERS_PARALLELISM=false &&
6161
export QEFF_HOME=$PWD/Non_qaic &&
62-
pytest tests -m '(not cli) and (on_qaic) and (not multimodal) and (not qnn) and (not finetune)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log2.xml &&
62+
pytest tests -m '(not cli) and (on_qaic) and (not nightly) and (not multimodal) and (not qnn) and (not finetune)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log2.xml &&
6363
junitparser merge tests/tests_log2.xml tests/tests_log.xml &&
6464
deactivate"
6565
'''
@@ -97,23 +97,13 @@ pipeline {
9797
mkdir -p $PWD/cli &&
9898
export TOKENIZERS_PARALLELISM=false &&
9999
export QEFF_HOME=$PWD/cli &&
100-
pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm --junitxml=tests/tests_log3.xml &&
100+
pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log3.xml &&
101101
junitparser merge tests/tests_log3.xml tests/tests_log.xml &&
102102
deactivate"
103103
'''
104104
}
105105
}
106106
}
107-
stage('vLLM Tests') {
108-
steps {
109-
catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') {
110-
build job: 'qefficient_vllm_upstream',
111-
parameters: [string(name: 'NAME', value: "${BUILD_TAG}")],
112-
propagate: true,
113-
wait: true
114-
}
115-
}
116-
}
117107
stage('QNN CLI Tests') {
118108
steps {
119109
timeout(time: 30, unit: 'MINUTES') {
@@ -126,7 +116,7 @@ pipeline {
126116
mkdir -p $PWD/Qnn_cli &&
127117
export TOKENIZERS_PARALLELISM=false &&
128118
export QEFF_HOME=$PWD/Qnn_cli &&
129-
pytest tests -m '(cli and qnn) and (not finetune)' --ignore tests/vllm --junitxml=tests/tests_log4.xml &&
119+
pytest tests -m '(cli and qnn) and (not finetune)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log4.xml &&
130120
junitparser merge tests/tests_log4.xml tests/tests_log.xml &&
131121
deactivate"
132122
'''
@@ -145,7 +135,7 @@ pipeline {
145135
mkdir -p $PWD/Qnn_non_cli &&
146136
export TOKENIZERS_PARALLELISM=false &&
147137
export QEFF_HOME=$PWD/Qnn_non_cli &&
148-
pytest tests -m '(not cli) and (qnn) and (on_qaic) and (not multimodal) and (not finetune)' --ignore tests/vllm --junitxml=tests/tests_log5.xml &&
138+
pytest tests -m '(not cli) and (qnn) and (not nightly) and (on_qaic) and (not multimodal) and (not finetune)' --ignore tests/vllm -n 4 --junitxml=tests/tests_log5.xml &&
149139
junitparser merge tests/tests_log5.xml tests/tests_log.xml &&
150140
deactivate"
151141
'''
@@ -191,20 +181,39 @@ pipeline {
191181
}
192182

193183
post {
194-
always {
195-
script {
196-
try {
197-
sh '''
198-
sudo docker rm -f ${BUILD_TAG}
199-
sudo chown -R ubuntu .
200-
'''
201-
} catch (error) {
202-
echo "Failed to delete container ${BUILD_TAG}: ${error}"
203-
}
204-
}
205-
junit testResults: 'tests/tests_log.xml'
206-
echo 'Cleaning Workspace'
207-
deleteDir()
208-
}
209-
}
184+
success {
185+
// Trigger downstream job only if this pipeline succeeds
186+
build job: 'qefficient_vllm_upstream',
187+
parameters: [
188+
string(name: 'NAME', value: "${BUILD_TAG}"),
189+
string(name: 'QEFF_WORKSPACE', value: "${env.WORKSPACE}")
190+
],
191+
wait: false
192+
}
193+
always {
194+
script {
195+
try {
196+
sh '''
197+
sudo chown -R ubuntu .
198+
'''
199+
} catch (error) {
200+
echo "Failed to change ownership: ${error}"
201+
}
202+
}
203+
junit testResults: 'tests/tests_log.xml'
204+
}
205+
unsuccessful {
206+
script {
207+
try {
208+
sh '''
209+
sudo docker rm -f ${BUILD_TAG}
210+
'''
211+
} catch (error) {
212+
echo "Failed to delete container ${BUILD_TAG}: ${error}"
213+
}
214+
}
215+
echo 'Cleaning Workspace'
216+
deleteDir()
217+
}
218+
}
210219
}

0 commit comments

Comments
 (0)