Skip to content
Merged
Show file tree
Hide file tree
Changes from 45 commits
Commits
Show all changes
67 commits
Select commit Hold shift + click to select a range
80c92da
add auto-round
yiliu30 Nov 3, 2025
75f7efd
Merge branch 'main' into up-ar
yiliu30 Nov 3, 2025
3266b79
add auto-round modifier
yiliu30 Nov 3, 2025
9c537cc
refine code
yiliu30 Nov 3, 2025
bebe0fa
disbale qac for auto-round
yiliu30 Nov 3, 2025
dfb0ff8
clean code
yiliu30 Nov 3, 2025
513972c
add compile after disable qac
yiliu30 Nov 3, 2025
2291cc4
add iters and clean code
yiliu30 Nov 3, 2025
4028853
clean code
yiliu30 Nov 3, 2025
97ff9e0
add example
yiliu30 Nov 3, 2025
cb7a5b4
refine docs
yiliu30 Nov 3, 2025
5a7500e
refine example
yiliu30 Nov 3, 2025
d02a355
add init
yiliu30 Nov 3, 2025
cea9d2f
clean code
yiliu30 Nov 3, 2025
22be9b7
format
yiliu30 Nov 3, 2025
6cdb402
refactor
yiliu30 Nov 3, 2025
e2814eb
add ut
yiliu30 Nov 3, 2025
3e4a9fc
test llama 3
yiliu30 Nov 3, 2025
aa34b65
clean code
yiliu30 Nov 4, 2025
afe2ff7
parse layer-wise config
yiliu30 Nov 4, 2025
8e9eccc
format
yiliu30 Nov 4, 2025
81f76af
add docstring
yiliu30 Nov 4, 2025
afa6150
add ar
yiliu30 Nov 4, 2025
97217e7
update example
yiliu30 Nov 4, 2025
3dcb434
align api
yiliu30 Nov 5, 2025
aef7707
format
yiliu30 Nov 5, 2025
97e1ca2
clean code
yiliu30 Nov 5, 2025
c75c272
fix typo
yiliu30 Nov 5, 2025
3d8a0c8
small iters for ut
yiliu30 Nov 5, 2025
6729a75
format
yiliu30 Nov 5, 2025
bb4dbe8
refine comment
yiliu30 Nov 5, 2025
2adf0e7
replace papaer link
yiliu30 Nov 5, 2025
dd9bde9
correct comments
yiliu30 Nov 5, 2025
4980229
Merge branch 'main' into autoround-support
yiliu30 Nov 5, 2025
7d97255
update comments
yiliu30 Nov 5, 2025
f298e82
refine code
yiliu30 Nov 5, 2025
73c3571
add more checks
yiliu30 Nov 5, 2025
eb16397
update example
yiliu30 Nov 6, 2025
9cb1f06
move auto-round to modifier
yiliu30 Nov 6, 2025
76e0d21
apply untie
yiliu30 Nov 6, 2025
1cbe919
correct docstring
yiliu30 Nov 6, 2025
9fa5efb
enable ci
yiliu30 Nov 6, 2025
7937d80
revert import AutoRoundModifier into modfifier directly
yiliu30 Nov 6, 2025
e58b2bd
update
yiliu30 Nov 6, 2025
bd70ea6
Merge branch 'main' into autoround-support
yiliu30 Nov 6, 2025
6b236f6
merge main
yiliu30 Nov 7, 2025
4c94187
clean
yiliu30 Nov 7, 2025
7ea8442
fix
yiliu30 Nov 7, 2025
f52c0c0
refactor
yiliu30 Nov 7, 2025
4a9c4aa
format
yiliu30 Nov 7, 2025
0567df6
Update src/llmcompressor/modifiers/autoround/base.py
yiliu30 Nov 7, 2025
650a19c
refine docs
yiliu30 Nov 7, 2025
58e09bf
Merge branch 'autoround-support' of https://github.com/yiliu30/llm-co…
yiliu30 Nov 7, 2025
5cd35a6
fix import
yiliu30 Nov 8, 2025
678b123
Update src/llmcompressor/modifiers/autoround/base.py
yiliu30 Nov 8, 2025
a8c63d3
add qinput
yiliu30 Nov 10, 2025
38634dc
Merge branch 'autoround-support' of https://github.com/yiliu30/llm-co…
yiliu30 Nov 10, 2025
fbc047a
clean cache
yiliu30 Nov 10, 2025
96b6490
align api
yiliu30 Nov 10, 2025
d00d41b
fix
yiliu30 Nov 10, 2025
d4a8fb0
fix
yiliu30 Nov 10, 2025
487fcd2
update
yiliu30 Nov 10, 2025
baeea3f
Merge branch 'main' into autoround-support
yiliu30 Nov 11, 2025
3adc879
add requires_gpu for ut
yiliu30 Nov 12, 2025
ac10f7b
Merge branch 'main' into autoround-support
yiliu30 Nov 12, 2025
decb14f
Merge branch 'autoround-support' of https://github.com/yiliu30/llm-co…
yiliu30 Nov 12, 2025
f9dabc4
Merge branch 'main' into autoround-support
yiliu30 Nov 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/test-check-transformers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ jobs:
if: (success() || failure()) && steps.install.outcome == 'success'
run: |
pytest -v tests/llmcompressor/transformers/gptq
- name: Running AutoRound Tests
if: (success() || failure()) && steps.install.outcome == 'success'
run: |
pytest -v tests/llmcompressor/transformers/autoround
- name: Running ONESHOT Tests
if: (success() || failure()) && steps.install.outcome == 'success'
run: |
Expand Down
56 changes: 56 additions & 0 deletions examples/autoround/llama3_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from auto_round.calib_dataset import get_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

from llmcompressor import oneshot
from llmcompressor.modifiers.autoround import AutoRoundModifier
from llmcompressor.utils import dispatch_for_generation

# Select model and load it.
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Select calibration dataset.
NUM_CALIBRATION_SAMPLES = 128
MAX_SEQUENCE_LENGTH = 2048
# Get aligned calibration dataset.

ds = get_dataset(
tokenizer=tokenizer,
seqlen=MAX_SEQUENCE_LENGTH,
nsamples=NUM_CALIBRATION_SAMPLES,
)


# Configure the quantization algorithm to run.
# * quantize the weights to 4 bit with AutoRound with a group size 128
recipe = AutoRoundModifier(
targets="Linear", scheme="W4A16", ignore=["lm_head"], iters=200
)


# Apply algorithms.
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
# disable shuffling to get slightly better mmlu score
shuffle_calibration_samples=False,
)

# Confirm generations of the quantized model look sane.
print("\n\n")
print("========== SAMPLE GENERATION ==============")
dispatch_for_generation(model)
sample = tokenizer("Hello my name is", return_tensors="pt")
sample = {key: value.to(model.device) for key, value in sample.items()}
output = model.generate(**sample, max_new_tokens=100)
print(tokenizer.decode(output[0]))
print("==========================================\n\n")

# Save to disk compressed.
SAVE_DIR = model_id.rstrip("/").split("/")[-1] + "-W4A16-G128-AutoRound"
model.save_pretrained(SAVE_DIR, save_compressed=True)
tokenizer.save_pretrained(SAVE_DIR)
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ def localversion_func(version: ScmVersion) -> str:
if BUILD_TYPE == "release"
else "compressed-tensors>=0.12.3a2"
),
# TODO: replace it with the release version
("auto_round @ git+https://github.com/intel/auto-round.git@llmc"),
],
extras_require={
"dev": [
Expand Down
3 changes: 3 additions & 0 deletions src/llmcompressor/modifiers/autoround/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# ruff: noqa

from .base import *
Loading