Skip to content

Commit 0208fde

Browse files
committed
update
1 parent 8318270 commit 0208fde

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

examples/quantization_w4a4_fp4/llama3_mxfp4.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from llmcompressor.modifiers.quantization import QuantizationModifier
55
from llmcompressor.utils import dispatch_for_generation
66

7-
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
7+
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
88

99
# Load model.
1010
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto")
@@ -13,7 +13,7 @@
1313
# Configure the quantization algorithm and scheme.
1414
# In this case, we:
1515
# * quantize the weights to fp4 with per group 16 via ptq
16-
recipe = QuantizationModifier(targets="Linear", scheme="MXFP4A16", ignore=["lm_head"])
16+
recipe = QuantizationModifier(targets="Linear", scheme="MXFP4", ignore=["lm_head"])
1717

1818
# Apply quantization.
1919
oneshot(model=model, recipe=recipe)
@@ -30,6 +30,6 @@
3030

3131

3232
# Save to disk in compressed-tensors format.
33-
SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-MXFP4A16"
33+
SAVE_DIR = MODEL_ID.rstrip("/").split("/")[-1] + "-MXFP4"
3434
model.save_pretrained(SAVE_DIR, save_compressed=True)
3535
tokenizer.save_pretrained(SAVE_DIR)

0 commit comments

Comments
 (0)