File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
examples/quantization_w4a4_fp4 Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change 44from llmcompressor .modifiers .quantization import QuantizationModifier
55from llmcompressor .utils import dispatch_for_generation
66
7- MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct "
7+ MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0 "
88
99# Load model.
1010model = AutoModelForCausalLM .from_pretrained (MODEL_ID , torch_dtype = "auto" )
1313# Configure the quantization algorithm and scheme.
1414# In this case, we:
1515# * quantize the weights to fp4 with per group 16 via ptq
16- recipe = QuantizationModifier (targets = "Linear" , scheme = "MXFP4A16 " , ignore = ["lm_head" ])
16+ recipe = QuantizationModifier (targets = "Linear" , scheme = "MXFP4 " , ignore = ["lm_head" ])
1717
1818# Apply quantization.
1919oneshot (model = model , recipe = recipe )
3030
3131
3232# Save to disk in compressed-tensors format.
33- SAVE_DIR = MODEL_ID .rstrip ("/" ).split ("/" )[- 1 ] + "-MXFP4A16 "
33+ SAVE_DIR = MODEL_ID .rstrip ("/" ).split ("/" )[- 1 ] + "-MXFP4 "
3434model .save_pretrained (SAVE_DIR , save_compressed = True )
3535tokenizer .save_pretrained (SAVE_DIR )
You can’t perform that action at this time.
0 commit comments