Skip to content

Commit d35c2ce

Browse files
authored
Add support for only update models and push to a different user ID (#2966)
Summary: This is used for updating official pytorch checkpoints, after the PR, e.g. to update qwen3-32b FP8 checkpoint: sh release.sh --model_id microsoft/Phi-4-mini-instruct --quants FP8 --push_to_hub --push_to_user_id pytorch Test Plan: Updated the INT4, FP8 checkpoints in https://huggingface.co/pytorch with the updated scripts Reviewers: Subscribers: Tasks: Tags:
1 parent d3efa39 commit d35c2ce

File tree

3 files changed

+42
-7
lines changed

3 files changed

+42
-7
lines changed

.github/scripts/torchao_model_releases/eval.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,5 +110,5 @@ done
110110

111111
# Run summarize_results.sh with MODEL_IDS if eval_type is "all"
112112
if [[ "$EVAL_TYPE" == "all" ]]; then
113-
sh summarize_results.sh --model_id "${MODEL_ID_ARRAY[@]}"
113+
sh summarize_results.sh --model_ids "${MODEL_ID_ARRAY[@]}"
114114
fi

.github/scripts/torchao_model_releases/quantize_and_upload.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import argparse
8+
from typing import List
89

910
import torch
1011
from huggingface_hub import ModelCard, get_token, whoami
@@ -617,7 +618,14 @@ def _untie_weights_and_save_locally(model_id):
617618

618619

619620
def quantize_and_upload(
620-
model_id, quant, tasks, calibration_limit, max_seq_length, push_to_hub
621+
model_id: str,
622+
quant: str,
623+
tasks: List[str],
624+
calibration_limit: int,
625+
max_seq_length: int,
626+
push_to_hub: bool,
627+
push_to_user_id: str,
628+
update_model_card: bool,
621629
):
622630
_int8_int4_linear_config = Int8DynamicActivationIntxWeightConfig(
623631
weight_dtype=torch.int4,
@@ -712,7 +720,9 @@ def quantize_and_upload(
712720
username = _get_username()
713721

714722
MODEL_NAME = model_id.split("/")[-1]
715-
save_to = f"{username}/{MODEL_NAME}-{quant}"
723+
724+
save_to_user_id = username if push_to_user_id is None else push_to_user_id
725+
save_to = f"{save_to_user_id}/{MODEL_NAME}-{quant}"
716726
untied_model_path = 'f"{{MODEL_NAME}}-untied-weights"'
717727
is_mobile = quant == "INT8-INT4"
718728
quantized_model_id = save_to
@@ -758,7 +768,8 @@ def quantize_and_upload(
758768
if push_to_hub:
759769
quantized_model.push_to_hub(quantized_model_id, safe_serialization=False)
760770
tokenizer.push_to_hub(quantized_model_id)
761-
card.push_to_hub(quantized_model_id)
771+
if update_model_card:
772+
card.push_to_hub(quantized_model_id)
762773
else:
763774
quantized_model.save_pretrained(quantized_model_id, safe_serialization=False)
764775
tokenizer.save_pretrained(quantized_model_id)
@@ -827,6 +838,18 @@ def quantize_and_upload(
827838
default=False,
828839
help="Flag to indicate whether push to huggingface hub or not",
829840
)
841+
parser.add_argument(
842+
"--push_to_user_id",
843+
type=str,
844+
default=None,
845+
help="The user_id to use for pushing the quantized model, only used when --push_to_hub is set",
846+
)
847+
parser.add_argument(
848+
"--update_model_card",
849+
action="store_true",
850+
default=False,
851+
help="Flag to indicate whether push model card to huggingface hub or not",
852+
)
830853
args = parser.parse_args()
831854
quantize_and_upload(
832855
args.model_id,
@@ -835,4 +858,6 @@ def quantize_and_upload(
835858
args.calibration_limit,
836859
args.max_seq_length,
837860
args.push_to_hub,
861+
args.push_to_user_id,
862+
args.update_model_card,
838863
)

.github/scripts/torchao_model_releases/release.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# Default quantization options
1616
default_quants=("FP8" "INT4" "INT8-INT4")
1717
push_to_hub=""
18+
push_to_user_id=""
19+
update_model_card=""
1820
# Parse arguments
1921
while [[ $# -gt 0 ]]; do
2022
case "$1" in
@@ -34,6 +36,14 @@ while [[ $# -gt 0 ]]; do
3436
push_to_hub="--push_to_hub"
3537
shift
3638
;;
39+
--push_to_user_id)
40+
push_to_user_id=("--push_to_user_id $2")
41+
shift 2
42+
;;
43+
--update_model_card)
44+
update_model_card="--update_model_card"
45+
shift
46+
;;
3747
*)
3848
echo "Unknown option: $1"
3949
exit 1
@@ -43,14 +53,14 @@ done
4353
# Use default quants if none specified
4454
if [[ -z "$model_id" ]]; then
4555
echo "Error: --model_id is required"
46-
echo "Usage: $0 --model_id <model_id> [--quants <quant1> [quant2 ...]] [--push_to_hub]"
56+
echo "Usage: $0 --model_id <model_id> [--quants <quant1> [quant2 ...]] [--push_to_hub] [--push_to_user_id <push_to_user_id>] [--update_model_card]"
4757
exit 1
4858
fi
4959
if [[ ${#quants[@]} -eq 0 ]]; then
5060
quants=("${default_quants[@]}")
5161
fi
5262
# Run the python command for each quantization option
5363
for quant in "${quants[@]}"; do
54-
echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub"
55-
python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub
64+
echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub $push_to_user_id $update_model_card"
65+
python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub $push_to_user_id $update_model_card
5666
done

0 commit comments

Comments
 (0)