Add support for only update models and push to a different user ID (#2966)

jerryzh168 · web-flow · commit d35c2ce93efb · 2025-09-09T16:10:43.000-07:00
Summary: This is used for updating official pytorch checkpoints, after the PR, e.g. to update qwen3-32b FP8 checkpoint: sh release.sh --model_id microsoft/Phi-4-mini-instruct --quants FP8 --push_to_hub --push_to_user_id pytorch Test Plan: Updated the INT4, FP8 checkpoints in https://huggingface.co/pytorch with the updated scripts Reviewers: Subscribers: Tasks: Tags:
diff --git a/.github/scripts/torchao_model_releases/eval.sh b/.github/scripts/torchao_model_releases/eval.sh
@@ -110,5 +110,5 @@ done
 
 # Run summarize_results.sh with MODEL_IDS if eval_type is "all"
 if [[ "$EVAL_TYPE" == "all" ]]; then
-  sh summarize_results.sh --model_id "${MODEL_ID_ARRAY[@]}"
+  sh summarize_results.sh --model_ids "${MODEL_ID_ARRAY[@]}"
 fi
diff --git a/.github/scripts/torchao_model_releases/quantize_and_upload.py b/.github/scripts/torchao_model_releases/quantize_and_upload.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import argparse
+from typing import List
 
 import torch
 from huggingface_hub import ModelCard, get_token, whoami
@@ -617,7 +618,14 @@ def _untie_weights_and_save_locally(model_id):
 
 
 def quantize_and_upload(
-    model_id, quant, tasks, calibration_limit, max_seq_length, push_to_hub
+    model_id: str,
+    quant: str,
+    tasks: List[str],
+    calibration_limit: int,
+    max_seq_length: int,
+    push_to_hub: bool,
+    push_to_user_id: str,
+    update_model_card: bool,
 ):
     _int8_int4_linear_config = Int8DynamicActivationIntxWeightConfig(
         weight_dtype=torch.int4,
@@ -712,7 +720,9 @@ def quantize_and_upload(
     username = _get_username()
 
     MODEL_NAME = model_id.split("/")[-1]
-    save_to = f"{username}/{MODEL_NAME}-{quant}"
+
+    save_to_user_id = username if push_to_user_id is None else push_to_user_id
+    save_to = f"{save_to_user_id}/{MODEL_NAME}-{quant}"
     untied_model_path = 'f"{{MODEL_NAME}}-untied-weights"'
     is_mobile = quant == "INT8-INT4"
     quantized_model_id = save_to
@@ -758,7 +768,8 @@ def quantize_and_upload(
     if push_to_hub:
         quantized_model.push_to_hub(quantized_model_id, safe_serialization=False)
         tokenizer.push_to_hub(quantized_model_id)
-        card.push_to_hub(quantized_model_id)
+        if update_model_card:
+            card.push_to_hub(quantized_model_id)
     else:
         quantized_model.save_pretrained(quantized_model_id, safe_serialization=False)
         tokenizer.save_pretrained(quantized_model_id)
@@ -827,6 +838,18 @@ def quantize_and_upload(
         default=False,
         help="Flag to indicate whether push to huggingface hub or not",
     )
+    parser.add_argument(
+        "--push_to_user_id",
+        type=str,
+        default=None,
+        help="The user_id to use for pushing the quantized model, only used when --push_to_hub is set",
+    )
+    parser.add_argument(
+        "--update_model_card",
+        action="store_true",
+        default=False,
+        help="Flag to indicate whether push model card to huggingface hub or not",
+    )
     args = parser.parse_args()
     quantize_and_upload(
         args.model_id,
@@ -835,4 +858,6 @@ def quantize_and_upload(
         args.calibration_limit,
         args.max_seq_length,
         args.push_to_hub,
+        args.push_to_user_id,
+        args.update_model_card,
     )
diff --git a/.github/scripts/torchao_model_releases/release.sh b/.github/scripts/torchao_model_releases/release.sh
@@ -15,6 +15,8 @@
 # Default quantization options
 default_quants=("FP8" "INT4" "INT8-INT4")
 push_to_hub=""
+push_to_user_id=""
+update_model_card=""
 # Parse arguments
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -34,6 +36,14 @@ while [[ $# -gt 0 ]]; do
       push_to_hub="--push_to_hub"
       shift
       ;;
+     --push_to_user_id)
+      push_to_user_id=("--push_to_user_id $2")
+      shift 2
+      ;;
+     --update_model_card)
+      update_model_card="--update_model_card"
+      shift
+      ;;
     *)
       echo "Unknown option: $1"
       exit 1
@@ -43,14 +53,14 @@ done
 # Use default quants if none specified
 if [[ -z "$model_id" ]]; then
   echo "Error: --model_id is required"
-  echo "Usage: $0 --model_id <model_id> [--quants <quant1> [quant2 ...]] [--push_to_hub]"
+  echo "Usage: $0 --model_id <model_id> [--quants <quant1> [quant2 ...]] [--push_to_hub] [--push_to_user_id <push_to_user_id>] [--update_model_card]"
   exit 1
 fi
 if [[ ${#quants[@]} -eq 0 ]]; then
   quants=("${default_quants[@]}")
 fi
 # Run the python command for each quantization option
 for quant in "${quants[@]}"; do
-  echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub"
-  python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub
+  echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub $push_to_user_id $update_model_card"
+  python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub $push_to_user_id $update_model_card
 done