Make the project complete. #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Quantize Model | |
on: | |
workflow_dispatch: | |
inputs: | |
model_name: | |
description: 'Model name or path' | |
required: true | |
type: string | |
quantization_method: | |
description: 'Quantization method' | |
required: true | |
type: choice | |
options: | |
- auto | |
- gguf | |
- gptq | |
- awq | |
default: 'auto' | |
bits: | |
description: 'Quantization bits' | |
required: true | |
type: choice | |
options: | |
- '2' | |
- '3' | |
- '4' | |
- '8' | |
default: '4' | |
output_format: | |
description: 'Output format' | |
required: true | |
type: choice | |
options: | |
- auto | |
- gguf | |
- safetensors | |
- pytorch | |
default: 'auto' | |
upload_to_hub: | |
description: 'Upload to HuggingFace Hub' | |
required: false | |
type: boolean | |
default: false | |
hub_repo_id: | |
description: 'HuggingFace Hub repository ID' | |
required: false | |
type: string | |
push: | |
paths: | |
- 'models/**' | |
- '.github/workflows/quantize-model.yml' | |
pull_request: | |
paths: | |
- 'models/**' | |
- '.github/workflows/quantize-model.yml' | |
env: | |
PYTHON_VERSION: '3.9' | |
CUDA_VERSION: '11.8' | |
jobs: | |
quantize: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
include: | |
- gpu: false | |
runner: ubuntu-latest | |
- gpu: true | |
runner: [self-hosted, gpu] | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Setup CUDA (GPU runners only) | |
if: matrix.gpu | |
uses: Jimver/[email protected] | |
with: | |
cuda: ${{ env.CUDA_VERSION }} | |
- name: Cache pip dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -e . | |
pip install -r requirements.txt | |
- name: Install GPU dependencies | |
if: matrix.gpu | |
run: | | |
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 | |
- name: Set up model configuration | |
run: | | |
mkdir -p ./quantized_models | |
echo "MODEL_NAME=${{ github.event.inputs.model_name || 'gpt2' }}" >> $GITHUB_ENV | |
echo "METHOD=${{ github.event.inputs.quantization_method || 'auto' }}" >> $GITHUB_ENV | |
echo "BITS=${{ github.event.inputs.bits || '4' }}" >> $GITHUB_ENV | |
echo "OUTPUT_FORMAT=${{ github.event.inputs.output_format || 'auto' }}" >> $GITHUB_ENV | |
- name: Run quantization | |
run: | | |
quantllm quantize \ | |
--model "${{ env.MODEL_NAME }}" \ | |
--method "${{ env.METHOD }}" \ | |
--bits "${{ env.BITS }}" \ | |
--output-format "${{ env.OUTPUT_FORMAT }}" \ | |
--output-dir "./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit" \ | |
--validate \ | |
--benchmark \ | |
--progress json \ | |
--log-file "./logs/quantization-${{ github.run_id }}.log" \ | |
--verbose | |
- name: Upload quantization logs | |
if: always() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: quantization-logs-${{ github.run_id }} | |
path: ./logs/ | |
retention-days: 30 | |
- name: Upload quantized model | |
uses: actions/upload-artifact@v3 | |
with: | |
name: quantized-model-${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit | |
path: ./quantized_models/ | |
retention-days: 7 | |
- name: Generate model card | |
run: | | |
python -c " | |
import json | |
import yaml | |
from pathlib import Path | |
model_info = { | |
'model_name': '${{ env.MODEL_NAME }}', | |
'quantization_method': '${{ env.METHOD }}', | |
'bits': int('${{ env.BITS }}'), | |
'output_format': '${{ env.OUTPUT_FORMAT }}', | |
'github_run_id': '${{ github.run_id }}', | |
'github_sha': '${{ github.sha }}', | |
'quantized_at': '$(date -u +%Y-%m-%dT%H:%M:%SZ)' | |
} | |
with open('./quantized_models/model_info.json', 'w') as f: | |
json.dump(model_info, f, indent=2) | |
" | |
- name: Upload to HuggingFace Hub | |
if: github.event.inputs.upload_to_hub == 'true' && github.event.inputs.hub_repo_id != '' | |
env: | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
run: | | |
pip install huggingface_hub | |
python -c " | |
from huggingface_hub import HfApi | |
import os | |
if os.getenv('HF_TOKEN'): | |
api = HfApi() | |
api.upload_folder( | |
folder_path='./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit', | |
repo_id='${{ github.event.inputs.hub_repo_id }}', | |
token=os.getenv('HF_TOKEN') | |
) | |
print('Model uploaded to HuggingFace Hub successfully!') | |
else: | |
print('HF_TOKEN not found, skipping upload') | |
" | |
- name: Create release (on tag) | |
if: startsWith(github.ref, 'refs/tags/') | |
uses: softprops/action-gh-release@v1 | |
with: | |
files: | | |
./quantized_models/**/* | |
generate_release_notes: true | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
notify: | |
needs: quantize | |
runs-on: ubuntu-latest | |
if: always() | |
steps: | |
- name: Notify on success | |
if: needs.quantize.result == 'success' | |
run: | | |
echo "✅ Quantization completed successfully!" | |
echo "Model: ${{ github.event.inputs.model_name || 'gpt2' }}" | |
echo "Method: ${{ github.event.inputs.quantization_method || 'auto' }}" | |
echo "Bits: ${{ github.event.inputs.bits || '4' }}" | |
- name: Notify on failure | |
if: needs.quantize.result == 'failure' | |
run: | | |
echo "❌ Quantization failed!" | |
echo "Check the logs for more details." | |
exit 1 |