Skip to content

Make the project complete. #1

Make the project complete.

Make the project complete. #1

name: Quantize Model
on:
workflow_dispatch:
inputs:
model_name:
description: 'Model name or path'
required: true
type: string
quantization_method:
description: 'Quantization method'
required: true
type: choice
options:
- auto
- gguf
- gptq
- awq
default: 'auto'
bits:
description: 'Quantization bits'
required: true
type: choice
options:
- '2'
- '3'
- '4'
- '8'
default: '4'
output_format:
description: 'Output format'
required: true
type: choice
options:
- auto
- gguf
- safetensors
- pytorch
default: 'auto'
upload_to_hub:
description: 'Upload to HuggingFace Hub'
required: false
type: boolean
default: false
hub_repo_id:
description: 'HuggingFace Hub repository ID'
required: false
type: string
push:
paths:
- 'models/**'
- '.github/workflows/quantize-model.yml'
pull_request:
paths:
- 'models/**'
- '.github/workflows/quantize-model.yml'
env:
PYTHON_VERSION: '3.9'
CUDA_VERSION: '11.8'
jobs:
quantize:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- gpu: false
runner: ubuntu-latest
- gpu: true
runner: [self-hosted, gpu]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Setup CUDA (GPU runners only)
if: matrix.gpu
uses: Jimver/[email protected]
with:
cuda: ${{ env.CUDA_VERSION }}
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
pip install -r requirements.txt
- name: Install GPU dependencies
if: matrix.gpu
run: |
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
- name: Set up model configuration
run: |
mkdir -p ./quantized_models
echo "MODEL_NAME=${{ github.event.inputs.model_name || 'gpt2' }}" >> $GITHUB_ENV
echo "METHOD=${{ github.event.inputs.quantization_method || 'auto' }}" >> $GITHUB_ENV
echo "BITS=${{ github.event.inputs.bits || '4' }}" >> $GITHUB_ENV
echo "OUTPUT_FORMAT=${{ github.event.inputs.output_format || 'auto' }}" >> $GITHUB_ENV
- name: Run quantization
run: |
quantllm quantize \
--model "${{ env.MODEL_NAME }}" \
--method "${{ env.METHOD }}" \
--bits "${{ env.BITS }}" \
--output-format "${{ env.OUTPUT_FORMAT }}" \
--output-dir "./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit" \
--validate \
--benchmark \
--progress json \
--log-file "./logs/quantization-${{ github.run_id }}.log" \
--verbose
- name: Upload quantization logs
if: always()
uses: actions/upload-artifact@v3
with:
name: quantization-logs-${{ github.run_id }}
path: ./logs/
retention-days: 30
- name: Upload quantized model
uses: actions/upload-artifact@v3
with:
name: quantized-model-${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit
path: ./quantized_models/
retention-days: 7
- name: Generate model card
run: |
python -c "
import json
import yaml
from pathlib import Path
model_info = {
'model_name': '${{ env.MODEL_NAME }}',
'quantization_method': '${{ env.METHOD }}',
'bits': int('${{ env.BITS }}'),
'output_format': '${{ env.OUTPUT_FORMAT }}',
'github_run_id': '${{ github.run_id }}',
'github_sha': '${{ github.sha }}',
'quantized_at': '$(date -u +%Y-%m-%dT%H:%M:%SZ)'
}
with open('./quantized_models/model_info.json', 'w') as f:
json.dump(model_info, f, indent=2)
"
- name: Upload to HuggingFace Hub
if: github.event.inputs.upload_to_hub == 'true' && github.event.inputs.hub_repo_id != ''
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
pip install huggingface_hub
python -c "
from huggingface_hub import HfApi
import os
if os.getenv('HF_TOKEN'):
api = HfApi()
api.upload_folder(
folder_path='./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit',
repo_id='${{ github.event.inputs.hub_repo_id }}',
token=os.getenv('HF_TOKEN')
)
print('Model uploaded to HuggingFace Hub successfully!')
else:
print('HF_TOKEN not found, skipping upload')
"
- name: Create release (on tag)
if: startsWith(github.ref, 'refs/tags/')
uses: softprops/action-gh-release@v1
with:
files: |
./quantized_models/**/*
generate_release_notes: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
notify:
needs: quantize
runs-on: ubuntu-latest
if: always()
steps:
- name: Notify on success
if: needs.quantize.result == 'success'
run: |
echo "✅ Quantization completed successfully!"
echo "Model: ${{ github.event.inputs.model_name || 'gpt2' }}"
echo "Method: ${{ github.event.inputs.quantization_method || 'auto' }}"
echo "Bits: ${{ github.event.inputs.bits || '4' }}"
- name: Notify on failure
if: needs.quantize.result == 'failure'
run: |
echo "❌ Quantization failed!"
echo "Check the logs for more details."
exit 1