Batch Quantize Models #3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Batch Quantize Models | |
on: | |
workflow_dispatch: | |
inputs: | |
config_file: | |
description: 'Batch configuration file path' | |
required: true | |
type: string | |
default: '.github/configs/batch_quantize.yaml' | |
parallel_jobs: | |
description: 'Number of parallel jobs' | |
required: false | |
type: number | |
default: 2 | |
upload_to_hub: | |
description: 'Upload results to HuggingFace Hub' | |
required: false | |
type: boolean | |
default: false | |
schedule: | |
# Run weekly on Sunday at 2 AM UTC | |
- cron: '0 2 * * 0' | |
env: | |
PYTHON_VERSION: '3.9' | |
CUDA_VERSION: '11.8' | |
jobs: | |
prepare: | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.set-matrix.outputs.matrix }} | |
config: ${{ steps.load-config.outputs.config }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyyaml | |
- name: Load batch configuration | |
id: load-config | |
run: | | |
python -c " | |
import yaml | |
import json | |
with open('${{ github.event.inputs.config_file }}', 'r') as f: | |
config = yaml.safe_load(f) | |
print('config=' + json.dumps(config)) | |
" >> $GITHUB_OUTPUT | |
- name: Generate job matrix | |
id: set-matrix | |
run: | | |
python -c " | |
import yaml | |
import json | |
with open('${{ github.event.inputs.config_file }}', 'r') as f: | |
config = yaml.safe_load(f) | |
models = config.get('models', []) | |
matrix = {'include': []} | |
for i, model in enumerate(models): | |
matrix['include'].append({ | |
'model_index': i, | |
'model_name': model['model'], | |
'output_dir': model['output_dir'], | |
'method': model.get('method', 'auto'), | |
'bits': model.get('bits', 4) | |
}) | |
print('matrix=' + json.dumps(matrix)) | |
" >> $GITHUB_OUTPUT | |
quantize: | |
needs: prepare | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: ${{ fromJson(needs.prepare.outputs.matrix) }} | |
max-parallel: ${{ fromJson(github.event.inputs.parallel_jobs || '2') }} | |
fail-fast: false | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Cache pip dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install -e . | |
pip install -r requirements.txt | |
- name: Create output directory | |
run: | | |
mkdir -p "${{ matrix.output_dir }}" | |
mkdir -p ./logs | |
- name: Run quantization | |
run: | | |
quantllm quantize \ | |
--model "${{ matrix.model_name }}" \ | |
--method "${{ matrix.method }}" \ | |
--bits "${{ matrix.bits }}" \ | |
--output-dir "${{ matrix.output_dir }}" \ | |
--validate \ | |
--progress json \ | |
--log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \ | |
--verbose | |
- name: Upload quantization logs | |
if: always() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: batch-logs-${{ matrix.model_index }}-${{ github.run_id }} | |
path: ./logs/ | |
retention-days: 30 | |
- name: Upload quantized model | |
if: success() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: batch-model-${{ matrix.model_index }}-${{ github.run_id }} | |
path: ${{ matrix.output_dir }} | |
retention-days: 7 | |
collect-results: | |
needs: [prepare, quantize] | |
runs-on: ubuntu-latest | |
if: always() | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Download all artifacts | |
uses: actions/download-artifact@v3 | |
with: | |
path: ./artifacts | |
- name: Generate batch report | |
run: | | |
python -c " | |
import json | |
import os | |
from pathlib import Path | |
artifacts_dir = Path('./artifacts') | |
report = { | |
'batch_id': '${{ github.run_id }}', | |
'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)', | |
'config_file': '${{ github.event.inputs.config_file }}', | |
'results': [] | |
} | |
# Collect results from each model | |
for artifact_dir in artifacts_dir.iterdir(): | |
if artifact_dir.name.startswith('batch-model-'): | |
model_index = artifact_dir.name.split('-')[2] | |
# Check if quantization was successful | |
if any(artifact_dir.rglob('*.json')): | |
status = 'success' | |
else: | |
status = 'failed' | |
report['results'].append({ | |
'model_index': int(model_index), | |
'status': status, | |
'artifact_name': artifact_dir.name | |
}) | |
# Save report | |
with open('./batch_report.json', 'w') as f: | |
json.dump(report, f, indent=2) | |
# Print summary | |
total = len(report['results']) | |
successful = sum(1 for r in report['results'] if r['status'] == 'success') | |
failed = total - successful | |
print(f'Batch Quantization Summary:') | |
print(f'Total models: {total}') | |
print(f'Successful: {successful}') | |
print(f'Failed: {failed}') | |
print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A') | |
" | |
- name: Upload batch report | |
uses: actions/upload-artifact@v3 | |
with: | |
name: batch-report-${{ github.run_id }} | |
path: ./batch_report.json | |
retention-days: 90 | |
- name: Comment on PR (if applicable) | |
if: github.event_name == 'pull_request' | |
uses: actions/github-script@v6 | |
with: | |
script: | | |
const fs = require('fs'); | |
const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8')); | |
const total = report.results.length; | |
const successful = report.results.filter(r => r.status === 'success').length; | |
const failed = total - successful; | |
const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A'; | |
const comment = `## Batch Quantization Results | |
📊 **Summary:** | |
- Total models: ${total} | |
- Successful: ${successful} ✅ | |
- Failed: ${failed} ❌ | |
- Success rate: ${successRate}% | |
🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models. | |
`; | |
github.rest.issues.createComment({ | |
issue_number: context.issue.number, | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
body: comment | |
}); | |
upload-to-hub: | |
needs: [prepare, quantize] | |
runs-on: ubuntu-latest | |
if: github.event.inputs.upload_to_hub == 'true' && success() | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Download all model artifacts | |
uses: actions/download-artifact@v3 | |
with: | |
path: ./artifacts | |
pattern: batch-model-* | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install HuggingFace Hub | |
run: | | |
pip install huggingface_hub | |
- name: Upload models to Hub | |
env: | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
run: | | |
python -c " | |
import os | |
import json | |
from pathlib import Path | |
from huggingface_hub import HfApi | |
if not os.getenv('HF_TOKEN'): | |
print('HF_TOKEN not found, skipping upload') | |
exit(0) | |
api = HfApi() | |
artifacts_dir = Path('./artifacts') | |
for artifact_dir in artifacts_dir.iterdir(): | |
if artifact_dir.name.startswith('batch-model-'): | |
model_index = artifact_dir.name.split('-')[2] | |
# Create repository name | |
repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}' | |
try: | |
api.upload_folder( | |
folder_path=str(artifact_dir), | |
repo_id=repo_id, | |
token=os.getenv('HF_TOKEN') | |
) | |
print(f'Uploaded {artifact_dir.name} to {repo_id}') | |
except Exception as e: | |
print(f'Failed to upload {artifact_dir.name}: {e}') | |
" |