Skip to content

Batch Quantize Models #3

Batch Quantize Models

Batch Quantize Models #3

name: Batch Quantize Models
on:
workflow_dispatch:
inputs:
config_file:
description: 'Batch configuration file path'
required: true
type: string
default: '.github/configs/batch_quantize.yaml'
parallel_jobs:
description: 'Number of parallel jobs'
required: false
type: number
default: 2
upload_to_hub:
description: 'Upload results to HuggingFace Hub'
required: false
type: boolean
default: false
schedule:
# Run weekly on Sunday at 2 AM UTC
- cron: '0 2 * * 0'
env:
PYTHON_VERSION: '3.9'
CUDA_VERSION: '11.8'
jobs:
prepare:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
config: ${{ steps.load-config.outputs.config }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyyaml
- name: Load batch configuration
id: load-config
run: |
python -c "
import yaml
import json
with open('${{ github.event.inputs.config_file }}', 'r') as f:
config = yaml.safe_load(f)
print('config=' + json.dumps(config))
" >> $GITHUB_OUTPUT
- name: Generate job matrix
id: set-matrix
run: |
python -c "
import yaml
import json
with open('${{ github.event.inputs.config_file }}', 'r') as f:
config = yaml.safe_load(f)
models = config.get('models', [])
matrix = {'include': []}
for i, model in enumerate(models):
matrix['include'].append({
'model_index': i,
'model_name': model['model'],
'output_dir': model['output_dir'],
'method': model.get('method', 'auto'),
'bits': model.get('bits', 4)
})
print('matrix=' + json.dumps(matrix))
" >> $GITHUB_OUTPUT
quantize:
needs: prepare
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}
max-parallel: ${{ fromJson(github.event.inputs.parallel_jobs || '2') }}
fail-fast: false
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
pip install -r requirements.txt
- name: Create output directory
run: |
mkdir -p "${{ matrix.output_dir }}"
mkdir -p ./logs
- name: Run quantization
run: |
quantllm quantize \
--model "${{ matrix.model_name }}" \
--method "${{ matrix.method }}" \
--bits "${{ matrix.bits }}" \
--output-dir "${{ matrix.output_dir }}" \
--validate \
--progress json \
--log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
--verbose
- name: Upload quantization logs
if: always()
uses: actions/upload-artifact@v3
with:
name: batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
path: ./logs/
retention-days: 30
- name: Upload quantized model
if: success()
uses: actions/upload-artifact@v3
with:
name: batch-model-${{ matrix.model_index }}-${{ github.run_id }}
path: ${{ matrix.output_dir }}
retention-days: 7
collect-results:
needs: [prepare, quantize]
runs-on: ubuntu-latest
if: always()
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all artifacts
uses: actions/download-artifact@v3
with:
path: ./artifacts
- name: Generate batch report
run: |
python -c "
import json
import os
from pathlib import Path
artifacts_dir = Path('./artifacts')
report = {
'batch_id': '${{ github.run_id }}',
'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
'config_file': '${{ github.event.inputs.config_file }}',
'results': []
}
# Collect results from each model
for artifact_dir in artifacts_dir.iterdir():
if artifact_dir.name.startswith('batch-model-'):
model_index = artifact_dir.name.split('-')[2]
# Check if quantization was successful
if any(artifact_dir.rglob('*.json')):
status = 'success'
else:
status = 'failed'
report['results'].append({
'model_index': int(model_index),
'status': status,
'artifact_name': artifact_dir.name
})
# Save report
with open('./batch_report.json', 'w') as f:
json.dump(report, f, indent=2)
# Print summary
total = len(report['results'])
successful = sum(1 for r in report['results'] if r['status'] == 'success')
failed = total - successful
print(f'Batch Quantization Summary:')
print(f'Total models: {total}')
print(f'Successful: {successful}')
print(f'Failed: {failed}')
print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
"
- name: Upload batch report
uses: actions/upload-artifact@v3
with:
name: batch-report-${{ github.run_id }}
path: ./batch_report.json
retention-days: 90
- name: Comment on PR (if applicable)
if: github.event_name == 'pull_request'
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));
const total = report.results.length;
const successful = report.results.filter(r => r.status === 'success').length;
const failed = total - successful;
const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';
const comment = `## Batch Quantization Results
📊 **Summary:**
- Total models: ${total}
- Successful: ${successful} ✅
- Failed: ${failed} ❌
- Success rate: ${successRate}%
🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models.
`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
upload-to-hub:
needs: [prepare, quantize]
runs-on: ubuntu-latest
if: github.event.inputs.upload_to_hub == 'true' && success()
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download all model artifacts
uses: actions/download-artifact@v3
with:
path: ./artifacts
pattern: batch-model-*
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install HuggingFace Hub
run: |
pip install huggingface_hub
- name: Upload models to Hub
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
python -c "
import os
import json
from pathlib import Path
from huggingface_hub import HfApi
if not os.getenv('HF_TOKEN'):
print('HF_TOKEN not found, skipping upload')
exit(0)
api = HfApi()
artifacts_dir = Path('./artifacts')
for artifact_dir in artifacts_dir.iterdir():
if artifact_dir.name.startswith('batch-model-'):
model_index = artifact_dir.name.split('-')[2]
# Create repository name
repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'
try:
api.upload_folder(
folder_path=str(artifact_dir),
repo_id=repo_id,
token=os.getenv('HF_TOKEN')
)
print(f'Uploaded {artifact_dir.name} to {repo_id}')
except Exception as e:
print(f'Failed to upload {artifact_dir.name}: {e}')
"