Batch Quantize Models #3

Workflow file for this run

.github/workflows/batch-quantize.yml at 40bb0c4

	name: Batch Quantize Models

	on:
	workflow_dispatch:
	inputs:
	config_file:
	description: 'Batch configuration file path'
	required: true
	type: string
	default: '.github/configs/batch_quantize.yaml'
	parallel_jobs:
	description: 'Number of parallel jobs'
	required: false
	type: number
	default: 2
	upload_to_hub:
	description: 'Upload results to HuggingFace Hub'
	required: false
	type: boolean
	default: false

	schedule:
	# Run weekly on Sunday at 2 AM UTC
	- cron: '0 2 * * 0'

	env:
	PYTHON_VERSION: '3.9'
	CUDA_VERSION: '11.8'

	jobs:
	prepare:
	runs-on: ubuntu-latest
	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}
	config: ${{ steps.load-config.outputs.config }}

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install pyyaml

	- name: Load batch configuration
	id: load-config
	run: \|
	python -c "
	import yaml
	import json

	with open('${{ github.event.inputs.config_file }}', 'r') as f:
	config = yaml.safe_load(f)

	print('config=' + json.dumps(config))
	" >> $GITHUB_OUTPUT

	- name: Generate job matrix
	id: set-matrix
	run: \|
	python -c "
	import yaml
	import json

	with open('${{ github.event.inputs.config_file }}', 'r') as f:
	config = yaml.safe_load(f)

	models = config.get('models', [])
	matrix = {'include': []}

	for i, model in enumerate(models):
	matrix['include'].append({
	'model_index': i,
	'model_name': model['model'],
	'output_dir': model['output_dir'],
	'method': model.get('method', 'auto'),
	'bits': model.get('bits', 4)
	})

	print('matrix=' + json.dumps(matrix))
	" >> $GITHUB_OUTPUT

	quantize:
	needs: prepare
	runs-on: ubuntu-latest

	strategy:
	matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}
	max-parallel: ${{ fromJson(github.event.inputs.parallel_jobs \|\| '2') }}
	fail-fast: false

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Cache pip dependencies
	uses: actions/cache@v3
	with:
	path: ~/.cache/pip
	key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -e .
	pip install -r requirements.txt

	- name: Create output directory
	run: \|
	mkdir -p "${{ matrix.output_dir }}"
	mkdir -p ./logs

	- name: Run quantization
	run: \|
	quantllm quantize \
	--model "${{ matrix.model_name }}" \
	--method "${{ matrix.method }}" \
	--bits "${{ matrix.bits }}" \
	--output-dir "${{ matrix.output_dir }}" \
	--validate \
	--progress json \
	--log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
	--verbose

	- name: Upload quantization logs
	if: always()
	uses: actions/upload-artifact@v3
	with:
	name: batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
	path: ./logs/
	retention-days: 30

	- name: Upload quantized model
	if: success()
	uses: actions/upload-artifact@v3
	with:
	name: batch-model-${{ matrix.model_index }}-${{ github.run_id }}
	path: ${{ matrix.output_dir }}
	retention-days: 7

	collect-results:
	needs: [prepare, quantize]
	runs-on: ubuntu-latest
	if: always()

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download all artifacts
	uses: actions/download-artifact@v3
	with:
	path: ./artifacts

	- name: Generate batch report
	run: \|
	python -c "
	import json
	import os
	from pathlib import Path

	artifacts_dir = Path('./artifacts')
	report = {
	'batch_id': '${{ github.run_id }}',
	'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
	'config_file': '${{ github.event.inputs.config_file }}',
	'results': []
	}

	# Collect results from each model
	for artifact_dir in artifacts_dir.iterdir():
	if artifact_dir.name.startswith('batch-model-'):
	model_index = artifact_dir.name.split('-')[2]

	# Check if quantization was successful
	if any(artifact_dir.rglob('*.json')):
	status = 'success'
	else:
	status = 'failed'

	report['results'].append({
	'model_index': int(model_index),
	'status': status,
	'artifact_name': artifact_dir.name
	})

	# Save report
	with open('./batch_report.json', 'w') as f:
	json.dump(report, f, indent=2)

	# Print summary
	total = len(report['results'])
	successful = sum(1 for r in report['results'] if r['status'] == 'success')
	failed = total - successful

	print(f'Batch Quantization Summary:')
	print(f'Total models: {total}')
	print(f'Successful: {successful}')
	print(f'Failed: {failed}')
	print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
	"

	- name: Upload batch report
	uses: actions/upload-artifact@v3
	with:
	name: batch-report-${{ github.run_id }}
	path: ./batch_report.json
	retention-days: 90

	- name: Comment on PR (if applicable)
	if: github.event_name == 'pull_request'
	uses: actions/github-script@v6
	with:
	script: \|
	const fs = require('fs');
	const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));

	const total = report.results.length;
	const successful = report.results.filter(r => r.status === 'success').length;
	const failed = total - successful;
	const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';

	const comment = `## Batch Quantization Results

	📊 Summary:
	- Total models: ${total}
	- Successful: ${successful} ✅
	- Failed: ${failed} ❌
	- Success rate: ${successRate}%

	🔗 Artifacts: Check the workflow run for detailed logs and quantized models.
	`;

	github.rest.issues.createComment({
	issue_number: context.issue.number,
	owner: context.repo.owner,
	repo: context.repo.repo,
	body: comment
	});

	upload-to-hub:
	needs: [prepare, quantize]
	runs-on: ubuntu-latest
	if: github.event.inputs.upload_to_hub == 'true' && success()

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Download all model artifacts
	uses: actions/download-artifact@v3
	with:
	path: ./artifacts
	pattern: batch-model-*

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install HuggingFace Hub
	run: \|
	pip install huggingface_hub

	- name: Upload models to Hub
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	python -c "
	import os
	import json
	from pathlib import Path
	from huggingface_hub import HfApi

	if not os.getenv('HF_TOKEN'):
	print('HF_TOKEN not found, skipping upload')
	exit(0)

	api = HfApi()
	artifacts_dir = Path('./artifacts')

	for artifact_dir in artifacts_dir.iterdir():
	if artifact_dir.name.startswith('batch-model-'):
	model_index = artifact_dir.name.split('-')[2]

	# Create repository name
	repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'

	try:
	api.upload_folder(
	folder_path=str(artifact_dir),
	repo_id=repo_id,
	token=os.getenv('HF_TOKEN')
	)
	print(f'Uploaded {artifact_dir.name} to {repo_id}')
	except Exception as e:
	print(f'Failed to upload {artifact_dir.name}: {e}')
	"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Batch Quantize Models #3

Workflow file

Batch Quantize Models #3

Uh oh!

Jobs

Run details

Workflow file for this run