Make the project complete. #1

Workflow file for this run

.github/workflows/quantize-model.yml at 40bb0c4

	name: Quantize Model

	on:
	workflow_dispatch:
	inputs:
	model_name:
	description: 'Model name or path'
	required: true
	type: string
	quantization_method:
	description: 'Quantization method'
	required: true
	type: choice
	options:
	- auto
	- gguf
	- gptq
	- awq
	default: 'auto'
	bits:
	description: 'Quantization bits'
	required: true
	type: choice
	options:
	- '2'
	- '3'
	- '4'
	- '8'
	default: '4'
	output_format:
	description: 'Output format'
	required: true
	type: choice
	options:
	- auto
	- gguf
	- safetensors
	- pytorch
	default: 'auto'
	upload_to_hub:
	description: 'Upload to HuggingFace Hub'
	required: false
	type: boolean
	default: false
	hub_repo_id:
	description: 'HuggingFace Hub repository ID'
	required: false
	type: string

	push:
	paths:
	- 'models/**'
	- '.github/workflows/quantize-model.yml'

	pull_request:
	paths:
	- 'models/**'
	- '.github/workflows/quantize-model.yml'

	env:
	PYTHON_VERSION: '3.9'
	CUDA_VERSION: '11.8'

	jobs:
	quantize:
	runs-on: ubuntu-latest

	strategy:
	matrix:
	include:
	- gpu: false
	runner: ubuntu-latest
	- gpu: true
	runner: [self-hosted, gpu]

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Setup CUDA (GPU runners only)
	if: matrix.gpu
	uses: Jimver/[email protected]
	with:
	cuda: ${{ env.CUDA_VERSION }}

	- name: Cache pip dependencies
	uses: actions/cache@v3
	with:
	path: ~/.cache/pip
	key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -e .
	pip install -r requirements.txt

	- name: Install GPU dependencies
	if: matrix.gpu
	run: \|
	pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

	- name: Set up model configuration
	run: \|
	mkdir -p ./quantized_models
	echo "MODEL_NAME=${{ github.event.inputs.model_name \|\| 'gpt2' }}" >> $GITHUB_ENV
	echo "METHOD=${{ github.event.inputs.quantization_method \|\| 'auto' }}" >> $GITHUB_ENV
	echo "BITS=${{ github.event.inputs.bits \|\| '4' }}" >> $GITHUB_ENV
	echo "OUTPUT_FORMAT=${{ github.event.inputs.output_format \|\| 'auto' }}" >> $GITHUB_ENV

	- name: Run quantization
	run: \|
	quantllm quantize \
	--model "${{ env.MODEL_NAME }}" \
	--method "${{ env.METHOD }}" \
	--bits "${{ env.BITS }}" \
	--output-format "${{ env.OUTPUT_FORMAT }}" \
	--output-dir "./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit" \
	--validate \
	--benchmark \
	--progress json \
	--log-file "./logs/quantization-${{ github.run_id }}.log" \
	--verbose

	- name: Upload quantization logs
	if: always()
	uses: actions/upload-artifact@v3
	with:
	name: quantization-logs-${{ github.run_id }}
	path: ./logs/
	retention-days: 30

	- name: Upload quantized model
	uses: actions/upload-artifact@v3
	with:
	name: quantized-model-${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit
	path: ./quantized_models/
	retention-days: 7

	- name: Generate model card
	run: \|
	python -c "
	import json
	import yaml
	from pathlib import Path

	model_info = {
	'model_name': '${{ env.MODEL_NAME }}',
	'quantization_method': '${{ env.METHOD }}',
	'bits': int('${{ env.BITS }}'),
	'output_format': '${{ env.OUTPUT_FORMAT }}',
	'github_run_id': '${{ github.run_id }}',
	'github_sha': '${{ github.sha }}',
	'quantized_at': '$(date -u +%Y-%m-%dT%H:%M:%SZ)'
	}

	with open('./quantized_models/model_info.json', 'w') as f:
	json.dump(model_info, f, indent=2)
	"

	- name: Upload to HuggingFace Hub
	if: github.event.inputs.upload_to_hub == 'true' && github.event.inputs.hub_repo_id != ''
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	pip install huggingface_hub
	python -c "
	from huggingface_hub import HfApi
	import os

	if os.getenv('HF_TOKEN'):
	api = HfApi()
	api.upload_folder(
	folder_path='./quantized_models/${{ env.MODEL_NAME }}-${{ env.METHOD }}-${{ env.BITS }}bit',
	repo_id='${{ github.event.inputs.hub_repo_id }}',
	token=os.getenv('HF_TOKEN')
	)
	print('Model uploaded to HuggingFace Hub successfully!')
	else:
	print('HF_TOKEN not found, skipping upload')
	"

	- name: Create release (on tag)
	if: startsWith(github.ref, 'refs/tags/')
	uses: softprops/action-gh-release@v1
	with:
	files: \|
	./quantized_models/*/
	generate_release_notes: true
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	notify:
	needs: quantize
	runs-on: ubuntu-latest
	if: always()

	steps:
	- name: Notify on success
	if: needs.quantize.result == 'success'
	run: \|
	echo "✅ Quantization completed successfully!"
	echo "Model: ${{ github.event.inputs.model_name \|\| 'gpt2' }}"
	echo "Method: ${{ github.event.inputs.quantization_method \|\| 'auto' }}"
	echo "Bits: ${{ github.event.inputs.bits \|\| '4' }}"

	- name: Notify on failure
	if: needs.quantize.result == 'failure'
	run: \|
	echo "❌ Quantization failed!"
	echo "Check the logs for more details."
	exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Make the project complete. #1

Workflow file

Make the project complete. #1

Uh oh!

Jobs

Run details

Workflow file for this run