1+ name : Batch Quantize Models
2+
3+ on :
4+ workflow_dispatch :
5+ inputs :
6+ config_file :
7+ description : ' Batch configuration file path'
8+ required : true
9+ type : string
10+ default : ' .github/configs/batch_quantize.yaml'
11+ parallel_jobs :
12+ description : ' Number of parallel jobs'
13+ required : false
14+ type : number
15+ default : 2
16+ upload_to_hub :
17+ description : ' Upload results to HuggingFace Hub'
18+ required : false
19+ type : boolean
20+ default : false
21+
22+ schedule :
23+ # Run weekly on Sunday at 2 AM UTC
24+ - cron : ' 0 2 * * 0'
25+
26+ env :
27+ PYTHON_VERSION : ' 3.9'
28+ CUDA_VERSION : ' 11.8'
29+
30+ jobs :
31+ prepare :
32+ runs-on : ubuntu-latest
33+ outputs :
34+ matrix : ${{ steps.set-matrix.outputs.matrix }}
35+ config : ${{ steps.load-config.outputs.config }}
36+
37+ steps :
38+ - name : Checkout repository
39+ uses : actions/checkout@v4
40+
41+ - name : Set up Python
42+ uses : actions/setup-python@v4
43+ with :
44+ python-version : ${{ env.PYTHON_VERSION }}
45+
46+ - name : Install dependencies
47+ run : |
48+ python -m pip install --upgrade pip
49+ pip install pyyaml
50+
51+ - name : Load batch configuration
52+ id : load-config
53+ run : |
54+ python -c "
55+ import yaml
56+ import json
57+
58+ with open('${{ github.event.inputs.config_file }}', 'r') as f:
59+ config = yaml.safe_load(f)
60+
61+ print('config=' + json.dumps(config))
62+ " >> $GITHUB_OUTPUT
63+
64+ - name : Generate job matrix
65+ id : set-matrix
66+ run : |
67+ python -c "
68+ import yaml
69+ import json
70+
71+ with open('${{ github.event.inputs.config_file }}', 'r') as f:
72+ config = yaml.safe_load(f)
73+
74+ models = config.get('models', [])
75+ matrix = {'include': []}
76+
77+ for i, model in enumerate(models):
78+ matrix['include'].append({
79+ 'model_index': i,
80+ 'model_name': model['model'],
81+ 'output_dir': model['output_dir'],
82+ 'method': model.get('method', 'auto'),
83+ 'bits': model.get('bits', 4)
84+ })
85+
86+ print('matrix=' + json.dumps(matrix))
87+ " >> $GITHUB_OUTPUT
88+
89+ quantize :
90+ needs : prepare
91+ runs-on : ubuntu-latest
92+
93+ strategy :
94+ matrix : ${{ fromJson(needs.prepare.outputs.matrix) }}
95+ max-parallel : ${{ fromJson(github.event.inputs.parallel_jobs || '2') }}
96+ fail-fast : false
97+
98+ steps :
99+ - name : Checkout repository
100+ uses : actions/checkout@v4
101+
102+ - name : Set up Python
103+ uses : actions/setup-python@v4
104+ with :
105+ python-version : ${{ env.PYTHON_VERSION }}
106+
107+ - name : Cache pip dependencies
108+ uses : actions/cache@v3
109+ with :
110+ path : ~/.cache/pip
111+ key : ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
112+ restore-keys : |
113+ ${{ runner.os }}-pip-
114+
115+ - name : Install dependencies
116+ run : |
117+ python -m pip install --upgrade pip
118+ pip install -e .
119+ pip install -r requirements.txt
120+
121+ - name : Create output directory
122+ run : |
123+ mkdir -p "${{ matrix.output_dir }}"
124+ mkdir -p ./logs
125+
126+ - name : Run quantization
127+ run : |
128+ quantllm quantize \
129+ --model "${{ matrix.model_name }}" \
130+ --method "${{ matrix.method }}" \
131+ --bits "${{ matrix.bits }}" \
132+ --output-dir "${{ matrix.output_dir }}" \
133+ --validate \
134+ --progress json \
135+ --log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
136+ --verbose
137+
138+ - name : Upload quantization logs
139+ if : always()
140+ uses : actions/upload-artifact@v3
141+ with :
142+ name : batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
143+ path : ./logs/
144+ retention-days : 30
145+
146+ - name : Upload quantized model
147+ if : success()
148+ uses : actions/upload-artifact@v3
149+ with :
150+ name : batch-model-${{ matrix.model_index }}-${{ github.run_id }}
151+ path : ${{ matrix.output_dir }}
152+ retention-days : 7
153+
154+ collect-results :
155+ needs : [prepare, quantize]
156+ runs-on : ubuntu-latest
157+ if : always()
158+
159+ steps :
160+ - name : Checkout repository
161+ uses : actions/checkout@v4
162+
163+ - name : Download all artifacts
164+ uses : actions/download-artifact@v3
165+ with :
166+ path : ./artifacts
167+
168+ - name : Generate batch report
169+ run : |
170+ python -c "
171+ import json
172+ import os
173+ from pathlib import Path
174+
175+ artifacts_dir = Path('./artifacts')
176+ report = {
177+ 'batch_id': '${{ github.run_id }}',
178+ 'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
179+ 'config_file': '${{ github.event.inputs.config_file }}',
180+ 'results': []
181+ }
182+
183+ # Collect results from each model
184+ for artifact_dir in artifacts_dir.iterdir():
185+ if artifact_dir.name.startswith('batch-model-'):
186+ model_index = artifact_dir.name.split('-')[2]
187+
188+ # Check if quantization was successful
189+ if any(artifact_dir.rglob('*.json')):
190+ status = 'success'
191+ else:
192+ status = 'failed'
193+
194+ report['results'].append({
195+ 'model_index': int(model_index),
196+ 'status': status,
197+ 'artifact_name': artifact_dir.name
198+ })
199+
200+ # Save report
201+ with open('./batch_report.json', 'w') as f:
202+ json.dump(report, f, indent=2)
203+
204+ # Print summary
205+ total = len(report['results'])
206+ successful = sum(1 for r in report['results'] if r['status'] == 'success')
207+ failed = total - successful
208+
209+ print(f'Batch Quantization Summary:')
210+ print(f'Total models: {total}')
211+ print(f'Successful: {successful}')
212+ print(f'Failed: {failed}')
213+ print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
214+ "
215+
216+ - name : Upload batch report
217+ uses : actions/upload-artifact@v3
218+ with :
219+ name : batch-report-${{ github.run_id }}
220+ path : ./batch_report.json
221+ retention-days : 90
222+
223+ - name : Comment on PR (if applicable)
224+ if : github.event_name == 'pull_request'
225+ uses : actions/github-script@v6
226+ with :
227+ script : |
228+ const fs = require('fs');
229+ const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));
230+
231+ const total = report.results.length;
232+ const successful = report.results.filter(r => r.status === 'success').length;
233+ const failed = total - successful;
234+ const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';
235+
236+ const comment = `## Batch Quantization Results
237+
238+ 📊 **Summary:**
239+ - Total models: ${total}
240+ - Successful: ${successful} ✅
241+ - Failed: ${failed} ❌
242+ - Success rate: ${successRate}%
243+
244+ 🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models.
245+ `;
246+
247+ github.rest.issues.createComment({
248+ issue_number: context.issue.number,
249+ owner: context.repo.owner,
250+ repo: context.repo.repo,
251+ body: comment
252+ });
253+
254+ upload-to-hub :
255+ needs : [prepare, quantize]
256+ runs-on : ubuntu-latest
257+ if : github.event.inputs.upload_to_hub == 'true' && success()
258+
259+ steps :
260+ - name : Checkout repository
261+ uses : actions/checkout@v4
262+
263+ - name : Download all model artifacts
264+ uses : actions/download-artifact@v3
265+ with :
266+ path : ./artifacts
267+ pattern : batch-model-*
268+
269+ - name : Set up Python
270+ uses : actions/setup-python@v4
271+ with :
272+ python-version : ${{ env.PYTHON_VERSION }}
273+
274+ - name : Install HuggingFace Hub
275+ run : |
276+ pip install huggingface_hub
277+
278+ - name : Upload models to Hub
279+ env :
280+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
281+ run : |
282+ python -c "
283+ import os
284+ import json
285+ from pathlib import Path
286+ from huggingface_hub import HfApi
287+
288+ if not os.getenv('HF_TOKEN'):
289+ print('HF_TOKEN not found, skipping upload')
290+ exit(0)
291+
292+ api = HfApi()
293+ artifacts_dir = Path('./artifacts')
294+
295+ for artifact_dir in artifacts_dir.iterdir():
296+ if artifact_dir.name.startswith('batch-model-'):
297+ model_index = artifact_dir.name.split('-')[2]
298+
299+ # Create repository name
300+ repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'
301+
302+ try:
303+ api.upload_folder(
304+ folder_path=str(artifact_dir),
305+ repo_id=repo_id,
306+ token=os.getenv('HF_TOKEN')
307+ )
308+ print(f'Uploaded {artifact_dir.name} to {repo_id}')
309+ except Exception as e:
310+ print(f'Failed to upload {artifact_dir.name}: {e}')
311+ "
0 commit comments