1
+ name : Batch Quantize Models
2
+
3
+ on :
4
+ workflow_dispatch :
5
+ inputs :
6
+ config_file :
7
+ description : ' Batch configuration file path'
8
+ required : true
9
+ type : string
10
+ default : ' .github/configs/batch_quantize.yaml'
11
+ parallel_jobs :
12
+ description : ' Number of parallel jobs'
13
+ required : false
14
+ type : number
15
+ default : 2
16
+ upload_to_hub :
17
+ description : ' Upload results to HuggingFace Hub'
18
+ required : false
19
+ type : boolean
20
+ default : false
21
+
22
+ schedule :
23
+ # Run weekly on Sunday at 2 AM UTC
24
+ - cron : ' 0 2 * * 0'
25
+
26
+ env :
27
+ PYTHON_VERSION : ' 3.9'
28
+ CUDA_VERSION : ' 11.8'
29
+
30
+ jobs :
31
+ prepare :
32
+ runs-on : ubuntu-latest
33
+ outputs :
34
+ matrix : ${{ steps.set-matrix.outputs.matrix }}
35
+ config : ${{ steps.load-config.outputs.config }}
36
+
37
+ steps :
38
+ - name : Checkout repository
39
+ uses : actions/checkout@v4
40
+
41
+ - name : Set up Python
42
+ uses : actions/setup-python@v4
43
+ with :
44
+ python-version : ${{ env.PYTHON_VERSION }}
45
+
46
+ - name : Install dependencies
47
+ run : |
48
+ python -m pip install --upgrade pip
49
+ pip install pyyaml
50
+
51
+ - name : Load batch configuration
52
+ id : load-config
53
+ run : |
54
+ python -c "
55
+ import yaml
56
+ import json
57
+
58
+ with open('${{ github.event.inputs.config_file }}', 'r') as f:
59
+ config = yaml.safe_load(f)
60
+
61
+ print('config=' + json.dumps(config))
62
+ " >> $GITHUB_OUTPUT
63
+
64
+ - name : Generate job matrix
65
+ id : set-matrix
66
+ run : |
67
+ python -c "
68
+ import yaml
69
+ import json
70
+
71
+ with open('${{ github.event.inputs.config_file }}', 'r') as f:
72
+ config = yaml.safe_load(f)
73
+
74
+ models = config.get('models', [])
75
+ matrix = {'include': []}
76
+
77
+ for i, model in enumerate(models):
78
+ matrix['include'].append({
79
+ 'model_index': i,
80
+ 'model_name': model['model'],
81
+ 'output_dir': model['output_dir'],
82
+ 'method': model.get('method', 'auto'),
83
+ 'bits': model.get('bits', 4)
84
+ })
85
+
86
+ print('matrix=' + json.dumps(matrix))
87
+ " >> $GITHUB_OUTPUT
88
+
89
+ quantize :
90
+ needs : prepare
91
+ runs-on : ubuntu-latest
92
+
93
+ strategy :
94
+ matrix : ${{ fromJson(needs.prepare.outputs.matrix) }}
95
+ max-parallel : ${{ fromJson(github.event.inputs.parallel_jobs || '2') }}
96
+ fail-fast : false
97
+
98
+ steps :
99
+ - name : Checkout repository
100
+ uses : actions/checkout@v4
101
+
102
+ - name : Set up Python
103
+ uses : actions/setup-python@v4
104
+ with :
105
+ python-version : ${{ env.PYTHON_VERSION }}
106
+
107
+ - name : Cache pip dependencies
108
+ uses : actions/cache@v3
109
+ with :
110
+ path : ~/.cache/pip
111
+ key : ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
112
+ restore-keys : |
113
+ ${{ runner.os }}-pip-
114
+
115
+ - name : Install dependencies
116
+ run : |
117
+ python -m pip install --upgrade pip
118
+ pip install -e .
119
+ pip install -r requirements.txt
120
+
121
+ - name : Create output directory
122
+ run : |
123
+ mkdir -p "${{ matrix.output_dir }}"
124
+ mkdir -p ./logs
125
+
126
+ - name : Run quantization
127
+ run : |
128
+ quantllm quantize \
129
+ --model "${{ matrix.model_name }}" \
130
+ --method "${{ matrix.method }}" \
131
+ --bits "${{ matrix.bits }}" \
132
+ --output-dir "${{ matrix.output_dir }}" \
133
+ --validate \
134
+ --progress json \
135
+ --log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
136
+ --verbose
137
+
138
+ - name : Upload quantization logs
139
+ if : always()
140
+ uses : actions/upload-artifact@v3
141
+ with :
142
+ name : batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
143
+ path : ./logs/
144
+ retention-days : 30
145
+
146
+ - name : Upload quantized model
147
+ if : success()
148
+ uses : actions/upload-artifact@v3
149
+ with :
150
+ name : batch-model-${{ matrix.model_index }}-${{ github.run_id }}
151
+ path : ${{ matrix.output_dir }}
152
+ retention-days : 7
153
+
154
+ collect-results :
155
+ needs : [prepare, quantize]
156
+ runs-on : ubuntu-latest
157
+ if : always()
158
+
159
+ steps :
160
+ - name : Checkout repository
161
+ uses : actions/checkout@v4
162
+
163
+ - name : Download all artifacts
164
+ uses : actions/download-artifact@v3
165
+ with :
166
+ path : ./artifacts
167
+
168
+ - name : Generate batch report
169
+ run : |
170
+ python -c "
171
+ import json
172
+ import os
173
+ from pathlib import Path
174
+
175
+ artifacts_dir = Path('./artifacts')
176
+ report = {
177
+ 'batch_id': '${{ github.run_id }}',
178
+ 'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
179
+ 'config_file': '${{ github.event.inputs.config_file }}',
180
+ 'results': []
181
+ }
182
+
183
+ # Collect results from each model
184
+ for artifact_dir in artifacts_dir.iterdir():
185
+ if artifact_dir.name.startswith('batch-model-'):
186
+ model_index = artifact_dir.name.split('-')[2]
187
+
188
+ # Check if quantization was successful
189
+ if any(artifact_dir.rglob('*.json')):
190
+ status = 'success'
191
+ else:
192
+ status = 'failed'
193
+
194
+ report['results'].append({
195
+ 'model_index': int(model_index),
196
+ 'status': status,
197
+ 'artifact_name': artifact_dir.name
198
+ })
199
+
200
+ # Save report
201
+ with open('./batch_report.json', 'w') as f:
202
+ json.dump(report, f, indent=2)
203
+
204
+ # Print summary
205
+ total = len(report['results'])
206
+ successful = sum(1 for r in report['results'] if r['status'] == 'success')
207
+ failed = total - successful
208
+
209
+ print(f'Batch Quantization Summary:')
210
+ print(f'Total models: {total}')
211
+ print(f'Successful: {successful}')
212
+ print(f'Failed: {failed}')
213
+ print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
214
+ "
215
+
216
+ - name : Upload batch report
217
+ uses : actions/upload-artifact@v3
218
+ with :
219
+ name : batch-report-${{ github.run_id }}
220
+ path : ./batch_report.json
221
+ retention-days : 90
222
+
223
+ - name : Comment on PR (if applicable)
224
+ if : github.event_name == 'pull_request'
225
+ uses : actions/github-script@v6
226
+ with :
227
+ script : |
228
+ const fs = require('fs');
229
+ const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));
230
+
231
+ const total = report.results.length;
232
+ const successful = report.results.filter(r => r.status === 'success').length;
233
+ const failed = total - successful;
234
+ const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';
235
+
236
+ const comment = `## Batch Quantization Results
237
+
238
+ 📊 **Summary:**
239
+ - Total models: ${total}
240
+ - Successful: ${successful} ✅
241
+ - Failed: ${failed} ❌
242
+ - Success rate: ${successRate}%
243
+
244
+ 🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models.
245
+ `;
246
+
247
+ github.rest.issues.createComment({
248
+ issue_number: context.issue.number,
249
+ owner: context.repo.owner,
250
+ repo: context.repo.repo,
251
+ body: comment
252
+ });
253
+
254
+ upload-to-hub :
255
+ needs : [prepare, quantize]
256
+ runs-on : ubuntu-latest
257
+ if : github.event.inputs.upload_to_hub == 'true' && success()
258
+
259
+ steps :
260
+ - name : Checkout repository
261
+ uses : actions/checkout@v4
262
+
263
+ - name : Download all model artifacts
264
+ uses : actions/download-artifact@v3
265
+ with :
266
+ path : ./artifacts
267
+ pattern : batch-model-*
268
+
269
+ - name : Set up Python
270
+ uses : actions/setup-python@v4
271
+ with :
272
+ python-version : ${{ env.PYTHON_VERSION }}
273
+
274
+ - name : Install HuggingFace Hub
275
+ run : |
276
+ pip install huggingface_hub
277
+
278
+ - name : Upload models to Hub
279
+ env :
280
+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
281
+ run : |
282
+ python -c "
283
+ import os
284
+ import json
285
+ from pathlib import Path
286
+ from huggingface_hub import HfApi
287
+
288
+ if not os.getenv('HF_TOKEN'):
289
+ print('HF_TOKEN not found, skipping upload')
290
+ exit(0)
291
+
292
+ api = HfApi()
293
+ artifacts_dir = Path('./artifacts')
294
+
295
+ for artifact_dir in artifacts_dir.iterdir():
296
+ if artifact_dir.name.startswith('batch-model-'):
297
+ model_index = artifact_dir.name.split('-')[2]
298
+
299
+ # Create repository name
300
+ repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'
301
+
302
+ try:
303
+ api.upload_folder(
304
+ folder_path=str(artifact_dir),
305
+ repo_id=repo_id,
306
+ token=os.getenv('HF_TOKEN')
307
+ )
308
+ print(f'Uploaded {artifact_dir.name} to {repo_id}')
309
+ except Exception as e:
310
+ print(f'Failed to upload {artifact_dir.name}: {e}')
311
+ "
0 commit comments