Skip to content

Commit 40bb0c4

Browse files
Make the project complete.
1 parent 7dbbaf3 commit 40bb0c4

File tree

234 files changed

+69616
-1539
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

234 files changed

+69616
-1539
lines changed

.github/configs/batch_quantize.yaml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Batch Quantization Configuration
2+
# This file defines multiple models to be quantized in batch
3+
4+
global_settings:
5+
parallel: 2
6+
verbose: 1
7+
progress: "json"
8+
log_file_prefix: "batch_quantization"
9+
10+
# Default quantization settings applied to all models
11+
default_quantization:
12+
method: "auto"
13+
bits: 4
14+
optimization_target: "balanced"
15+
validate: true
16+
quality_threshold: 0.95
17+
benchmark: false
18+
19+
# List of models to quantize
20+
models:
21+
- model: "gpt2"
22+
output_dir: "./quantized_models/gpt2-q4"
23+
method: "gguf"
24+
quant_type: "Q4_K_M"
25+
26+
- model: "microsoft/DialoGPT-small"
27+
output_dir: "./quantized_models/dialogpt-small-q4"
28+
method: "gptq"
29+
bits: 4
30+
group_size: 128
31+
32+
- model: "facebook/opt-350m"
33+
output_dir: "./quantized_models/opt-350m-q8"
34+
method: "gguf"
35+
quant_type: "Q8_0"
36+
37+
- model: "distilbert-base-uncased"
38+
output_dir: "./quantized_models/distilbert-q4"
39+
method: "auto"
40+
bits: 4
41+
optimization_target: "size"
42+
43+
- model: "microsoft/CodeBERT-base"
44+
output_dir: "./quantized_models/codebert-q4"
45+
method: "gptq"
46+
bits: 4
47+
group_size: 64
48+
desc_act: true
49+
50+
# Optional: Calibration settings
51+
calibration:
52+
strategy: "representative"
53+
samples: 512
54+
# data: "path/to/calibration/dataset" # Optional custom calibration data
55+
56+
# Optional: Validation settings
57+
validation:
58+
quality_threshold: 0.90
59+
benchmark_metrics: ["perplexity", "size", "speed"]
60+
61+
# Optional: Output settings
62+
output:
63+
format: "auto"
64+
generate_model_cards: true
65+
include_benchmarks: true

.github/workflows/batch-quantize.yml

Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
name: Batch Quantize Models
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
config_file:
7+
description: 'Batch configuration file path'
8+
required: true
9+
type: string
10+
default: '.github/configs/batch_quantize.yaml'
11+
parallel_jobs:
12+
description: 'Number of parallel jobs'
13+
required: false
14+
type: number
15+
default: 2
16+
upload_to_hub:
17+
description: 'Upload results to HuggingFace Hub'
18+
required: false
19+
type: boolean
20+
default: false
21+
22+
schedule:
23+
# Run weekly on Sunday at 2 AM UTC
24+
- cron: '0 2 * * 0'
25+
26+
env:
27+
PYTHON_VERSION: '3.9'
28+
CUDA_VERSION: '11.8'
29+
30+
jobs:
31+
prepare:
32+
runs-on: ubuntu-latest
33+
outputs:
34+
matrix: ${{ steps.set-matrix.outputs.matrix }}
35+
config: ${{ steps.load-config.outputs.config }}
36+
37+
steps:
38+
- name: Checkout repository
39+
uses: actions/checkout@v4
40+
41+
- name: Set up Python
42+
uses: actions/setup-python@v4
43+
with:
44+
python-version: ${{ env.PYTHON_VERSION }}
45+
46+
- name: Install dependencies
47+
run: |
48+
python -m pip install --upgrade pip
49+
pip install pyyaml
50+
51+
- name: Load batch configuration
52+
id: load-config
53+
run: |
54+
python -c "
55+
import yaml
56+
import json
57+
58+
with open('${{ github.event.inputs.config_file }}', 'r') as f:
59+
config = yaml.safe_load(f)
60+
61+
print('config=' + json.dumps(config))
62+
" >> $GITHUB_OUTPUT
63+
64+
- name: Generate job matrix
65+
id: set-matrix
66+
run: |
67+
python -c "
68+
import yaml
69+
import json
70+
71+
with open('${{ github.event.inputs.config_file }}', 'r') as f:
72+
config = yaml.safe_load(f)
73+
74+
models = config.get('models', [])
75+
matrix = {'include': []}
76+
77+
for i, model in enumerate(models):
78+
matrix['include'].append({
79+
'model_index': i,
80+
'model_name': model['model'],
81+
'output_dir': model['output_dir'],
82+
'method': model.get('method', 'auto'),
83+
'bits': model.get('bits', 4)
84+
})
85+
86+
print('matrix=' + json.dumps(matrix))
87+
" >> $GITHUB_OUTPUT
88+
89+
quantize:
90+
needs: prepare
91+
runs-on: ubuntu-latest
92+
93+
strategy:
94+
matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}
95+
max-parallel: ${{ fromJson(github.event.inputs.parallel_jobs || '2') }}
96+
fail-fast: false
97+
98+
steps:
99+
- name: Checkout repository
100+
uses: actions/checkout@v4
101+
102+
- name: Set up Python
103+
uses: actions/setup-python@v4
104+
with:
105+
python-version: ${{ env.PYTHON_VERSION }}
106+
107+
- name: Cache pip dependencies
108+
uses: actions/cache@v3
109+
with:
110+
path: ~/.cache/pip
111+
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
112+
restore-keys: |
113+
${{ runner.os }}-pip-
114+
115+
- name: Install dependencies
116+
run: |
117+
python -m pip install --upgrade pip
118+
pip install -e .
119+
pip install -r requirements.txt
120+
121+
- name: Create output directory
122+
run: |
123+
mkdir -p "${{ matrix.output_dir }}"
124+
mkdir -p ./logs
125+
126+
- name: Run quantization
127+
run: |
128+
quantllm quantize \
129+
--model "${{ matrix.model_name }}" \
130+
--method "${{ matrix.method }}" \
131+
--bits "${{ matrix.bits }}" \
132+
--output-dir "${{ matrix.output_dir }}" \
133+
--validate \
134+
--progress json \
135+
--log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
136+
--verbose
137+
138+
- name: Upload quantization logs
139+
if: always()
140+
uses: actions/upload-artifact@v3
141+
with:
142+
name: batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
143+
path: ./logs/
144+
retention-days: 30
145+
146+
- name: Upload quantized model
147+
if: success()
148+
uses: actions/upload-artifact@v3
149+
with:
150+
name: batch-model-${{ matrix.model_index }}-${{ github.run_id }}
151+
path: ${{ matrix.output_dir }}
152+
retention-days: 7
153+
154+
collect-results:
155+
needs: [prepare, quantize]
156+
runs-on: ubuntu-latest
157+
if: always()
158+
159+
steps:
160+
- name: Checkout repository
161+
uses: actions/checkout@v4
162+
163+
- name: Download all artifacts
164+
uses: actions/download-artifact@v3
165+
with:
166+
path: ./artifacts
167+
168+
- name: Generate batch report
169+
run: |
170+
python -c "
171+
import json
172+
import os
173+
from pathlib import Path
174+
175+
artifacts_dir = Path('./artifacts')
176+
report = {
177+
'batch_id': '${{ github.run_id }}',
178+
'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
179+
'config_file': '${{ github.event.inputs.config_file }}',
180+
'results': []
181+
}
182+
183+
# Collect results from each model
184+
for artifact_dir in artifacts_dir.iterdir():
185+
if artifact_dir.name.startswith('batch-model-'):
186+
model_index = artifact_dir.name.split('-')[2]
187+
188+
# Check if quantization was successful
189+
if any(artifact_dir.rglob('*.json')):
190+
status = 'success'
191+
else:
192+
status = 'failed'
193+
194+
report['results'].append({
195+
'model_index': int(model_index),
196+
'status': status,
197+
'artifact_name': artifact_dir.name
198+
})
199+
200+
# Save report
201+
with open('./batch_report.json', 'w') as f:
202+
json.dump(report, f, indent=2)
203+
204+
# Print summary
205+
total = len(report['results'])
206+
successful = sum(1 for r in report['results'] if r['status'] == 'success')
207+
failed = total - successful
208+
209+
print(f'Batch Quantization Summary:')
210+
print(f'Total models: {total}')
211+
print(f'Successful: {successful}')
212+
print(f'Failed: {failed}')
213+
print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
214+
"
215+
216+
- name: Upload batch report
217+
uses: actions/upload-artifact@v3
218+
with:
219+
name: batch-report-${{ github.run_id }}
220+
path: ./batch_report.json
221+
retention-days: 90
222+
223+
- name: Comment on PR (if applicable)
224+
if: github.event_name == 'pull_request'
225+
uses: actions/github-script@v6
226+
with:
227+
script: |
228+
const fs = require('fs');
229+
const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));
230+
231+
const total = report.results.length;
232+
const successful = report.results.filter(r => r.status === 'success').length;
233+
const failed = total - successful;
234+
const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';
235+
236+
const comment = `## Batch Quantization Results
237+
238+
📊 **Summary:**
239+
- Total models: ${total}
240+
- Successful: ${successful} ✅
241+
- Failed: ${failed} ❌
242+
- Success rate: ${successRate}%
243+
244+
🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models.
245+
`;
246+
247+
github.rest.issues.createComment({
248+
issue_number: context.issue.number,
249+
owner: context.repo.owner,
250+
repo: context.repo.repo,
251+
body: comment
252+
});
253+
254+
upload-to-hub:
255+
needs: [prepare, quantize]
256+
runs-on: ubuntu-latest
257+
if: github.event.inputs.upload_to_hub == 'true' && success()
258+
259+
steps:
260+
- name: Checkout repository
261+
uses: actions/checkout@v4
262+
263+
- name: Download all model artifacts
264+
uses: actions/download-artifact@v3
265+
with:
266+
path: ./artifacts
267+
pattern: batch-model-*
268+
269+
- name: Set up Python
270+
uses: actions/setup-python@v4
271+
with:
272+
python-version: ${{ env.PYTHON_VERSION }}
273+
274+
- name: Install HuggingFace Hub
275+
run: |
276+
pip install huggingface_hub
277+
278+
- name: Upload models to Hub
279+
env:
280+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
281+
run: |
282+
python -c "
283+
import os
284+
import json
285+
from pathlib import Path
286+
from huggingface_hub import HfApi
287+
288+
if not os.getenv('HF_TOKEN'):
289+
print('HF_TOKEN not found, skipping upload')
290+
exit(0)
291+
292+
api = HfApi()
293+
artifacts_dir = Path('./artifacts')
294+
295+
for artifact_dir in artifacts_dir.iterdir():
296+
if artifact_dir.name.startswith('batch-model-'):
297+
model_index = artifact_dir.name.split('-')[2]
298+
299+
# Create repository name
300+
repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'
301+
302+
try:
303+
api.upload_folder(
304+
folder_path=str(artifact_dir),
305+
repo_id=repo_id,
306+
token=os.getenv('HF_TOKEN')
307+
)
308+
print(f'Uploaded {artifact_dir.name} to {repo_id}')
309+
except Exception as e:
310+
print(f'Failed to upload {artifact_dir.name}: {e}')
311+
"

0 commit comments

Comments
 (0)