codewithdark-git
diff --git a/‎.github/configs/batch_quantize.yaml
Lines changed: 65 additions & 0 deletions b/‎.github/configs/batch_quantize.yaml
Lines changed: 65 additions & 0 deletions
diff --git a/‎.github/workflows/batch-quantize.yml
Lines changed: 311 additions & 0 deletions b/‎.github/workflows/batch-quantize.yml
Lines changed: 311 additions & 0 deletions
@@ -0,0 +1,65 @@
+# Batch Quantization Configuration
+# This file defines multiple models to be quantized in batch
+
+global_settings:
+  parallel: 2
+  verbose: 1
+  progress: "json"
+  log_file_prefix: "batch_quantization"
+
+# Default quantization settings applied to all models
+default_quantization:
+  method: "auto"
+  bits: 4
+  optimization_target: "balanced"
+  validate: true
+  quality_threshold: 0.95
+  benchmark: false
+
+# List of models to quantize
+models:
+  - model: "gpt2"
+    output_dir: "./quantized_models/gpt2-q4"
+    method: "gguf"
+    quant_type: "Q4_K_M"
+    
+  - model: "microsoft/DialoGPT-small"
+    output_dir: "./quantized_models/dialogpt-small-q4"
+    method: "gptq"
+    bits: 4
+    group_size: 128
+    
+  - model: "facebook/opt-350m"
+    output_dir: "./quantized_models/opt-350m-q8"
+    method: "gguf"
+    quant_type: "Q8_0"
+    
+  - model: "distilbert-base-uncased"
+    output_dir: "./quantized_models/distilbert-q4"
+    method: "auto"
+    bits: 4
+    optimization_target: "size"
+    
+  - model: "microsoft/CodeBERT-base"
+    output_dir: "./quantized_models/codebert-q4"
+    method: "gptq"
+    bits: 4
+    group_size: 64
+    desc_act: true
+
+# Optional: Calibration settings
+calibration:
+  strategy: "representative"
+  samples: 512
+  # data: "path/to/calibration/dataset"  # Optional custom calibration data
+
+# Optional: Validation settings
+validation:
+  quality_threshold: 0.90
+  benchmark_metrics: ["perplexity", "size", "speed"]
+  
+# Optional: Output settings
+output:
+  format: "auto"
+  generate_model_cards: true
+  include_benchmarks: true
@@ -0,0 +1,311 @@
+name: Batch Quantize Models
+
+on:
+  workflow_dispatch:
+    inputs:
+      config_file:
+        description: 'Batch configuration file path'
+        required: true
+        type: string
+        default: '.github/configs/batch_quantize.yaml'
+      parallel_jobs:
+        description: 'Number of parallel jobs'
+        required: false
+        type: number
+        default: 2
+      upload_to_hub:
+        description: 'Upload results to HuggingFace Hub'
+        required: false
+        type: boolean
+        default: false
+
+  schedule:
+    # Run weekly on Sunday at 2 AM UTC
+    - cron: '0 2 * * 0'
+
+env:
+  PYTHON_VERSION: '3.9'
+  CUDA_VERSION: '11.8'
+
+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      config: ${{ steps.load-config.outputs.config }}
+      
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+        
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pyyaml
+        
+    - name: Load batch configuration
+      id: load-config
+      run: |
+        python -c "
+        import yaml
+        import json
+        
+        with open('${{ github.event.inputs.config_file }}', 'r') as f:
+            config = yaml.safe_load(f)
+        
+        print('config=' + json.dumps(config))
+        " >> $GITHUB_OUTPUT
+        
+    - name: Generate job matrix
+      id: set-matrix
+      run: |
+        python -c "
+        import yaml
+        import json
+        
+        with open('${{ github.event.inputs.config_file }}', 'r') as f:
+            config = yaml.safe_load(f)
+        
+        models = config.get('models', [])
+        matrix = {'include': []}
+        
+        for i, model in enumerate(models):
+            matrix['include'].append({
+                'model_index': i,
+                'model_name': model['model'],
+                'output_dir': model['output_dir'],
+                'method': model.get('method', 'auto'),
+                'bits': model.get('bits', 4)
+            })
+        
+        print('matrix=' + json.dumps(matrix))
+        " >> $GITHUB_OUTPUT
+
+  quantize:
+    needs: prepare
+    runs-on: ubuntu-latest
+    
+    strategy:
+      matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}
+      max-parallel: ${{ fromJson(github.event.inputs.parallel_jobs || '2') }}
+      fail-fast: false
+      
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+        
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+          
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .
+        pip install -r requirements.txt
+        
+    - name: Create output directory
+      run: |
+        mkdir -p "${{ matrix.output_dir }}"
+        mkdir -p ./logs
+        
+    - name: Run quantization
+      run: |
+        quantllm quantize \
+          --model "${{ matrix.model_name }}" \
+          --method "${{ matrix.method }}" \
+          --bits "${{ matrix.bits }}" \
+          --output-dir "${{ matrix.output_dir }}" \
+          --validate \
+          --progress json \
+          --log-file "./logs/quantization-${{ matrix.model_index }}-${{ github.run_id }}.log" \
+          --verbose
+          
+    - name: Upload quantization logs
+      if: always()
+      uses: actions/upload-artifact@v3
+      with:
+        name: batch-logs-${{ matrix.model_index }}-${{ github.run_id }}
+        path: ./logs/
+        retention-days: 30
+        
+    - name: Upload quantized model
+      if: success()
+      uses: actions/upload-artifact@v3
+      with:
+        name: batch-model-${{ matrix.model_index }}-${{ github.run_id }}
+        path: ${{ matrix.output_dir }}
+        retention-days: 7
+
+  collect-results:
+    needs: [prepare, quantize]
+    runs-on: ubuntu-latest
+    if: always()
+    
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      
+    - name: Download all artifacts
+      uses: actions/download-artifact@v3
+      with:
+        path: ./artifacts
+        
+    - name: Generate batch report
+      run: |
+        python -c "
+        import json
+        import os
+        from pathlib import Path
+        
+        artifacts_dir = Path('./artifacts')
+        report = {
+            'batch_id': '${{ github.run_id }}',
+            'timestamp': '$(date -u +%Y-%m-%dT%H:%M:%SZ)',
+            'config_file': '${{ github.event.inputs.config_file }}',
+            'results': []
+        }
+        
+        # Collect results from each model
+        for artifact_dir in artifacts_dir.iterdir():
+            if artifact_dir.name.startswith('batch-model-'):
+                model_index = artifact_dir.name.split('-')[2]
+                
+                # Check if quantization was successful
+                if any(artifact_dir.rglob('*.json')):
+                    status = 'success'
+                else:
+                    status = 'failed'
+                
+                report['results'].append({
+                    'model_index': int(model_index),
+                    'status': status,
+                    'artifact_name': artifact_dir.name
+                })
+        
+        # Save report
+        with open('./batch_report.json', 'w') as f:
+            json.dump(report, f, indent=2)
+        
+        # Print summary
+        total = len(report['results'])
+        successful = sum(1 for r in report['results'] if r['status'] == 'success')
+        failed = total - successful
+        
+        print(f'Batch Quantization Summary:')
+        print(f'Total models: {total}')
+        print(f'Successful: {successful}')
+        print(f'Failed: {failed}')
+        print(f'Success rate: {successful/total*100:.1f}%' if total > 0 else 'N/A')
+        "
+        
+    - name: Upload batch report
+      uses: actions/upload-artifact@v3
+      with:
+        name: batch-report-${{ github.run_id }}
+        path: ./batch_report.json
+        retention-days: 90
+        
+    - name: Comment on PR (if applicable)
+      if: github.event_name == 'pull_request'
+      uses: actions/github-script@v6
+      with:
+        script: |
+          const fs = require('fs');
+          const report = JSON.parse(fs.readFileSync('./batch_report.json', 'utf8'));
+          
+          const total = report.results.length;
+          const successful = report.results.filter(r => r.status === 'success').length;
+          const failed = total - successful;
+          const successRate = total > 0 ? (successful / total * 100).toFixed(1) : 'N/A';
+          
+          const comment = `## Batch Quantization Results
+          
+          📊 **Summary:**
+          - Total models: ${total}
+          - Successful: ${successful} ✅
+          - Failed: ${failed} ❌
+          - Success rate: ${successRate}%
+          
+          🔗 **Artifacts:** Check the workflow run for detailed logs and quantized models.
+          `;
+          
+          github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: comment
+          });
+
+  upload-to-hub:
+    needs: [prepare, quantize]
+    runs-on: ubuntu-latest
+    if: github.event.inputs.upload_to_hub == 'true' && success()
+    
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      
+    - name: Download all model artifacts
+      uses: actions/download-artifact@v3
+      with:
+        path: ./artifacts
+        pattern: batch-model-*
+        
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+        
+    - name: Install HuggingFace Hub
+      run: |
+        pip install huggingface_hub
+        
+    - name: Upload models to Hub
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        python -c "
+        import os
+        import json
+        from pathlib import Path
+        from huggingface_hub import HfApi
+        
+        if not os.getenv('HF_TOKEN'):
+            print('HF_TOKEN not found, skipping upload')
+            exit(0)
+        
+        api = HfApi()
+        artifacts_dir = Path('./artifacts')
+        
+        for artifact_dir in artifacts_dir.iterdir():
+            if artifact_dir.name.startswith('batch-model-'):
+                model_index = artifact_dir.name.split('-')[2]
+                
+                # Create repository name
+                repo_id = f'quantllm/batch-{model_index}-${{ github.run_id }}'
+                
+                try:
+                    api.upload_folder(
+                        folder_path=str(artifact_dir),
+                        repo_id=repo_id,
+                        token=os.getenv('HF_TOKEN')
+                    )
+                    print(f'Uploaded {artifact_dir.name} to {repo_id}')
+                except Exception as e:
+                    print(f'Failed to upload {artifact_dir.name}: {e}')
+        "