run without QuTLASS

BlackSamorez · BlackSamorez · commit ac03ad311899 · 2025-07-18T15:18:27.000Z
diff --git a/inference_lib/src/fp_quant/module/linear.py b/inference_lib/src/fp_quant/module/linear.py
@@ -6,6 +6,7 @@
 
 from ..utils import FPQuantConfig, FPQuantDtype
 from .linear_fns import (
+    HAS_QUTLASS,
     FPQuant4x16MasterFn,
     FPQuant4x16NoMasterFn,
     forward_quantize,
@@ -35,6 +36,11 @@ def __init__(
     ):
         super().__init__()
 
+        if not HAS_QUTLASS and not config.pseudoquantization:
+            raise ValueError(
+                "QuTLASS is not installed. Can only run with `pseudoquantization=True` in the quantization config. If you have a Blackwell GPU, you can install QuTLASS from https://github.com/IST-DASLab/QuTLASS"
+            )
+
         factory_kwargs = {"device": device, "dtype": dtype}
         self.in_features = in_features
         self.out_features = out_features
diff --git a/inference_lib/src/fp_quant/module/linear_fns.py b/inference_lib/src/fp_quant/module/linear_fns.py
@@ -1,11 +1,17 @@
 from typing import Optional
 
 import torch
-from qutlass import fusedQuantizeMx, matmul_ada_mxf4_bf16_tn, matmul_mxf4_bf16_tn
-from qutlass.utils import to_blocked
 from torch import nn
 from torch.autograd import Function
 
+try:
+    from qutlass import fusedQuantizeMx, matmul_ada_mxf4_bf16_tn, matmul_mxf4_bf16_tn
+    from qutlass.utils import to_blocked
+
+    HAS_QUTLASS = True
+except ImportError:
+    HAS_QUTLASS = False
+
 from ..utils import FPQuantDtype