Skip to content

Commit ac03ad3

Browse files
committed
run without QuTLASS
1 parent d7f6c1a commit ac03ad3

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

inference_lib/src/fp_quant/module/linear.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from ..utils import FPQuantConfig, FPQuantDtype
88
from .linear_fns import (
9+
HAS_QUTLASS,
910
FPQuant4x16MasterFn,
1011
FPQuant4x16NoMasterFn,
1112
forward_quantize,
@@ -35,6 +36,11 @@ def __init__(
3536
):
3637
super().__init__()
3738

39+
if not HAS_QUTLASS and not config.pseudoquantization:
40+
raise ValueError(
41+
"QuTLASS is not installed. Can only run with `pseudoquantization=True` in the quantization config. If you have a Blackwell GPU, you can install QuTLASS from https://github.com/IST-DASLab/QuTLASS"
42+
)
43+
3844
factory_kwargs = {"device": device, "dtype": dtype}
3945
self.in_features = in_features
4046
self.out_features = out_features

inference_lib/src/fp_quant/module/linear_fns.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
from typing import Optional
22

33
import torch
4-
from qutlass import fusedQuantizeMx, matmul_ada_mxf4_bf16_tn, matmul_mxf4_bf16_tn
5-
from qutlass.utils import to_blocked
64
from torch import nn
75
from torch.autograd import Function
86

7+
try:
8+
from qutlass import fusedQuantizeMx, matmul_ada_mxf4_bf16_tn, matmul_mxf4_bf16_tn
9+
from qutlass.utils import to_blocked
10+
11+
HAS_QUTLASS = True
12+
except ImportError:
13+
HAS_QUTLASS = False
14+
915
from ..utils import FPQuantDtype
1016

1117

0 commit comments

Comments
 (0)