We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2a50198 commit bb496caCopy full SHA for bb496ca
sgl-kernel/python/sgl_kernel/flash_attn.py
@@ -144,10 +144,6 @@ def flash_attn_with_kvcache(
144
logsumexp of each row of the matrix QK^T * scaling (e.g., log of the softmax
145
normalization factor).
146
"""
147
- if not is_fa3_supported():
148
- raise NotImplementedError(
149
- "flash_attn at sgl-kernel is only supported on sm90 and cu123 above"
150
- )
151
assert k_cache.stride(-1) == 1, "k_cache must have contiguous last dimension"
152
assert v_cache.stride(-1) == 1, "v_cache must have contiguous last dimension"
153
if softmax_scale is None:
0 commit comments