address #114

lucidrains · lucidrains · commit 002400875c97 · 2024-03-19T10:52:44.000-07:00
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '1.14.2',
+  version = '1.14.4',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/finite_scalar_quantization.py b/vector_quantize_pytorch/finite_scalar_quantization.py
@@ -3,7 +3,7 @@
 Code adapted from Jax version in Appendix A.1
 """
 
-from typing import List, Optional
+from typing import List, Tuple, Optional
 
 import torch
 import torch.nn as nn
@@ -46,7 +46,8 @@ def __init__(
         dim: Optional[int] = None,
         num_codebooks = 1,
         keep_num_codebooks_dim: Optional[bool] = None,
-        scale: Optional[float] = None
+        scale: Optional[float] = None,
+        allowed_dtypes: Tuple[torch.dtype, ...] = (torch.float32, torch.float64)
     ):
         super().__init__()
         _levels = torch.tensor(levels, dtype=int32)
@@ -80,6 +81,8 @@ def __init__(
         implicit_codebook = self.indices_to_codes(torch.arange(self.codebook_size), project_out = False)
         self.register_buffer("implicit_codebook", implicit_codebook, persistent = False)
 
+        self.allowed_dtypes = allowed_dtypes
+
     def bound(self, z: Tensor, eps: float = 1e-3) -> Tensor:
         """Bound `z`, an array of shape (..., d)."""
         half_l = (self._levels - 1) * (1 + eps) / 2
@@ -141,8 +144,14 @@ def forward(self, z: Tensor) -> Tensor:
         c - number of codebook dim
         """
 
+        orig_dtype = z.dtype
         is_img_or_video = z.ndim >= 4
 
+        # make sure allowed dtype
+
+        if z.dtype not in self.allowed_dtypes:
+            z = z.float()
+
         # standardize image or video into (batch, seq, dimension)
 
         if is_img_or_video:
@@ -173,4 +182,11 @@ def forward(self, z: Tensor) -> Tensor:
         if not self.keep_num_codebooks_dim:
             indices = rearrange(indices, '... 1 -> ...')
 
+        # cast back to original dtype
+
+        if out.dtype != orig_dtype:
+            out = out.type(orig_dtype)
+
+        # return quantized output and indices
+
         return out, indices