add channel_last, for ability to define whether input has features at last dimension, default to true

lucidrains · lucidrains · commit 567dc9cf92d7 · 2021-10-27T14:45:47.000-07:00
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.3.7',
+  version = '0.3.8',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   author = 'Phil Wang',
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -242,7 +242,8 @@ def __init__(
         kmeans_init = False,
         kmeans_iters = 10,
         use_cosine_sim = False,
-        threshold_ema_dead_code = 0
+        threshold_ema_dead_code = 0,
+        channel_last = True
     ):
         super().__init__()
         n_embed = default(n_embed, codebook_size)
@@ -271,12 +272,18 @@ def __init__(
         )
 
         self.codebook_size = codebook_size
+        self.channel_last = channel_last
 
     @property
     def codebook(self):
         return self._codebook.codebook
 
     def forward(self, x):
+        need_transpose = not self.channel_last
+
+        if need_transpose:
+            x = rearrange(x, 'b n d -> b d n')
+
         x = self.project_in(x)
 
         quantize, embed_ind = self._codebook(x)
@@ -288,4 +295,8 @@ def forward(self, x):
             quantize = x + (quantize - x).detach()
 
         quantize = self.project_out(quantize)
+
+        if need_transpose:
+            quantize = rearrange(quantize, 'b n d -> b d n')
+
         return quantize, embed_ind, commit_loss