support variable lengthed sequences for residual VQ and LFQ

lucidrains · lucidrains · commit 4fc8e0a8f032 · 2023-12-06T09:36:27.000-08:00
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '1.11.8',
+  version = '1.12.0',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/lookup_free_quantization.py b/vector_quantize_pytorch/lookup_free_quantization.py
@@ -161,7 +161,8 @@ def forward(
         self,
         x,
         inv_temperature = 100.,
-        return_loss_breakdown = False
+        return_loss_breakdown = False,
+        mask = None,
     ):
         """
         einstein notation
@@ -216,8 +217,14 @@ def forward(
 
             per_sample_entropy = entropy(prob).mean()
 
+            # account for mask
+
+            if exists(mask):
+                prob = prob[mask]
+
             # distribution over all available tokens in the batch
-            avg_prob = reduce(prob, 'b n c d -> c d', 'mean')
+
+            avg_prob = reduce(prob, '... c d -> c d', 'mean')
             codebook_entropy = entropy(avg_prob).mean()
 
             # 1. entropy will be nudged to be low for each code, to encourage the network to output confident predictions
@@ -231,7 +238,12 @@ def forward(
         # commit loss
 
         if self.training:
-            commit_loss = F.mse_loss(original_input, quantized.detach())
+            commit_loss = F.mse_loss(original_input, quantized.detach(), reduction = 'none')
+
+            if exists(mask):
+                commit_loss = commit_loss[mask]
+
+            commit_loss = commit_loss.mean()
         else:
             commit_loss = self.zero
 
diff --git a/vector_quantize_pytorch/residual_lfq.py b/vector_quantize_pytorch/residual_lfq.py
@@ -122,6 +122,7 @@ def get_output_from_indices(self, indices):
     def forward(
         self,
         x,
+        mask = None,
         return_all_codes = False,
         rand_quantize_dropout_fixed_seed = None
     ):
@@ -161,7 +162,7 @@ def forward(
                     all_losses.append(null_loss)
                     continue
 
-                quantized, indices, loss = layer(residual)
+                quantized, indices, loss = layer(residual, mask = mask)
 
                 residual = residual - quantized.detach()
                 quantized_out = quantized_out + quantized
@@ -236,6 +237,7 @@ def get_output_from_indices(self, indices):
     def forward(
         self,
         x,
+        mask = None,
         return_all_codes = False
     ):
         shape, split_dim = x.shape, self.split_dim
@@ -246,6 +248,7 @@ def forward(
         x = x.chunk(self.groups, dim = split_dim)
 
         forward_kwargs = dict(
+            mask = mask,
             return_all_codes = return_all_codes,
             rand_quantize_dropout_fixed_seed = random.randint(0, 1e7)
         )
diff --git a/vector_quantize_pytorch/residual_vq.py b/vector_quantize_pytorch/residual_vq.py
@@ -124,6 +124,7 @@ def get_output_from_indices(self, indices):
     def forward(
         self,
         x,
+        mask = None,
         indices = None,
         return_all_codes = False,
         sample_codebook_temp = None,
@@ -175,7 +176,12 @@ def forward(
             if return_loss:
                 layer_indices = indices[..., quantizer_index]
 
-            quantized, *rest = layer(residual, indices = layer_indices, sample_codebook_temp = sample_codebook_temp)
+            quantized, *rest = layer(
+                residual,
+                mask = mask,
+                indices = layer_indices,
+                sample_codebook_temp = sample_codebook_temp,
+            )
 
             residual = residual - quantized.detach()
             quantized_out = quantized_out + quantized
@@ -263,7 +269,8 @@ def forward(
         x,
         indices = None,
         return_all_codes = False,
-        sample_codebook_temp = None
+        sample_codebook_temp = None,
+        mask = None,
     ):
         shape, split_dim = x.shape, self.split_dim
         assert shape[split_dim] == self.dim
@@ -279,6 +286,7 @@ def forward(
         forward_kwargs = dict(
             return_all_codes = return_all_codes,
             sample_codebook_temp = sample_codebook_temp,
+            mask = mask,
             rand_quantize_dropout_fixed_seed = random.randint(0, 1e7)
         )