add the structured quantized dropout from encodec paper

lucidrains · lucidrains · commit 71b259e329d2 · 2023-03-02T14:23:23.000-08:00
diff --git a/README.md b/README.md
@@ -320,3 +320,13 @@ if __name__ == '__main__':
     title   = {Autoregressive Image Generation using Residual Quantization}
 }
 ```
+
+```bibtex
+@article{Defossez2022HighFN,
+    title   = {High Fidelity Neural Audio Compression},
+    author  = {Alexandre D'efossez and Jade Copet and Gabriel Synnaeve and Yossi Adi},
+    journal = {ArXiv},
+    year    = {2022},
+    volume  = {abs/2210.13438}
+}
+```
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '1.0.4',
+  version = '1.0.6',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/residual_vq.py b/vector_quantize_pytorch/residual_vq.py
@@ -1,3 +1,4 @@
+from math import ceil
 from functools import partial
 from random import randrange
 
@@ -8,6 +9,11 @@
 
 from einops import rearrange, repeat, pack, unpack
 
+def round_up_multiple(num, mult):
+    return ceil(num / mult) * mult
+
+# main class
+
 class ResidualVQ(nn.Module):
     """ Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf """
     def __init__(
@@ -18,6 +24,7 @@ def __init__(
         heads = 1,
         quantize_dropout = False,
         quantize_dropout_cutoff_index = 0,
+        quantize_dropout_multiple_of = 1,
         accept_image_fmap = False,
         **kwargs
     ):
@@ -32,7 +39,9 @@ def __init__(
         self.quantize_dropout = quantize_dropout
 
         assert quantize_dropout_cutoff_index >= 0
+
         self.quantize_dropout_cutoff_index = quantize_dropout_cutoff_index
+        self.quantize_dropout_multiple_of = quantize_dropout_multiple_of  # encodec paper proposes structured dropout, believe this was set to 4
 
         if not shared_codebook:
             return
@@ -92,7 +101,7 @@ def forward(
         x,
         return_all_codes = False
     ):
-        num_quant, device = self.num_quantizers, x.device
+        num_quant, quant_dropout_multiple_of, device = self.num_quantizers, self.quantize_dropout_multiple_of, x.device
         quantized_out = 0.
         residual = x
 
@@ -104,6 +113,9 @@ def forward(
         if should_quantize_dropout:
             rand_quantize_dropout_index = randrange(self.quantize_dropout_cutoff_index, num_quant)
 
+            if quant_dropout_multiple_of != 1:
+                rand_quantize_dropout_index = round_up_multiple(rand_quantize_dropout_index + 1, quant_dropout_multiple_of) - 1
+
         null_indices_shape = (x.shape[0], *x.shape[-2:]) if self.accept_image_fmap else tuple(x.shape[:2])
 
         for quantizer_index, layer in enumerate(self.layers):