add residual vector quantization, from soundstream paper from google

lucidrains · lucidrains · commit 284a6712c596 · 2021-09-08T08:03:46.000-07:00
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-## Vector Quantization, in Pytorch
+## Vector Quantization - Pytorch
 
 A vector quantization library originally transcribed from Deepmind's tensorflow implementation, made conveniently into a package. It uses exponential moving averages to update the dictionary.
 
@@ -26,3 +26,48 @@ vq = VectorQuantize(
 x = torch.randn(1, 1024, 256)
 quantized, indices, commit_loss = vq(x) # (1, 1024, 256), (1, 1024), (1)
 ```
+
+## Variants
+
+This <a href="https://arxiv.org/abs/2107.03312">paper</a> proposes to use multiple vector quantizers to recursively quantize the residuals of the waveform. You can use this with the `ResidualVQ` class and one extra initialization parameter.
+
+```python
+import torch
+from vector_quantize_pytorch import ResidualVQ
+
+residual_vq = ResidualVQ(
+    dim = 256,
+    num_quantizers = 8,      # specify number of quantizers
+    n_embed = 1024,          # codebook size
+)
+
+x = torch.randn(1, 1024, 256)
+quantized, indices, commit_loss = residual_vq(x)
+
+# (1, 1024, 256), (8, 1, 1024), (8, 1)
+# (batch, seq, dim), (quantizer, batch, seq), (quantizer, batch)
+```
+
+## Citations
+
+```bibtex
+@misc{oord2018neural,
+    title   = {Neural Discrete Representation Learning},
+    author  = {Aaron van den Oord and Oriol Vinyals and Koray Kavukcuoglu},
+    year    = {2018},
+    eprint  = {1711.00937},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG}
+}
+```
+
+```bibtex
+@misc{zeghidour2021soundstream,
+    title   = {SoundStream: An End-to-End Neural Audio Codec},
+    author  = {Neil Zeghidour and Alejandro Luebs and Ahmed Omran and Jan Skoglund and Marco Tagliasacchi},
+    year    = {2021},
+    eprint  = {2107.03312},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.SD}
+}
+```
diff --git a/setup.py b/setup.py
@@ -3,21 +3,26 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.1.0',
+  version = '0.2.0',
   license='MIT',
-  description = 'Simple Vector Quantization, in Pytorch',
+  description = 'Vector Quantization - Pytorch',
   author = 'Phil Wang',
   author_email = 'lucidrains@gmail.com',
   url = 'https://github.com/lucidrains/vector-quantizer-pytorch',
-  keywords = ['artificial intelligence', 'deep learning', 'pytorch'],
+  keywords = [
+    'artificial intelligence',
+    'deep learning',
+    'pytorch',
+    'quantization'
+  ],
   install_requires=[
-      'torch'
+    'torch'
   ],
   classifiers=[
-      'Development Status :: 4 - Beta',
-      'Intended Audience :: Developers',
-      'Topic :: Scientific/Engineering :: Artificial Intelligence',
-      'License :: OSI Approved :: MIT License',
-      'Programming Language :: Python :: 3.6',
+    'Development Status :: 4 - Beta',
+    'Intended Audience :: Developers',
+    'Topic :: Scientific/Engineering :: Artificial Intelligence',
+    'License :: OSI Approved :: MIT License',
+    'Programming Language :: Python :: 3.6',
   ],
-)
+)
diff --git a/vector_quantize_pytorch/__init__.py b/vector_quantize_pytorch/__init__.py
@@ -1 +1,3 @@
 from vector_quantize_pytorch.vector_quantize_pytorch import VectorQuantize
+from vector_quantize_pytorch.residual_vq import ResidualVQ
+
diff --git a/vector_quantize_pytorch/residual_vq.py b/vector_quantize_pytorch/residual_vq.py
@@ -0,0 +1,33 @@
+import torch
+from torch import nn
+from vector_quantize_pytorch.vector_quantize_pytorch import VectorQuantize
+
+class ResidualVQ(nn.Module):
+    """ Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf """
+    def __init__(
+        self,
+        *,
+        num_quantizers,
+        n_embed,
+        **kwargs
+    ):
+        super().__init__()
+        self.layers = nn.ModuleList([VectorQuantize(n_embed = n_embed, **kwargs) for _ in range(num_quantizers)])
+
+    def forward(self, x):
+        quantized_out = 0.
+        residual = x
+
+        all_losses = []
+        all_indices = []
+
+        for layer in self.layers:
+            quantized, indices, loss = layer(residual)
+            residual = residual - quantized
+            quantized_out = quantized_out + quantized
+
+            all_indices.append(indices)
+            all_losses.append(loss)
+
+        all_losses, all_indices = map(torch.stack, (all_losses, all_indices))
+        return quantized_out, all_indices, all_losses
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -9,7 +9,14 @@ def laplace_smoothing(x, n_categories, eps=1e-5):
     return (x + eps) / (x.sum() + n_categories * eps)
 
 class VectorQuantize(nn.Module):
-    def __init__(self, dim, n_embed, decay=0.8, commitment=1., eps=1e-5):
+    def __init__(
+        self,
+        dim,
+        n_embed,
+        decay = 0.8,
+        commitment = 1.,
+        eps = 1e-5
+    ):
         super().__init__()
 
         self.dim = dim

Original file line number	Diff line number	Diff line change
`@@ -1 +1,3 @@`
`1`	`1`	`from vector_quantize_pytorch.vector_quantize_pytorch import VectorQuantize`
	`2`	`+from vector_quantize_pytorch.residual_vq import ResidualVQ`
	`3`	`+`