able to use only a fraction of the probs for per sample entropy regularization in LFQ, to resolve some memory issues in meshgpt

lucidrains · lucidrains · commit 877287aa90b6 · 2024-01-13T14:17:48.000-08:00
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '1.12.6',
+  version = '1.12.7',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/lookup_free_quantization.py b/vector_quantize_pytorch/lookup_free_quantization.py
@@ -61,7 +61,8 @@ def __init__(
         straight_through_activation = nn.Identity(),
         num_codebooks = 1,
         keep_num_codebooks_dim = None,
-        codebook_scale = 1.  # for residual LFQ, codebook scaled down by 2x at each layer
+        codebook_scale = 1.,            # for residual LFQ, codebook scaled down by 2x at each layer
+        frac_per_sample_entropy = 1.    # make less than 1. to only use a random fraction of the probs for per sample entropy
     ):
         super().__init__()
 
@@ -95,6 +96,9 @@ def __init__(
 
         # entropy aux loss related weights
 
+        assert 0 < frac_per_sample_entropy <= 1.
+        self.frac_per_sample_entropy = frac_per_sample_entropy
+
         self.diversity_gamma = diversity_gamma
         self.entropy_loss_weight = entropy_loss_weight
 
@@ -219,8 +223,22 @@ def forward(
 
             if exists(mask):
                 prob = prob[mask]
+            else:
+                prob = rearrange(prob, 'b n ... -> (b n) ...')
+
+            # whether to only use a fraction of probs, for reducing memory
+
+            if self.frac_per_sample_entropy < 1.:
+                num_tokens = prob.shape[0]
+                num_sampled_tokens = int(num_tokens * self.frac_per_sample_entropy)
+                rand_mask = torch.randn(num_tokens).argsort(dim = -1) < num_sampled_tokens
+                per_sample_probs = prob[rand_mask]
+            else:
+                per_sample_probs = prob
+
+            # calculate per sample entropy
 
-            per_sample_entropy = entropy(prob).mean()
+            per_sample_entropy = entropy(per_sample_probs).mean()
 
             # distribution over all available tokens in the batch