add measure number one for dead codebooks

lucidrains · lucidrains · commit db5dd6e7551f · 2021-10-18T11:43:45.000-07:00
diff --git a/README.md b/README.md
@@ -68,6 +68,28 @@ x = torch.randn(1, 1024, 256)
 quantized, indices, commit_loss = residual_vq(x)
 ```
 
+## Increasing codebook usage
+
+This repository will contain a few techniques from various papers to combat "dead" codebook entries, which is a common problem when using vector quantizers.
+
+### Lower codebook dimension
+
+The <a href="https://openreview.net/forum?id=pfNyExj7z2">Improved VQGAN paper</a> proposes to have the codebook in a lower dimension, and the encoder values is projected down, before being projected back to high dimensional on output. You can set this with the `codebook_dim` hyperparameter.
+
+```python
+import torch
+from vector_quantize_pytorch import VectorQuantize
+
+vq = VectorQuantize(
+    dim = 256,
+    codebook_size = 256,
+    codebook_dim = 16      # paper proposes setting this to 32 or as low as 8 to increase codebook usage
+)
+
+x = torch.randn(1, 1024, 256)
+quantized, indices, commit_loss = vq(x)
+```
+
 ## Citations
 
 ```bibtex
@@ -91,3 +113,14 @@ quantized, indices, commit_loss = residual_vq(x)
     primaryClass = {cs.SD}
 }
 ```
+
+```bibtex
+@inproceedings{anonymous2022vectorquantized,
+    title   = {Vector-quantized Image Modeling with Improved {VQGAN}},
+    author  = {Anonymous},
+    booktitle = {Submitted to The Tenth International Conference on Learning Representations },
+    year    = {2022},
+    url     = {https://openreview.net/forum?id=pfNyExj7z2},
+    note    = {under review}
+}
+```
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.3.0',
+  version = '0.3.1',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   author = 'Phil Wang',
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -52,19 +52,26 @@ def __init__(
         eps = 1e-5,
         n_embed = None,
         kmeans_init = False,
-        kmeans_iters = 10
+        kmeans_iters = 10,
+        codebook_dim = None
     ):
         super().__init__()
         n_embed = default(n_embed, codebook_size)
 
         self.dim = dim
         self.n_embed = n_embed
+
+        codebook_dim = default(codebook_dim, dim)
+        requires_projection = codebook_dim != dim
+        self.project_in = nn.Linear(dim, codebook_dim) if requires_projection else nn.Identity()
+        self.project_out = nn.Linear(codebook_dim, dim) if requires_projection else nn.Identity()
+
         self.decay = decay
         self.eps = eps
         self.commitment = commitment
 
         init_fn = torch.randn if not kmeans_init else torch.zeros
-        embed = init_fn(dim, n_embed)
+        embed = init_fn(codebook_dim, n_embed)
 
         self.kmeans_iters = kmeans_iters
         self.register_buffer('initted', torch.Tensor([not kmeans_init]))
@@ -83,11 +90,13 @@ def init_embed_(self, data):
         self.initted.data.copy_(torch.Tensor([True]))
 
     def forward(self, input):
+        input = self.project_in(input)
+
         if not self.initted:
             self.init_embed_(input)
 
         dtype = input.dtype
-        flatten = input.reshape(-1, self.dim)
+        flatten = rearrange(input, '... d -> (...) d')
         dist = (
             flatten.pow(2).sum(1, keepdim=True)
             - 2 * flatten @ self.embed
@@ -112,4 +121,5 @@ def forward(self, input):
             commit_loss = F.mse_loss(quantize.detach(), input) * self.commitment
             quantize = input + (quantize - input).detach()
 
+        quantize = self.project_out(quantize)
         return quantize, embed_ind, commit_loss