pytorch
diff --git a/‎torchrec/distributed/planner/enumerators.py‎
Lines changed: 33 additions & 1 deletion b/‎torchrec/distributed/planner/enumerators.py‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎torchrec/distributed/planner/tests/test_enumerators.py‎
Lines changed: 155 additions & 1 deletion b/‎torchrec/distributed/planner/tests/test_enumerators.py‎
Lines changed: 155 additions & 1 deletion
@@ -38,6 +38,10 @@
     ShardingType,
 )
 from torchrec.modules.embedding_configs import DataType
+from torchrec.modules.embedding_modules import (
+    EmbeddingBagCollection,
+    EmbeddingCollection,
+)
 from torchrec.modules.embedding_tower import EmbeddingTower, EmbeddingTowerCollection
 
 
@@ -178,7 +182,7 @@ def enumerate(
                 # skip for other device groups
                 if device_group and device_group != self._compute_device:
                     continue
-
+                num_buckets = self._get_num_buckets(name, child_module)
                 sharding_options_per_table: List[ShardingOption] = []
 
                 for sharding_type in self._filter_sharding_types(
@@ -200,6 +204,7 @@ def enumerate(
                                 sharding_type=sharding_type,
                                 col_wise_shard_dim=col_wise_shard_dim,
                                 device_memory_sizes=self._device_memory_sizes,
+                                num_buckets=num_buckets,
                             )
                         except ZeroDivisionError as e:
                             # Re-raise with additional context about the table and module
@@ -264,6 +269,33 @@ def enumerate(
         self._last_stored_search_space = copy.deepcopy(sharding_options)
         return sharding_options
 
+    def _get_num_buckets(self, parameter: str, module: nn.Module) -> Optional[int]:
+        """
+        Get the number of buckets for each embedding table.
+
+        Args:
+            parameter (str): name of the embedding table.
+            module (nn.Module): module to be sharded.
+
+        Returns:
+            Optional[int]: Number of buckets for the table, or None if module is not EmbeddingBagCollection or table not found.
+        """
+        # If module is not of type EmbeddingBagCollection, return None
+        if isinstance(module, EmbeddingBagCollection):
+            embedding_configs = module.embedding_bag_configs()
+        elif isinstance(module, EmbeddingCollection):
+            embedding_configs = module.embedding_configs()
+        else:
+            return None
+
+        # Find the embedding config for the table with the same name as parameter input
+        for config in embedding_configs:
+            if config.name == parameter and config.use_virtual_table:
+                return config.total_num_buckets
+
+        # If table with matching name not found, return None
+        return None
+
     @property
     def last_stored_search_space(self) -> Optional[List[ShardingOption]]:
         # NOTE: This is the last search space stored by enumerate(...), do not use
 
@@ -18,7 +18,10 @@
     EmbeddingTowerSharder,
 )
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
-from torchrec.distributed.embeddingbag import EmbeddingBagCollectionSharder
+from torchrec.distributed.embeddingbag import (
+    EmbeddingBagCollection,
+    EmbeddingBagCollectionSharder,
+)
 from torchrec.distributed.mc_embeddingbag import (
     ManagedCollisionEmbeddingBagCollectionSharder,
 )
@@ -45,13 +48,27 @@
     [[17, 80], [17, 80], [17, 80], [17, 80], [17, 80], [17, 80], [17, 80], [11, 80]],
 ]
 
+EXPECTED_RW_SHARD_SIZES_WITH_BUCKETS = [
+    [[20, 20], [20, 20], [10, 20], [10, 20], [10, 20], [10, 20], [10, 20], [10, 20]],
+    [[22, 40], [22, 40], [11, 40], [11, 40], [11, 40], [11, 40], [11, 40], [11, 40]],
+    [[24, 60], [24, 60], [12, 60], [12, 60], [12, 60], [12, 60], [12, 60], [12, 60]],
+    [[26, 80], [26, 80], [13, 80], [13, 80], [13, 80], [13, 80], [13, 80], [13, 80]],
+]
+
 EXPECTED_RW_SHARD_OFFSETS = [
     [[0, 0], [13, 0], [26, 0], [39, 0], [52, 0], [65, 0], [78, 0], [91, 0]],
     [[0, 0], [14, 0], [28, 0], [42, 0], [56, 0], [70, 0], [84, 0], [98, 0]],
     [[0, 0], [15, 0], [30, 0], [45, 0], [60, 0], [75, 0], [90, 0], [105, 0]],
     [[0, 0], [17, 0], [34, 0], [51, 0], [68, 0], [85, 0], [102, 0], [119, 0]],
 ]
 
+EXPECTED_RW_SHARD_OFFSETS_WITH_BUCKETS = [
+    [[0, 0], [20, 0], [40, 0], [50, 0], [60, 0], [70, 0], [80, 0], [90, 0]],
+    [[0, 0], [22, 0], [44, 0], [55, 0], [66, 0], [77, 0], [88, 0], [99, 0]],
+    [[0, 0], [24, 0], [48, 0], [60, 0], [72, 0], [84, 0], [96, 0], [108, 0]],
+    [[0, 0], [26, 0], [52, 0], [65, 0], [78, 0], [91, 0], [104, 0], [117, 0]],
+]
+
 
 def get_expected_cache_aux_size(rows: int) -> int:
     # 0.2 is the hardcoded cache load factor assumed in this test
@@ -101,6 +118,48 @@ def get_expected_cache_aux_size(rows: int) -> int:
     ],
 ]
 
+EXPECTED_RW_SHARD_STORAGE_WITH_BUCKETS = [
+    [
+        Storage(hbm=167488, ddr=0),
+        Storage(hbm=167488, ddr=0),
+        Storage(hbm=166688, ddr=0),
+        Storage(hbm=166688, ddr=0),
+        Storage(hbm=166688, ddr=0),
+        Storage(hbm=166688, ddr=0),
+        Storage(hbm=166688, ddr=0),
+        Storage(hbm=166688, ddr=0),
+    ],
+    [
+        Storage(hbm=1004992, ddr=0),
+        Storage(hbm=1004992, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+        Storage(hbm=1003232, ddr=0),
+    ],
+    [
+        Storage(hbm=1009280, ddr=0),
+        Storage(hbm=1009280, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+        Storage(hbm=1006400, ddr=0),
+    ],
+    [
+        Storage(hbm=2656384, ddr=0),
+        Storage(hbm=2656384, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+        Storage(hbm=2652224, ddr=0),
+    ],
+]
 
 EXPECTED_UVM_CACHING_RW_SHARD_STORAGE = [
     [
@@ -145,6 +204,48 @@ def get_expected_cache_aux_size(rows: int) -> int:
     ],
 ]
 
+EXPECTED_UVM_CACHING_RW_SHARD_STORAGE_WITH_BUCKETS = [
+    [
+        Storage(hbm=166352, ddr=1600),
+        Storage(hbm=166352, ddr=1600),
+        Storage(hbm=166120, ddr=800),
+        Storage(hbm=166120, ddr=800),
+        Storage(hbm=166120, ddr=800),
+        Storage(hbm=166120, ddr=800),
+        Storage(hbm=166120, ddr=800),
+        Storage(hbm=166120, ddr=800),
+    ],
+    [
+        Storage(hbm=1002335, ddr=3520),
+        Storage(hbm=1002335, ddr=3520),
+        Storage(hbm=1001904, ddr=1760),
+        Storage(hbm=1001904, ddr=1760),
+        Storage(hbm=1001904, ddr=1760),
+        Storage(hbm=1001904, ddr=1760),
+        Storage(hbm=1001904, ddr=1760),
+        Storage(hbm=1001904, ddr=1760),
+    ],
+    [
+        Storage(hbm=1004845, ddr=5760),
+        Storage(hbm=1004845, ddr=5760),
+        Storage(hbm=1004183, ddr=2880),
+        Storage(hbm=1004183, ddr=2880),
+        Storage(hbm=1004183, ddr=2880),
+        Storage(hbm=1004183, ddr=2880),
+        Storage(hbm=1004183, ddr=2880),
+        Storage(hbm=1004183, ddr=2880),
+    ],
+    [
+        Storage(hbm=2649916, ddr=8320),
+        Storage(hbm=2649916, ddr=8320),
+        Storage(hbm=2648990, ddr=4160),
+        Storage(hbm=2648990, ddr=4160),
+        Storage(hbm=2648990, ddr=4160),
+        Storage(hbm=2648990, ddr=4160),
+        Storage(hbm=2648990, ddr=4160),
+        Storage(hbm=2648990, ddr=4160),
+    ],
+]
 
 EXPECTED_TWRW_SHARD_SIZES = [
     [[25, 20], [25, 20], [25, 20], [25, 20]],
@@ -367,6 +468,17 @@ def setUp(self) -> None:
             )
             for i in range(self.num_tables)
         ]
+        tables_with_buckets = [
+            EmbeddingBagConfig(
+                num_embeddings=100 + i * 10,
+                embedding_dim=20 + i * 20,
+                name="table_" + str(i),
+                feature_names=["feature_" + str(i)],
+                total_num_buckets=10,
+                use_virtual_table=True,
+            )
+            for i in range(self.num_tables)
+        ]
         weighted_tables = [
             EmbeddingBagConfig(
                 num_embeddings=(i + 1) * 10,
@@ -377,6 +489,9 @@ def setUp(self) -> None:
             for i in range(4)
         ]
         self.model = TestSparseNN(tables=tables, weighted_tables=[])
+        self.model_with_buckets = EmbeddingBagCollection(
+            tables=tables_with_buckets,
+        )
         self.enumerator = EmbeddingEnumerator(
             topology=Topology(
                 world_size=self.world_size,
@@ -514,6 +629,25 @@ def test_rw_sharding(self) -> None:
                 EXPECTED_RW_SHARD_STORAGE[i],
             )
 
+    def test_rw_sharding_with_buckets(self) -> None:
+        sharding_options = self.enumerator.enumerate(
+            self.model_with_buckets, [cast(ModuleSharder[torch.nn.Module], RWSharder())]
+        )
+        for i, sharding_option in enumerate(sharding_options):
+            self.assertEqual(sharding_option.sharding_type, ShardingType.ROW_WISE.value)
+            self.assertEqual(
+                [shard.size for shard in sharding_option.shards],
+                EXPECTED_RW_SHARD_SIZES_WITH_BUCKETS[i],
+            )
+            self.assertEqual(
+                [shard.offset for shard in sharding_option.shards],
+                EXPECTED_RW_SHARD_OFFSETS_WITH_BUCKETS[i],
+            )
+            self.assertEqual(
+                [shard.storage for shard in sharding_option.shards],
+                EXPECTED_RW_SHARD_STORAGE_WITH_BUCKETS[i],
+            )
+
     def test_uvm_caching_rw_sharding(self) -> None:
         sharding_options = self.enumerator.enumerate(
             self.model,
@@ -535,6 +669,26 @@ def test_uvm_caching_rw_sharding(self) -> None:
                 EXPECTED_UVM_CACHING_RW_SHARD_STORAGE[i],
             )
 
+    def test_uvm_caching_rw_sharding_with_buckets(self) -> None:
+        sharding_options = self.enumerator.enumerate(
+            self.model_with_buckets,
+            [cast(ModuleSharder[torch.nn.Module], UVMCachingRWSharder())],
+        )
+        for i, sharding_option in enumerate(sharding_options):
+            self.assertEqual(sharding_option.sharding_type, ShardingType.ROW_WISE.value)
+            self.assertEqual(
+                [shard.size for shard in sharding_option.shards],
+                EXPECTED_RW_SHARD_SIZES_WITH_BUCKETS[i],
+            )
+            self.assertEqual(
+                [shard.offset for shard in sharding_option.shards],
+                EXPECTED_RW_SHARD_OFFSETS_WITH_BUCKETS[i],
+            )
+            self.assertEqual(
+                [shard.storage for shard in sharding_option.shards],
+                EXPECTED_UVM_CACHING_RW_SHARD_STORAGE_WITH_BUCKETS[i],
+            )
+
     def test_twrw_sharding(self) -> None:
         sharding_options = self.enumerator.enumerate(
             self.model, [cast(ModuleSharder[torch.nn.Module], TWRWSharder())]