pytorch
diff --git a/‎fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py
Lines changed: 52 additions & 5 deletions b/‎fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_common.py
Lines changed: 52 additions & 5 deletions
diff --git a/‎fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py
Lines changed: 46 additions & 5 deletions b/‎fbgemm_gpu/fbgemm_gpu/tbe/ssd/training.py
Lines changed: 46 additions & 5 deletions
diff --git a/‎fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache.h
Lines changed: 105 additions & 0 deletions b/‎fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache.h
Lines changed: 105 additions & 0 deletions
diff --git a/‎fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache_wrapper.h
Lines changed: 12 additions & 2 deletions b/‎fbgemm_gpu/src/dram_kv_embedding_cache/dram_kv_embedding_cache_wrapper.h
Lines changed: 12 additions & 2 deletions
@@ -65,7 +65,7 @@ class EvictionPolicy(NamedTuple):
         0  # disabled, 0: disabled, 1: iteration, 2: mem_util, 3: manual
     )
     eviction_strategy: int = (
-        0  # 0: timestamp, 1: counter (feature score), 2: counter (feature score) + timestamp, 3: feature l2 norm
+        0  # 0: timestamp, 1: counter , 2: counter + timestamp, 3: feature l2 norm 4: timestamp threshold 5: feature score
     )
     eviction_step_intervals: Optional[int] = (
         None  # trigger_step_interval if trigger mode is iteration
@@ -74,17 +74,32 @@ class EvictionPolicy(NamedTuple):
         None  # eviction trigger condition if trigger mode is mem_util
     )
     counter_thresholds: Optional[List[int]] = (
-        None  # count_thresholds for each table if eviction strategy is feature score
+        None  # count_thresholds for each table if eviction strategy is counter
     )
     ttls_in_mins: Optional[List[int]] = (
         None  # ttls_in_mins for each table if eviction strategy is timestamp
     )
     counter_decay_rates: Optional[List[float]] = (
-        None  # count_decay_rates for each table if eviction strategy is feature score
+        None  # count_decay_rates for each table if eviction strategy is counter
+    )
+    feature_score_counter_decay_rates: Optional[List[float]] = (
+        None  # feature_score_counter_decay_rates for each table if eviction strategy is feature score
+    )
+    max_training_id_num_per_table: Optional[List[int]] = (
+        None  # max_training_id_num_per_table for each table
+    )
+    target_eviction_percent_per_table: Optional[List[float]] = (
+        None  # target_eviction_percent_per_table for each table
     )
     l2_weight_thresholds: Optional[List[float]] = (
         None  # l2_weight_thresholds for each table if eviction strategy is feature l2 norm
     )
+    threshold_calculation_bucket_stride: Optional[float] = (
+        0.2  # threshold_calculation_bucket_stride if eviction strategy is feature score
+    )
+    threshold_calculation_bucket_num: Optional[int] = (
+        1000000  # 1M, threshold_calculation_bucket_num if eviction strategy is feature score
+    )
     interval_for_insufficient_eviction_s: int = (
         # wait at least # seconds before trigger next round of eviction, if last finished eviction is insufficient
         # insufficient means we didn't evict enough rows, so we want to wait longer time to
@@ -95,6 +110,9 @@ class EvictionPolicy(NamedTuple):
         # wait at least # seconds before trigger next round of eviction, if last finished eviction is sufficient
         60
     )
+    interval_for_feature_statistics_decay_s: int = (
+        24 * 3600  # 1 day, interval for feature statistics decay
+    )
     meta_header_lens: Optional[List[int]] = None  # metaheader length for each table
 
     def validate(self) -> None:
@@ -105,8 +123,8 @@ def validate(self) -> None:
         if self.eviction_trigger_mode == 0:
             return
 
-        assert self.eviction_strategy in [0, 1, 2, 3], (
-            "eviction_strategy must be 0, 1, 2, or 3, "
+        assert self.eviction_strategy in [0, 1, 2, 3, 4, 5], (
+            "eviction_strategy must be 0, 1, 2, 3, 4 or 5, "
             f"actual {self.eviction_strategy}"
         )
         if self.eviction_trigger_mode == 1:
@@ -161,6 +179,35 @@ def validate(self) -> None:
                 "counter_thresholds and ttls_in_mins must have the same length, "
                 f"actual {self.counter_thresholds} vs {self.ttls_in_mins}"
             )
+        elif self.eviction_strategy == 5:
+            assert self.feature_score_counter_decay_rates is not None, (
+                "feature_score_counter_decay_rates must be set if eviction_strategy is 5, "
+                f"actual {self.feature_score_counter_decay_rates}"
+            )
+            assert self.max_training_id_num_per_table is not None, (
+                "max_training_id_num_per_table must be set if eviction_strategy is 5,"
+                f"actual {self.max_training_id_num_per_table}"
+            )
+            assert self.target_eviction_percent_per_table is not None, (
+                "target_eviction_percent_per_table must be set if eviction_strategy is 5,"
+                f"actual {self.target_eviction_percent_per_table}"
+            )
+            assert self.threshold_calculation_bucket_stride is not None, (
+                "threshold_calculation_bucket_stride must be set if eviction_strategy is 5,"
+                f"actual {self.threshold_calculation_bucket_stride}"
+            )
+            assert self.threshold_calculation_bucket_num is not None, (
+                "threshold_calculation_bucket_num must be set if eviction_strategy is 5,"
+                f"actual {self.threshold_calculation_bucket_num}"
+            )
+            assert (
+                len(self.target_eviction_percent_per_table)
+                == len(self.feature_score_counter_decay_rates)
+                == len(self.max_training_id_num_per_table)
+            ), (
+                "feature_score_thresholds, max_training_id_num_per_table and target_eviction_percent_per_table must have the same length, "
+                f"actual {self.target_eviction_percent_per_table} vs {self.feature_score_counter_decay_rates} vs {self.max_training_id_num_per_table}"
+            )
 
 
 class KVZCHParams(NamedTuple):
 
@@ -677,18 +677,25 @@ def __init__(
                     if self.kv_zch_params.eviction_policy.eviction_mem_threshold_gb
                     else self.l2_cache_size
                 )
+                # Please refer to https://fburl.com/gdoc/nuupjwqq for the following eviction parameters.
                 eviction_config = torch.classes.fbgemm.FeatureEvictConfig(
                     self.kv_zch_params.eviction_policy.eviction_trigger_mode,  # eviction is disabled, 0: disabled, 1: iteration, 2: mem_util, 3: manual
-                    self.kv_zch_params.eviction_policy.eviction_strategy,  # evict_trigger_strategy: 0: timestamp, 1: counter (feature score), 2: counter (feature score) + timestamp, 3: feature l2 norm
+                    self.kv_zch_params.eviction_policy.eviction_strategy,  # evict_trigger_strategy: 0: timestamp, 1: counter, 2: counter + timestamp, 3: feature l2 norm, 4: timestamp threshold 5: feature score
                     self.kv_zch_params.eviction_policy.eviction_step_intervals,  # trigger_step_interval if trigger mode is iteration
                     eviction_mem_threshold_gb,  # mem_util_threshold_in_GB if trigger mode is mem_util
                     self.kv_zch_params.eviction_policy.ttls_in_mins,  # ttls_in_mins for each table if eviction strategy is timestamp
-                    self.kv_zch_params.eviction_policy.counter_thresholds,  # counter_thresholds for each table if eviction strategy is feature score
-                    self.kv_zch_params.eviction_policy.counter_decay_rates,  # counter_decay_rates for each table if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.counter_thresholds,  # counter_thresholds for each table if eviction strategy is counter
+                    self.kv_zch_params.eviction_policy.counter_decay_rates,  # counter_decay_rates for each table if eviction strategy is counter
+                    self.kv_zch_params.eviction_policy.feature_score_counter_decay_rates,  # feature_score_counter_decay_rates for each table if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.max_training_id_num_per_table,  # max_training_id_num for each table
+                    self.kv_zch_params.eviction_policy.target_eviction_percent_per_table,  # target_eviction_percent for each table
                     self.kv_zch_params.eviction_policy.l2_weight_thresholds,  # l2_weight_thresholds for each table if eviction strategy is feature l2 norm
                     table_dims.tolist() if table_dims is not None else None,
+                    self.kv_zch_params.eviction_policy.threshold_calculation_bucket_stride,  # threshold_calculation_bucket_stride if eviction strategy is feature score
+                    self.kv_zch_params.eviction_policy.threshold_calculation_bucket_num,  # threshold_calculation_bucket_num if eviction strategy is feature score
                     self.kv_zch_params.eviction_policy.interval_for_insufficient_eviction_s,
                     self.kv_zch_params.eviction_policy.interval_for_sufficient_eviction_s,
+                    self.kv_zch_params.eviction_policy.interval_for_feature_statistics_decay_s,
                 )
             self._ssd_db = torch.classes.fbgemm.DramKVEmbeddingCacheWrapper(
                 self.cache_row_dim,
@@ -1020,6 +1027,9 @@ def __init__(
             self.stats_reporter.register_stats(
                 "eviction.feature_table.exec_duration_ms"
             )
+            self.stats_reporter.register_stats(
+                "eviction.feature_table.dry_run_exec_duration_ms"
+            )
             self.stats_reporter.register_stats(
                 "eviction.feature_table.exec_div_full_duration_rate"
             )
@@ -1607,6 +1617,7 @@ def prefetch(
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,  # todo: need to update caller
         forward_stream: Optional[torch.cuda.Stream] = None,
         batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
     ) -> None:
@@ -1632,6 +1643,7 @@ def prefetch(
         self._prefetch(
             indices,
             offsets,
+            weights,
             vbe_metadata,
             forward_stream,
         )
@@ -1640,6 +1652,7 @@ def _prefetch(  # noqa C901
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,
         vbe_metadata: Optional[invokers.lookup_args.VBEMetadata] = None,
         forward_stream: Optional[torch.cuda.Stream] = None,
     ) -> None:
@@ -1667,6 +1680,12 @@ def _prefetch(  # noqa C901
 
             self.timestep += 1
             self.timesteps_prefetched.append(self.timestep)
+            if self.backend_type == BackendType.DRAM and weights is not None:
+                # DRAM backend supports feature score eviction, if there is weights available
+                # in the prefetch call, we will set metadata for feature score eviction asynchronously
+                cloned_linear_cache_indices = linear_cache_indices.clone()
+            else:
+                cloned_linear_cache_indices = None
 
             # Lookup and virtually insert indices into L1. After this operator,
             # we know:
@@ -2024,6 +2043,16 @@ def _prefetch(  # noqa C901
                     is_bwd=False,
                 )
 
+            if self.backend_type == BackendType.DRAM and weights is not None:
+                # Write feature score metadata to DRAM
+                self.record_function_via_dummy_profile(
+                    "## ssd_write_feature_score_metadata ##",
+                    self.ssd_db.set_feature_score_metadata_cuda,
+                    cloned_linear_cache_indices.cpu(),
+                    torch.tensor([weights.shape[0]], device="cpu", dtype=torch.long),
+                    weights.cpu().view(torch.float32).view(-1, 2),
+                )
+
             # Generate row addresses (pointing to either L1 or the current
             # iteration's scratch pad)
             with record_function("## ssd_generate_row_addrs ##"):
@@ -2166,6 +2195,7 @@ def forward(
         self,
         indices: Tensor,
         offsets: Tensor,
+        weights: Optional[Tensor] = None,
         per_sample_weights: Optional[Tensor] = None,
         feature_requires_grad: Optional[Tensor] = None,
         batch_size_per_feature_per_rank: Optional[List[List[int]]] = None,
@@ -2187,7 +2217,7 @@ def forward(
                 context=self.step,
                 stream=self.ssd_eviction_stream,
             ):
-                self._prefetch(indices, offsets, vbe_metadata)
+                self._prefetch(indices, offsets, weights, vbe_metadata)
 
         assert len(self.ssd_prefetch_data) > 0
 
@@ -3792,8 +3822,13 @@ def _report_eviction_stats(self) -> None:
         processed_counts = torch.zeros(T, dtype=torch.int64)
         full_duration_ms = torch.tensor(0, dtype=torch.int64)
         exec_duration_ms = torch.tensor(0, dtype=torch.int64)
+        dry_run_exec_duration_ms = torch.tensor(0, dtype=torch.int64)
         self.ssd_db.get_feature_evict_metric(
-            evicted_counts, processed_counts, full_duration_ms, exec_duration_ms
+            evicted_counts,
+            processed_counts,
+            full_duration_ms,
+            exec_duration_ms,
+            dry_run_exec_duration_ms,
         )
 
         stats_reporter.report_data_amount(
@@ -3845,6 +3880,12 @@ def _report_eviction_stats(self) -> None:
             duration_ms=exec_duration_ms.item(),
             time_unit="ms",
         )
+        stats_reporter.report_duration(
+            iteration_step=self.step,
+            event_name="eviction.feature_table.dry_run_exec_duration_ms",
+            duration_ms=dry_run_exec_duration_ms.item(),
+            time_unit="ms",
+        )
         if full_duration_ms.item() != 0:
             stats_reporter.report_data_amount(
                 iteration_step=self.step,
 
@@ -633,6 +633,111 @@ class DramKVEmbeddingCache : public kv_db::EmbeddingKVDB {
             });
   }
 
+  /// Update feature scores metadata into kvstore.
+  folly::SemiFuture<std::vector<folly::Unit>>
+  set_kv_zch_eviction_metadata_async(
+      at::Tensor indices,
+      at::Tensor count,
+      at::Tensor engege_rates) override {
+    if (!feature_evict_ || !feature_evict_config_.has_value() ||
+        feature_evict_config_.value()->trigger_mode_ ==
+            EvictTriggerMode::DISABLED) {
+      // featre eviction is disabled
+      return folly::makeSemiFuture(std::vector<folly::Unit>());
+    }
+
+    CHECK_EQ(engege_rates.scalar_type(), at::ScalarType::Float);
+    auto* feature_score_evict =
+        dynamic_cast<FeatureScoreBasedEvict<weight_type>*>(
+            feature_evict_.get());
+
+    if (feature_score_evict == nullptr) {
+      // Not a feature score based eviction
+      return folly::makeSemiFuture(std::vector<folly::Unit>());
+    }
+    pause_ongoing_eviction();
+    std::vector<folly::Future<int64_t>> futures;
+    auto shardid_to_indexes = shard_input(indices, count);
+    for (auto iter = shardid_to_indexes.begin();
+         iter != shardid_to_indexes.end();
+         iter++) {
+      const auto shard_id = iter->first;
+      const auto indexes = iter->second;
+      auto f =
+          folly::via(executor_.get())
+              .thenValue([this,
+                          shard_id,
+                          indexes,
+                          indices,
+                          engege_rates,
+                          feature_score_evict](folly::Unit) {
+                int64_t updated_id_count = 0;
+                FBGEMM_DISPATCH_INTEGRAL_TYPES(
+                    indices.scalar_type(),
+                    "dram_set_kv_feature_score_metadata",
+                    [this,
+                     shard_id,
+                     indexes,
+                     indices,
+                     engege_rates,
+                     feature_score_evict,
+                     &updated_id_count] {
+                      using index_t = scalar_t;
+                      CHECK(indices.is_contiguous());
+                      CHECK(engege_rates.is_contiguous());
+                      CHECK_EQ(indices.size(0), engege_rates.size(0));
+                      auto indices_data_ptr = indices.data_ptr<index_t>();
+                      auto engage_rate_ptr = engege_rates.data_ptr<float>();
+                      int64_t stride = 2;
+                      {
+                        auto wlmap = kv_store_.by(shard_id).wlock();
+                        auto* pool = kv_store_.pool_by(shard_id);
+
+                        for (auto index_iter = indexes.begin();
+                             index_iter != indexes.end();
+                             index_iter++) {
+                          const auto& id_index = *index_iter;
+                          auto id = int64_t(indices_data_ptr[id_index]);
+                          float engege_rate =
+                              float(engage_rate_ptr[id_index * stride + 0]);
+                          // use mempool
+                          weight_type* block = nullptr;
+                          auto it = wlmap->find(id);
+                          if (it != wlmap->end()) {
+                            block = it->second;
+                          } else {
+                            // Key doesn't exist, allocate new block and
+                            // insert.
+                            block = pool->template allocate_t<weight_type>();
+                            FixedBlockPool::set_key(block, id);
+                            wlmap->insert({id, block});
+                          }
+
+                          feature_score_evict->update_feature_score_statistics(
+                              block, engege_rate);
+                          updated_id_count++;
+                        }
+                      }
+                    });
+                return updated_id_count;
+              });
+      futures.push_back(std::move(f));
+    }
+    return folly::collect(std::move(futures))
+        .via(executor_.get())
+        .thenValue([this](const std::vector<int64_t>& results) {
+          resume_ongoing_eviction();
+          int total_updated_ids = 0;
+          for (const auto& result : results) {
+            total_updated_ids += result;
+          }
+          LOG(INFO)
+              << "[DRAM KV][Feature Score Eviction]Total updated IDs across all shards: "
+              << total_updated_ids;
+          return std::vector<folly::Unit>(results.size());
+        });
+  }
+
   /// Get embeddings from kvstore.
   ///
   /// @param indices The 1D embedding index tensor, should skip on negative
 
@@ -76,7 +76,7 @@ class DramKVEmbeddingCacheWrapper : public torch::jit::CustomClassHolder {
       at::Tensor count,
       int64_t timestep,
       bool is_bwd) {
-    return impl_->set_cuda(indices, weights, count, timestep);
+    return impl_->set_cuda(indices, weights, count, timestep, is_bwd);
   }
 
   void get_cuda(at::Tensor indices, at::Tensor weights, at::Tensor count) {
@@ -147,7 +147,8 @@ class DramKVEmbeddingCacheWrapper : public torch::jit::CustomClassHolder {
       at::Tensor evicted_counts,
       at::Tensor processed_counts,
       at::Tensor full_duration_ms,
-      at::Tensor exec_duration_ms) {
+      at::Tensor exec_duration_ms,
+      at::Tensor dry_run_exec_duration_ms) {
     auto metrics = impl_->get_feature_evict_metric();
     if (metrics.has_value()) {
       evicted_counts.copy_(
@@ -158,6 +159,8 @@ class DramKVEmbeddingCacheWrapper : public torch::jit::CustomClassHolder {
           metrics.value().full_duration_ms); // full duration (Long)
       exec_duration_ms.copy_(
           metrics.value().exec_duration_ms); // exec duration (Long)
+      dry_run_exec_duration_ms.copy_(
+          metrics.value().dry_run_exec_duration_ms); // dry run exec duration
     }
   }
 
@@ -169,6 +172,13 @@ class DramKVEmbeddingCacheWrapper : public torch::jit::CustomClassHolder {
     impl_->set_backend_return_whole_row(backend_return_whole_row);
   }
 
+  void set_feature_score_metadata_cuda(
+      at::Tensor indices,
+      at::Tensor count,
+      at::Tensor engage_show_count) {
+    impl_->set_feature_score_metadata_cuda(indices, count, engage_show_count);
+  }
+
  private:
   // friend class EmbeddingRocksDBWrapper;
   friend class ssd::KVTensorWrapper;