【Allocator】Return free blocks info (PaddlePaddle#76499)

liuruyan · web-flow · commit f90e6c1b85fe · 2025-11-21T12:47:44.000+08:00
* return free blocks info

* fix dcu
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -141,6 +141,7 @@ limitations under the License. */
 #include "paddle/phi/core/compat/convert_utils.h"
 #include "paddle/phi/core/lod_utils.h"
 #include "paddle/phi/core/memory/allocation/mmap_allocator.h"
+#include "paddle/phi/core/memory/mem_utils.h"
 #include "paddle/phi/core/platform/cpu_helper.h"
 #include "paddle/phi/core/platform/device/device_wrapper.h"
 #include "paddle/phi/core/platform/device_context.h"
@@ -3623,7 +3624,6 @@ All parameter, weight, gradient are variables in Paddle.
     }
     platform::EmptyCache();
   });
-  m.def("vmm_compact", [] { platform::VmmCompact(); });
   m.def(
       "get_device_properties",
       [](int id) -> const gpuDeviceProp & {
@@ -3673,8 +3673,12 @@ All parameter, weight, gradient are variables in Paddle.
 #endif
 #if defined(PADDLE_WITH_CUDA)
   m.def("vmm_max_free_size", [] {
-    memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()), 1);
+    return memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()),
+                                  1);
   });
+  m.def("vmm_compact", [] { return paddle::memory::VmmCompact(); });
+  m.def("vmm_free_block_info",
+        [] { return paddle::memory::FreeBlockInfoOfVmmAllocator(); });
 #endif
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
   m.def(
diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/common/flags.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/core/memory/malloc.h"
+#include "paddle/phi/core/memory/mem_utils.h"
 #include "paddle/phi/core/memory/stats.h"
 #include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/strided_copy_kernel.h"
diff --git a/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h b/paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h
@@ -47,6 +47,11 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
   std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
     return underlying_allocator_;
   }
+  std::map<std::pair<size_t, void *>, std::list<Block>::iterator>
+      &GetFreeBlocks() {
+    return free_blocks_;
+  }
+
   std::pair<size_t, size_t> SumLargestFreeBlockSizes(int32_t n) const;
   void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }
 
diff --git a/paddle/phi/core/memory/malloc.cc b/paddle/phi/core/memory/malloc.cc
@@ -81,24 +81,6 @@ gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation) {
 
 #endif
 
-#if defined(PADDLE_WITH_CUDA)
-std::pair<size_t, size_t> VmmMaxFreeSize(const phi::GPUPlace& place,
-                                         int32_t n) {
-  FreeMemoryMetricsVisitor free_memory_metrics_visitor(n);
-  allocation::AllocatorFacade::Instance().Accept(place,
-                                                 &free_memory_metrics_visitor);
-  return std::make_pair(free_memory_metrics_visitor.GetLargeSize(),
-                        free_memory_metrics_visitor.GetSumSize());
-}
-
-bool TryAllocBatch(const phi::GPUPlace& place,
-                   const std::vector<size_t>& sizes) {
-  TryAllocVisitor try_alloc_visitor(sizes);
-  allocation::AllocatorFacade::Instance().Accept(place, &try_alloc_visitor);
-  return try_alloc_visitor.IsTryAllocSuccess();
-}
-#endif
-
 #ifdef PADDLE_WITH_XPU
 bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream) {
   return allocation::AllocatorFacade::Instance().RecordStream(allocation,
diff --git a/paddle/phi/core/memory/malloc.h b/paddle/phi/core/memory/malloc.h
@@ -69,16 +69,6 @@ void EraseStream(std::shared_ptr<Allocation> allocation, gpuStream_t stream);
 PADDLE_API gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation);
 #endif
 
-#if defined(PADDLE_WITH_CUDA)
-// return a pair of <largest_free_block_size, sum_of_n_largest_free_block_size>
-PADDLE_API extern std::pair<size_t, size_t> VmmMaxFreeSize(
-    const phi::GPUPlace& place, int32_t n);
-// Try using Allocator to simulate an allocation, simulating a request for
-// vector<size>.
-PADDLE_API extern bool TryAllocBatch(const phi::GPUPlace& place,
-                                     const std::vector<size_t>& sizes);
-#endif
-
 #ifdef PADDLE_WITH_XPU
 bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream);
 #endif
diff --git a/paddle/phi/core/memory/mem_utils.cc b/paddle/phi/core/memory/mem_utils.cc
@@ -13,7 +13,11 @@
 // limitations under the License.
 
 #include "paddle/phi/core/memory/mem_utils.h"
+
 #include <algorithm>
+#include "paddle/phi/core/memory/allocation/allocator_facade.h"
+#include "paddle/phi/core/memory/malloc.h"
+#include "paddle/phi/core/platform/device/gpu/gpu_info.h"
 
 #ifdef PADDLE_WITH_CUDA
 #include <cuda.h>
@@ -85,5 +89,36 @@ size_t TotalMemoryCompactor::Compact(std::list<Block>& blocks,
 #endif
 }
 
+#if defined(PADDLE_WITH_CUDA)
+std::pair<size_t, size_t> VmmMaxFreeSize(const phi::GPUPlace& place,
+                                         int32_t n) {
+  FreeMemoryMetricsVisitor free_memory_metrics_visitor(n);
+  allocation::AllocatorFacade::Instance().Accept(place,
+                                                 &free_memory_metrics_visitor);
+  return std::make_pair(free_memory_metrics_visitor.GetLargeSize(),
+                        free_memory_metrics_visitor.GetSumSize());
+}
+
+bool TryAllocBatch(const phi::GPUPlace& place,
+                   const std::vector<size_t>& sizes) {
+  TryAllocVisitor try_alloc_visitor(sizes);
+  allocation::AllocatorFacade::Instance().Accept(place, &try_alloc_visitor);
+  return try_alloc_visitor.IsTryAllocSuccess();
+}
+
+size_t VmmCompact() {
+  return memory::Compact(phi::GPUPlace(paddle::platform::GetCurrentDeviceId()));
+}
+
+std::vector<std::vector<std::pair<size_t, uintptr_t>>>
+FreeBlockInfoOfVmmAllocator() {
+  VMMFreeBlocksInfoVisitor free_blocks_info_visitor;
+  allocation::AllocatorFacade::Instance().Accept(
+      phi::GPUPlace(paddle::platform::GetCurrentDeviceId()),
+      &free_blocks_info_visitor);
+  return free_blocks_info_visitor.GetFreeBlocksInfo();
+}
+#endif
+
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/phi/core/memory/mem_utils.h b/paddle/phi/core/memory/mem_utils.h
@@ -80,5 +80,24 @@ class TotalMemoryCompactor final : public MemoryCompactionStrategy {
                  void* start_ptr,
                  void* end_ptr) override;
 };
+
+#if defined(PADDLE_WITH_CUDA)
+// return a pair of <largest_free_block_size, sum_of_n_largest_free_block_size>
+PADDLE_API extern std::pair<size_t, size_t> VmmMaxFreeSize(
+    const phi::GPUPlace& place, int32_t n);
+
+// Try using Allocator to simulate an allocation, simulating a request for
+// vector<size>.
+PADDLE_API extern bool TryAllocBatch(const phi::GPUPlace& place,
+                                     const std::vector<size_t>& sizes);
+
+// Compact memory of free blocks held by the VmmAllocator.
+PADDLE_API extern size_t VmmCompact(void);
+
+// Get VMM allocator free block info.
+PADDLE_API extern std::vector<std::vector<std::pair<size_t, uintptr_t>>>
+FreeBlockInfoOfVmmAllocator();
+#endif
+
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/phi/core/memory/mem_visitor.cc b/paddle/phi/core/memory/mem_visitor.cc
@@ -72,6 +72,19 @@ void TryAllocVisitor::Visit(
   VLOG(1) << "Visit VirtualMemoryAutoGrowthBestFitAllocator try_alloc_result:"
           << is_try_alloc_success_;
 }
+
+void VMMFreeBlocksInfoVisitor::Visit(
+    VirtualMemoryAutoGrowthBestFitAllocator* allocator) {
+  std::vector<std::pair<size_t, uintptr_t>> keys;
+  for (const auto& item : allocator->GetFreeBlocks()) {
+    size_t size = item.first.first;
+    uintptr_t addr = reinterpret_cast<uintptr_t>(item.first.second);
+    keys.emplace_back(size, addr);
+  }
+  if (!keys.empty()) {
+    free_blocks_info_.push_back(keys);
+  }
+}
 #endif
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/phi/core/memory/mem_visitor.h b/paddle/phi/core/memory/mem_visitor.h
@@ -174,6 +174,62 @@ class TryAllocVisitor : public AllocatorVisitor {
   const std::vector<size_t>& sizes_;
   bool is_try_alloc_success_ = false;
 };
+
+/**
+ * @brief Visitor class to retrieve free block information from a VMM allocator.
+ *
+ * Inherits from AllocatorVisitor, implementing the Visitor Pattern.
+ * The purpose of this class is to access a specific memory allocator's
+ * internal state (the list of free memory blocks) and extract key information
+ * (size and address) for external analysis or debugging.
+ */
+class VMMFreeBlocksInfoVisitor : public AllocatorVisitor {
+ public:
+  /**
+   * @brief Default Constructor.
+   */
+  VMMFreeBlocksInfoVisitor() {}
+
+  /**
+   * @brief Retrieves the collected information about the free memory blocks.
+   *
+   * The structure is a nested vector:
+   * Outer Vector: Represents different categories or lists within the
+   * allocator. Inner Vector: Contains pairs of (size, address) for the free
+   * blocks in that category. uintptr_t is used to safely store the memory
+   * address (void*) as an integer.
+   *
+   * @return A nested vector structure containing the size and integer address
+   * of all free blocks.
+   */
+  std::vector<std::vector<std::pair<size_t, uintptr_t>>> GetFreeBlocksInfo()
+      const {
+    return free_blocks_info_;
+  }
+
+  /**
+   * @brief Visits the VirtualMemoryAutoGrowthBestFitAllocator.
+   *
+   * This is the core implementation of the Visitor Pattern. When called,
+   * it accesses the `allocator` object's internal structure that holds the
+   * free block list(s) and populates the `free_blocks_info_` member variable
+   * with the necessary data.
+   *
+   * @param allocator Pointer to the memory allocator object whose free blocks
+   * information is to be extracted.
+   */
+  void Visit(VirtualMemoryAutoGrowthBestFitAllocator* allocator) override;
+
+ private:
+  /**
+   * @brief Stores the extracted free block information.
+   *
+   * This member is populated during the Visit() call. It is structured to
+   * hold lists of (size, address) pairs, where the outer vector typically
+   * distinguishes between different free lists (e.g., small, large blocks).
+   */
+  std::vector<std::vector<std::pair<size_t, uintptr_t>>> free_blocks_info_;
+};
 #endif
 
 }  // namespace memory
diff --git a/paddle/phi/core/platform/device/gpu/gpu_info.cc b/paddle/phi/core/platform/device/gpu/gpu_info.cc
@@ -615,8 +615,6 @@ void EmptyCache() {
   }
 }
 
-void VmmCompact() { memory::Compact(phi::GPUPlace(GetCurrentDeviceId())); }
-
 bool IsGPUManagedMemorySupported(int dev_id) {
   return phi::backends::gpu::IsGPUManagedMemorySupported(dev_id);
 }
diff --git a/paddle/phi/core/platform/device/gpu/gpu_info.h b/paddle/phi/core/platform/device/gpu/gpu_info.h
@@ -179,9 +179,6 @@ PADDLE_API bool IsGpuMallocRecorded(int dev_id);
 //! Empty idle cached memory held by the allocator.
 PADDLE_API void EmptyCache(void);
 
-//! Compact memory of free blocks held by the VmmAllocator.
-PADDLE_API void VmmCompact(void);
-
 PADDLE_API bool IsGPUManagedMemorySupported(int dev_id);
 
 PADDLE_API bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id);
diff --git a/test/legacy_test/test_allocator_visitor.py b/test/legacy_test/test_allocator_visitor.py
@@ -67,6 +67,12 @@ def test_multi_scale_alloc_free(self):
         params = self.allocate_cmds(self.cmds)
         paddle.core.vmm_max_free_size()
 
+    def test_free_block_info(self):
+        params = self.allocate_cmds(self.cmds)
+        x = paddle.core.vmm_free_block_info()
+        self.assertEqual(x[0][0][0], 1000000000)
+        self.assertEqual(x[0][1][0], 2002049024)
+
 
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -615,8 +615,6 @@ void EmptyCache() {`
`615`	`615`	`}`
`616`	`616`	`}`
`617`	`617`
`618`		`-void VmmCompact() { memory::Compact(phi::GPUPlace(GetCurrentDeviceId())); }`
`619`		`-`
`620`	`618`	`bool IsGPUManagedMemorySupported(int dev_id) {`
`621`	`619`	`return phi::backends::gpu::IsGPUManagedMemorySupported(dev_id);`
`622`	`620`	`}`