Skip to content

Commit f90e6c1

Browse files
authored
【Allocator】Return free blocks info (PaddlePaddle#76499)
* return free blocks info * fix dcu
1 parent ae814e1 commit f90e6c1

File tree

12 files changed

+141
-35
lines changed

12 files changed

+141
-35
lines changed

paddle/fluid/pybind/pybind.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ limitations under the License. */
141141
#include "paddle/phi/core/compat/convert_utils.h"
142142
#include "paddle/phi/core/lod_utils.h"
143143
#include "paddle/phi/core/memory/allocation/mmap_allocator.h"
144+
#include "paddle/phi/core/memory/mem_utils.h"
144145
#include "paddle/phi/core/platform/cpu_helper.h"
145146
#include "paddle/phi/core/platform/device/device_wrapper.h"
146147
#include "paddle/phi/core/platform/device_context.h"
@@ -3623,7 +3624,6 @@ All parameter, weight, gradient are variables in Paddle.
36233624
}
36243625
platform::EmptyCache();
36253626
});
3626-
m.def("vmm_compact", [] { platform::VmmCompact(); });
36273627
m.def(
36283628
"get_device_properties",
36293629
[](int id) -> const gpuDeviceProp & {
@@ -3673,8 +3673,12 @@ All parameter, weight, gradient are variables in Paddle.
36733673
#endif
36743674
#if defined(PADDLE_WITH_CUDA)
36753675
m.def("vmm_max_free_size", [] {
3676-
memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()), 1);
3676+
return memory::VmmMaxFreeSize(phi::GPUPlace(platform::GetCurrentDeviceId()),
3677+
1);
36773678
});
3679+
m.def("vmm_compact", [] { return paddle::memory::VmmCompact(); });
3680+
m.def("vmm_free_block_info",
3681+
[] { return paddle::memory::FreeBlockInfoOfVmmAllocator(); });
36783682
#endif
36793683
#ifdef PADDLE_WITH_CUSTOM_DEVICE
36803684
m.def(

paddle/phi/api/lib/api_gen_utils.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License. */
1616
#include "paddle/common/flags.h"
1717
#include "paddle/phi/backends/gpu/gpu_info.h"
1818
#include "paddle/phi/core/memory/malloc.h"
19+
#include "paddle/phi/core/memory/mem_utils.h"
1920
#include "paddle/phi/core/memory/stats.h"
2021
#include "paddle/phi/core/visit_type.h"
2122
#include "paddle/phi/kernels/strided_copy_kernel.h"

paddle/phi/core/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ class VirtualMemoryAutoGrowthBestFitAllocator : public Allocator {
4747
std::shared_ptr<Allocator> &GetUnderLyingAllocator() {
4848
return underlying_allocator_;
4949
}
50+
std::map<std::pair<size_t, void *>, std::list<Block>::iterator>
51+
&GetFreeBlocks() {
52+
return free_blocks_;
53+
}
54+
5055
std::pair<size_t, size_t> SumLargestFreeBlockSizes(int32_t n) const;
5156
void Accept(AllocatorVisitor *visitor) override { visitor->Visit(this); }
5257

paddle/phi/core/memory/malloc.cc

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -81,24 +81,6 @@ gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation) {
8181

8282
#endif
8383

84-
#if defined(PADDLE_WITH_CUDA)
85-
std::pair<size_t, size_t> VmmMaxFreeSize(const phi::GPUPlace& place,
86-
int32_t n) {
87-
FreeMemoryMetricsVisitor free_memory_metrics_visitor(n);
88-
allocation::AllocatorFacade::Instance().Accept(place,
89-
&free_memory_metrics_visitor);
90-
return std::make_pair(free_memory_metrics_visitor.GetLargeSize(),
91-
free_memory_metrics_visitor.GetSumSize());
92-
}
93-
94-
bool TryAllocBatch(const phi::GPUPlace& place,
95-
const std::vector<size_t>& sizes) {
96-
TryAllocVisitor try_alloc_visitor(sizes);
97-
allocation::AllocatorFacade::Instance().Accept(place, &try_alloc_visitor);
98-
return try_alloc_visitor.IsTryAllocSuccess();
99-
}
100-
#endif
101-
10284
#ifdef PADDLE_WITH_XPU
10385
bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream) {
10486
return allocation::AllocatorFacade::Instance().RecordStream(allocation,

paddle/phi/core/memory/malloc.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,6 @@ void EraseStream(std::shared_ptr<Allocation> allocation, gpuStream_t stream);
6969
PADDLE_API gpuStream_t GetStream(const std::shared_ptr<Allocation>& allocation);
7070
#endif
7171

72-
#if defined(PADDLE_WITH_CUDA)
73-
// return a pair of <largest_free_block_size, sum_of_n_largest_free_block_size>
74-
PADDLE_API extern std::pair<size_t, size_t> VmmMaxFreeSize(
75-
const phi::GPUPlace& place, int32_t n);
76-
// Try using Allocator to simulate an allocation, simulating a request for
77-
// vector<size>.
78-
PADDLE_API extern bool TryAllocBatch(const phi::GPUPlace& place,
79-
const std::vector<size_t>& sizes);
80-
#endif
81-
8272
#ifdef PADDLE_WITH_XPU
8373
bool RecordStream(std::shared_ptr<Allocation> allocation, XPUStream stream);
8474
#endif

paddle/phi/core/memory/mem_utils.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313
// limitations under the License.
1414

1515
#include "paddle/phi/core/memory/mem_utils.h"
16+
1617
#include <algorithm>
18+
#include "paddle/phi/core/memory/allocation/allocator_facade.h"
19+
#include "paddle/phi/core/memory/malloc.h"
20+
#include "paddle/phi/core/platform/device/gpu/gpu_info.h"
1721

1822
#ifdef PADDLE_WITH_CUDA
1923
#include <cuda.h>
@@ -85,5 +89,36 @@ size_t TotalMemoryCompactor::Compact(std::list<Block>& blocks,
8589
#endif
8690
}
8791

92+
#if defined(PADDLE_WITH_CUDA)
93+
std::pair<size_t, size_t> VmmMaxFreeSize(const phi::GPUPlace& place,
94+
int32_t n) {
95+
FreeMemoryMetricsVisitor free_memory_metrics_visitor(n);
96+
allocation::AllocatorFacade::Instance().Accept(place,
97+
&free_memory_metrics_visitor);
98+
return std::make_pair(free_memory_metrics_visitor.GetLargeSize(),
99+
free_memory_metrics_visitor.GetSumSize());
100+
}
101+
102+
bool TryAllocBatch(const phi::GPUPlace& place,
103+
const std::vector<size_t>& sizes) {
104+
TryAllocVisitor try_alloc_visitor(sizes);
105+
allocation::AllocatorFacade::Instance().Accept(place, &try_alloc_visitor);
106+
return try_alloc_visitor.IsTryAllocSuccess();
107+
}
108+
109+
size_t VmmCompact() {
110+
return memory::Compact(phi::GPUPlace(paddle::platform::GetCurrentDeviceId()));
111+
}
112+
113+
std::vector<std::vector<std::pair<size_t, uintptr_t>>>
114+
FreeBlockInfoOfVmmAllocator() {
115+
VMMFreeBlocksInfoVisitor free_blocks_info_visitor;
116+
allocation::AllocatorFacade::Instance().Accept(
117+
phi::GPUPlace(paddle::platform::GetCurrentDeviceId()),
118+
&free_blocks_info_visitor);
119+
return free_blocks_info_visitor.GetFreeBlocksInfo();
120+
}
121+
#endif
122+
88123
} // namespace memory
89124
} // namespace paddle

paddle/phi/core/memory/mem_utils.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,24 @@ class TotalMemoryCompactor final : public MemoryCompactionStrategy {
8080
void* start_ptr,
8181
void* end_ptr) override;
8282
};
83+
84+
#if defined(PADDLE_WITH_CUDA)
85+
// return a pair of <largest_free_block_size, sum_of_n_largest_free_block_size>
86+
PADDLE_API extern std::pair<size_t, size_t> VmmMaxFreeSize(
87+
const phi::GPUPlace& place, int32_t n);
88+
89+
// Try using Allocator to simulate an allocation, simulating a request for
90+
// vector<size>.
91+
PADDLE_API extern bool TryAllocBatch(const phi::GPUPlace& place,
92+
const std::vector<size_t>& sizes);
93+
94+
// Compact memory of free blocks held by the VmmAllocator.
95+
PADDLE_API extern size_t VmmCompact(void);
96+
97+
// Get VMM allocator free block info.
98+
PADDLE_API extern std::vector<std::vector<std::pair<size_t, uintptr_t>>>
99+
FreeBlockInfoOfVmmAllocator();
100+
#endif
101+
83102
} // namespace memory
84103
} // namespace paddle

paddle/phi/core/memory/mem_visitor.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,19 @@ void TryAllocVisitor::Visit(
7272
VLOG(1) << "Visit VirtualMemoryAutoGrowthBestFitAllocator try_alloc_result:"
7373
<< is_try_alloc_success_;
7474
}
75+
76+
void VMMFreeBlocksInfoVisitor::Visit(
77+
VirtualMemoryAutoGrowthBestFitAllocator* allocator) {
78+
std::vector<std::pair<size_t, uintptr_t>> keys;
79+
for (const auto& item : allocator->GetFreeBlocks()) {
80+
size_t size = item.first.first;
81+
uintptr_t addr = reinterpret_cast<uintptr_t>(item.first.second);
82+
keys.emplace_back(size, addr);
83+
}
84+
if (!keys.empty()) {
85+
free_blocks_info_.push_back(keys);
86+
}
87+
}
7588
#endif
7689
} // namespace memory
7790
} // namespace paddle

paddle/phi/core/memory/mem_visitor.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,62 @@ class TryAllocVisitor : public AllocatorVisitor {
174174
const std::vector<size_t>& sizes_;
175175
bool is_try_alloc_success_ = false;
176176
};
177+
178+
/**
179+
* @brief Visitor class to retrieve free block information from a VMM allocator.
180+
*
181+
* Inherits from AllocatorVisitor, implementing the Visitor Pattern.
182+
* The purpose of this class is to access a specific memory allocator's
183+
* internal state (the list of free memory blocks) and extract key information
184+
* (size and address) for external analysis or debugging.
185+
*/
186+
class VMMFreeBlocksInfoVisitor : public AllocatorVisitor {
187+
public:
188+
/**
189+
* @brief Default Constructor.
190+
*/
191+
VMMFreeBlocksInfoVisitor() {}
192+
193+
/**
194+
* @brief Retrieves the collected information about the free memory blocks.
195+
*
196+
* The structure is a nested vector:
197+
* Outer Vector: Represents different categories or lists within the
198+
* allocator. Inner Vector: Contains pairs of (size, address) for the free
199+
* blocks in that category. uintptr_t is used to safely store the memory
200+
* address (void*) as an integer.
201+
*
202+
* @return A nested vector structure containing the size and integer address
203+
* of all free blocks.
204+
*/
205+
std::vector<std::vector<std::pair<size_t, uintptr_t>>> GetFreeBlocksInfo()
206+
const {
207+
return free_blocks_info_;
208+
}
209+
210+
/**
211+
* @brief Visits the VirtualMemoryAutoGrowthBestFitAllocator.
212+
*
213+
* This is the core implementation of the Visitor Pattern. When called,
214+
* it accesses the `allocator` object's internal structure that holds the
215+
* free block list(s) and populates the `free_blocks_info_` member variable
216+
* with the necessary data.
217+
*
218+
* @param allocator Pointer to the memory allocator object whose free blocks
219+
* information is to be extracted.
220+
*/
221+
void Visit(VirtualMemoryAutoGrowthBestFitAllocator* allocator) override;
222+
223+
private:
224+
/**
225+
* @brief Stores the extracted free block information.
226+
*
227+
* This member is populated during the Visit() call. It is structured to
228+
* hold lists of (size, address) pairs, where the outer vector typically
229+
* distinguishes between different free lists (e.g., small, large blocks).
230+
*/
231+
std::vector<std::vector<std::pair<size_t, uintptr_t>>> free_blocks_info_;
232+
};
177233
#endif
178234

179235
} // namespace memory

paddle/phi/core/platform/device/gpu/gpu_info.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,6 @@ void EmptyCache() {
615615
}
616616
}
617617

618-
void VmmCompact() { memory::Compact(phi::GPUPlace(GetCurrentDeviceId())); }
619-
620618
bool IsGPUManagedMemorySupported(int dev_id) {
621619
return phi::backends::gpu::IsGPUManagedMemorySupported(dev_id);
622620
}

0 commit comments

Comments
 (0)