Skip to content

Commit b590be6

Browse files
committed
[ASan][Compiler-rt] Support Gracefull hsa-runtime shutdown.
Summary: - Track runtime shutdown via AmdgpuMemFuncs::GetAmdgpuRuntimeShutdown() and gate further pointer_info calls. - Add interception of 'hsa_init' api call. - Add registration of hsa-runtime associated system events via 'hsa_amd_register_system_event_handler'. - HSA event registered is 'HSA_AMD_SYSTEM_SHUTDOWN_EVENT'
1 parent 09a4d41 commit b590be6

File tree

6 files changed

+125
-51
lines changed

6 files changed

+125
-51
lines changed

compiler-rt/lib/asan/asan_allocator.cpp

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) {
13851385
}
13861386

13871387
#if SANITIZER_AMDGPU
1388+
DECLARE_REAL(hsa_status_t, hsa_init);
13881389
DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents,
13891390
const hsa_agent_t *agents, const uint32_t *flags, const void *ptr)
13901391
DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate,
@@ -1400,43 +1401,44 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr)
14001401
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr,
14011402
size_t size, uint64_t address, uint64_t alignment, uint64_t flags)
14021403
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size);
1404+
DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler,
1405+
hsa_amd_system_event_callback_t, void*)
14031406

14041407
namespace __asan {
1405-
14061408
// Always align to page boundary to match current ROCr behavior
14071409
static const size_t kPageSize_ = 4096;
14081410

14091411
hsa_status_t asan_hsa_amd_memory_pool_allocate(
1410-
hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void **ptr,
1411-
BufferedStackTrace *stack) {
1412+
hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void** ptr,
1413+
BufferedStackTrace* stack) {
14121414
AmdgpuAllocationInfo aa_info;
14131415
aa_info.alloc_func =
1414-
reinterpret_cast<void *>(asan_hsa_amd_memory_pool_allocate);
1416+
reinterpret_cast<void*>(asan_hsa_amd_memory_pool_allocate);
14151417
aa_info.memory_pool = memory_pool;
14161418
aa_info.size = size;
14171419
aa_info.flags = flags;
14181420
aa_info.ptr = nullptr;
1419-
SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack,
1420-
FROM_MALLOC, false, &aa_info));
1421+
SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack, FROM_MALLOC,
1422+
false, &aa_info));
14211423
return aa_info.status;
14221424
}
14231425

1424-
hsa_status_t asan_hsa_amd_memory_pool_free(
1425-
void *ptr,
1426-
BufferedStackTrace *stack) {
1427-
void *p = get_allocator().GetBlockBegin(ptr);
1426+
hsa_status_t asan_hsa_amd_memory_pool_free(void* ptr,
1427+
BufferedStackTrace* stack) {
1428+
void* p = get_allocator().GetBlockBegin(ptr);
14281429
if (p) {
14291430
instance.Deallocate(ptr, 0, 0, stack, FROM_MALLOC);
14301431
return HSA_STATUS_SUCCESS;
14311432
}
14321433
return REAL(hsa_amd_memory_pool_free)(ptr);
14331434
}
14341435

1435-
hsa_status_t asan_hsa_amd_agents_allow_access(
1436-
uint32_t num_agents, const hsa_agent_t *agents, const uint32_t *flags,
1437-
const void *ptr,
1438-
BufferedStackTrace *stack) {
1439-
void *p = get_allocator().GetBlockBegin(ptr);
1436+
hsa_status_t asan_hsa_amd_agents_allow_access(uint32_t num_agents,
1437+
const hsa_agent_t* agents,
1438+
const uint32_t* flags,
1439+
const void* ptr,
1440+
BufferedStackTrace* stack) {
1441+
void* p = get_allocator().GetBlockBegin(ptr);
14401442
return REAL(hsa_amd_agents_allow_access)(num_agents, agents, flags,
14411443
p ? p : ptr);
14421444
}
@@ -1446,44 +1448,45 @@ hsa_status_t asan_hsa_amd_agents_allow_access(
14461448
// is always one kPageSize_
14471449
// IPC calls use static_assert to make sure kMetadataSize = 0
14481450
//
1449-
#if SANITIZER_CAN_USE_ALLOCATOR64
1451+
# if SANITIZER_CAN_USE_ALLOCATOR64
14501452
static struct AP64<LocalAddressSpaceView> AP_;
1451-
#else
1453+
# else
14521454
static struct AP32<LocalAddressSpaceView> AP_;
1453-
#endif
1455+
# endif
14541456

1455-
hsa_status_t asan_hsa_amd_ipc_memory_create(void *ptr, size_t len,
1456-
hsa_amd_ipc_memory_t * handle) {
1457-
void *ptr_;
1457+
hsa_status_t asan_hsa_amd_ipc_memory_create(void* ptr, size_t len,
1458+
hsa_amd_ipc_memory_t* handle) {
1459+
void* ptr_;
14581460
size_t len_ = get_allocator().GetActuallyAllocatedSize(ptr);
14591461
if (len_) {
14601462
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
1461-
ptr_ = reinterpret_cast<void *>(reinterpret_cast<uptr>(ptr) - kPageSize_);
1463+
ptr_ = reinterpret_cast<void*>(reinterpret_cast<uptr>(ptr) - kPageSize_);
14621464
} else {
14631465
ptr_ = ptr;
14641466
len_ = len;
14651467
}
14661468
return REAL(hsa_amd_ipc_memory_create)(ptr_, len_, handle);
14671469
}
14681470

1469-
hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t *handle,
1470-
size_t len, uint32_t num_agents, const hsa_agent_t *mapping_agents,
1471-
void **mapped_ptr) {
1471+
hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* handle,
1472+
size_t len, uint32_t num_agents,
1473+
const hsa_agent_t* mapping_agents,
1474+
void** mapped_ptr) {
14721475
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
14731476
size_t len_ = len + kPageSize_;
14741477
hsa_status_t status = REAL(hsa_amd_ipc_memory_attach)(
1475-
handle, len_, num_agents, mapping_agents, mapped_ptr);
1478+
handle, len_, num_agents, mapping_agents, mapped_ptr);
14761479
if (status == HSA_STATUS_SUCCESS && mapped_ptr) {
1477-
*mapped_ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(*mapped_ptr) +
1478-
kPageSize_);
1480+
*mapped_ptr = reinterpret_cast<void*>(reinterpret_cast<uptr>(*mapped_ptr) +
1481+
kPageSize_);
14791482
}
14801483
return status;
14811484
}
14821485

1483-
hsa_status_t asan_hsa_amd_ipc_memory_detach(void *mapped_ptr) {
1486+
hsa_status_t asan_hsa_amd_ipc_memory_detach(void* mapped_ptr) {
14841487
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
1485-
void *mapped_ptr_ =
1486-
reinterpret_cast<void *>(reinterpret_cast<uptr>(mapped_ptr) - kPageSize_);
1488+
void* mapped_ptr_ =
1489+
reinterpret_cast<void*>(reinterpret_cast<uptr>(mapped_ptr) - kPageSize_);
14871490
return REAL(hsa_amd_ipc_memory_detach)(mapped_ptr_);
14881491
}
14891492

@@ -1540,5 +1543,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
15401543
}
15411544
return REAL(hsa_amd_vmem_address_free)(ptr, size);
15421545
}
1546+
1547+
hsa_status_t asan_hsa_init() {
1548+
hsa_status_t status = REAL(hsa_init)();
1549+
if (status == HSA_STATUS_SUCCESS)
1550+
__sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers();
1551+
return status;
1552+
}
1553+
15431554
} // namespace __asan
15441555
#endif

compiler-rt/lib/asan/asan_allocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size,
341341
BufferedStackTrace* stack);
342342
hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
343343
BufferedStackTrace* stack);
344+
hsa_status_t asan_hsa_init();
344345
} // namespace __asan
345346
#endif
346347

compiler-rt/lib/asan/asan_interceptors.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,13 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) {
948948
return asan_hsa_amd_vmem_address_free(ptr, size, &stack);
949949
}
950950

951+
INTERCEPTOR(hsa_status_t, hsa_init) {
952+
AsanInitFromRtl();
953+
return asan_hsa_init();
954+
}
955+
951956
void InitializeAmdgpuInterceptors() {
957+
ASAN_INTERCEPT_FUNC(hsa_init);
952958
ASAN_INTERCEPT_FUNC(hsa_memory_copy);
953959
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate);
954960
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free);
@@ -965,7 +971,7 @@ void InitializeAmdgpuInterceptors() {
965971
}
966972

967973
void ENSURE_HSA_INITED() {
968-
if (!REAL(hsa_memory_copy))
974+
if (!REAL(hsa_init))
969975
InitializeAmdgpuInterceptors();
970976
}
971977
#endif

compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,44 @@
1111
//===----------------------------------------------------------------------===//
1212
#if SANITIZER_AMDGPU
1313
# include <dlfcn.h> // For dlsym
14+
1415
# include "sanitizer_allocator.h"
16+
# include "sanitizer_atomic.h"
1517

1618
namespace __sanitizer {
17-
struct HsaMemoryFunctions {
19+
struct HsaFunctions {
20+
// ---------------- Memory Functions ----------------
1821
hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool,
19-
size_t size, uint32_t flags, void **ptr);
20-
hsa_status_t (*memory_pool_free)(void *ptr);
21-
hsa_status_t (*pointer_info)(void *ptr, hsa_amd_pointer_info_t *info,
22-
void *(*alloc)(size_t),
23-
uint32_t *num_agents_accessible,
24-
hsa_agent_t **accessible);
22+
size_t size, uint32_t flags, void** ptr);
23+
hsa_status_t (*memory_pool_free)(void* ptr);
24+
hsa_status_t (*pointer_info)(void* ptr, hsa_amd_pointer_info_t* info,
25+
void* (*alloc)(size_t),
26+
uint32_t* num_agents_accessible,
27+
hsa_agent_t** accessible);
2528
hsa_status_t (*vmem_address_reserve_align)(void** ptr, size_t size,
2629
uint64_t address,
2730
uint64_t alignment,
2831
uint64_t flags);
2932
hsa_status_t (*vmem_address_free)(void* ptr, size_t size);
33+
34+
// ----------------Event Functions ----------------
35+
hsa_status_t (*register_system_event_handler)(
36+
hsa_amd_system_event_callback_t callback, void* data);
3037
};
3138

32-
static HsaMemoryFunctions hsa_amd;
39+
static HsaFunctions hsa_amd;
3340

3441
// Always align to page boundary to match current ROCr behavior
3542
static const size_t kPageSize_ = 4096;
3643

44+
static atomic_uint8_t amdgpu_runtime_shutdown{0};
45+
static atomic_uint8_t amdgpu_event_registered{0};
46+
47+
bool AmdgpuMemFuncs::GetAmdgpuRuntimeShutdown() {
48+
return static_cast<bool>(
49+
atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire));
50+
}
51+
3752
bool AmdgpuMemFuncs::Init() {
3853
hsa_amd.memory_pool_allocate =
3954
(decltype(hsa_amd.memory_pool_allocate))dlsym(
@@ -47,15 +62,20 @@ bool AmdgpuMemFuncs::Init() {
4762
RTLD_NEXT, "hsa_amd_vmem_address_reserve_align");
4863
hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym(
4964
RTLD_NEXT, "hsa_amd_vmem_address_free");
65+
hsa_amd.register_system_event_handler =
66+
(decltype(hsa_amd.register_system_event_handler))dlsym(
67+
RTLD_NEXT, "hsa_amd_register_system_event_handler");
5068
if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free ||
5169
!hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align ||
52-
!hsa_amd.vmem_address_free)
70+
!hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler)
5371
return false;
5472
return true;
5573
}
5674

5775
void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
5876
DeviceAllocationInfo *da_info) {
77+
if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire))
78+
return nullptr;
5979
AmdgpuAllocationInfo *aa_info =
6080
reinterpret_cast<AmdgpuAllocationInfo *>(da_info);
6181
if (!aa_info->memory_pool.handle) {
@@ -73,6 +93,8 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
7393
}
7494

7595
void AmdgpuMemFuncs::Deallocate(void *p) {
96+
if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire))
97+
return;
7698
DevicePointerInfo DevPtrInfo;
7799
if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast<uptr>(p), &DevPtrInfo)) {
78100
if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
@@ -103,6 +125,30 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) {
103125
return true;
104126
}
105127

128+
void AmdgpuMemFuncs::RegisterSystemEventHandlers() {
129+
// Register shutdown system event handler only once
130+
if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) {
131+
// Callback to just detect runtime shutdown
132+
hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event,
133+
void* data) {
134+
if (!event)
135+
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
136+
if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT) {
137+
uint8_t shutdown = 0;
138+
if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown,
139+
1, memory_order_acq_rel)) {
140+
// Evict all allocations (add purge logic here).
141+
}
142+
}
143+
return HSA_STATUS_SUCCESS;
144+
};
145+
hsa_status_t status =
146+
hsa_amd.register_system_event_handler(callback, nullptr);
147+
if (status == HSA_STATUS_SUCCESS)
148+
atomic_store(&amdgpu_event_registered, 1, memory_order_release);
149+
}
150+
}
151+
106152
uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; }
107153
} // namespace __sanitizer
108154
#endif // SANITIZER_AMDGPU

compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class AmdgpuMemFuncs {
2222
static void Deallocate(void *p);
2323
static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info);
2424
static uptr GetPageSize();
25+
static void RegisterSystemEventHandlers();
26+
static bool GetAmdgpuRuntimeShutdown();
2527
};
2628

2729
struct AmdgpuAllocationInfo : public DeviceAllocationInfo {

compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ class DeviceAllocatorT {
122122
CHECK_EQ(chunks_[idx], p_);
123123
CHECK_LT(idx, n_chunks_);
124124
h = GetHeader(chunks_[idx], &header);
125-
CHECK(!dev_runtime_unloaded_);
125+
if (dev_runtime_unloaded_)
126+
return;
126127
chunks_[idx] = chunks_[--n_chunks_];
127128
chunks_sorted_ = false;
128129
stats.n_frees++;
@@ -140,7 +141,8 @@ class DeviceAllocatorT {
140141
uptr res = 0;
141142
for (uptr i = 0; i < n_chunks_; i++) {
142143
Header *h = GetHeader(chunks_[i], &header);
143-
CHECK(!dev_runtime_unloaded_);
144+
if (dev_runtime_unloaded_)
145+
return 0;
144146
res += RoundUpMapSize(h->map_size);
145147
}
146148
return res;
@@ -188,7 +190,6 @@ class DeviceAllocatorT {
188190
CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
189191
CHECK_LE(nearest_chunk, p);
190192
if (h->map_beg + h->map_size <= p) {
191-
CHECK(!dev_runtime_unloaded_);
192193
return nullptr;
193194
}
194195
}
@@ -306,14 +307,21 @@ class DeviceAllocatorT {
306307
}
307308

308309
Header* GetHeader(uptr chunk, Header* h) const {
309-
if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) {
310-
// Device allocator has dependency on device runtime. If device runtime
311-
// is unloaded, GetPointerInfo() will fail. For such case, we can still
312-
// return a valid value for map_beg, map_size will be limited to one page
313-
h->map_beg = chunk;
314-
h->map_size = page_size_;
315-
dev_runtime_unloaded_ = true;
310+
// Device allocator has dependency on device runtime. If device runtime
311+
// is unloaded, GetPointerInfo() will fail. For such case, we can still
312+
// return a valid value for map_beg, map_size will be limited to one page
313+
if (!dev_runtime_unloaded_) {
314+
if (DeviceMemFuncs::GetPointerInfo(chunk, h))
315+
return h;
316+
// If GetPointerInfo() fails, we don't assume the runtime is unloaded yet.
317+
// We just return a conservative single-page header. Here mark/check the
318+
// runtime shutdown state
319+
dev_runtime_unloaded_ = DeviceMemFuncs::GetAmdgpuRuntimeShutdown();
316320
}
321+
// If we reach here, device runtime is unloaded.
322+
// Fallback: conservative single-page header
323+
h->map_beg = chunk;
324+
h->map_size = page_size_;
317325
return h;
318326
}
319327

0 commit comments

Comments
 (0)