Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion compiler-rt/lib/asan/asan_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) {
}

#if SANITIZER_AMDGPU
DECLARE_REAL(hsa_status_t, hsa_init);
DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents,
const hsa_agent_t *agents, const uint32_t *flags, const void *ptr)
DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate,
Expand All @@ -1400,9 +1401,10 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr)
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr,
size_t size, uint64_t address, uint64_t alignment, uint64_t flags)
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size);
DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler,
hsa_amd_system_event_callback_t, void*)

namespace __asan {

// Always align to page boundary to match current ROCr behavior
static const size_t kPageSize_ = 4096;

Expand Down Expand Up @@ -1540,5 +1542,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
}
return REAL(hsa_amd_vmem_address_free)(ptr, size);
}

hsa_status_t asan_hsa_init() {
hsa_status_t status = REAL(hsa_init)();
if (status == HSA_STATUS_SUCCESS)
__sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers();
return status;
}

} // namespace __asan
#endif
1 change: 1 addition & 0 deletions compiler-rt/lib/asan/asan_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size,
BufferedStackTrace* stack);
hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
BufferedStackTrace* stack);
hsa_status_t asan_hsa_init();
} // namespace __asan
#endif

Expand Down
9 changes: 8 additions & 1 deletion compiler-rt/lib/asan/asan_interceptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,14 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) {
return asan_hsa_amd_vmem_address_free(ptr, size, &stack);
}

INTERCEPTOR(hsa_status_t, hsa_init) {
AsanInitFromRtl();
ENSURE_HSA_INITED();
return asan_hsa_init();
}

void InitializeAmdgpuInterceptors() {
ASAN_INTERCEPT_FUNC(hsa_init);
ASAN_INTERCEPT_FUNC(hsa_memory_copy);
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate);
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free);
Expand All @@ -965,7 +972,7 @@ void InitializeAmdgpuInterceptors() {
}

void ENSURE_HSA_INITED() {
if (!REAL(hsa_memory_copy))
if (!REAL(hsa_init))
InitializeAmdgpuInterceptors();
}
#endif
Expand Down
63 changes: 59 additions & 4 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
#if SANITIZER_AMDGPU
# include <dlfcn.h> // For dlsym
# include "sanitizer_allocator.h"
# include "sanitizer_atomic.h"

namespace __sanitizer {
struct HsaMemoryFunctions {
struct HsaFunctions {
// ---------------- Memory Functions ----------------
hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool,
size_t size, uint32_t flags, void **ptr);
hsa_status_t (*memory_pool_free)(void *ptr);
Expand All @@ -26,14 +28,36 @@ struct HsaMemoryFunctions {
uint64_t address,
uint64_t alignment,
uint64_t flags);
hsa_status_t (*vmem_address_free)(void* ptr, size_t size);
hsa_status_t (*vmem_address_free)(void *ptr, size_t size);

// ----------------Event Functions ----------------
hsa_status_t (*register_system_event_handler)(
hsa_amd_system_event_callback_t callback, void *data);
};

static HsaMemoryFunctions hsa_amd;
static HsaFunctions hsa_amd;

// Always align to page boundary to match current ROCr behavior
static const size_t kPageSize_ = 4096;

static atomic_uint8_t amdgpu_runtime_shutdown{0};
static atomic_uint8_t amdgpu_event_registered{0};

// Check if AMDGPU runtime shutdown state
bool AmdgpuMemFuncs::IsAmdgpuRuntimeShutdown() {
return static_cast<bool>(
atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire));
}

// Notify AMDGPU runtime shutdown to allocator
void AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown() {
uint8_t shutdown = 0;
if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown, 1,
memory_order_acq_rel)) {
VReport(1, " Amdgpu Allocator: AMDGPU runtime shutdown detected\n");
}
}

bool AmdgpuMemFuncs::Init() {
hsa_amd.memory_pool_allocate =
(decltype(hsa_amd.memory_pool_allocate))dlsym(
Expand All @@ -47,15 +71,21 @@ bool AmdgpuMemFuncs::Init() {
RTLD_NEXT, "hsa_amd_vmem_address_reserve_align");
hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym(
RTLD_NEXT, "hsa_amd_vmem_address_free");
hsa_amd.register_system_event_handler =
(decltype(hsa_amd.register_system_event_handler))dlsym(
RTLD_NEXT, "hsa_amd_register_system_event_handler");
if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free ||
!hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align ||
!hsa_amd.vmem_address_free)
!hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler)
return false;
return true;
}

void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
DeviceAllocationInfo *da_info) {
// Do not allocate if AMDGPU runtime is shutdown
if (IsAmdgpuRuntimeShutdown())
return nullptr;
AmdgpuAllocationInfo *aa_info =
reinterpret_cast<AmdgpuAllocationInfo *>(da_info);
if (!aa_info->memory_pool.handle) {
Expand All @@ -73,6 +103,9 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
}

void AmdgpuMemFuncs::Deallocate(void *p) {
// Deallocate does nothing after AMDGPU runtime shutdown
if (IsAmdgpuRuntimeShutdown())
return;
DevicePointerInfo DevPtrInfo;
if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast<uptr>(p), &DevPtrInfo)) {
if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
Expand Down Expand Up @@ -102,6 +135,28 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) {

return true;
}
// Register shutdown system event handler only once
// TODO: Register multiple event handlers if needed in future
void AmdgpuMemFuncs::RegisterSystemEventHandlers() {
// Check if already registered
if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) {
// Callback to just detect runtime shutdown
hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event,
void* data) {
if (!event)
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT)
AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown();
return HSA_STATUS_SUCCESS;
};
// Register the callback
hsa_status_t status =
hsa_amd.register_system_event_handler(callback, nullptr);
// Mark as registered if successful
if (status == HSA_STATUS_SUCCESS)
atomic_store(&amdgpu_event_registered, 1, memory_order_release);
}
}

uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; }
} // namespace __sanitizer
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class AmdgpuMemFuncs {
static void Deallocate(void *p);
static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info);
static uptr GetPageSize();
static void RegisterSystemEventHandlers();
static bool IsAmdgpuRuntimeShutdown();

private:
static void NotifyAmdgpuRuntimeShutdown();
};

struct AmdgpuAllocationInfo : public DeviceAllocationInfo {
Expand Down
28 changes: 18 additions & 10 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class DeviceAllocatorT {
CHECK_EQ(chunks_[idx], p_);
CHECK_LT(idx, n_chunks_);
h = GetHeader(chunks_[idx], &header);
CHECK(!dev_runtime_unloaded_);
if (dev_runtime_unloaded_)
return;
chunks_[idx] = chunks_[--n_chunks_];
chunks_sorted_ = false;
stats.n_frees++;
Expand All @@ -140,7 +141,8 @@ class DeviceAllocatorT {
uptr res = 0;
for (uptr i = 0; i < n_chunks_; i++) {
Header *h = GetHeader(chunks_[i], &header);
CHECK(!dev_runtime_unloaded_);
if (dev_runtime_unloaded_)
return 0;
res += RoundUpMapSize(h->map_size);
}
return res;
Expand Down Expand Up @@ -188,7 +190,6 @@ class DeviceAllocatorT {
CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
CHECK_LE(nearest_chunk, p);
if (h->map_beg + h->map_size <= p) {
CHECK(!dev_runtime_unloaded_);
return nullptr;
}
}
Expand Down Expand Up @@ -306,14 +307,21 @@ class DeviceAllocatorT {
}

Header* GetHeader(uptr chunk, Header* h) const {
if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) {
// Device allocator has dependency on device runtime. If device runtime
// is unloaded, GetPointerInfo() will fail. For such case, we can still
// return a valid value for map_beg, map_size will be limited to one page
h->map_beg = chunk;
h->map_size = page_size_;
dev_runtime_unloaded_ = true;
// Device allocator has dependency on device runtime. If device runtime
// is unloaded, GetPointerInfo() will fail. For such case, we can still
// return a valid value for map_beg, map_size will be limited to one page
if (!dev_runtime_unloaded_) {
if (DeviceMemFuncs::GetPointerInfo(chunk, h))
return h;
// If GetPointerInfo() fails, we don't assume the runtime is unloaded yet.
// We just return a conservative single-page header. Here mark/check the
// runtime shutdown state
dev_runtime_unloaded_ = DeviceMemFuncs::IsAmdgpuRuntimeShutdown();
}
// If we reach here, device runtime is unloaded.
// Fallback: conservative single-page header
h->map_beg = chunk;
h->map_size = page_size_;
return h;
}

Expand Down