diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 06c827c41eacc..7130f3e408074 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) { } #if SANITIZER_AMDGPU +DECLARE_REAL(hsa_status_t, hsa_init); DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents, const hsa_agent_t *agents, const uint32_t *flags, const void *ptr) DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate, @@ -1400,9 +1401,10 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr) DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags) DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size); +DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler, + hsa_amd_system_event_callback_t, void*) namespace __asan { - // Always align to page boundary to match current ROCr behavior static const size_t kPageSize_ = 4096; @@ -1540,5 +1542,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, } return REAL(hsa_amd_vmem_address_free)(ptr, size); } + +hsa_status_t asan_hsa_init() { + hsa_status_t status = REAL(hsa_init)(); + if (status == HSA_STATUS_SUCCESS) + __sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers(); + return status; +} + } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index ced10f62b7a58..73e21b95d0f59 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, BufferedStackTrace* stack); hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, BufferedStackTrace* stack); +hsa_status_t asan_hsa_init(); } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 0951a77b1b93e..cfee0fa51fb89 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -948,7 +948,14 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) { return asan_hsa_amd_vmem_address_free(ptr, size, &stack); } +INTERCEPTOR(hsa_status_t, hsa_init) { + AsanInitFromRtl(); + ENSURE_HSA_INITED(); + return asan_hsa_init(); +} + void InitializeAmdgpuInterceptors() { + ASAN_INTERCEPT_FUNC(hsa_init); ASAN_INTERCEPT_FUNC(hsa_memory_copy); ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate); ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free); @@ -965,7 +972,7 @@ void InitializeAmdgpuInterceptors() { } void ENSURE_HSA_INITED() { - if (!REAL(hsa_memory_copy)) + if (!REAL(hsa_init)) InitializeAmdgpuInterceptors(); } #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp index cf10cb773e746..8dcf90798db01 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp @@ -12,9 +12,11 @@ #if SANITIZER_AMDGPU # include // For dlsym # include "sanitizer_allocator.h" +# include "sanitizer_atomic.h" namespace __sanitizer { -struct HsaMemoryFunctions { +struct HsaFunctions { + // ---------------- Memory Functions ---------------- hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void **ptr); hsa_status_t (*memory_pool_free)(void *ptr); @@ -26,14 +28,36 @@ struct HsaMemoryFunctions { uint64_t address, uint64_t alignment, uint64_t flags); - hsa_status_t (*vmem_address_free)(void* ptr, size_t size); + hsa_status_t (*vmem_address_free)(void *ptr, size_t size); + + // ----------------Event Functions ---------------- + hsa_status_t (*register_system_event_handler)( + hsa_amd_system_event_callback_t callback, void *data); }; -static HsaMemoryFunctions hsa_amd; +static HsaFunctions hsa_amd; // Always align to page boundary to match current ROCr behavior static const size_t kPageSize_ = 4096; +static atomic_uint8_t amdgpu_runtime_shutdown{0}; +static atomic_uint8_t amdgpu_event_registered{0}; + +// Check if AMDGPU runtime shutdown state +bool AmdgpuMemFuncs::IsAmdgpuRuntimeShutdown() { + return static_cast( + atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire)); +} + +// Notify AMDGPU runtime shutdown to allocator +void AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown() { + uint8_t shutdown = 0; + if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown, 1, + memory_order_acq_rel)) { + VReport(1, " Amdgpu Allocator: AMDGPU runtime shutdown detected\n"); + } +} + bool AmdgpuMemFuncs::Init() { hsa_amd.memory_pool_allocate = (decltype(hsa_amd.memory_pool_allocate))dlsym( @@ -47,15 +71,21 @@ bool AmdgpuMemFuncs::Init() { RTLD_NEXT, "hsa_amd_vmem_address_reserve_align"); hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym( RTLD_NEXT, "hsa_amd_vmem_address_free"); + hsa_amd.register_system_event_handler = + (decltype(hsa_amd.register_system_event_handler))dlsym( + RTLD_NEXT, "hsa_amd_register_system_event_handler"); if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free || !hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align || - !hsa_amd.vmem_address_free) + !hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler) return false; return true; } void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment, DeviceAllocationInfo *da_info) { + // Do not allocate if AMDGPU runtime is shutdown + if (IsAmdgpuRuntimeShutdown()) + return nullptr; AmdgpuAllocationInfo *aa_info = reinterpret_cast(da_info); if (!aa_info->memory_pool.handle) { @@ -73,6 +103,9 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment, } void AmdgpuMemFuncs::Deallocate(void *p) { + // Deallocate does nothing after AMDGPU runtime shutdown + if (IsAmdgpuRuntimeShutdown()) + return; DevicePointerInfo DevPtrInfo; if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast(p), &DevPtrInfo)) { if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) { @@ -102,6 +135,28 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) { return true; } + // Register shutdown system event handler only once + // TODO: Register multiple event handlers if needed in future +void AmdgpuMemFuncs::RegisterSystemEventHandlers() { + // Check if already registered + if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) { + // Callback to just detect runtime shutdown + hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event, + void* data) { + if (!event) + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT) + AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown(); + return HSA_STATUS_SUCCESS; + }; + // Register the callback + hsa_status_t status = + hsa_amd.register_system_event_handler(callback, nullptr); + // Mark as registered if successful + if (status == HSA_STATUS_SUCCESS) + atomic_store(&amdgpu_event_registered, 1, memory_order_release); + } +} uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; } } // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h index 84b62964e5145..b6f4a75076122 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h @@ -22,6 +22,11 @@ class AmdgpuMemFuncs { static void Deallocate(void *p); static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info); static uptr GetPageSize(); + static void RegisterSystemEventHandlers(); + static bool IsAmdgpuRuntimeShutdown(); + + private: + static void NotifyAmdgpuRuntimeShutdown(); }; struct AmdgpuAllocationInfo : public DeviceAllocationInfo { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h index f76800da79ac3..0718ab13d2325 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h @@ -122,7 +122,8 @@ class DeviceAllocatorT { CHECK_EQ(chunks_[idx], p_); CHECK_LT(idx, n_chunks_); h = GetHeader(chunks_[idx], &header); - CHECK(!dev_runtime_unloaded_); + if (dev_runtime_unloaded_) + return; chunks_[idx] = chunks_[--n_chunks_]; chunks_sorted_ = false; stats.n_frees++; @@ -140,7 +141,8 @@ class DeviceAllocatorT { uptr res = 0; for (uptr i = 0; i < n_chunks_; i++) { Header *h = GetHeader(chunks_[i], &header); - CHECK(!dev_runtime_unloaded_); + if (dev_runtime_unloaded_) + return 0; res += RoundUpMapSize(h->map_size); } return res; @@ -188,7 +190,6 @@ class DeviceAllocatorT { CHECK_LT(nearest_chunk, h->map_beg + h->map_size); CHECK_LE(nearest_chunk, p); if (h->map_beg + h->map_size <= p) { - CHECK(!dev_runtime_unloaded_); return nullptr; } } @@ -306,14 +307,21 @@ class DeviceAllocatorT { } Header* GetHeader(uptr chunk, Header* h) const { - if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) { - // Device allocator has dependency on device runtime. If device runtime - // is unloaded, GetPointerInfo() will fail. For such case, we can still - // return a valid value for map_beg, map_size will be limited to one page - h->map_beg = chunk; - h->map_size = page_size_; - dev_runtime_unloaded_ = true; + // Device allocator has dependency on device runtime. If device runtime + // is unloaded, GetPointerInfo() will fail. For such case, we can still + // return a valid value for map_beg, map_size will be limited to one page + if (!dev_runtime_unloaded_) { + if (DeviceMemFuncs::GetPointerInfo(chunk, h)) + return h; + // If GetPointerInfo() fails, we don't assume the runtime is unloaded yet. + // We just return a conservative single-page header. Here mark/check the + // runtime shutdown state + dev_runtime_unloaded_ = DeviceMemFuncs::IsAmdgpuRuntimeShutdown(); } + // If we reach here, device runtime is unloaded. + // Fallback: conservative single-page header + h->map_beg = chunk; + h->map_size = page_size_; return h; }