ROCm · ampandey-AMD · Nov 11, 2025 · Nov 14, 2025 · Nov 27, 2025
diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp
@@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) {
 }
 
 #if SANITIZER_AMDGPU
+DECLARE_REAL(hsa_status_t, hsa_init);
 DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents,
   const hsa_agent_t *agents, const uint32_t *flags, const void *ptr)
 DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate,
@@ -1400,9 +1401,10 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr)
 DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr,
              size_t size, uint64_t address, uint64_t alignment, uint64_t flags)
 DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size);
+DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler,
+             hsa_amd_system_event_callback_t, void*)
 
 namespace __asan {
-
 // Always align to page boundary to match current ROCr behavior
 static const size_t kPageSize_ = 4096;
 
@@ -1540,5 +1542,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
   }
   return REAL(hsa_amd_vmem_address_free)(ptr, size);
 }
+
+hsa_status_t asan_hsa_init() {
+  hsa_status_t status = REAL(hsa_init)();
+  if (status == HSA_STATUS_SUCCESS)
+    __sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers();
+  return status;
+}
+
 }  // namespace __asan
 #endif
diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h
@@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size,
                                                      BufferedStackTrace* stack);
 hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
                                             BufferedStackTrace* stack);
+hsa_status_t asan_hsa_init();
 } // namespace __asan
 #endif
 

diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -948,7 +948,14 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) {
   return asan_hsa_amd_vmem_address_free(ptr, size, &stack);
 }
 
+INTERCEPTOR(hsa_status_t, hsa_init) {
+  AsanInitFromRtl();
+  ENSURE_HSA_INITED();
+  return asan_hsa_init();
+}
+
 void InitializeAmdgpuInterceptors() {
+  ASAN_INTERCEPT_FUNC(hsa_init);
   ASAN_INTERCEPT_FUNC(hsa_memory_copy);
   ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate);
   ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free);
@@ -965,7 +972,7 @@ void InitializeAmdgpuInterceptors() {
 }
 
 void ENSURE_HSA_INITED() {
-  if (!REAL(hsa_memory_copy))
+  if (!REAL(hsa_init))
     InitializeAmdgpuInterceptors();
 }
 #endif

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp
@@ -12,9 +12,11 @@
 #if SANITIZER_AMDGPU
 #  include <dlfcn.h>  // For dlsym
 #  include "sanitizer_allocator.h"
+#  include "sanitizer_atomic.h"
 
 namespace __sanitizer {
-struct HsaMemoryFunctions {
+struct HsaFunctions {
+  // ---------------- Memory Functions ----------------
   hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool,
                                        size_t size, uint32_t flags, void **ptr);
   hsa_status_t (*memory_pool_free)(void *ptr);
@@ -26,14 +28,36 @@ struct HsaMemoryFunctions {
                                              uint64_t address,
                                              uint64_t alignment,
                                              uint64_t flags);
-  hsa_status_t (*vmem_address_free)(void* ptr, size_t size);
+  hsa_status_t (*vmem_address_free)(void *ptr, size_t size);
+
+  // ----------------Event Functions ----------------
+  hsa_status_t (*register_system_event_handler)(
+      hsa_amd_system_event_callback_t callback, void *data);
 };
 
-static HsaMemoryFunctions hsa_amd;
+static HsaFunctions hsa_amd;
 
 // Always align to page boundary to match current ROCr behavior
 static const size_t kPageSize_ = 4096;
 
+static atomic_uint8_t amdgpu_runtime_shutdown{0};
+static atomic_uint8_t amdgpu_event_registered{0};
+
+// Check if AMDGPU runtime shutdown state
+bool AmdgpuMemFuncs::IsAmdgpuRuntimeShutdown() {
+  return static_cast<bool>(
+      atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire));
+}
+
+// Notify AMDGPU runtime shutdown to allocator
+void AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown() {
+  uint8_t shutdown = 0;
+  if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown, 1,
+                                     memory_order_acq_rel)) {
+    VReport(1, " Amdgpu Allocator: AMDGPU runtime shutdown detected\n");
+  }
+}
+
 bool AmdgpuMemFuncs::Init() {
   hsa_amd.memory_pool_allocate =
       (decltype(hsa_amd.memory_pool_allocate))dlsym(
@@ -47,15 +71,21 @@ bool AmdgpuMemFuncs::Init() {
           RTLD_NEXT, "hsa_amd_vmem_address_reserve_align");
   hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym(
       RTLD_NEXT, "hsa_amd_vmem_address_free");
+  hsa_amd.register_system_event_handler =
+      (decltype(hsa_amd.register_system_event_handler))dlsym(
+          RTLD_NEXT, "hsa_amd_register_system_event_handler");
   if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free ||
       !hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align ||
-      !hsa_amd.vmem_address_free)
+      !hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler)
     return false;
   return true;
 }
 
 void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
                                DeviceAllocationInfo *da_info) {
+  // Do not allocate if AMDGPU runtime is shutdown
+  if (IsAmdgpuRuntimeShutdown())
+    return nullptr;
   AmdgpuAllocationInfo *aa_info =
       reinterpret_cast<AmdgpuAllocationInfo *>(da_info);
   if (!aa_info->memory_pool.handle) {
@@ -73,6 +103,9 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
 }
 
 void AmdgpuMemFuncs::Deallocate(void *p) {
+  // Deallocate does nothing after AMDGPU runtime shutdown
+  if (IsAmdgpuRuntimeShutdown())
+    return;
   DevicePointerInfo DevPtrInfo;
   if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast<uptr>(p), &DevPtrInfo)) {
     if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
@@ -102,6 +135,28 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) {
 
   return true;
 }
+ // Register shutdown system event handler only once
+ // TODO: Register multiple event handlers if needed in future
+void AmdgpuMemFuncs::RegisterSystemEventHandlers() {
+  // Check if already registered
+  if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) {
+    // Callback to just detect runtime shutdown
+    hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event,
+                                                  void* data) {
+      if (!event)
+        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
+      if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT)
+        AmdgpuMemFuncs::NotifyAmdgpuRuntimeShutdown();
+      return HSA_STATUS_SUCCESS;
+    };
+    // Register the callback
+    hsa_status_t status =
+        hsa_amd.register_system_event_handler(callback, nullptr);
+    // Mark as registered if successful
+    if (status == HSA_STATUS_SUCCESS)
+      atomic_store(&amdgpu_event_registered, 1, memory_order_release);
+  }
+}
 
 uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; }
 }  // namespace __sanitizer

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h
@@ -22,6 +22,11 @@ class AmdgpuMemFuncs {
   static void Deallocate(void *p);
   static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info);
   static uptr GetPageSize();
+  static void RegisterSystemEventHandlers();
+  static bool IsAmdgpuRuntimeShutdown();
+
+ private:
+  static void NotifyAmdgpuRuntimeShutdown();
 };
 
 struct AmdgpuAllocationInfo : public DeviceAllocationInfo {

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h
@@ -122,7 +122,8 @@ class DeviceAllocatorT {
       CHECK_EQ(chunks_[idx], p_);
       CHECK_LT(idx, n_chunks_);
       h = GetHeader(chunks_[idx], &header);
-      CHECK(!dev_runtime_unloaded_);
+      if (dev_runtime_unloaded_)
+        return;
       chunks_[idx] = chunks_[--n_chunks_];
       chunks_sorted_ = false;
       stats.n_frees++;
@@ -140,7 +141,8 @@ class DeviceAllocatorT {
     uptr res = 0;
     for (uptr i = 0; i < n_chunks_; i++) {
       Header *h = GetHeader(chunks_[i], &header);
-      CHECK(!dev_runtime_unloaded_);
+      if (dev_runtime_unloaded_)
+        return 0;
       res += RoundUpMapSize(h->map_size);
     }
     return res;
@@ -188,7 +190,6 @@ class DeviceAllocatorT {
       CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
       CHECK_LE(nearest_chunk, p);
       if (h->map_beg + h->map_size <= p) {
-        CHECK(!dev_runtime_unloaded_);
         return nullptr;
       }
     }
@@ -306,14 +307,21 @@ class DeviceAllocatorT {
   }
 
   Header* GetHeader(uptr chunk, Header* h) const {
-    if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) {
-      // Device allocator has dependency on device runtime. If device runtime
-      // is unloaded, GetPointerInfo() will fail. For such case, we can still
-      // return a valid value for map_beg, map_size will be limited to one page
-      h->map_beg = chunk;
-      h->map_size = page_size_;
-      dev_runtime_unloaded_ = true;
+    // Device allocator has dependency on device runtime. If device runtime
+    // is unloaded, GetPointerInfo() will fail. For such case, we can still
+    // return a valid value for map_beg, map_size will be limited to one page
+    if (!dev_runtime_unloaded_) {
+      if (DeviceMemFuncs::GetPointerInfo(chunk, h))
+        return h;
+      // If GetPointerInfo() fails, we don't assume the runtime is unloaded yet.
+      // We just return a conservative single-page header. Here mark/check the
+      // runtime shutdown state
+      dev_runtime_unloaded_ = DeviceMemFuncs::IsAmdgpuRuntimeShutdown();
     }
+    // If we reach here, device runtime is unloaded.
+    // Fallback: conservative single-page header
+    h->map_beg = chunk;
+    h->map_size = page_size_;
     return h;
   }