diff --git a/src/allocator/allocator.c b/src/allocator/allocator.c index ba264ba..79ae06b 100755 --- a/src/allocator/allocator.c +++ b/src/allocator/allocator.c @@ -272,3 +272,39 @@ int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream){ pthread_mutex_unlock(&mutex); return tmp; } + +int allocate_virtual_memory_management(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags){ + int tmp; + pthread_mutex_lock(&mutex); + tmp = add_chunk_virtual_memory(handle,size,prop,flags); + pthread_mutex_unlock(&mutex); + return tmp; +} + +int add_chunk_virtual_memory(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags){ + size_t allocsize; + CUresult res = CUDA_SUCCESS; + CUdevice dev; + cuCtxGetDevice(&dev); + if (oom_check(dev,size)) + return CUDA_ERROR_OUT_OF_MEMORY; + res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemCreate,handle,size,prop,flags); + if (res!=CUDA_SUCCESS){ + LOG_ERROR("cuMemCreate failed res=%d",res); + return res; + } + allocsize = size; + cuCtxGetDevice(&dev); + add_gpu_device_memory_usage(getpid(),dev,allocsize,2); + return 0; +} + +int remove_virtual_memory_management(CUdeviceptr ptr, size_t size){ + pthread_mutex_lock(&mutex); + CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemUnmap,ptr,size); + CUdevice dev; + cuCtxGetDevice(&dev); + rm_gpu_device_memory_usage(getpid(),dev,size,2); + pthread_mutex_unlock(&mutex); + return 0; +} \ No newline at end of file diff --git a/src/allocator/allocator.h b/src/allocator/allocator.h index 2064ee3..4508271 100755 --- a/src/allocator/allocator.h +++ b/src/allocator/allocator.h @@ -161,3 +161,7 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream); // Checks memory type int check_memory_type(CUdeviceptr address); +//virtual memory management +int allocate_virtual_memory_management(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags); +int add_chunk_virtual_memory(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags); +int remove_virtual_memory_management(CUdeviceptr ptr, size_t size); \ No newline at end of file diff --git a/src/cuda/hook.c b/src/cuda/hook.c index 96fc33e..642978b 100644 --- a/src/cuda/hook.c +++ b/src/cuda/hook.c @@ -168,6 +168,7 @@ cuda_entry_t cuda_library_entry[] = { {.name = "cuSignalExternalSemaphoresAsync"}, {.name = "cuWaitExternalSemaphoresAsync"}, {.name = "cuDestroyExternalSemaphore"}, + {.name = "cuMemUnmap"}, /* Graph part */ {.name = "cuGraphCreate"}, {.name = "cuGraphAddKernelNode_v2"}, diff --git a/src/cuda/memory.c b/src/cuda/memory.c index 8dd87ef..df8587a 100755 --- a/src/cuda/memory.c +++ b/src/cuda/memory.c @@ -573,8 +573,12 @@ CUresult cuMemAddressReserve ( CUdeviceptr* ptr, size_t size, size_t alignment, } CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags ) { - LOG_INFO("cuMemCreate:"); - CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemCreate,handle,size,prop,flags); + LOG_INFO("into cuMemCreate:"); + ENSURE_RUNNING(); + CUresult res = allocate_virtual_memory_management(handle,size,prop,flags); + if (res!=CUDA_SUCCESS) + return res; + LOG_INFO("cuMemCreate success with bytesize=%lu", size); return res; } @@ -781,3 +785,11 @@ CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { LOG_DEBUG("cuDestroyExternalSemaphore"); return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalSemaphore,extSem); } + +CUresult cuMemUnmap( CUdeviceptr ptr, size_t size ) { + LOG_INFO("into cuMemUnmap:"); + ENSURE_RUNNING(); + CUresult res = remove_virtual_memory_management(ptr,size); + LOG_DEBUG("cuMemUnmap: dptr=%p size=%ld res=%d",(void *)ptr,size,res); + return res; +} \ No newline at end of file diff --git a/src/include/libcuda_hook.h b/src/include/libcuda_hook.h index 2d1592f..85a081a 100644 --- a/src/include/libcuda_hook.h +++ b/src/include/libcuda_hook.h @@ -200,6 +200,7 @@ typedef enum { CUDA_OVERRIDE_ENUM(cuSignalExternalSemaphoresAsync), CUDA_OVERRIDE_ENUM(cuWaitExternalSemaphoresAsync), CUDA_OVERRIDE_ENUM(cuDestroyExternalSemaphore), + CUDA_OVERRIDE_ENUM(cuMemUnmap), /* cuda graph part */ CUDA_OVERRIDE_ENUM(cuGraphCreate), CUDA_OVERRIDE_ENUM(cuGraphAddKernelNode_v2), diff --git a/src/libvgpu.c b/src/libvgpu.c index cf9e652..d84609f 100644 --- a/src/libvgpu.c +++ b/src/libvgpu.c @@ -288,6 +288,7 @@ void* __dlsym_hook_section(void* handle, const char* symbol) { DLSYM_HOOK_FUNC(cuSignalExternalSemaphoresAsync); DLSYM_HOOK_FUNC(cuWaitExternalSemaphoresAsync); DLSYM_HOOK_FUNC(cuDestroyExternalSemaphore); + DLSYM_HOOK_FUNC(cuMemUnmap); /* cuda Graph */ DLSYM_HOOK_FUNC(cuGraphCreate); DLSYM_HOOK_FUNC(cuGraphAddKernelNode_v2);