Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/allocator/allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,39 @@ int allocate_async_raw(CUdeviceptr *dptr, size_t size, CUstream hStream){
pthread_mutex_unlock(&mutex);
return tmp;
}

int allocate_virtual_memory_management(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags){
int tmp;
pthread_mutex_lock(&mutex);
tmp = add_chunk_virtual_memory(handle,size,prop,flags);
pthread_mutex_unlock(&mutex);
return tmp;
}

int add_chunk_virtual_memory(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags){
size_t allocsize;
CUresult res = CUDA_SUCCESS;
CUdevice dev;
cuCtxGetDevice(&dev);
if (oom_check(dev,size))
return CUDA_ERROR_OUT_OF_MEMORY;
res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemCreate,handle,size,prop,flags);
if (res!=CUDA_SUCCESS){
LOG_ERROR("cuMemCreate failed res=%d",res);
return res;
}
allocsize = size;
cuCtxGetDevice(&dev);
add_gpu_device_memory_usage(getpid(),dev,allocsize,2);
return 0;
}

int remove_virtual_memory_management(CUdeviceptr ptr, size_t size){
pthread_mutex_lock(&mutex);
CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemUnmap,ptr,size);
CUdevice dev;
cuCtxGetDevice(&dev);
rm_gpu_device_memory_usage(getpid(),dev,size,2);
pthread_mutex_unlock(&mutex);
return 0;
}
4 changes: 4 additions & 0 deletions src/allocator/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,7 @@ int free_raw_async(CUdeviceptr dptr, CUstream hStream);
// Checks memory type
int check_memory_type(CUdeviceptr address);

//virtual memory management
int allocate_virtual_memory_management(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags);
int add_chunk_virtual_memory(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags);
int remove_virtual_memory_management(CUdeviceptr ptr, size_t size);
1 change: 1 addition & 0 deletions src/cuda/hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ cuda_entry_t cuda_library_entry[] = {
{.name = "cuSignalExternalSemaphoresAsync"},
{.name = "cuWaitExternalSemaphoresAsync"},
{.name = "cuDestroyExternalSemaphore"},
{.name = "cuMemUnmap"},
/* Graph part */
{.name = "cuGraphCreate"},
{.name = "cuGraphAddKernelNode_v2"},
Expand Down
16 changes: 14 additions & 2 deletions src/cuda/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,12 @@ CUresult cuMemAddressReserve ( CUdeviceptr* ptr, size_t size, size_t alignment,
}

CUresult cuMemCreate ( CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags ) {
LOG_INFO("cuMemCreate:");
CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry,cuMemCreate,handle,size,prop,flags);
LOG_INFO("into cuMemCreate:");
ENSURE_RUNNING();
CUresult res = allocate_virtual_memory_management(handle,size,prop,flags);
if (res!=CUDA_SUCCESS)
return res;
LOG_INFO("cuMemCreate success with bytesize=%lu", size);
return res;
}

Expand Down Expand Up @@ -781,3 +785,11 @@ CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) {
LOG_DEBUG("cuDestroyExternalSemaphore");
return CUDA_OVERRIDE_CALL(cuda_library_entry,cuDestroyExternalSemaphore,extSem);
}

CUresult cuMemUnmap( CUdeviceptr ptr, size_t size ) {
LOG_INFO("into cuMemUnmap:");
ENSURE_RUNNING();
CUresult res = remove_virtual_memory_management(ptr,size);
LOG_DEBUG("cuMemUnmap: dptr=%p size=%ld res=%d",(void *)ptr,size,res);
return res;
}
1 change: 1 addition & 0 deletions src/include/libcuda_hook.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ typedef enum {
CUDA_OVERRIDE_ENUM(cuSignalExternalSemaphoresAsync),
CUDA_OVERRIDE_ENUM(cuWaitExternalSemaphoresAsync),
CUDA_OVERRIDE_ENUM(cuDestroyExternalSemaphore),
CUDA_OVERRIDE_ENUM(cuMemUnmap),
/* cuda graph part */
CUDA_OVERRIDE_ENUM(cuGraphCreate),
CUDA_OVERRIDE_ENUM(cuGraphAddKernelNode_v2),
Expand Down
1 change: 1 addition & 0 deletions src/libvgpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ void* __dlsym_hook_section(void* handle, const char* symbol) {
DLSYM_HOOK_FUNC(cuSignalExternalSemaphoresAsync);
DLSYM_HOOK_FUNC(cuWaitExternalSemaphoresAsync);
DLSYM_HOOK_FUNC(cuDestroyExternalSemaphore);
DLSYM_HOOK_FUNC(cuMemUnmap);
/* cuda Graph */
DLSYM_HOOK_FUNC(cuGraphCreate);
DLSYM_HOOK_FUNC(cuGraphAddKernelNode_v2);
Expand Down