diff --git a/kernel-open/common/inc/nv-nanos.h b/kernel-open/common/inc/nv-nanos.h
index 3cc14cac57..35e46877c3 100644
--- a/kernel-open/common/inc/nv-nanos.h
+++ b/kernel-open/common/inc/nv-nanos.h
@@ -481,6 +481,18 @@ typedef struct nvidia_event
 #define BUILD_BUG_ON(expr)                  build_assert(!(expr))
 #define BUILD_BUG_ON_NOT_POWER_OF_2(expr)   build_assert(((expr) & ((expr) - 1)) == 0)
 
+// To implement realloc for vmalloc-based allocations we need to track the size
+// of the original allocation. We can do that by allocating a header along with
+// the allocation itself. Since vmalloc is only used for relatively large
+// allocations, this overhead is very small.
+//
+// We don't need this for kmalloc since we can use ksize().
+typedef struct
+{
+    size_t alloc_size;
+    uint8_t ptr[0];
+} uvm_vmalloc_hdr_t;
+
 #define ZERO_SIZE_PTR       pointer_from_u64(16)
 #define ZERO_OR_NULL_PTR(p) (u64_from_pointer(p) <= u64_from_pointer(ZERO_SIZE_PTR))
 
@@ -507,8 +519,13 @@ typedef struct nvidia_event
 #define vmalloc(size)       kmalloc(size, 0)
 #define vzalloc(size)       kzalloc(size, 0)
 #define ksize(p)            objcache_from_object(u64_from_pointer(p), PAGESIZE_2M)->pagesize
-#define is_vmalloc_addr(p)  false
-#define vfree               kfree
+#define is_vmalloc_addr(p)  (objcache_from_object(u64_from_pointer(p), PAGESIZE_2M) == INVALID_ADDRESS)
+#define vfree(p) do {                               \
+    uvm_vmalloc_hdr_t *hdr;                         \
+    hdr = container_of(p, uvm_vmalloc_hdr_t, ptr);  \
+    NV_KFREE(p, hdr->alloc_size);                   \
+} while (0)
+
 
 static inline void *kmalloc(unsigned long size, int flags)
 {
diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.c b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
index c531bff64c..5b51772786 100644
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.c
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.c
@@ -27,18 +27,6 @@
 #include "uvm_kvmalloc.h"
 #include "uvm_rb_tree.h"
 
-// To implement realloc for vmalloc-based allocations we need to track the size
-// of the original allocation. We can do that by allocating a header along with
-// the allocation itself. Since vmalloc is only used for relatively large
-// allocations, this overhead is very small.
-//
-// We don't need this for kmalloc since we can use ksize().
-typedef struct
-{
-    size_t alloc_size;
-    uint8_t ptr[0];
-} uvm_vmalloc_hdr_t;
-
 typedef struct
 {
     const char *file;
@@ -257,7 +245,6 @@ static void *alloc_internal(size_t size, bool zero_memory)
     // Make sure that (sizeof(hdr) + size) is what it should be
     BUILD_BUG_ON(sizeof(uvm_vmalloc_hdr_t) != offsetof(uvm_vmalloc_hdr_t *, ptr));
 
-    assert(size <= (1 << 16));
     if (size <= UVM_KMALLOC_THRESHOLD) {
         if (zero_memory)
             return kzalloc(size, NV_UVM_GFP_FLAGS);
diff --git a/kernel-open/nvidia-uvm/uvm_kvmalloc.h b/kernel-open/nvidia-uvm/uvm_kvmalloc.h
index 92f3fd55b8..95c906c41a 100644
--- a/kernel-open/nvidia-uvm/uvm_kvmalloc.h
+++ b/kernel-open/nvidia-uvm/uvm_kvmalloc.h
@@ -27,6 +27,11 @@
 #include "uvm_nanos.h"
 #include "uvm_test_ioctl.h"
 
+#ifndef _CONFIG_H_
+#include <config.h>
+#define _CONFIG_H_
+#endif
+
 // kmalloc is faster than vmalloc because it doesn't have to remap kernel
 // virtual memory, but for that same reason it requires physically-contiguous
 // memory. It also supports a native krealloc function which is missing in
@@ -41,7 +46,7 @@
 //
 // This is in the header so callers can use it to inform their allocation sizes
 // if they wish.
-#define UVM_KMALLOC_THRESHOLD infinity
+#define UVM_KMALLOC_THRESHOLD (1 << MAX_MCACHE_ORDER)
 
 NV_STATUS uvm_kvmalloc_init(void);
 void uvm_kvmalloc_exit(void);