@@ -246,11 +246,21 @@ def determine_available_memory(self) -> int:
246246 available_kv_cache_memory = self .requested_memory \
247247 - profile_result .non_kv_cache_memory
248248
249+ unrequested_memory = self .init_snapshot .free_memory \
250+ - self .requested_memory
249251 logger .debug (
250- "Initial free memory: %.2f GiB, free memory: %.2f GiB, "
251- "requested GPU memory: %.2f GiB" ,
252- GiB (self .init_snapshot .free_memory ), GiB (free_gpu_memory ),
253- GiB (self .requested_memory ))
252+ "Initial free memory: %.2f GiB; "
253+ "Requested memory: %.2f (util), %.2f GiB" ,
254+ GiB (self .init_snapshot .free_memory ),
255+ self .cache_config .gpu_memory_utilization ,
256+ GiB (self .requested_memory ),
257+ )
258+ logger .debug (
259+ "Free memory after profiling: %.2f GiB (total), "
260+ "%.2f GiB (within requested)" ,
261+ GiB (free_gpu_memory ),
262+ GiB (free_gpu_memory - unrequested_memory ),
263+ )
254264 logger .debug (profile_result )
255265 logger .info ("Available KV cache memory: %.2f GiB" ,
256266 GiB (available_kv_cache_memory ))
0 commit comments