NVIDIA
diff --git a/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorrt_llm/_torch/pyexecutor/_util.py‎
Lines changed: 1 addition & 1 deletion
@@ -119,7 +119,7 @@ def _cal_max_memory(self, peak_memory, total_gpu_memory, fraction) -> int:
         """
         kv_size_per_token = self._get_kv_size_per_token()
 
-        available_kv_mem = total_gpu_memory * fraction - peak_memory
+        available_kv_mem = (total_gpu_memory - peak_memory) * fraction
         logger.info(
             f"Peak memory during memory usage profiling (torch + non-torch): {peak_memory / (GB):.2f} GiB, "
             f"available KV cache memory when calculating max tokens: {available_kv_mem / (GB):.2f} GiB, "