Add comprehensive logging for TensorRT compilation process

alexnorell · claude · alexnorell · commit 3ca850d7677a · 2025-11-18T13:51:27.000-08:00
This update adds detailed logging throughout the TensorRT compilation
pipeline to provide better visibility into the compilation process,
which can take several minutes to complete.

Changes:
- Added timing tracking for total compilation time
- Log runtime environment details (GPU, CUDA, TensorRT versions)
- Added progress indicators for each compilation phase
- Log ONNX parsing start and completion
- Display TensorRT engine configuration details (precision, input size,
  batch settings, compatibility flags)
- Show platform capability checks for FP16/INT8 support
- Log the engine building phase with clear "this may take several
  minutes" message
- Report engine build time and final engine size
- Added visual separators for better log readability

The logs now provide users with:
- Real-time feedback on compilation progress
- Time estimates for long-running operations
- System configuration being used
- Clear indication when compilation is skipped (engine exists)

Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/inference_experimental/development/compilation/core.py b/inference_experimental/development/compilation/core.py
@@ -1,6 +1,7 @@
 import json
 import os.path
 import shutil
+import time
 from typing import List, Literal, Optional, Tuple, Union
 
 import onnxruntime
@@ -167,48 +168,56 @@ def compile_model_to_trt(
     trt_version_compatible: bool = False,
     same_compute_compatibility: bool = False,
 ) -> None:
-    print(f"Compiling model in {model_dir}")
+    LOGGER.info("Starting TRT compilation for model in: {}".format(model_dir))
+    compilation_start_time = time.time()
+
+    LOGGER.info("Capturing runtime environment information...")
     runtime_xray = x_ray_runtime_environment()
     xray_path = os.path.join(model_dir, "env-x-ray.json")
-    dump_json(
-        path=xray_path,
-        contents={
-            "gpu_available": runtime_xray.gpu_available,
-            "gpu_devices": runtime_xray.gpu_devices,
-            "gpu_devices_cc": [str(e) for e in runtime_xray.gpu_devices_cc],
-            "driver_version": (
-                str(runtime_xray.driver_version)
-                if runtime_xray.driver_version
-                else None
-            ),
-            "cuda_version": (
-                str(runtime_xray.cuda_version) if runtime_xray.cuda_version else None
-            ),
-            "trt_version": (
-                str(runtime_xray.trt_version) if runtime_xray.trt_version else None
-            ),
-            "jetson_type": runtime_xray.jetson_type,
-            "l4t_version": (
-                str(runtime_xray.l4t_version) if runtime_xray.l4t_version else None
-            ),
-            "os_version": runtime_xray.os_version,
-            "torch_available": runtime_xray.torch_available,
-            "onnxruntime_version": (
-                str(runtime_xray.onnxruntime_version)
-                if runtime_xray.onnxruntime_version
-                else None
-            ),
-            "available_onnx_execution_providers": (
-                list(runtime_xray.available_onnx_execution_providers)
-                if runtime_xray.available_onnx_execution_providers
-                else None
-            ),
-            "hf_transformers_available": runtime_xray.hf_transformers_available,
-            "ultralytics_available": runtime_xray.ultralytics_available,
-            "trt_python_package_available": runtime_xray.trt_python_package_available,
-        },
-    )
+    env_contents = {
+        "gpu_available": runtime_xray.gpu_available,
+        "gpu_devices": runtime_xray.gpu_devices,
+        "gpu_devices_cc": [str(e) for e in runtime_xray.gpu_devices_cc],
+        "driver_version": (
+            str(runtime_xray.driver_version)
+            if runtime_xray.driver_version
+            else None
+        ),
+        "cuda_version": (
+            str(runtime_xray.cuda_version) if runtime_xray.cuda_version else None
+        ),
+        "trt_version": (
+            str(runtime_xray.trt_version) if runtime_xray.trt_version else None
+        ),
+        "jetson_type": runtime_xray.jetson_type,
+        "l4t_version": (
+            str(runtime_xray.l4t_version) if runtime_xray.l4t_version else None
+        ),
+        "os_version": runtime_xray.os_version,
+        "torch_available": runtime_xray.torch_available,
+        "onnxruntime_version": (
+            str(runtime_xray.onnxruntime_version)
+            if runtime_xray.onnxruntime_version
+            else None
+        ),
+        "available_onnx_execution_providers": (
+            list(runtime_xray.available_onnx_execution_providers)
+            if runtime_xray.available_onnx_execution_providers
+            else None
+        ),
+        "hf_transformers_available": runtime_xray.hf_transformers_available,
+        "ultralytics_available": runtime_xray.ultralytics_available,
+        "trt_python_package_available": runtime_xray.trt_python_package_available,
+    }
+    dump_json(path=xray_path, contents=env_contents)
+    LOGGER.info("GPU Available: {}".format(runtime_xray.gpu_available))
+    if runtime_xray.gpu_available and runtime_xray.gpu_devices:
+        LOGGER.info("GPU Devices: {}".format(runtime_xray.gpu_devices))
+        LOGGER.info("CUDA Version: {}".format(runtime_xray.cuda_version))
+        LOGGER.info("TensorRT Version: {}".format(runtime_xray.trt_version))
+
     onnx_path = os.path.join(model_dir, WEIGHTS_FILE_NAME)
+    LOGGER.info("Loading ONNX model from: {}".format(onnx_path))
     session = onnxruntime.InferenceSession(onnx_path)
     if model_input_size is not None:
         if isinstance(model_input_size, int):
@@ -234,10 +243,14 @@ def compile_model_to_trt(
         model_dir, f"engine-{precision}{engine_name_postfix}.plan"
     )
     if os.path.exists(engine_path):
+        LOGGER.info("TRT engine already exists at: {}".format(engine_path))
+        LOGGER.info("Skipping compilation")
         return None
+
     trt_config_path = os.path.join(
         model_dir, f"trt-config-{precision}{engine_name_postfix}.json"
     )
+    LOGGER.info("Saving TRT configuration to: {}".format(trt_config_path))
     dump_json(
         path=trt_config_path,
         contents={
@@ -250,6 +263,8 @@ def compile_model_to_trt(
             "precision": precision,
         },
     )
+
+    LOGGER.info("Initializing TensorRT Engine Builder (workspace: {} GB)".format(workspace_size_gb))
     engine_builder = EngineBuilder(workspace=workspace_size_gb)
     engine_builder.create_network(onnx_path=onnx_path)
     engine_builder.create_engine(
@@ -262,6 +277,11 @@ def compile_model_to_trt(
         same_compute_compatibility=same_compute_compatibility,
     )
 
+    total_compilation_time = time.time() - compilation_start_time
+    LOGGER.info("Total compilation time: {:.2f} seconds ({:.2f} minutes)".format(
+        total_compilation_time, total_compilation_time / 60
+    ))
+
 
 def dump_json(path: str, contents: dict) -> None:
     with open(path, "w") as f:
diff --git a/inference_experimental/development/compilation/engine_builder.py b/inference_experimental/development/compilation/engine_builder.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import time
 from typing import Literal, Optional, Tuple
 
 import tensorrt as trt
@@ -31,16 +32,19 @@ def create_network(self, onnx_path: str) -> None:
         Parse the ONNX graph and create the corresponding TensorRT network definition.
         :param onnx_path: The path to the ONNX graph to load.
         """
+        LOGGER.info("Starting ONNX parsing from: {}".format(onnx_path))
         self.network = self.builder.create_network(0)
         self.parser = trt.OnnxParser(self.network, self.trt_logger)
 
         onnx_path = os.path.realpath(onnx_path)
         with open(onnx_path, "rb") as f:
+            LOGGER.info("Parsing ONNX model graph...")
             if not self.parser.parse(f.read()):
                 LOGGER.error("Failed to load ONNX file: {}".format(onnx_path))
                 for error in range(self.parser.num_errors):
                     LOGGER.error(self.parser.get_error(error))
                 raise RuntimeError("Could not parse ONNX file")
+            LOGGER.info("ONNX parsing completed successfully")
 
         inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
         outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
@@ -72,35 +76,74 @@ def create_engine(
         engine_path = os.path.abspath(engine_path)
         engine_dir = os.path.dirname(engine_path)
         os.makedirs(engine_dir, exist_ok=True)
-        LOGGER.info("Building {} Engine in {}".format(precision, engine_path))
+
+        LOGGER.info("=" * 60)
+        LOGGER.info("Starting TensorRT Engine Compilation")
+        LOGGER.info("=" * 60)
+        LOGGER.info("Output path: {}".format(engine_path))
+        LOGGER.info("Precision: {}".format(precision.upper()))
+        LOGGER.info("Input size: {}x{}".format(input_size[0], input_size[1]))
+        if dynamic_batch_sizes:
+            LOGGER.info("Dynamic batch sizes: min={}, opt={}, max={}".format(
+                dynamic_batch_sizes[0], dynamic_batch_sizes[1], dynamic_batch_sizes[2]
+            ))
+        else:
+            LOGGER.info("Using static batch size")
+        LOGGER.info("TRT version compatible: {}".format(trt_version_compatible))
+        LOGGER.info("Same compute compatibility: {}".format(same_compute_compatibility))
+
         inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
         if len(inputs) != 1:
             raise ValueError("Detected network with multiple inputs")
+
+        LOGGER.info("Configuring builder flags...")
         if precision in ["fp16", "int8"]:
             if not self.builder.platform_has_fast_fp16:
                 LOGGER.warning("FP16 is not supported natively on this platform/device")
+            else:
+                LOGGER.info("FP16 is supported on this platform")
             self.config.set_flag(trt.BuilderFlag.FP16)
         if precision in ["int8"]:
             if not self.builder.platform_has_fast_int8:
                 LOGGER.warning("INT8 is not supported natively on this platform/device")
+            else:
+                LOGGER.info("INT8 is supported on this platform")
             self.config.set_flag(trt.BuilderFlag.INT8)
         if trt_version_compatible:
+            LOGGER.info("Enabling TRT version compatibility flag")
             self.config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
         if same_compute_compatibility:
+            LOGGER.info("Enabling same compute capability compatibility")
             self.config.hardware_compatibility_level = (
                 trt.HardwareCompatibilityLevel.SAME_COMPUTE_CAPABILITY
             )
+
+        LOGGER.info("Creating optimization profile...")
         profile = self.builder.create_optimization_profile()
         if dynamic_batch_sizes:
             bs_min, bs_opt, bs_max = dynamic_batch_sizes
             h, w = input_size
             profile.set_shape(
                 input_name, (bs_min, 3, h, w), (bs_opt, 3, h, w), (bs_max, 3, h, w)
             )
+            LOGGER.info("Optimization profile configured with dynamic batch sizes")
         self.config.add_optimization_profile(profile)
+
+        LOGGER.info("Building TensorRT engine - this may take several minutes...")
+        start_time = time.time()
         engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        build_time = time.time() - start_time
+
         if engine_bytes is None:
             raise ValueError("Failed to create image")
+
+        LOGGER.info("TensorRT engine built successfully in {:.2f} seconds".format(build_time))
+        LOGGER.info("Engine size: {:.2f} MB".format(len(engine_bytes) / (1024 * 1024)))
+
         with open(engine_path, "wb") as f:
             LOGGER.info("Serializing engine to file: {:}".format(engine_path))
             f.write(engine_bytes)
+
+        LOGGER.info("=" * 60)
+        LOGGER.info("TensorRT Compilation Complete")
+        LOGGER.info("=" * 60)