Skip to content

Commit 3ca850d

Browse files
alexnorellclaude
andcommitted
Add comprehensive logging for TensorRT compilation process
This update adds detailed logging throughout the TensorRT compilation pipeline to provide better visibility into the compilation process, which can take several minutes to complete. Changes: - Added timing tracking for total compilation time - Log runtime environment details (GPU, CUDA, TensorRT versions) - Added progress indicators for each compilation phase - Log ONNX parsing start and completion - Display TensorRT engine configuration details (precision, input size, batch settings, compatibility flags) - Show platform capability checks for FP16/INT8 support - Log the engine building phase with clear "this may take several minutes" message - Report engine build time and final engine size - Added visual separators for better log readability The logs now provide users with: - Real-time feedback on compilation progress - Time estimates for long-running operations - System configuration being used - Clear indication when compilation is skipped (engine exists) Co-Authored-By: Claude <[email protected]>
1 parent d7ab4dd commit 3ca850d

File tree

2 files changed

+103
-40
lines changed

2 files changed

+103
-40
lines changed

inference_experimental/development/compilation/core.py

Lines changed: 59 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os.path
33
import shutil
4+
import time
45
from typing import List, Literal, Optional, Tuple, Union
56

67
import onnxruntime
@@ -167,48 +168,56 @@ def compile_model_to_trt(
167168
trt_version_compatible: bool = False,
168169
same_compute_compatibility: bool = False,
169170
) -> None:
170-
print(f"Compiling model in {model_dir}")
171+
LOGGER.info("Starting TRT compilation for model in: {}".format(model_dir))
172+
compilation_start_time = time.time()
173+
174+
LOGGER.info("Capturing runtime environment information...")
171175
runtime_xray = x_ray_runtime_environment()
172176
xray_path = os.path.join(model_dir, "env-x-ray.json")
173-
dump_json(
174-
path=xray_path,
175-
contents={
176-
"gpu_available": runtime_xray.gpu_available,
177-
"gpu_devices": runtime_xray.gpu_devices,
178-
"gpu_devices_cc": [str(e) for e in runtime_xray.gpu_devices_cc],
179-
"driver_version": (
180-
str(runtime_xray.driver_version)
181-
if runtime_xray.driver_version
182-
else None
183-
),
184-
"cuda_version": (
185-
str(runtime_xray.cuda_version) if runtime_xray.cuda_version else None
186-
),
187-
"trt_version": (
188-
str(runtime_xray.trt_version) if runtime_xray.trt_version else None
189-
),
190-
"jetson_type": runtime_xray.jetson_type,
191-
"l4t_version": (
192-
str(runtime_xray.l4t_version) if runtime_xray.l4t_version else None
193-
),
194-
"os_version": runtime_xray.os_version,
195-
"torch_available": runtime_xray.torch_available,
196-
"onnxruntime_version": (
197-
str(runtime_xray.onnxruntime_version)
198-
if runtime_xray.onnxruntime_version
199-
else None
200-
),
201-
"available_onnx_execution_providers": (
202-
list(runtime_xray.available_onnx_execution_providers)
203-
if runtime_xray.available_onnx_execution_providers
204-
else None
205-
),
206-
"hf_transformers_available": runtime_xray.hf_transformers_available,
207-
"ultralytics_available": runtime_xray.ultralytics_available,
208-
"trt_python_package_available": runtime_xray.trt_python_package_available,
209-
},
210-
)
177+
env_contents = {
178+
"gpu_available": runtime_xray.gpu_available,
179+
"gpu_devices": runtime_xray.gpu_devices,
180+
"gpu_devices_cc": [str(e) for e in runtime_xray.gpu_devices_cc],
181+
"driver_version": (
182+
str(runtime_xray.driver_version)
183+
if runtime_xray.driver_version
184+
else None
185+
),
186+
"cuda_version": (
187+
str(runtime_xray.cuda_version) if runtime_xray.cuda_version else None
188+
),
189+
"trt_version": (
190+
str(runtime_xray.trt_version) if runtime_xray.trt_version else None
191+
),
192+
"jetson_type": runtime_xray.jetson_type,
193+
"l4t_version": (
194+
str(runtime_xray.l4t_version) if runtime_xray.l4t_version else None
195+
),
196+
"os_version": runtime_xray.os_version,
197+
"torch_available": runtime_xray.torch_available,
198+
"onnxruntime_version": (
199+
str(runtime_xray.onnxruntime_version)
200+
if runtime_xray.onnxruntime_version
201+
else None
202+
),
203+
"available_onnx_execution_providers": (
204+
list(runtime_xray.available_onnx_execution_providers)
205+
if runtime_xray.available_onnx_execution_providers
206+
else None
207+
),
208+
"hf_transformers_available": runtime_xray.hf_transformers_available,
209+
"ultralytics_available": runtime_xray.ultralytics_available,
210+
"trt_python_package_available": runtime_xray.trt_python_package_available,
211+
}
212+
dump_json(path=xray_path, contents=env_contents)
213+
LOGGER.info("GPU Available: {}".format(runtime_xray.gpu_available))
214+
if runtime_xray.gpu_available and runtime_xray.gpu_devices:
215+
LOGGER.info("GPU Devices: {}".format(runtime_xray.gpu_devices))
216+
LOGGER.info("CUDA Version: {}".format(runtime_xray.cuda_version))
217+
LOGGER.info("TensorRT Version: {}".format(runtime_xray.trt_version))
218+
211219
onnx_path = os.path.join(model_dir, WEIGHTS_FILE_NAME)
220+
LOGGER.info("Loading ONNX model from: {}".format(onnx_path))
212221
session = onnxruntime.InferenceSession(onnx_path)
213222
if model_input_size is not None:
214223
if isinstance(model_input_size, int):
@@ -234,10 +243,14 @@ def compile_model_to_trt(
234243
model_dir, f"engine-{precision}{engine_name_postfix}.plan"
235244
)
236245
if os.path.exists(engine_path):
246+
LOGGER.info("TRT engine already exists at: {}".format(engine_path))
247+
LOGGER.info("Skipping compilation")
237248
return None
249+
238250
trt_config_path = os.path.join(
239251
model_dir, f"trt-config-{precision}{engine_name_postfix}.json"
240252
)
253+
LOGGER.info("Saving TRT configuration to: {}".format(trt_config_path))
241254
dump_json(
242255
path=trt_config_path,
243256
contents={
@@ -250,6 +263,8 @@ def compile_model_to_trt(
250263
"precision": precision,
251264
},
252265
)
266+
267+
LOGGER.info("Initializing TensorRT Engine Builder (workspace: {} GB)".format(workspace_size_gb))
253268
engine_builder = EngineBuilder(workspace=workspace_size_gb)
254269
engine_builder.create_network(onnx_path=onnx_path)
255270
engine_builder.create_engine(
@@ -262,6 +277,11 @@ def compile_model_to_trt(
262277
same_compute_compatibility=same_compute_compatibility,
263278
)
264279

280+
total_compilation_time = time.time() - compilation_start_time
281+
LOGGER.info("Total compilation time: {:.2f} seconds ({:.2f} minutes)".format(
282+
total_compilation_time, total_compilation_time / 60
283+
))
284+
265285

266286
def dump_json(path: str, contents: dict) -> None:
267287
with open(path, "w") as f:

inference_experimental/development/compilation/engine_builder.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import os
3+
import time
34
from typing import Literal, Optional, Tuple
45

56
import tensorrt as trt
@@ -31,16 +32,19 @@ def create_network(self, onnx_path: str) -> None:
3132
Parse the ONNX graph and create the corresponding TensorRT network definition.
3233
:param onnx_path: The path to the ONNX graph to load.
3334
"""
35+
LOGGER.info("Starting ONNX parsing from: {}".format(onnx_path))
3436
self.network = self.builder.create_network(0)
3537
self.parser = trt.OnnxParser(self.network, self.trt_logger)
3638

3739
onnx_path = os.path.realpath(onnx_path)
3840
with open(onnx_path, "rb") as f:
41+
LOGGER.info("Parsing ONNX model graph...")
3942
if not self.parser.parse(f.read()):
4043
LOGGER.error("Failed to load ONNX file: {}".format(onnx_path))
4144
for error in range(self.parser.num_errors):
4245
LOGGER.error(self.parser.get_error(error))
4346
raise RuntimeError("Could not parse ONNX file")
47+
LOGGER.info("ONNX parsing completed successfully")
4448

4549
inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
4650
outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
@@ -72,35 +76,74 @@ def create_engine(
7276
engine_path = os.path.abspath(engine_path)
7377
engine_dir = os.path.dirname(engine_path)
7478
os.makedirs(engine_dir, exist_ok=True)
75-
LOGGER.info("Building {} Engine in {}".format(precision, engine_path))
79+
80+
LOGGER.info("=" * 60)
81+
LOGGER.info("Starting TensorRT Engine Compilation")
82+
LOGGER.info("=" * 60)
83+
LOGGER.info("Output path: {}".format(engine_path))
84+
LOGGER.info("Precision: {}".format(precision.upper()))
85+
LOGGER.info("Input size: {}x{}".format(input_size[0], input_size[1]))
86+
if dynamic_batch_sizes:
87+
LOGGER.info("Dynamic batch sizes: min={}, opt={}, max={}".format(
88+
dynamic_batch_sizes[0], dynamic_batch_sizes[1], dynamic_batch_sizes[2]
89+
))
90+
else:
91+
LOGGER.info("Using static batch size")
92+
LOGGER.info("TRT version compatible: {}".format(trt_version_compatible))
93+
LOGGER.info("Same compute compatibility: {}".format(same_compute_compatibility))
94+
7695
inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
7796
if len(inputs) != 1:
7897
raise ValueError("Detected network with multiple inputs")
98+
99+
LOGGER.info("Configuring builder flags...")
79100
if precision in ["fp16", "int8"]:
80101
if not self.builder.platform_has_fast_fp16:
81102
LOGGER.warning("FP16 is not supported natively on this platform/device")
103+
else:
104+
LOGGER.info("FP16 is supported on this platform")
82105
self.config.set_flag(trt.BuilderFlag.FP16)
83106
if precision in ["int8"]:
84107
if not self.builder.platform_has_fast_int8:
85108
LOGGER.warning("INT8 is not supported natively on this platform/device")
109+
else:
110+
LOGGER.info("INT8 is supported on this platform")
86111
self.config.set_flag(trt.BuilderFlag.INT8)
87112
if trt_version_compatible:
113+
LOGGER.info("Enabling TRT version compatibility flag")
88114
self.config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
89115
if same_compute_compatibility:
116+
LOGGER.info("Enabling same compute capability compatibility")
90117
self.config.hardware_compatibility_level = (
91118
trt.HardwareCompatibilityLevel.SAME_COMPUTE_CAPABILITY
92119
)
120+
121+
LOGGER.info("Creating optimization profile...")
93122
profile = self.builder.create_optimization_profile()
94123
if dynamic_batch_sizes:
95124
bs_min, bs_opt, bs_max = dynamic_batch_sizes
96125
h, w = input_size
97126
profile.set_shape(
98127
input_name, (bs_min, 3, h, w), (bs_opt, 3, h, w), (bs_max, 3, h, w)
99128
)
129+
LOGGER.info("Optimization profile configured with dynamic batch sizes")
100130
self.config.add_optimization_profile(profile)
131+
132+
LOGGER.info("Building TensorRT engine - this may take several minutes...")
133+
start_time = time.time()
101134
engine_bytes = self.builder.build_serialized_network(self.network, self.config)
135+
build_time = time.time() - start_time
136+
102137
if engine_bytes is None:
103138
raise ValueError("Failed to create image")
139+
140+
LOGGER.info("TensorRT engine built successfully in {:.2f} seconds".format(build_time))
141+
LOGGER.info("Engine size: {:.2f} MB".format(len(engine_bytes) / (1024 * 1024)))
142+
104143
with open(engine_path, "wb") as f:
105144
LOGGER.info("Serializing engine to file: {:}".format(engine_path))
106145
f.write(engine_bytes)
146+
147+
LOGGER.info("=" * 60)
148+
LOGGER.info("TensorRT Compilation Complete")
149+
LOGGER.info("=" * 60)

0 commit comments

Comments
 (0)