Torch-Tensorrt Integration with LightningModule (#20808)

GdoongMathew · Borda · web-flow · commit 39e24f5d37ba · 2025-08-15T10:50:55.000+02:00
* feat: add `to_tensorrt` in the `LightningModule`.
* refactor: fix `to_tensorrt` impl
* test: add test_torch_tensorrt.py
* add dependency in test requirement
* limit the torch-tensorrt condition again
* update tensorrt version
* update tensorrt source
* update test.txt
* ci: add extra-index
* ci: use find-links instead. works on my computer...
* fix: fix bug in torch-tensorrt 2.8.0
* add find links in ci test.
* chlog

---------

Co-authored-by: Jirka Borovec &lt;6035284+Borda@users.noreply.github.com&gt;
Co-authored-by: Jirka B &lt;j.borovec+github@gmail.com&gt;
diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml
@@ -139,7 +139,7 @@ jobs:
           pip install ".[${EXTRA_PREFIX}extra,${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
             -U --upgrade-strategy=eager --prefer-binary \
             -r requirements/_integrations/accelerators.txt \
-            --extra-index-url="${TORCH_URL}" --find-links="${PYPI_CACHE_DIR}"
+            --extra-index-url="${TORCH_URL}" --find-links="${PYPI_CACHE_DIR}" --find-links="https://download.pytorch.org/whl/torch-tensorrt"
           pip list
       - name: Drop LAI from extensions
         if: ${{ matrix.pkg-name != 'lightning' }}
diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt
@@ -18,3 +18,6 @@ fastapi  # for `ServableModuleValidator`  # not setting version as re-defined in
 uvicorn  # for `ServableModuleValidator`  # not setting version as re-defined in App
 
 tensorboard >=2.9.1, <2.21.0  # for `TensorBoardLogger`
+
+--find-links https://download.pytorch.org/whl/torch-tensorrt
+torch-tensorrt; platform_system == "Linux" and python_version >= "3.12"
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
--
+- Added Torch-Tensorrt Integration with `LightningModule` ([#20808](https://github.com/Lightning-AI/pytorch-lightning/pull/20808))
 
 
 ### Changed
diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 """The LightningModule - an nn.Module with many additional features."""
 
+import copy
 import logging
 import numbers
 import weakref
 from collections.abc import Generator, Mapping, Sequence
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext
 from io import BytesIO
 from pathlib import Path
 from typing import (
@@ -47,7 +48,7 @@
 from lightning.fabric.utilities.apply_func import convert_to_tensors
 from lightning.fabric.utilities.cloud_io import get_filesystem
 from lightning.fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin
-from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_5
+from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_2, _TORCH_GREATER_EQUAL_2_5
 from lightning.fabric.utilities.types import _MAP_LOCATION_TYPE, _PATH
 from lightning.fabric.wrappers import _FabricOptimizer
 from lightning.pytorch.callbacks.callback import Callback
@@ -75,6 +76,7 @@
 
 _ONNX_AVAILABLE = RequirementCache("onnx")
 _ONNXSCRIPT_AVAILABLE = RequirementCache("onnxscript")
+_TORCH_TRT_AVAILABLE = RequirementCache("torch_tensorrt")
 
 if TYPE_CHECKING:
     from torch.distributed.device_mesh import DeviceMesh
@@ -1570,6 +1572,117 @@ def forward(self, x):
 
         return torchscript_module
 
+    @torch.no_grad()
+    def to_tensorrt(
+        self,
+        file_path: Optional[Union[str, Path, BytesIO]] = None,
+        input_sample: Optional[Any] = None,
+        ir: Literal["default", "dynamo", "ts"] = "default",
+        output_format: Literal["exported_program", "torchscript"] = "exported_program",
+        retrace: bool = False,
+        default_device: Union[str, torch.device] = "cuda",
+        **compile_kwargs: Any,
+    ) -> Union[ScriptModule, torch.fx.GraphModule]:
+        """Export the model to ScriptModule or GraphModule using TensorRT compile backend.
+
+        Args:
+            file_path: Path where to save the tensorrt model. Default: None (no file saved).
+            input_sample: inputs to be used during `torch_tensorrt.compile`.
+                Default: None (Use :attr:`example_input_array`).
+            ir: The IR mode to use for TensorRT compilation. Default: "default".
+            output_format: The format of the output model. Default: "exported_program".
+            retrace: Whether to retrace the model. Default: False.
+            default_device: The device to use for the model when the current model is not in CUDA. Default: "cuda".
+            **compile_kwargs: Additional arguments that will be passed to the TensorRT compile function.
+
+        Example::
+
+            class SimpleModel(LightningModule):
+                def __init__(self):
+                    super().__init__()
+                    self.l1 = torch.nn.Linear(in_features=64, out_features=4)
+
+                def forward(self, x):
+                    return torch.relu(self.l1(x.view(x.size(0), -1)
+
+            model = SimpleModel()
+            input_sample = torch.randn(1, 64)
+            exported_program = model.to_tensorrt(
+                file_path="export.ep",
+                inputs=input_sample,
+            )
+
+        """
+        if not _TORCH_GREATER_EQUAL_2_2:
+            raise MisconfigurationException(
+                f"TensorRT export requires PyTorch 2.2 or higher. Current version is {torch.__version__}."
+            )
+
+        if not _TORCH_TRT_AVAILABLE:
+            raise ModuleNotFoundError(
+                f"`{type(self).__name__}.to_tensorrt` requires `torch_tensorrt` to be installed. "
+            )
+
+        mode = self.training
+        device = self.device
+        if self.device.type != "cuda":
+            default_device = torch.device(default_device) if isinstance(default_device, str) else default_device
+
+            if not torch.cuda.is_available() or default_device.type != "cuda":
+                raise MisconfigurationException(
+                    f"TensorRT only supports CUDA devices. The current device is {self.device}."
+                    f" Please set the `default_device` argument to a CUDA device."
+                )
+
+            self.to(default_device)
+
+        if input_sample is None:
+            if self.example_input_array is None:
+                raise ValueError(
+                    "Could not export to TensorRT since neither `input_sample` nor"
+                    " `model.example_input_array` attribute is set."
+                )
+            input_sample = self.example_input_array
+
+        import torch_tensorrt
+
+        input_sample = copy.deepcopy((input_sample,) if isinstance(input_sample, torch.Tensor) else input_sample)
+        input_sample = self._on_before_batch_transfer(input_sample)
+        input_sample = self._apply_batch_transfer_handler(input_sample)
+
+        with _jit_is_scripting() if ir == "ts" else nullcontext():
+            trt_obj = torch_tensorrt.compile(
+                module=self.eval(),
+                ir=ir,
+                inputs=input_sample,
+                **compile_kwargs,
+            )
+        self.train(mode)
+        self.to(device)
+
+        if file_path is not None:
+            if ir == "ts":
+                if output_format != "torchscript":
+                    raise ValueError(
+                        "TensorRT with IR mode 'ts' only supports output format 'torchscript'."
+                        f" The current output format is {output_format}."
+                    )
+                assert isinstance(trt_obj, (torch.jit.ScriptModule, torch.jit.ScriptFunction)), (
+                    f"Expected TensorRT object to be a ScriptModule, but got {type(trt_obj)}."
+                )
+                # Because of https://github.com/pytorch/TensorRT/issues/3775,
+                # we'll need to take special care for the ScriptModule
+                torch.jit.save(trt_obj, file_path)
+            else:
+                torch_tensorrt.save(
+                    trt_obj,
+                    file_path,
+                    inputs=input_sample,
+                    output_format=output_format,
+                    retrace=retrace,
+                )
+        return trt_obj
+
     @_restricted_classmethod
     def load_from_checkpoint(
         cls,
diff --git a/src/lightning/pytorch/utilities/testing/_runif.py b/src/lightning/pytorch/utilities/testing/_runif.py
@@ -17,7 +17,7 @@
 
 from lightning.fabric.utilities.testing import _runif_reasons as _fabric_run_if
 from lightning.pytorch.accelerators.cpu import _PSUTIL_AVAILABLE
-from lightning.pytorch.core.module import _ONNX_AVAILABLE, _ONNXSCRIPT_AVAILABLE
+from lightning.pytorch.core.module import _ONNX_AVAILABLE, _ONNXSCRIPT_AVAILABLE, _TORCH_TRT_AVAILABLE
 from lightning.pytorch.utilities.imports import _OMEGACONF_AVAILABLE, _RICH_AVAILABLE
 
 _SKLEARN_AVAILABLE = RequirementCache("scikit-learn")
@@ -43,6 +43,7 @@ def _runif_reasons(
     onnx: bool = False,
     linux_only: bool = False,
     onnxscript: bool = False,
+    tensorrt: bool = False,
 ) -> tuple[list[str], dict[str, bool]]:
     """Construct reasons for pytest skipif.
 
@@ -66,6 +67,7 @@ def _runif_reasons(
         sklearn: Require that scikit-learn is installed.
         onnx: Require that onnx is installed.
         onnxscript: Require that onnxscript is installed.
+        tensorrt: Require that torch-tensorrt is installed.
 
     """
 
@@ -102,4 +104,7 @@ def _runif_reasons(
     if onnxscript and not _ONNXSCRIPT_AVAILABLE:
         reasons.append("onnxscript")
 
+    if tensorrt and not _TORCH_TRT_AVAILABLE:
+        reasons.append("torch-tensorrt")
+
     return reasons, kwargs
diff --git a/tests/tests_pytorch/models/test_torch_tensorrt.py b/tests/tests_pytorch/models/test_torch_tensorrt.py
@@ -0,0 +1,155 @@
+import os
+import re
+from io import BytesIO
+from pathlib import Path
+
+import pytest
+import torch
+
+import tests_pytorch.helpers.pipelines as pipes
+from lightning.pytorch.core.module import _TORCH_TRT_AVAILABLE
+from lightning.pytorch.demos.boring_classes import BoringModel
+from lightning.pytorch.utilities.exceptions import MisconfigurationException
+from tests_pytorch.helpers.runif import RunIf
+
+
+@RunIf(max_torch="2.2.0")
+def test_torch_minimum_version():
+    model = BoringModel()
+    with pytest.raises(
+        MisconfigurationException,
+        match=re.escape(f"TensorRT export requires PyTorch 2.2 or higher. Current version is {torch.__version__}."),
+    ):
+        model.to_tensorrt("model.trt")
+
+
+@pytest.mark.skipif(_TORCH_TRT_AVAILABLE, reason="Run this test only if tensorrt is not available.")
+@RunIf(min_torch="2.2.0")
+def test_missing_tensorrt_package():
+    model = BoringModel()
+    with pytest.raises(
+        ModuleNotFoundError,
+        match=re.escape(f"`{type(model).__name__}.to_tensorrt` requires `torch_tensorrt` to be installed. "),
+    ):
+        model.to_tensorrt("model.trt")
+
+
+@RunIf(tensorrt=True, min_torch="2.2.0")
+def test_tensorrt_with_wrong_default_device(tmp_path):
+    model = BoringModel()
+    input_sample = torch.randn((1, 32))
+    file_path = os.path.join(tmp_path, "model.trt")
+    with pytest.raises(MisconfigurationException):
+        model.to_tensorrt(file_path, input_sample, default_device="cpu")
+
+
+@RunIf(tensorrt=True, min_cuda_gpus=1, min_torch="2.2.0")
+def test_tensorrt_saves_with_input_sample(tmp_path):
+    model = BoringModel()
+    ori_device = model.device
+    input_sample = torch.randn((1, 32))
+
+    file_path = os.path.join(tmp_path, "model.trt")
+    model.to_tensorrt(file_path, input_sample)
+
+    assert os.path.isfile(file_path)
+    assert os.path.getsize(file_path) > 4e2
+    assert model.device == ori_device
+
+    file_path = Path(tmp_path) / "model.trt"
+    model.to_tensorrt(file_path, input_sample)
+    assert os.path.isfile(file_path)
+    assert os.path.getsize(file_path) > 4e2
+    assert model.device == ori_device
+
+    file_path = BytesIO()
+    model.to_tensorrt(file_path, input_sample)
+    assert len(file_path.getvalue()) > 4e2
+
+
+@RunIf(tensorrt=True, min_cuda_gpus=1, min_torch="2.2.0")
+def test_tensorrt_error_if_no_input(tmp_path):
+    model = BoringModel()
+    model.example_input_array = None
+    file_path = os.path.join(tmp_path, "model.trt")
+
+    with pytest.raises(
+        ValueError,
+        match=r"Could not export to TensorRT since neither `input_sample` nor "
+        r"`model.example_input_array` attribute is set.",
+    ):
+        model.to_tensorrt(file_path)
+
+
+@RunIf(tensorrt=True, min_cuda_gpus=2, min_torch="2.2.0")
+def test_tensorrt_saves_on_multi_gpu(tmp_path):
+    trainer_options = {
+        "default_root_dir": tmp_path,
+        "max_epochs": 1,
+        "limit_train_batches": 10,
+        "limit_val_batches": 10,
+        "accelerator": "gpu",
+        "devices": [0, 1],
+        "strategy": "ddp_spawn",
+        "enable_progress_bar": False,
+    }
+
+    model = BoringModel()
+    model.example_input_array = torch.randn((4, 32))
+
+    pipes.run_model_test(trainer_options, model, min_acc=0.08)
+
+    file_path = os.path.join(tmp_path, "model.trt")
+    model.to_tensorrt(file_path)
+
+    assert os.path.exists(file_path)
+
+
+@pytest.mark.parametrize(
+    ("ir", "export_type"),
+    [
+        ("default", torch.fx.GraphModule),
+        ("dynamo", torch.fx.GraphModule),
+        ("ts", torch.jit.ScriptModule),
+    ],
+)
+@RunIf(tensorrt=True, min_cuda_gpus=1, min_torch="2.2.0")
+def test_tensorrt_save_ir_type(ir, export_type):
+    model = BoringModel()
+    model.example_input_array = torch.randn((4, 32))
+
+    ret = model.to_tensorrt(ir=ir)
+    assert isinstance(ret, export_type)
+
+
+@pytest.mark.parametrize(
+    "output_format",
+    ["exported_program", "torchscript"],
+)
+@pytest.mark.parametrize(
+    "ir",
+    ["default", "dynamo", "ts"],
+)
+@RunIf(tensorrt=True, min_cuda_gpus=1, min_torch="2.2.0")
+def test_tensorrt_export_reload(output_format, ir, tmp_path):
+    if ir == "ts" and output_format == "exported_program":
+        pytest.skip("TorchScript cannot be exported as exported_program")
+
+    import torch_tensorrt
+
+    model = BoringModel()
+    model.cuda().eval()
+    model.example_input_array = torch.ones((4, 32))
+
+    file_path = os.path.join(tmp_path, "model.trt")
+    model.to_tensorrt(file_path, output_format=output_format, ir=ir)
+
+    loaded_model = torch_tensorrt.load(file_path)
+    if output_format == "exported_program":
+        loaded_model = loaded_model.module()
+
+    with torch.no_grad(), torch.inference_mode():
+        model_output = model(model.example_input_array.to("cuda"))
+        jit_output = loaded_model(model.example_input_array.to("cuda"))
+
+    assert torch.allclose(model_output, jit_output, rtol=1e-03, atol=1e-06)