Fixed the comments

cehongwang · cehongwang · commit 95db34d328d2 · 2025-06-06T20:28:27.000Z
diff --git a/core/runtime/TRTEngineProfiler.h b/core/runtime/TRTEngineProfiler.h
@@ -12,8 +12,6 @@ namespace runtime {
 
 enum TraceFormat { kPERFETTO, kTREX };
 
-// Forward declare the function
-
 struct TRTEngineProfiler : public nvinfer1::IProfiler {
   struct Record {
     float time{0};
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
@@ -66,7 +66,6 @@ def cross_compile_for_windows(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -140,7 +139,6 @@ def cross_compile_for_windows(
         assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False
         sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
         enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
-        debug (bool): Enable debuggable engine
         capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
         num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
         workspace_size (int): Maximum size of workspace given to TensorRT
@@ -187,9 +185,9 @@ def cross_compile_for_windows(
             f"Cross compile for windows is only supported on x86-64 Linux architecture, current platform: {platform.system()=}, {platform.architecture()[0]=}"
         )
 
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            "`debug` is deprecated. Please use with torch_tensorrt.dynamo.Debugger(...) to wrap your compilation call to enable debugging functionality.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -404,7 +402,6 @@ def compile(
         Set[Union[torch.dtype, dtype]], Tuple[Union[torch.dtype, dtype]]
     ] = _defaults.ENABLED_PRECISIONS,
     engine_capability: EngineCapability = _defaults.ENGINE_CAPABILITY,
-    debug: bool = False,
     num_avg_timing_iters: int = _defaults.NUM_AVG_TIMING_ITERS,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
@@ -480,7 +477,6 @@ def compile(
         assume_dynamic_shape_support (bool): Setting this to true enables the converters work for both dynamic and static shapes. Default: False
         sparse_weights (bool): Enable sparsity for convolution and fully connected layers.
         enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
-        debug (bool): Enable debuggable engine
         capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
         num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
         workspace_size (int): Maximum size of workspace given to TensorRT
@@ -523,9 +519,9 @@ def compile(
         torch.fx.GraphModule: Compiled FX Module, when run it will execute via TensorRT
     """
 
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` for debugging functionality",
+            "`debug` is deprecated. Please use with torch_tensorrt.dynamo.Debugger(...) to wrap your compilation call to enable debugging functionality",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -946,7 +942,8 @@ def contains_metadata(gm: torch.fx.GraphModule) -> bool:
                             trt_module.enable_profiling()
                     else:
                         path = os.path.join(
-                            _debugger_settings.logging_dir, "engine_visualization"
+                            _debugger_settings.logging_dir,
+                            "engine_visualization_profile",
                         )
                         os.makedirs(path, exist_ok=True)
                         trt_module.enable_profiling(
@@ -990,7 +987,6 @@ def convert_exported_program_to_serialized_trt_engine(
     enabled_precisions: (
         Set[torch.dtype | dtype] | Tuple[torch.dtype | dtype]
     ) = _defaults.ENABLED_PRECISIONS,
-    debug: bool = False,
     assume_dynamic_shape_support: bool = _defaults.ASSUME_DYNAMIC_SHAPE_SUPPORT,
     workspace_size: int = _defaults.WORKSPACE_SIZE,
     min_block_size: int = _defaults.MIN_BLOCK_SIZE,
@@ -1052,7 +1048,6 @@ def convert_exported_program_to_serialized_trt_engine(
                         torch.randn((1, 3, 224, 244)) # Use an example tensor and let torch_tensorrt infer settings
                     ]
         enabled_precisions (Optional[Set[torch.dtype | _enums.dtype]]): The set of datatypes that TensorRT can use
-        debug (bool): Whether to print out verbose debugging information
         workspace_size (int): Workspace TRT is allowed to use for the module (0 is default)
         min_block_size (int): Minimum number of operators per TRT-Engine Block
         torch_executed_ops (Set[str]): Set of operations to run in Torch, regardless of converter coverage
@@ -1092,9 +1087,9 @@ def convert_exported_program_to_serialized_trt_engine(
     Returns:
         bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs
     """
-    if debug:
+    if kwargs.get("debug", False):
         warnings.warn(
-            "`debug` is deprecated. Please use `torch_tensorrt.dynamo.Debugger` to configure debugging options.",
+            "`debug` is deprecated. Please use with torch_tensorrt.dynamo.Debugger(...) to wrap your compilation call to enable debugging functionality.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -1181,7 +1176,6 @@ def convert_exported_program_to_serialized_trt_engine(
     compilation_options = {
         "assume_dynamic_shape_support": assume_dynamic_shape_support,
         "enabled_precisions": enabled_precisions,
-        "debug": debug,
         "workspace_size": workspace_size,
         "min_block_size": min_block_size,
         "torch_executed_ops": torch_executed_ops,
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -49,6 +49,7 @@
 L2_LIMIT_FOR_TILING = -1
 USE_DISTRIBUTED_MODE_TRACE = False
 OFFLOAD_MODULE_TO_CPU = False
+DEBUG_LOGGING_DIR = os.path.join(tempfile.gettempdir(), "torch_tensorrt/debug_logs")
 
 
 def default_device() -> Device:
diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py
@@ -8,6 +8,7 @@
 from unittest import mock
 
 import torch
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
 from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig
 from torch_tensorrt.dynamo.debug._supports_debugger import (
     _DEBUG_ENABLED_CLS,
@@ -32,7 +33,7 @@ def __init__(
         save_engine_profile: bool = False,
         profile_format: str = "perfetto",
         engine_builder_monitor: bool = True,
-        logging_dir: str = tempfile.gettempdir(),
+        logging_dir: str = DEBUG_LOGGING_DIR,
         save_layer_info: bool = False,
     ):
         """Initialize a debugger for TensorRT conversion.
@@ -92,7 +93,7 @@ def __init__(
     def __enter__(self) -> None:
         self.original_lvl = _LOGGER.getEffectiveLevel()
         self.rt_level = torch.ops.tensorrt.get_logging_level()
-        dictConfig(self.get_customized_logging_config())
+        dictConfig(self.get_logging_config(self.log_level))
 
         if self.capture_fx_graph_before or self.capture_fx_graph_after:
             self.old_pre_passes, self.old_post_passes = (
@@ -141,7 +142,7 @@ def __enter__(self) -> None:
 
     def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
 
-        dictConfig(self.get_default_logging_config())
+        dictConfig(self.get_logging_config(None))
         torch.ops.tensorrt.set_logging_level(self.rt_level)
         if self.capture_fx_graph_before or self.capture_fx_graph_after:
             ATEN_PRE_LOWERING_PASSES.passes, ATEN_POST_LOWERING_PASSES.passes = (
@@ -155,46 +156,9 @@ def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None:
 
         self._context_stack.close()
 
-    def get_customized_logging_config(self) -> dict[str, Any]:
-        config = {
-            "version": 1,
-            "disable_existing_loggers": False,
-            "formatters": {
-                "brief": {
-                    "format": "%(asctime)s - %(levelname)s - %(message)s",
-                    "datefmt": "%H:%M:%S",
-                },
-                "standard": {
-                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-                    "datefmt": "%Y-%m-%d %H:%M:%S",
-                },
-            },
-            "handlers": {
-                "file": {
-                    "level": self.log_level,
-                    "class": "logging.FileHandler",
-                    "filename": f"{self.cfg.logging_dir}/torch_tensorrt_logging.log",
-                    "formatter": "standard",
-                },
-                "console": {
-                    "level": self.log_level,
-                    "class": "logging.StreamHandler",
-                    "formatter": "brief",
-                },
-            },
-            "loggers": {
-                "": {  # root logger
-                    "handlers": ["file", "console"],
-                    "level": self.log_level,
-                    "propagate": True,
-                },
-            },
-            "force": True,
-        }
-        return config
-
-    def get_default_logging_config(self) -> dict[str, Any]:
-        config = {
+    def get_logging_config(self, log_level: Optional[int] = None) -> dict[str, Any]:
+        level = log_level if log_level is not None else self.original_lvl
+        config: dict[str, Any] = {
             "version": 1,
             "disable_existing_loggers": False,
             "formatters": {
@@ -209,18 +173,26 @@ def get_default_logging_config(self) -> dict[str, Any]:
             },
             "handlers": {
                 "console": {
-                    "level": self.original_lvl,
+                    "level": level,
                     "class": "logging.StreamHandler",
                     "formatter": "brief",
                 },
             },
             "loggers": {
                 "": {  # root logger
                     "handlers": ["console"],
-                    "level": self.original_lvl,
+                    "level": level,
                     "propagate": True,
                 },
             },
             "force": True,
         }
+        if log_level is not None:
+            config["handlers"]["file"] = {
+                "level": level,
+                "class": "logging.FileHandler",
+                "filename": f"{self.cfg.logging_dir}/torch_tensorrt_logging.log",
+                "formatter": "standard",
+            }
+            config["loggers"][""]["handlers"].append("file")
         return config
diff --git a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py
@@ -1,12 +1,13 @@
-import tempfile
 from dataclasses import dataclass
 
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
+
 
 @dataclass
 class DebuggerConfig:
     log_level: str = "debug"
     save_engine_profile: bool = False
     engine_builder_monitor: bool = True
-    logging_dir: str = tempfile.gettempdir()
+    logging_dir: str = DEBUG_LOGGING_DIR
     profile_format: str = "perfetto"
     save_layer_info: bool = False
diff --git a/py/torch_tensorrt/dynamo/lowering/passes/pass_manager.py b/py/torch_tensorrt/dynamo/lowering/passes/pass_manager.py
@@ -1,10 +1,10 @@
 import os
-import tempfile
 from typing import Any, Callable, List, Optional
 
 import torch
 from torch.fx import passes
 from torch.fx.passes.pass_manager import PassManager
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
 from torch_tensorrt.dynamo._settings import CompilationSettings
 
 
@@ -70,7 +70,7 @@ def remove_pass_with_index(self, index: int) -> None:
         del self.passes[index]
 
     def insert_debug_pass_before(
-        self, passes: List[str], output_path_prefix: str = tempfile.gettempdir()
+        self, passes: List[str], output_path_prefix: str = DEBUG_LOGGING_DIR
     ) -> None:
         """Insert debug passes in the PassManager pass sequence prior to the execution of a particular pass.
 
@@ -96,7 +96,7 @@ def insert_debug_pass_before(
         self._validated = False
 
     def insert_debug_pass_after(
-        self, passes: List[str], output_path_prefix: str = tempfile.gettempdir()
+        self, passes: List[str], output_path_prefix: str = DEBUG_LOGGING_DIR
     ) -> None:
         """Insert debug passes in the PassManager pass sequence after the execution of a particular pass.
 
diff --git a/tools/debug/engine_visualization/README.md b/tools/debug/engine_visualization/README.md
@@ -1,9 +1,11 @@
 ## Introduction
-We use the TRT Engine Explorer (TREX) to visualize the engien graph structure. TREX is a diagnostic and profiling tool for TensorRT engine files. It allows you to inspect, benchmark, and debug TensorRT engines with ease.
+We use the TRT Engine Explorer (TREX) to visualize the engine graph structure. TREX is a diagnostic and profiling tool for TensorRT engine files. It allows you to inspect, benchmark, and debug TensorRT engines with ease.
 
 ## Installation
 ```bash
 pip install git+https://github.com/NVIDIA/TensorRT.git#subdirectory=tools/experimental/trt-engine-explorer
 sudo apt --yes install graphviz
 ```
 
+## Usage
+The example usage can be found in `draw_engine_graph_example.py`. We use `torch_tensorrt.dynamo.debugger` to first output the engine profile info that required by TREX. Note that only when the compilation settings `use_python_runtime=False` can it produce TREX profiling. When it is saved to a folder, we call `draw_engine` on the same directory where the profile files are saved, which is in the subdirectory `engine_visualization_profile`.
diff --git a/tools/debug/engine_visualization/draw_engine_graph_example.py b/tools/debug/engine_visualization/draw_engine_graph_example.py
@@ -5,32 +5,29 @@
 import torch
 import torch_tensorrt as torch_tensorrt
 import torchvision.models as models
+from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR
 
 inputs = [torch.rand((1, 3, 224, 224)).to("cuda")]
 model = models.resnet18(pretrained=False).eval().to("cuda")
 exp_program = torch.export.export(model, tuple(inputs))
-enabled_precisions = {torch.float}
-workspace_size = 20 << 30
-# min_block_size = 0
-use_python_runtime = False
-torch_executed_ops = {}
-logging_dir = "/home/profile"
+
 with torch_tensorrt.dynamo.Debugger(
     "graphs",
-    logging_dir=logging_dir,
+    logging_dir=DEBUG_LOGGING_DIR,
     capture_fx_graph_after=["constant_fold"],
     save_engine_profile=True,
+    profile_format="trex",
 ):
     trt_gm = torch_tensorrt.dynamo.compile(
         exp_program,
         inputs=inputs,
-        enabled_precisions=enabled_precisions,
+        enabled_precisions={torch.float},
         truncate_double=True,
         use_python_runtime=False,
+        min_block_size=1,
     )
     trt_output = trt_gm(*inputs)
 
     from draw_engine_graph import draw_engine
 
-    draw_engine(os.path.join(logging_dir, "engine_visualization"))
-print()
+    draw_engine(os.path.join(DEBUG_LOGGING_DIR, "engine_visualization_profile"))