From 608cafafba176604a506ecc3421fc8e09413db06 Mon Sep 17 00:00:00 2001
From: Benedykt Bela <benedykt.bela@intel.com>
Date: Tue, 4 Nov 2025 12:03:02 +0100
Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=90=9B=20Resolve=20negative=20duratio?=
 =?UTF-8?q?n=20times=20issue.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to rounding timestamps, there were cases where the duration became
negative for very small intervals. This commit fixes that by ensuring
that any negative durations are set to zero after rounding.
It also enables the Frequent CUDA Kernel Patterns analysis which failed
so far.

Signed-off-by: Benedykt Bela <benedykt.bela@intel.com>
---
 hta/common/trace_parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hta/common/trace_parser.py b/hta/common/trace_parser.py
index 780531b..470e6b2 100644
--- a/hta/common/trace_parser.py
+++ b/hta/common/trace_parser.py
@@ -377,6 +377,8 @@ def round_down_time_stamps(df: pd.DataFrame) -> None:
     df["ts"] = df[~df["ts"].isnull()]["ts"].apply(lambda x: math.ceil(x))
     df["end"] = df[~df["end"].isnull()]["end"].apply(lambda x: math.floor(x))
     df["dur"] = df["end"] - df["ts"]
+    # Fix negative durations that can occur due to rounding very small time intervals.
+    df.loc[df["dur"] < 0, "dur"] = 0
 
 
 # @profile

From 15588cee2f5ba4861cd9aab7099cb6214795eb5e Mon Sep 17 00:00:00 2001
From: Benedykt Bela <benedykt.bela@intel.com>
Date: Wed, 5 Nov 2025 16:31:17 +0200
Subject: [PATCH 2/5] Add unittest for function round_down_time_stamps.

Signed-off-by: Benedykt Bela <benedykt.bela@intel.com>
---
 tests/test_trace_parse.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/test_trace_parse.py b/tests/test_trace_parse.py
index e64a268..f2c925f 100644
--- a/tests/test_trace_parse.py
+++ b/tests/test_trace_parse.py
@@ -20,6 +20,7 @@
     parse_metadata_ijson,
     parse_trace_dataframe,
     ParserBackend,
+    round_down_time_stamps,
     set_default_trace_parsing_backend,
 )
 from hta.common.trace_symbol_table import TraceSymbolTable
@@ -623,6 +624,26 @@ def test_fix_mtia_memory_kernels(self) -> None:
         # Validate results
         pd.testing.assert_frame_equal(fixed_df, expected_df)
 
+    def test_round_down_time_stamps(self) -> None:
+        """Test that round_down_time_stamps never produces negative durations."""
+
+        # Test case 1: Very small durations that could become negative after rounding.
+        test_data = {
+            "ts": [100.3, 200.7, 300.1, 400.9],
+            "dur": [0.3, 0.2, 0.8, 0.1],
+        }
+        df = pd.DataFrame(test_data)
+        df["ts"] = df["ts"].astype("float64")
+        df["dur"] = df["dur"].astype("float64")
+
+        round_down_time_stamps(df)
+
+        # Assert no negative durations.
+        self.assertTrue(
+            (df["dur"] >= 0).all(),
+            "Found negative duration times which should not occur after rounding down timestamps!",
+        )
+
 
 if __name__ == "__main__":  # pragma: no cover
     unittest.main()

From 0935ba350e84f3d7e2e43338dc4065780d78cbf6 Mon Sep 17 00:00:00 2001
From: Benedykt Bela <benedykt.bela@intel.com>
Date: Wed, 19 Nov 2025 11:33:56 +0200
Subject: [PATCH 3/5] Resolve issues with negative values.

Signed-off-by: Benedykt Bela <benedykt.bela@intel.com>
---
 hta/analyzers/breakdown_analysis.py | 2 ++
 hta/analyzers/trace_counters.py     | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/hta/analyzers/breakdown_analysis.py b/hta/analyzers/breakdown_analysis.py
index 1b5691b..7f943a5 100644
--- a/hta/analyzers/breakdown_analysis.py
+++ b/hta/analyzers/breakdown_analysis.py
@@ -772,6 +772,8 @@ def _analyze_idle_time_for_stream(
         gpu_kernels_s["idle_interval"] = (
             gpu_kernels_s["ts"] - gpu_kernels_s["prev_end_ts"]
         )
+        # Handle negative idle intervals that can occur due to rounding errors.
+        gpu_kernels_s.loc[gpu_kernels_s["idle_interval"] < 0, "idle_interval"] = 0
 
         # Default idle time category
         gpu_kernels_s["idle_category"] = IdleTimeType.OTHER.value
diff --git a/hta/analyzers/trace_counters.py b/hta/analyzers/trace_counters.py
index babf1a9..2e7879a 100644
--- a/hta/analyzers/trace_counters.py
+++ b/hta/analyzers/trace_counters.py
@@ -314,6 +314,8 @@ def _get_memory_bw_time_series_for_rank(
         result_df_list = []
         for _, membw_df in membw_time_series.groupby("name"):
             membw_df.memory_bw_gbps = membw_df.memory_bw_gbps.cumsum()
+            # Fix floating-point precision errors that can result in tiny negative values.
+            membw_df.loc[membw_df.memory_bw_gbps < 0, "memory_bw_gbps"] = 0
             result_df_list.append(membw_df)
 
         if len(result_df_list) == 0:

From 363096023e731270a08fa2c6a9830f3cc6e54f30 Mon Sep 17 00:00:00 2001
From: Benedykt Bela <benedykt.bela@intel.com>
Date: Wed, 19 Nov 2025 11:34:29 +0200
Subject: [PATCH 4/5] Add mechaanism to round idle time ration so they sum up
 to 1.0.

Signed-off-by: Benedykt Bela <benedykt.bela@intel.com>
---
 hta/analyzers/breakdown_analysis.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/hta/analyzers/breakdown_analysis.py b/hta/analyzers/breakdown_analysis.py
index 7f943a5..c6e95f8 100644
--- a/hta/analyzers/breakdown_analysis.py
+++ b/hta/analyzers/breakdown_analysis.py
@@ -743,6 +743,21 @@ def idle_time_per_rank(trace_df: pd.DataFrame) -> Tuple[int, int, int, int]:
             ]
         ]
 
+    def _round_preserving_sum(group: pd.DataFrame) -> pd.DataFrame:
+        """Round idle time ratios while preserving the constraint
+        that they sum to 1.0 (100%) per stream."""
+
+        ratios = group["idle_time_ratio"].round(2)
+        ratio_sum = ratios.sum()
+
+        if ratio_sum != 1.0 and 0 < ratio_sum:
+            max_idx = ratios.idxmax()
+            ratios.loc[max_idx] = ratios.loc[max_idx] + (1.0 - ratio_sum)
+
+        group["idle_time_ratio"] = ratios
+
+        return group
+
     @classmethod
     def _analyze_idle_time_for_stream(
         cls,
@@ -931,6 +946,8 @@ def get_idle_time_breakdown(
                 mapper=idle_category_name_map, axis=0, inplace=True
             )
 
+        grouped_result_df = result_df.groupby("stream", group_keys=False)
+        result_df = grouped_result_df.apply(cls._round_preserving_sum)
         result_df = result_df[
             ["rank", "stream", "idle_category", "idle_time", "idle_time_ratio"]
         ].round(2)

From ed7ed1eee38352019213995d160a53a075c370a2 Mon Sep 17 00:00:00 2001
From: Benedykt Bela <benedykt.bela@intel.com>
Date: Wed, 19 Nov 2025 12:23:20 +0200
Subject: [PATCH 5/5] Change approach to round very small float numbers to
 zero.

Initial approach was to set all negative values to 0.0.
The more precise approach is to round to zero both positive and
negative values that are smaller than a defined accuracy
which is currently set to 1e-9.

Signed-off-by: Benedykt Bela <benedykt.bela@intel.com>
---
 hta/analyzers/trace_counters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hta/analyzers/trace_counters.py b/hta/analyzers/trace_counters.py
index 2e7879a..9ab8299 100644
--- a/hta/analyzers/trace_counters.py
+++ b/hta/analyzers/trace_counters.py
@@ -314,8 +314,8 @@ def _get_memory_bw_time_series_for_rank(
         result_df_list = []
         for _, membw_df in membw_time_series.groupby("name"):
             membw_df.memory_bw_gbps = membw_df.memory_bw_gbps.cumsum()
-            # Fix floating-point precision errors that can result in tiny negative values.
-            membw_df.loc[membw_df.memory_bw_gbps < 0, "memory_bw_gbps"] = 0
+            # Fix floating-point precision errors that can result in very tiny values.
+            membw_df.loc[abs(membw_df.memory_bw_gbps) < 1e-9, "memory_bw_gbps"] = 0
             result_df_list.append(membw_df)
 
         if len(result_df_list) == 0: