From 608cafafba176604a506ecc3421fc8e09413db06 Mon Sep 17 00:00:00 2001 From: Benedykt Bela Date: Tue, 4 Nov 2025 12:03:02 +0100 Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=90=9B=20Resolve=20negative=20duratio?= =?UTF-8?q?n=20times=20issue.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to rounding timestamps, there were cases where the duration became negative for very small intervals. This commit fixes that by ensuring that any negative durations are set to zero after rounding. It also enables the Frequent CUDA Kernel Patterns analysis which failed so far. Signed-off-by: Benedykt Bela --- hta/common/trace_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hta/common/trace_parser.py b/hta/common/trace_parser.py index 780531b..470e6b2 100644 --- a/hta/common/trace_parser.py +++ b/hta/common/trace_parser.py @@ -377,6 +377,8 @@ def round_down_time_stamps(df: pd.DataFrame) -> None: df["ts"] = df[~df["ts"].isnull()]["ts"].apply(lambda x: math.ceil(x)) df["end"] = df[~df["end"].isnull()]["end"].apply(lambda x: math.floor(x)) df["dur"] = df["end"] - df["ts"] + # Fix negative durations that can occur due to rounding very small time intervals. + df.loc[df["dur"] < 0, "dur"] = 0 # @profile From 15588cee2f5ba4861cd9aab7099cb6214795eb5e Mon Sep 17 00:00:00 2001 From: Benedykt Bela Date: Wed, 5 Nov 2025 16:31:17 +0200 Subject: [PATCH 2/5] Add unittest for function round_down_time_stamps. Signed-off-by: Benedykt Bela --- tests/test_trace_parse.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_trace_parse.py b/tests/test_trace_parse.py index e64a268..f2c925f 100644 --- a/tests/test_trace_parse.py +++ b/tests/test_trace_parse.py @@ -20,6 +20,7 @@ parse_metadata_ijson, parse_trace_dataframe, ParserBackend, + round_down_time_stamps, set_default_trace_parsing_backend, ) from hta.common.trace_symbol_table import TraceSymbolTable @@ -623,6 +624,26 @@ def test_fix_mtia_memory_kernels(self) -> None: # Validate results pd.testing.assert_frame_equal(fixed_df, expected_df) + def test_round_down_time_stamps(self) -> None: + """Test that round_down_time_stamps never produces negative durations.""" + + # Test case 1: Very small durations that could become negative after rounding. + test_data = { + "ts": [100.3, 200.7, 300.1, 400.9], + "dur": [0.3, 0.2, 0.8, 0.1], + } + df = pd.DataFrame(test_data) + df["ts"] = df["ts"].astype("float64") + df["dur"] = df["dur"].astype("float64") + + round_down_time_stamps(df) + + # Assert no negative durations. + self.assertTrue( + (df["dur"] >= 0).all(), + "Found negative duration times which should not occur after rounding down timestamps!", + ) + if __name__ == "__main__": # pragma: no cover unittest.main() From 0935ba350e84f3d7e2e43338dc4065780d78cbf6 Mon Sep 17 00:00:00 2001 From: Benedykt Bela Date: Wed, 19 Nov 2025 11:33:56 +0200 Subject: [PATCH 3/5] Resolve issues with negative values. Signed-off-by: Benedykt Bela --- hta/analyzers/breakdown_analysis.py | 2 ++ hta/analyzers/trace_counters.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/hta/analyzers/breakdown_analysis.py b/hta/analyzers/breakdown_analysis.py index 1b5691b..7f943a5 100644 --- a/hta/analyzers/breakdown_analysis.py +++ b/hta/analyzers/breakdown_analysis.py @@ -772,6 +772,8 @@ def _analyze_idle_time_for_stream( gpu_kernels_s["idle_interval"] = ( gpu_kernels_s["ts"] - gpu_kernels_s["prev_end_ts"] ) + # Handle negative idle intervals that can occur due to rounding errors. + gpu_kernels_s.loc[gpu_kernels_s["idle_interval"] < 0, "idle_interval"] = 0 # Default idle time category gpu_kernels_s["idle_category"] = IdleTimeType.OTHER.value diff --git a/hta/analyzers/trace_counters.py b/hta/analyzers/trace_counters.py index babf1a9..2e7879a 100644 --- a/hta/analyzers/trace_counters.py +++ b/hta/analyzers/trace_counters.py @@ -314,6 +314,8 @@ def _get_memory_bw_time_series_for_rank( result_df_list = [] for _, membw_df in membw_time_series.groupby("name"): membw_df.memory_bw_gbps = membw_df.memory_bw_gbps.cumsum() + # Fix floating-point precision errors that can result in tiny negative values. + membw_df.loc[membw_df.memory_bw_gbps < 0, "memory_bw_gbps"] = 0 result_df_list.append(membw_df) if len(result_df_list) == 0: From 363096023e731270a08fa2c6a9830f3cc6e54f30 Mon Sep 17 00:00:00 2001 From: Benedykt Bela Date: Wed, 19 Nov 2025 11:34:29 +0200 Subject: [PATCH 4/5] Add mechaanism to round idle time ration so they sum up to 1.0. Signed-off-by: Benedykt Bela --- hta/analyzers/breakdown_analysis.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hta/analyzers/breakdown_analysis.py b/hta/analyzers/breakdown_analysis.py index 7f943a5..c6e95f8 100644 --- a/hta/analyzers/breakdown_analysis.py +++ b/hta/analyzers/breakdown_analysis.py @@ -743,6 +743,21 @@ def idle_time_per_rank(trace_df: pd.DataFrame) -> Tuple[int, int, int, int]: ] ] + def _round_preserving_sum(group: pd.DataFrame) -> pd.DataFrame: + """Round idle time ratios while preserving the constraint + that they sum to 1.0 (100%) per stream.""" + + ratios = group["idle_time_ratio"].round(2) + ratio_sum = ratios.sum() + + if ratio_sum != 1.0 and 0 < ratio_sum: + max_idx = ratios.idxmax() + ratios.loc[max_idx] = ratios.loc[max_idx] + (1.0 - ratio_sum) + + group["idle_time_ratio"] = ratios + + return group + @classmethod def _analyze_idle_time_for_stream( cls, @@ -931,6 +946,8 @@ def get_idle_time_breakdown( mapper=idle_category_name_map, axis=0, inplace=True ) + grouped_result_df = result_df.groupby("stream", group_keys=False) + result_df = grouped_result_df.apply(cls._round_preserving_sum) result_df = result_df[ ["rank", "stream", "idle_category", "idle_time", "idle_time_ratio"] ].round(2) From ed7ed1eee38352019213995d160a53a075c370a2 Mon Sep 17 00:00:00 2001 From: Benedykt Bela Date: Wed, 19 Nov 2025 12:23:20 +0200 Subject: [PATCH 5/5] Change approach to round very small float numbers to zero. Initial approach was to set all negative values to 0.0. The more precise approach is to round to zero both positive and negative values that are smaller than a defined accuracy which is currently set to 1e-9. Signed-off-by: Benedykt Bela --- hta/analyzers/trace_counters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hta/analyzers/trace_counters.py b/hta/analyzers/trace_counters.py index 2e7879a..9ab8299 100644 --- a/hta/analyzers/trace_counters.py +++ b/hta/analyzers/trace_counters.py @@ -314,8 +314,8 @@ def _get_memory_bw_time_series_for_rank( result_df_list = [] for _, membw_df in membw_time_series.groupby("name"): membw_df.memory_bw_gbps = membw_df.memory_bw_gbps.cumsum() - # Fix floating-point precision errors that can result in tiny negative values. - membw_df.loc[membw_df.memory_bw_gbps < 0, "memory_bw_gbps"] = 0 + # Fix floating-point precision errors that can result in very tiny values. + membw_df.loc[abs(membw_df.memory_bw_gbps) < 1e-9, "memory_bw_gbps"] = 0 result_df_list.append(membw_df) if len(result_df_list) == 0: