Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions primus/backends/megatron/training/mlflow_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def generate_tracelens_report(
- "all" (default): Both XLSX and CSV files
- "xlsx": Single multi-tab Excel file with detailed analysis
- "csv": Multiple CSV files (kernels, memory, communication, etc.)
- "html": Interactive HTML report
- "html": Interactive HTML report (not yet supported, falls back to xlsx+csv)

Returns:
List of paths to generated report files
Expand Down Expand Up @@ -376,12 +376,27 @@ def generate_tracelens_report(
generated_files.extend(csv_files)

if output_format == "html":
warning_rank_0("[TraceLens] HTML format not yet supported, using xlsx+csv")
# Fall through to xlsx
warning_rank_0("[TraceLens] HTML format not yet supported, generating xlsx+csv instead")
# Generate both XLSX and CSV formats as fallback
# XLSX: Single file with multiple tabs
xlsx_path = os.path.join(output_dir, f"{report_name}_analysis.xlsx")
dfs = generate_perf_report_pytorch(trace_file, output_xlsx_path=xlsx_path)
xlsx_dfs = generate_perf_report_pytorch(trace_file, output_xlsx_path=xlsx_path)
if os.path.exists(xlsx_path):
log_rank_0(
f"[TraceLens] Generated XLSX report with {len(xlsx_dfs)} tabs: {os.path.basename(xlsx_path)}"
)
generated_files.append(xlsx_path)

# CSV: Multiple files in a subdirectory
csv_subdir = os.path.join(output_dir, report_name)
os.makedirs(csv_subdir, exist_ok=True)
generate_perf_report_pytorch(trace_file, output_csvs_dir=csv_subdir)
Copy link

Copilot AI Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent variable naming: The return value from generate_perf_report_pytorch is stored in 'xlsx_dfs' for consistency (line 383), but the CSV generation call on line 393 ignores the return value. For consistency with line 370 (the "all"/"csv" branch), this should be stored in a variable like 'csv_dfs' even if not immediately used, to maintain code consistency.

Suggested change
generate_perf_report_pytorch(trace_file, output_csvs_dir=csv_subdir)
csv_dfs = generate_perf_report_pytorch(trace_file, output_csvs_dir=csv_subdir)

Copilot uses AI. Check for mistakes.

# Collect all generated CSV files
csv_files = glob.glob(os.path.join(csv_subdir, "*.csv"))
if csv_files:
log_rank_0(f"[TraceLens] Generated {len(csv_files)} CSV files for {report_name}")
generated_files.extend(csv_files)
Comment on lines 378 to +399
Copy link

Copilot AI Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code duplication: The HTML format handler duplicates the logic from the "all" branch (lines 356-376). Consider refactoring to avoid duplication - for example, treating output_format == "html" the same as "all" by changing the condition on line 356 to 'if output_format in ("all", "xlsx", "html")' and line 366 to 'if output_format in ("all", "csv", "html")', then only keeping the warning message unique to the HTML case.

Copilot uses AI. Check for mistakes.

if generated_files:
return generated_files
Expand Down Expand Up @@ -524,6 +539,7 @@ def generate_tracelens_reports(
- "all" (default): Both XLSX and CSV files
- "xlsx": Multi-tab Excel with detailed analysis
- "csv": Multiple CSV files per rank (kernels, memory, comm, etc.)
- "html": Interactive HTML report (not yet supported, falls back to xlsx+csv)

Returns:
List of paths to all generated report files
Expand Down Expand Up @@ -583,7 +599,7 @@ def upload_tracelens_reports_to_mlflow(
exp_root_path: Root path of the experiment (for saving reports)
ranks: List of ranks to analyze (None = all ranks, [0] = rank 0 only)
max_reports: Maximum number of reports to generate
output_format: Report format - "all" (default, xlsx+csv), "xlsx", or "csv"
output_format: Report format - "all" (default, xlsx+csv), "xlsx", "csv", or "html" (falls back to xlsx+csv)
artifact_path: MLflow artifact subdirectory for reports

Returns:
Expand Down Expand Up @@ -672,7 +688,7 @@ def upload_artifacts_to_mlflow(
tracelens_ranks: List of ranks to generate TraceLens reports for
(None = all ranks, [0] = rank 0 only)
tracelens_max_reports: Maximum number of TraceLens reports to generate
tracelens_output_format: Report format - "all" (default, xlsx+csv), "xlsx", or "csv"
tracelens_output_format: Report format - "all" (default, xlsx+csv), "xlsx", "csv", or "html" (falls back to xlsx+csv)

Returns:
Dictionary with counts of uploaded files:
Expand Down
Loading