diff --git a/src/cli.rs b/src/cli.rs index 3a4b243..6c645d6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -6,6 +6,24 @@ use std::path::PathBuf; use tlparse::{parse_path, ParseConfig}; +// Main output filename used by both single rank and multi-rank processing +const MAIN_OUTPUT_FILENAME: &str = "index.html"; + +// Helper function to setup output directory (handles overwrite logic) +fn setup_output_directory(out_path: &PathBuf, overwrite: bool) -> anyhow::Result<()> { + if out_path.exists() { + if !overwrite { + bail!( + "Directory {} already exists, use -o OUTDIR to write to another location or pass --overwrite to overwrite the old contents", + out_path.display() + ); + } + fs::remove_dir_all(&out_path)?; + } + fs::create_dir(&out_path)?; + Ok(()) +} + #[derive(Parser)] #[command(author, version, about, long_about = None)] #[command(propagate_version = true)] @@ -47,12 +65,20 @@ pub struct Cli { /// For inductor provenance tracking highlighter #[arg(short, long)] inductor_provenance: bool, + /// Parse all ranks and generate a single unified HTML page + #[arg(long)] + all_ranks_html: bool, } fn main() -> anyhow::Result<()> { let cli = Cli::parse(); + + if cli.all_ranks_html { + return handle_all_ranks(cli); + } + let path = if cli.latest { - let input_path = cli.path; + let input_path = &cli.path; // Path should be a directory if !input_path.is_dir() { bail!( @@ -61,7 +87,7 @@ fn main() -> anyhow::Result<()> { ); } - let last_modified_file = std::fs::read_dir(&input_path) + let last_modified_file = std::fs::read_dir(input_path) .with_context(|| format!("Couldn't access directory {}", input_path.display()))? .flatten() .filter(|f| f.metadata().unwrap().is_file()) @@ -72,45 +98,212 @@ fn main() -> anyhow::Result<()> { }; last_modified_file.path() } else { - cli.path + cli.path.clone() }; - let out_path = cli.out; + let out_path = cli.out.clone(); + setup_output_directory(&out_path, cli.overwrite)?; - if out_path.exists() { - if !cli.overwrite { - bail!( - "Directory {} already exists, use -o OUTDIR to write to another location or pass --overwrite to overwrite the old contents", - out_path.display() - ); - } - fs::remove_dir_all(&out_path)?; + // Use handle_one_rank for single rank processing (don't create directory since it already exists) + handle_one_rank(&path, &out_path, &cli, false)?; + + if !cli.no_browser { + opener::open(out_path.join("index.html"))?; + } + Ok(()) +} + +// Helper function to handle parsing and writing output for a single rank +// Returns the relative path to the main output file within the rank directory +fn handle_one_rank( + rank_path: &PathBuf, + rank_out_dir: &PathBuf, + cli: &Cli, + create_output_dir: bool, +) -> anyhow::Result { + if create_output_dir { + fs::create_dir(rank_out_dir)?; } - fs::create_dir(&out_path)?; let config = ParseConfig { strict: cli.strict, strict_compile_id: cli.strict_compile_id, custom_parsers: Vec::new(), - custom_header_html: cli.custom_header_html, + custom_header_html: cli.custom_header_html.clone(), verbose: cli.verbose, plain_text: cli.plain_text, export: cli.export, inductor_provenance: cli.inductor_provenance, + all_ranks: false, }; - let output = parse_path(&path, config)?; + let output = parse_path(rank_path, config)?; + + let mut main_output_path = None; - for (filename, path) in output { - let out_file = out_path.join(filename); + // Write output files to rank subdirectory + for (filename, content) in output { + let out_file = rank_out_dir.join(&filename); if let Some(dir) = out_file.parent() { fs::create_dir_all(dir)?; } - fs::write(out_file, path)?; + fs::write(out_file, content)?; + + // Track the main output file (typically index.html) + if filename.file_name().and_then(|name| name.to_str()) == Some(MAIN_OUTPUT_FILENAME) { + main_output_path = Some(filename); + } } + Ok(main_output_path.unwrap_or_else(|| PathBuf::from(MAIN_OUTPUT_FILENAME))) +} + +// handle_all_ranks function with placeholder landing page +fn handle_all_ranks(cli: Cli) -> anyhow::Result<()> { + let input_path = &cli.path; + + if !input_path.is_dir() { + bail!( + "Input path {} must be a directory when using --all-ranks-html", + input_path.display() + ); + } + + let out_path = &cli.out; + setup_output_directory(out_path, cli.overwrite)?; + + // Find all rank log files in the directory + let rank_files: Vec<_> = std::fs::read_dir(input_path) + .with_context(|| format!("Couldn't access directory {}", input_path.display()))? + .flatten() + .filter(|entry| { + let path = entry.path(); + if !path.is_file() { + return false; + } + + let Some(filename) = path.file_name().and_then(|name| name.to_str()) else { + return false; + }; + + // Only support PyTorch TORCH_TRACE files: dedicated_log_torch_trace_rank_0_hash.log + if !filename.starts_with("dedicated_log_torch_trace_rank_") + || !filename.ends_with(".log") + { + return false; + } + + // Extract rank number from the pattern + let after_prefix = &filename[31..]; // Remove "dedicated_log_torch_trace_rank_" + if let Some(underscore_pos) = after_prefix.find('_') { + let rank_part = &after_prefix[..underscore_pos]; + return !rank_part.is_empty() && rank_part.chars().all(|c| c.is_ascii_digit()); + } + + false + }) + .collect(); + + if rank_files.is_empty() { + bail!( + "No rank log files found in directory {}", + input_path.display() + ); + } + + let mut rank_links = Vec::new(); + + // Process each rank file + for rank_file in rank_files { + let rank_path = rank_file.path(); + let rank_name = rank_path + .file_stem() + .and_then(|name| name.to_str()) + .unwrap_or("unknown"); + + // Extract rank number from PyTorch TORCH_TRACE filename + let rank_num = + if let Some(after_prefix) = rank_name.strip_prefix("dedicated_log_torch_trace_rank_") { + if let Some(underscore_pos) = after_prefix.find('_') { + let rank_part = &after_prefix[..underscore_pos]; + if rank_part.is_empty() || !rank_part.chars().all(|c| c.is_ascii_digit()) { + bail!( + "Could not extract rank number from TORCH_TRACE filename: {}", + rank_name + ); + } + rank_part.to_string() + } else { + bail!("Invalid TORCH_TRACE filename format: {}", rank_name); + } + } else { + bail!( + "Filename does not match PyTorch TORCH_TRACE pattern: {}", + rank_name + ); + }; + + println!( + "Processing rank {} from file: {}", + rank_num, + rank_path.display() + ); + + let rank_out_dir = out_path.join(format!("rank_{rank_num}")); + let main_output_path = handle_one_rank(&rank_path, &rank_out_dir, &cli, true)?; + + // Add link to this rank's page using the actual output path + let rank_link = format!("rank_{rank_num}/{}", main_output_path.display()); + rank_links.push((rank_num.clone(), rank_link)); + } + + // Sort rank links by rank number + rank_links.sort_by(|a, b| { + let a_num: i32 = + a.0.parse() + .expect(&format!("Failed to parse rank number from '{}'", a.0)); + let b_num: i32 = + b.0.parse() + .expect(&format!("Failed to parse rank number from '{}'", b.0)); + a_num.cmp(&b_num) + }); + + // Generate landing page HTML using template system + use tinytemplate::TinyTemplate; + use tlparse::{MultiRankContext, RankInfo, CSS, JAVASCRIPT, TEMPLATE_MULTI_RANK_INDEX}; + + let mut tt = TinyTemplate::new(); + tt.add_formatter("format_unescaped", tinytemplate::format_unescaped); + tt.add_template("multi_rank_index.html", TEMPLATE_MULTI_RANK_INDEX)?; + + let ranks: Vec = rank_links + .iter() + .map(|(rank_num, link)| RankInfo { + number: rank_num.clone(), + link: link.clone(), + }) + .collect(); + + let context = MultiRankContext { + css: CSS, + javascript: JAVASCRIPT, + custom_header_html: cli.custom_header_html, + rank_count: rank_links.len(), + ranks, + }; + + let landing_html = tt.render("multi_rank_index.html", &context)?; + + fs::write(out_path.join("index.html"), landing_html)?; + + println!( + "Generated multi-rank report with {} ranks", + rank_links.len() + ); + if !cli.no_browser { opener::open(out_path.join("index.html"))?; } + Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index ff7110e..6155656 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,8 @@ pub mod parsers; mod templates; mod types; +pub use crate::templates::{CSS, JAVASCRIPT, TEMPLATE_MULTI_RANK_INDEX}; + #[derive(Debug)] enum ParserResult { NoPayload, @@ -37,6 +39,22 @@ pub struct ParseConfig { pub plain_text: bool, pub export: bool, pub inductor_provenance: bool, + pub all_ranks: bool, +} + +#[derive(serde::Serialize)] +pub struct RankInfo { + pub number: String, + pub link: String, +} + +#[derive(serde::Serialize)] +pub struct MultiRankContext { + pub css: &'static str, + pub javascript: &'static str, + pub custom_header_html: String, + pub rank_count: usize, + pub ranks: Vec, } impl Default for ParseConfig { @@ -50,6 +68,7 @@ impl Default for ParseConfig { plain_text: false, export: false, inductor_provenance: false, + all_ranks: false, } } } diff --git a/src/templates.rs b/src/templates.rs index 4f31e0c..32fdd1f 100644 --- a/src/templates.rs +++ b/src/templates.rs @@ -157,7 +157,7 @@ and avoid inlining the function in the first place.

When compiled autograd is enabled, the compile id will include a prefix signifier [!a/x/y], -where a is the compiled autograd id. For instance, [!0/-/-] refers +where a is the compiled autograd id. For instance, [!0/-/-] refers to the first graph captured by compiled autograd. It is then traced by torch.compile as [!0/x/y_z].

@@ -486,7 +486,7 @@ you may address them. {{ for failure in failures }} - + @@ -529,3 +529,34 @@ pub static TEMPLATE_SYMBOLIC_GUARD_INFO: &str = r#" pub static PROVENANCE_CSS: &str = include_str!("provenance.css"); pub static PROVENANCE_JS: &str = include_str!("provenance.js"); pub static TEMPLATE_PROVENANCE_TRACKING: &str = include_str!("provenance.html"); + +pub static TEMPLATE_MULTI_RANK_INDEX: &str = r#" + + + + + + +
+{custom_header_html | format_unescaped} +

Multi-Rank TLParse Report

+

+This report contains compilation information from {rank_count} distributed training rank(s). +Each rank ran independently and generated its own compilation artifacts. Click on any rank below +to view its detailed compilation report, including stack traces, IR dumps, and performance metrics. +

+

+Ranks processed: +

+ +
+ +"#;
Failure Type Reason Additional Info
{failure.failure_type | format_unescaped} {failure.reason | format_unescaped} {failure.additional_info | format_unescaped}