diff --git a/mdbook-spec/Cargo.lock b/mdbook-spec/Cargo.lock index d77a440..971edaf 100644 --- a/mdbook-spec/Cargo.lock +++ b/mdbook-spec/Cargo.lock @@ -388,6 +388,7 @@ version = "0.0.0" dependencies = [ "anyhow", "mdbook", + "once_cell", "pathdiff", "regex", "semver", diff --git a/mdbook-spec/Cargo.toml b/mdbook-spec/Cargo.toml index bbfd903..50a4b3d 100644 --- a/mdbook-spec/Cargo.toml +++ b/mdbook-spec/Cargo.toml @@ -8,6 +8,7 @@ license = "MIT OR Apache-2.0" [dependencies] anyhow = "1.0.79" mdbook = { version = "0.4.36", default-features = false } +once_cell = "1.19.0" pathdiff = "0.2.1" regex = "1.10.3" semver = "1.0.21" diff --git a/mdbook-spec/src/main.rs b/mdbook-spec/src/main.rs index 7e617f0..255da02 100644 --- a/mdbook-spec/src/main.rs +++ b/mdbook-spec/src/main.rs @@ -2,14 +2,24 @@ use mdbook::book::{Book, Chapter}; use mdbook::errors::Error; use mdbook::preprocess::{CmdPreprocessor, Preprocessor, PreprocessorContext}; use mdbook::BookItem; +use once_cell::sync::Lazy; use regex::{Captures, Regex}; use semver::{Version, VersionReq}; use std::collections::BTreeMap; -use std::fmt::Write as _; -use std::fs; -use std::io::{self, Write as _}; +use std::io; use std::path::PathBuf; -use std::process::{self, Command}; +use std::process; + +mod std_links; + +/// The Regex for rules like `r[foo]`. +static RULE_RE: Lazy = Lazy::new(|| Regex::new(r"(?m)^r\[([^]]+)]$").unwrap()); + +/// The Regex for the syntax for blockquotes that have a specific CSS class, +/// like `> [!WARNING]`. +static ADMONITION_RE: Lazy = Lazy::new(|| { + Regex::new(r"(?m)^ *> \[!(?[^]]+)\]\n(?
(?: *> .*\n)+)").unwrap() +}); fn main() { let mut args = std::env::args().skip(1); @@ -56,41 +66,15 @@ fn handle_preprocessing(pre: &dyn Preprocessor) -> Result<(), Error> { } struct Spec { + /// Whether or not warnings should be errors (set by SPEC_DENY_WARNINGS + /// environment variable). deny_warnings: bool, - rule_re: Regex, - admonition_re: Regex, - std_link_re: Regex, - std_link_extract_re: Regex, } impl Spec { pub fn new() -> Spec { - // This is roughly a rustdoc intra-doc link definition. - let std_link = r"(?: [a-z]+@ )? - (?: std|core|alloc|proc_macro|test ) - (?: ::[A-Za-z_!:<>{}()\[\]]+ )?"; Spec { deny_warnings: std::env::var("SPEC_DENY_WARNINGS").as_deref() == Ok("1"), - rule_re: Regex::new(r"(?m)^r\[([^]]+)]$").unwrap(), - admonition_re: Regex::new( - r"(?m)^ *> \[!(?[^]]+)\]\n(?
(?: *> .*\n)+)", - ) - .unwrap(), - std_link_re: Regex::new(&format!( - r"(?x) - (?: - ( \[`[^`]+`\] ) \( ({std_link}) \) - ) - | (?: - ( \[`{std_link}`\] ) - ) - " - )) - .unwrap(), - std_link_extract_re: Regex::new( - r#"
  • ]*href="(https://doc.rust-lang.org/[^"]+)""#, - ) - .unwrap(), } } @@ -103,7 +87,7 @@ impl Spec { ) -> String { let source_path = chapter.source_path.clone().unwrap_or_default(); let path = chapter.path.clone().unwrap_or_default(); - self.rule_re + RULE_RE .replace_all(&chapter.content, |caps: &Captures| { let rule_id = &caps[1]; if let Some((old, _)) = @@ -165,7 +149,7 @@ impl Spec { /// be a CSS class is valid. The actual styling needs to be added in a CSS /// file. fn admonitions(&self, chapter: &Chapter) -> String { - self.admonition_re + ADMONITION_RE .replace_all(&chapter.content, |caps: &Captures| { let lower = caps["admon"].to_lowercase(); format!( @@ -175,122 +159,6 @@ impl Spec { }) .to_string() } - - /// Converts links to the standard library to the online documentation in - /// a fashion similar to rustdoc intra-doc links. - fn std_links(&self, chapter: &Chapter) -> String { - // This is very hacky, but should work well enough. - // - // Collect all standard library links. - // - // links are tuples of ("[`std::foo`]", None) for links without dest, - // or ("[`foo`]", "std::foo") with a dest. - let mut links: Vec<_> = self - .std_link_re - .captures_iter(&chapter.content) - .map(|cap| { - if let Some(no_dest) = cap.get(3) { - (no_dest.as_str(), None) - } else { - ( - cap.get(1).unwrap().as_str(), - Some(cap.get(2).unwrap().as_str()), - ) - } - }) - .collect(); - if links.is_empty() { - return chapter.content.clone(); - } - links.sort(); - links.dedup(); - - // Write a Rust source file to use with rustdoc to generate intra-doc links. - let tmp = tempfile::TempDir::with_prefix("mdbook-spec-").unwrap(); - let src_path = tmp.path().join("a.rs"); - // Allow redundant since there could some in-scope things that are - // technically not necessary, but we don't care about (like - // [`Option`](std::option::Option)). - let mut src = format!( - "#![deny(rustdoc::broken_intra_doc_links)]\n\ - #![allow(rustdoc::redundant_explicit_links)]\n" - ); - for (link, dest) in &links { - write!(src, "//! - {link}").unwrap(); - if let Some(dest) = dest { - write!(src, "({})", dest).unwrap(); - } - src.push('\n'); - } - writeln!( - src, - "extern crate alloc;\n\ - extern crate proc_macro;\n\ - extern crate test;\n" - ) - .unwrap(); - fs::write(&src_path, &src).unwrap(); - let output = Command::new("rustdoc") - .arg("--edition=2021") - .arg(&src_path) - .current_dir(tmp.path()) - .output() - .expect("rustdoc installed"); - if !output.status.success() { - eprintln!( - "error: failed to extract std links ({:?}) in chapter {} ({:?})\n", - output.status, - chapter.name, - chapter.source_path.as_ref().unwrap() - ); - io::stderr().write_all(&output.stderr).unwrap(); - process::exit(1); - } - - // Extract the links from the generated html. - let generated = - fs::read_to_string(tmp.path().join("doc/a/index.html")).expect("index.html generated"); - let urls: Vec<_> = self - .std_link_extract_re - .captures_iter(&generated) - .map(|cap| cap.get(1).unwrap().as_str()) - .collect(); - if urls.len() != links.len() { - eprintln!( - "error: expected rustdoc to generate {} links, but found {} in chapter {} ({:?})", - links.len(), - urls.len(), - chapter.name, - chapter.source_path.as_ref().unwrap() - ); - process::exit(1); - } - - // Replace any disambiguated links with just the disambiguation. - let mut output = self - .std_link_re - .replace_all(&chapter.content, |caps: &Captures| { - if let Some(dest) = caps.get(2) { - // Replace destination parenthesis with a link definition (square brackets). - format!("{}[{}]", &caps[1], dest.as_str()) - } else { - caps[0].to_string() - } - }) - .to_string(); - - // Append the link definitions to the bottom of the chapter. - write!(output, "\n").unwrap(); - for ((link, dest), url) in links.iter().zip(urls) { - if let Some(dest) = dest { - write!(output, "[{dest}]: {url}\n").unwrap(); - } else { - write!(output, "{link}: {url}\n").unwrap(); - } - } - - output - } } impl Preprocessor for Spec { @@ -300,27 +168,28 @@ impl Preprocessor for Spec { fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result { let mut found_rules = BTreeMap::new(); - for section in &mut book.sections { - let BookItem::Chapter(ch) = section else { - continue; + book.for_each_mut(|item| { + let BookItem::Chapter(ch) = item else { + return; }; if ch.is_draft_chapter() { - continue; + return; } ch.content = self.rule_definitions(&ch, &mut found_rules); ch.content = self.admonitions(&ch); - ch.content = self.std_links(&ch); - } - for section in &mut book.sections { - let BookItem::Chapter(ch) = section else { - continue; + ch.content = std_links::std_links(&ch); + }); + // This is a separate pass because it relies on the modifications of + // the previous passes. + book.for_each_mut(|item| { + let BookItem::Chapter(ch) = item else { + return; }; if ch.is_draft_chapter() { - continue; + return; } ch.content = self.auto_link_references(&ch, &found_rules); - } - + }); Ok(book) } } diff --git a/mdbook-spec/src/std_links.rs b/mdbook-spec/src/std_links.rs new file mode 100644 index 0000000..9b7c6cb --- /dev/null +++ b/mdbook-spec/src/std_links.rs @@ -0,0 +1,214 @@ +use mdbook::book::Chapter; +use once_cell::sync::Lazy; +use regex::{Captures, Regex}; +use std::collections::HashSet; +use std::fmt::Write as _; +use std::fs; +use std::io::{self, Write as _}; +use std::process::{self, Command}; +use tempfile::TempDir; + +/// A markdown link (without the brackets) that might possibly be a link to +/// the standard library using rustdoc's intra-doc notation. +const STD_LINK: &str = r"(?: [a-z]+@ )? + (?: std|core|alloc|proc_macro|test ) + (?: ::[A-Za-z0-9_!:<>{}()\[\]]+ )?"; + +/// The Regex for a markdown link that might be a link to the standard library. +static STD_LINK_RE: Lazy = Lazy::new(|| { + Regex::new(&format!( + r"(?x) + (?: + ( \[`[^`]+`\] ) \( ({STD_LINK}) \) + ) + | (?: + ( \[`{STD_LINK}`\] ) + ) + " + )) + .unwrap() +}); + +/// The Regex used to extract the std links from the HTML generated by rustdoc. +static STD_LINK_EXTRACT_RE: Lazy = + Lazy::new(|| Regex::new(r#"
  • ]*href="(https://doc.rust-lang.org/[^"]+)""#).unwrap()); + +/// The Regex for a markdown link definition. +static LINK_DEF_RE: Lazy = Lazy::new(|| { + // This is a pretty lousy regex for a link definition. It doesn't + // handle things like blockquotes, code blocks, etc. Using a + // markdown parser isn't really feasible here, it would be nice to + // improve this. + Regex::new(r#"(?m)^(?