From e87c19ad1d1c7c87b655483c6cc9b6f30c263260 Mon Sep 17 00:00:00 2001 From: Nik Revenco Date: Sun, 22 Jun 2025 23:59:12 +0100 Subject: [PATCH] feat: Add `injection.parent-layer` property --- bindings/src/query.rs | 44 ++++++++ bindings/src/query/predicate.rs | 43 +++++-- fixtures/highlighter/rust_doc_comment.rs | 23 ++++ highlighter/src/injections_query.rs | 137 ++++++++++++++++++++++- test-grammars/markdown/injections.scm | 9 ++ 5 files changed, 240 insertions(+), 16 deletions(-) diff --git a/bindings/src/query.rs b/bindings/src/query.rs index c408c9b..5fd51e3 100644 --- a/bindings/src/query.rs +++ b/bindings/src/query.rs @@ -17,6 +17,30 @@ pub enum UserPredicate<'a> { key: &'a str, val: Option<&'a str>, }, + /// A custom `#any-of? [...]` predicate where + /// `` is any string and `[...]` is a list of values for + /// which the predicate succeeds if `` is in the list. + /// + /// # Example + /// + /// Field values in the following example: + /// - `negated`: `false` + /// - `value`: `"injection.parent-layer"` + /// - `values`: `["gleam", "zig"]` + /// + /// ```scheme + /// (#any-of? injection.parent-layer "gleam" "zig") + /// ``` + IsAnyOf { + /// - If `false`, will be `any-of?`. Will match *if* `values` includes `value` + /// - If `true`, will be `not-any-of?`. Will match *unless* `values` includes `value` + negated: bool, + /// What we are trying to find. E.g. in `#any-of? hello-world` this will be + /// `"hello-world"`. We will try to find this value in `values` + value: &'a str, + /// List of valid (or invalid, if `negated`) values for `value` + values: Vec<&'a str>, + }, SetProperty { key: &'a str, val: Option<&'a str>, @@ -27,6 +51,26 @@ pub enum UserPredicate<'a> { impl Display for UserPredicate<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { + UserPredicate::IsAnyOf { + negated, + value, + ref values, + } => { + let values_len = values.len(); + write!( + f, + "(#{not}any-of? {value} {values})", + not = if negated { "not-" } else { "" }, + values = values + .iter() + .enumerate() + .fold(String::new(), |s, (i, value)| { + let comma = if i + 1 == values_len { "" } else { ", " }; + + format!("{s}\"{value}\"{comma}") + }), + ) + } UserPredicate::IsPropertySet { negate, key, val } => { let predicate = if negate { "is-not?" } else { "is?" }; let spacer = if val.is_some() { " " } else { "" }; diff --git a/bindings/src/query/predicate.rs b/bindings/src/query/predicate.rs index 3688883..4dcdd6d 100644 --- a/bindings/src/query/predicate.rs +++ b/bindings/src/query/predicate.rs @@ -217,17 +217,39 @@ impl Query { "any-of?" | "not-any-of?" => { predicate.check_min_arg_count(1)?; - let capture = predicate.capture_arg(0)?; let negated = predicate.name() == "not-any-of?"; - let values: Result<_, InvalidPredicateError> = (1..predicate.num_args()) - .map(|i| predicate.query_str_arg(i)) - .collect(); - self.text_predicates.push(TextPredicate { - capture, - kind: TextPredicateKind::AnyString(values?), - negated, - match_all: false, - }); + let args = 1..predicate.num_args(); + + match predicate.capture_arg(0) { + Ok(capture) => { + let args = args.map(|i| predicate.query_str_arg(i)); + let values: Result<_, InvalidPredicateError> = args.collect(); + + self.text_predicates.push(TextPredicate { + capture, + kind: TextPredicateKind::AnyString(values?), + negated, + match_all: false, + }); + } + Err(missing_capture_err) => { + let Ok(value) = predicate.str_arg(0) else { + return Err(missing_capture_err); + }; + let values = args + .map(|i| predicate.str_arg(i)) + .collect::, _>>()?; + + custom_predicate( + pattern, + UserPredicate::IsAnyOf { + negated, + value, + values, + }, + )? + } + } } // is and is-not are better handled as custom predicates since interpreting is context dependent @@ -369,6 +391,7 @@ impl InvalidPredicateError { UserPredicate::SetProperty { key, .. } => Self::UnknownProperty { property: key.into(), }, + UserPredicate::IsAnyOf { value, .. } => Self::UnknownPredicate { name: value.into() }, UserPredicate::Other(predicate) => Self::UnknownPredicate { name: predicate.name().into(), }, diff --git a/fixtures/highlighter/rust_doc_comment.rs b/fixtures/highlighter/rust_doc_comment.rs index edf8504..77a346b 100644 --- a/fixtures/highlighter/rust_doc_comment.rs +++ b/fixtures/highlighter/rust_doc_comment.rs @@ -5,6 +5,29 @@ // │ ││╰─ comment markup.bold punctuation.bracket // │ │╰─ comment // │ ╰─ comment comment +// ╰─ comment + /// +// ┡┛╿╰─ comment +// │ ╰─ comment comment +// ╰─ comment + /// ``` +// ┡┛╿┡━━┛╰─ comment markup.raw.block +// │ │╰─ comment markup.raw.block punctuation.bracket +// │ ╰─ comment comment +// ╰─ comment + /// fn foo() +// ┡┛╿╿┡┛╿┡━┛┡┛╰─ comment markup.raw.block +// │ │││ ││ ╰─ comment markup.raw.block punctuation.bracket +// │ │││ │╰─ comment markup.raw.block function +// │ │││ ╰─ comment markup.raw.block +// │ ││╰─ comment markup.raw.block keyword.function +// │ │╰─ comment markup.raw.block +// │ ╰─ comment comment +// ╰─ comment + /// ``` +// ┡┛╿┡━━┛╰─ comment markup.raw.block +// │ │╰─ comment markup.raw.block punctuation.bracket +// │ ╰─ comment comment // ╰─ comment /// **foo // ┡┛╿╿┡┛┗━┹─ comment markup.bold diff --git a/highlighter/src/injections_query.rs b/highlighter/src/injections_query.rs index 10e4057..fde4646 100644 --- a/highlighter/src/injections_query.rs +++ b/highlighter/src/injections_query.rs @@ -89,6 +89,9 @@ pub struct InjectionsQuery { injection_language_capture: Option, injection_filename_capture: Option, injection_shebang_capture: Option, + /// 1. The list of matches to compare the parent layer's language + /// 1. Whether it is negated: `#any-of` or `#not-any-of?` + injection_parent_layer_langs_predicate: Option<(Vec, bool)>, // Note that the injections query is concatenated with the locals query. pub(crate) local_query: Query, // TODO: Use a Vec instead? @@ -108,6 +111,8 @@ impl InjectionsQuery { query_source.push_str(injection_query_text); query_source.push_str(local_query_text); + let mut injection_parent_layer_langs_predicate = None; + let mut injection_properties: HashMap = HashMap::new(); let mut not_scope_inherits = HashSet::new(); let injection_query = Query::new(grammar, injection_query_text, |pattern, predicate| { @@ -122,6 +127,16 @@ impl InjectionsQuery { .or_default() .include_children = IncludedChildren::Unnamed } + // Allow filtering for specific languages in + // `#set! injection.languae injection.parent-layer` + UserPredicate::IsAnyOf { + negated, + value: INJECTION_PARENT_LAYER, + values, + } => { + injection_parent_layer_langs_predicate = + Some((values.into_iter().map(ToOwned::to_owned).collect(), negated)); + } UserPredicate::SetProperty { key: "injection.include-children", val: None, @@ -167,6 +182,7 @@ impl InjectionsQuery { local_query.disable_capture("local.reference"); Ok(InjectionsQuery { + injection_parent_layer_langs_predicate, injection_properties, injection_content_capture: injection_query.get_capture("injection.content"), injection_language_capture: injection_query.get_capture("injection.language"), @@ -195,6 +211,7 @@ impl InjectionsQuery { fn process_match<'a, 'tree>( &self, + injection_parent_language: Language, query_match: &QueryMatch<'a, 'tree>, node_idx: MatchedNodeIdx, source: RopeSlice<'a>, @@ -242,11 +259,41 @@ impl InjectionsQuery { last_content_node = i as u32; } } - let marker = marker.or(properties - .and_then(|p| p.language.as_deref()) - .map(InjectionLanguageMarker::Name))?; - let language = loader.language_for_marker(marker)?; + let language = marker + .and_then(|m| loader.language_for_marker(m)) + .or_else(|| { + properties + .and_then(|p| p.language.as_deref()) + .and_then(|name| { + let matches_predicate = || { + self.injection_parent_layer_langs_predicate + .as_ref() + .is_none_or(|(predicate, is_negated)| { + predicate.iter().any(|capture| { + let Some(marker) = loader.language_for_marker( + InjectionLanguageMarker::Name(capture), + ) else { + return false; + }; + + if *is_negated { + marker != injection_parent_language + } else { + marker == injection_parent_language + } + }) + }) + }; + + if name == INJECTION_PARENT_LAYER && matches_predicate() { + Some(injection_parent_language) + } else { + loader.language_for_marker(InjectionLanguageMarker::Name(name)) + } + }) + })?; + let scope = if properties.is_some_and(|p| p.combined) { Some(InjectionScope::Pattern { pattern: query_match.pattern(), @@ -286,6 +333,7 @@ impl InjectionsQuery { /// This case should be handled by the calling function fn execute<'a>( &'a self, + injection_parent_language: Language, node: &Node<'a>, source: RopeSlice<'a>, loader: &'a impl LanguageLoader, @@ -298,7 +346,14 @@ impl InjectionsQuery { if query_match.matched_node(node_idx).capture != injection_content_capture { continue; } - let Some(mat) = self.process_match(&query_match, node_idx, source, loader) else { + + let Some(mat) = self.process_match( + injection_parent_language, + &query_match, + node_idx, + source, + loader, + ) else { query_match.remove(); continue; }; @@ -384,7 +439,18 @@ impl Syntax { let mut injections: Vec = Vec::with_capacity(layer_data.injections.len()); let mut old_injections = take(&mut layer_data.injections).into_iter().peekable(); - let injection_query = injections_query.execute(&parse_tree.root_node(), source, loader); + // The language to inject if `(#set! injection.language injection.parent-layer)` is set + let injection_parent_language = layer_data.parent.map_or_else( + || self.layer(self.root).language, + |layer| self.layer(layer).language, + ); + + let injection_query = injections_query.execute( + injection_parent_language, + &parse_tree.root_node(), + source, + loader, + ); let mut combined_injections: HashMap = HashMap::with_capacity(32); for mat in injection_query { @@ -713,3 +779,62 @@ fn ranges_intersect(a: &Range, b: &Range) -> bool { // Adapted from a.start == b.start || (a.end > b.start && b.end > a.start) } + +/// When the language is injected, this value will be set to the +/// language of the parent layer. +/// +/// This is useful e.g. when injecting markdown into documentation +/// comments for a language such as Rust, and we want the default +/// code block without any info string to be the same as the parent layer. +/// +/// In the next two examples, the language injected into the inner +/// code block in the documentation comments will be the same as the parent +/// layer +/// +/// ````gleam +/// /// This code block will have the "gleam" language when +/// /// no info string is supplied: +/// /// +/// /// ``` +/// /// let foo: Int = example() +/// /// ``` +/// fn example() -> Int { todo } +/// ```` +/// +/// ````rust +/// /// This code block will have the "rust" language when +/// /// no info string is supplied: +/// /// +/// /// ``` +/// /// let foo: i32 = example(); +/// /// ``` +/// fn example() -> i32 { todo!() } +/// ```` +/// +/// In the above example, we have two layers: +/// +/// ```text +/// <-- rust --> +/// <-- markdown --> +/// ``` +/// +/// In the `markdown` layer, by default there will be no injection for a +/// code block with no `(info_string)` node. +/// +/// By using `injection.parent-layer`, when markdown is injected into a +/// language the code block's default value will be the parent layer. +/// +/// # Example +/// +/// The following injection will have the effect described above for the +/// specified languages `gleam` and `rust`. All other languages are treated +/// normally. +/// +/// ```scheme +/// (fenced_code_block +/// (code_fence_content) @injection.content +/// (#set! injection.include-unnamed-children) +/// (#set! injection.language injection.parent-layer) +/// (#any-of? injection.parent-layer "gleam" "rust")) +/// ``` +const INJECTION_PARENT_LAYER: &str = "injection.parent-layer"; diff --git a/test-grammars/markdown/injections.scm b/test-grammars/markdown/injections.scm index 7ed09c6..5da732e 100644 --- a/test-grammars/markdown/injections.scm +++ b/test-grammars/markdown/injections.scm @@ -4,6 +4,15 @@ (code_fence_content) @injection.shebang @injection.content (#set! injection.include-unnamed-children)) +(fenced_code_block + (fenced_code_block_delimiter) + (block_continuation) + (code_fence_content) @injection.content + (fenced_code_block_delimiter) + (#set! injection.language injection.parent-layer) + (#set! injection.include-unnamed-children) + (#any-of? injection.parent-layer "rust")) + (fenced_code_block (info_string (language) @injection.language)