diff --git a/peg-macros/analysis.rs b/peg-macros/analysis.rs index 9f54ae4..3146b39 100644 --- a/peg-macros/analysis.rs +++ b/peg-macros/analysis.rs @@ -259,7 +259,7 @@ impl<'a> LoopNullabilityVisitor<'a> { ref sep, } => { let inner_nullable = self.walk_expr(inner); - let sep_nullable = sep.as_ref().map_or(true, |sep| self.walk_expr(sep)); + let sep_nullable = sep.as_ref().is_none_or(|sep| self.walk_expr(sep)); // The entire purpose of this analysis: report errors if the loop body is nullable if inner_nullable && sep_nullable && !bound.has_upper_bound() { diff --git a/peg-macros/ast.rs b/peg-macros/ast.rs index e8230f1..aad9a76 100644 --- a/peg-macros/ast.rs +++ b/peg-macros/ast.rs @@ -49,6 +49,16 @@ pub struct Rule { pub no_eof: bool, } +impl Rule { + pub fn cacheable(&self) -> bool { + self.ty_params.is_none() + && self + .params + .iter() + .all(|param| matches!(param.ty, RuleParamTy::Rust(..))) + } +} + #[derive(Debug)] pub struct RuleParam { pub name: Ident, diff --git a/peg-macros/translate.rs b/peg-macros/translate.rs index 24343a8..b2ecb25 100644 --- a/peg-macros/translate.rs +++ b/peg-macros/translate.rs @@ -84,10 +84,10 @@ pub(crate) fn compile_grammar(grammar: &Grammar) -> TokenStream { continue; } - if rule.cache.is_some() && !(rule.params.is_empty() && rule.ty_params.is_none()) { + if rule.cache.is_some() && !rule.cacheable() { items.push(report_error( - rule.name.span(), - "rules with generics or parameters cannot use #[cache] or #[cache_left_rec]".to_string(), + rule.name.span(), + "rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec]".to_string(), )); continue; } @@ -158,11 +158,29 @@ fn make_parse_state(grammar: &Grammar) -> TokenStream { let mut cache_fields_def: Vec = Vec::new(); let mut cache_fields: Vec = Vec::new(); for rule in grammar.iter_rules() { - if rule.cache.is_some() && rule.params.is_empty() && rule.ty_params.is_none() { + if rule.cache.is_some() && rule.cacheable() { let name = format_ident!("{}_cache", rule.name); let ret_ty = rule.ret_type.clone().unwrap_or_else(|| quote!(())); + + // This could be written more simply as `(usize, #(, #param_ty)*))`, + // but this generates unnecessary brackets when `rule.params` is + // empty, and new releases of clippy have a bad habit of suddenly + // triggering on code generated by proc-macros when `quote_spanned!` + // is used because it thinks that the generated code was handwritten + let key = if rule.params.is_empty() { + quote_spanned!(span=> usize) + } else { + let param_ty = rule.params.iter().map(|param| { + let RuleParamTy::Rust(ty) = ¶m.ty else { + unreachable!() + }; + quote_spanned!(span=> <::peg::chomp_ref!(#ty) as ::std::borrow::ToOwned>::Owned) + }); + quote_spanned!(span=> (usize, #(#param_ty),*)) + }; + cache_fields_def.push( - quote_spanned! { span => #name: ::std::collections::HashMap> }, + quote_spanned!(span=> #name: ::std::collections::HashMap<#key, ::peg::RuleResult<#ret_ty>>), ); cache_fields.push(name); } @@ -274,15 +292,25 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream { quote!() }; + let param = rule.params.iter().map(|param| { + let name = ¶m.name; + quote_spanned!(span=> #name.to_owned()) + }); + let key = if rule.params.is_empty() { + quote_spanned!(span=> __pos) + } else { + quote_spanned!(span=> (__pos, #(#param),*)) + }; + match cache_type { Cache::Simple => quote_spanned! { span => - if let Some(entry) = __state.#cache_field.get(&__pos) { + if let Some(entry) = __state.#cache_field.get(&#key) { #cache_trace return entry.clone(); } let __rule_result = #wrapped_body; - __state.#cache_field.insert(__pos, __rule_result.clone()); + __state.#cache_field.insert(#key, __rule_result.clone()); __rule_result }, Cache::Recursive => @@ -290,12 +318,12 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream { // { quote_spanned! { span => - if let Some(entry) = __state.#cache_field.get(&__pos) { + if let Some(entry) = __state.#cache_field.get(&#key) { #cache_trace return entry.clone(); } - __state.#cache_field.insert(__pos, ::peg::RuleResult::Failed); + __state.#cache_field.insert(#key, ::peg::RuleResult::Failed); let mut __last_result = ::peg::RuleResult::Failed; loop { let __current_result = { #wrapped_body }; @@ -305,7 +333,7 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream { match __last_result { ::peg::RuleResult::Matched(__last_endpos, _) if __current_endpos <= __last_endpos => break, _ => { - __state.#cache_field.insert(__pos, __current_result.clone()); + __state.#cache_field.insert(#key, __current_result.clone()); __last_result = __current_result; }, } diff --git a/peg-runtime/lib.rs b/peg-runtime/lib.rs index 96a120c..da3bfa2 100644 --- a/peg-runtime/lib.rs +++ b/peg-runtime/lib.rs @@ -71,3 +71,19 @@ pub fn call_custom_closure( ) -> RuleResult { f(input, pos) } + +// this is used to convert references to ownable types for cache keys, as a +// cleaner alternative to filtering the token tree +#[doc(hidden)] +#[macro_export] +macro_rules! chomp_ref { + (& $lt:lifetime $ty:ty) => { + $ty + }; + (& $ty:ty) => { + $ty + }; + ($ty:ty) => { + $ty + }; +} diff --git a/src/lib.rs b/src/lib.rs index 25272ea..3e2e7ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -312,10 +312,9 @@ //! //! ## Caching and left recursion //! -//! A `rule` without parameters can be prefixed with `#[cache]` if it is likely -//! to be checked repeatedly in the same position. This memoizes the rule result -//! as a function of input position, in the style of a [packrat -//! parser][wp-peg-packrat]. +//! A `rule` can be prefixed with `#[cache]` if it is likely to be checked +//! repeatedly in the same position. This memoizes the rule result as a function +//! of input position, in the style of a [packrat parser][wp-peg-packrat]. //! //! [wp-peg-packrat]: https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars //! @@ -334,6 +333,11 @@ //! The `precedence!{}` syntax is another way to handle nested operators and avoid //! repeatedly matching an expression rule. //! +//! Currently, rules with arguments can only be cached if all argument types are +//! `ToOwned + Hash + Eq`. Rules with generic types or `rule<_>` arguments +//! cannot be cached. References are converted to values when the cache is +//! checked and when they are inserted to the cache. +//! //! ## Tracing //! //! If you pass the `peg/trace` feature to Cargo when building your project, a diff --git a/tests/compile-fail/cache_with_args.rs b/tests/compile-fail/cache_with_args.rs index ac78f52..9a4c01e 100644 --- a/tests/compile-fail/cache_with_args.rs +++ b/tests/compile-fail/cache_with_args.rs @@ -1,11 +1,11 @@ extern crate peg; -peg::parser!(grammar foo() for str { +peg::parser!(grammar foo() for str { #[cache] - rule foo(x: u32) = "foo" //~ ERROR + rule ltarg<'a>() -> &'a str = { "" } //~ ERROR #[cache] - rule ltarg<'a>() -> &'a str = { "" } //~ ERROR + rule rulearg(r: rule<()>) -> &'a str = { "" } //~ ERROR }); -fn main() {} \ No newline at end of file +fn main() {} diff --git a/tests/compile-fail/cache_with_args.stderr b/tests/compile-fail/cache_with_args.stderr index 1caa07e..269b869 100644 --- a/tests/compile-fail/cache_with_args.stderr +++ b/tests/compile-fail/cache_with_args.stderr @@ -1,11 +1,11 @@ -error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec] - --> $DIR/cache_with_args.rs:5:10 +error: rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec] + --> tests/compile-fail/cache_with_args.rs:5:10 | -5 | rule foo(x: u32) = "foo" //~ ERROR - | ^^^ +5 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR + | ^^^^^ -error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec] - --> $DIR/cache_with_args.rs:8:10 +error: rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec] + --> tests/compile-fail/cache_with_args.rs:8:10 | -8 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR - | ^^^^^ +8 | rule rulearg(r: rule<()>) -> &'a str = { "" } //~ ERROR + | ^^^^^^^ diff --git a/tests/pass/cache_with_args.rs b/tests/pass/cache_with_args.rs new file mode 100644 index 0000000..6845121 --- /dev/null +++ b/tests/pass/cache_with_args.rs @@ -0,0 +1,75 @@ +peg::parser!(grammar foo() for str { + pub rule main() + = yepnope(true) + yepnope(false) + / yepnope(true) + yepnope(true) + yepnope(false) + + #[cache] + rule yepnope(yep: bool) + = &assert(yep, "yep") "yep" + / !assert(yep, "yep") "nope" + + pub rule main_ref() + = yepnope_ref(&true) + yepnope_ref(&false) + / yepnope_ref(&true) + yepnope_ref(&true) + yepnope_ref(&false) + + #[cache] + rule yepnope_ref(yep: &bool) + = &assert(*yep, "yep") "yep" + / !assert(*yep, "yep") "nope" + + pub rule main_ref_lifetime() + = yepnope_ref(&true) + yepnope_ref(&false) + / yepnope_ref(&true) + yepnope_ref(&true) + yepnope_ref(&false) + + #[cache] + rule yepnope_ref_lifetime(yep: &'input bool) + = &assert(*yep, "yep") "yep" + / !assert(*yep, "yep") "nope" + + pub rule main_ref_to_owned() + = yepnope_ref_to_owned("yep") + yepnope_ref_to_owned("nope") + / yepnope_ref_to_owned("yep") + yepnope_ref_to_owned("yep") + yepnope_ref_to_owned("nope") + + #[cache] + rule yepnope_ref_to_owned(yep: &str) + = &assert(yep == "yep", "yep") "yep" + / !assert(yep == "yep", "yep") "nope" + + rule assert(v: bool, msg: &'static str) + = {? if v { Ok(()) } else { Err(msg) } } +}); + +#[test] +fn main() { + foo::main("yepnope").unwrap(); + foo::main("nopeyep").unwrap_err(); + foo::main("yepyepnope").unwrap(); + foo::main("nopeyepnope").unwrap_err(); + + foo::main_ref("yepnope").unwrap(); + foo::main_ref("nopeyep").unwrap_err(); + foo::main_ref("yepyepnope").unwrap(); + foo::main_ref("nopeyepnope").unwrap_err(); + + foo::main_ref_lifetime("yepnope").unwrap(); + foo::main_ref_lifetime("nopeyep").unwrap_err(); + foo::main_ref_lifetime("yepyepnope").unwrap(); + foo::main_ref_lifetime("nopeyepnope").unwrap_err(); + + foo::main_ref_to_owned("yepnope").unwrap(); + foo::main_ref_to_owned("nopeyep").unwrap_err(); + foo::main_ref_to_owned("yepyepnope").unwrap(); + foo::main_ref_to_owned("nopeyepnope").unwrap_err(); +} diff --git a/tests/pass/mod.rs b/tests/pass/mod.rs index 66f082b..8ec537f 100644 --- a/tests/pass/mod.rs +++ b/tests/pass/mod.rs @@ -7,6 +7,7 @@ mod arithmetic_with_left_recursion; mod assembly_ast_dyn_type_param_bounds; mod borrow_from_input; mod bytes; +mod cache_with_args; mod conditional_block; mod crate_import; mod custom_expr; diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 461c920..0c4bc15 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -64,7 +64,7 @@ fn generate_grammar() -> Result> { let output = Command::new(cargo) .current_dir(project_root()) - .args(&[ + .args([ "run", "-p", "peg-macros",