Skip to content

Commit 68a9108

Browse files
committed
Allow combining #[cache] with arguments
This is a naïve first attempt at enabling caching with arguments, supporting both values and ownable references. References are less efficient because it is extremely difficult to satisfy the constraint `K: Borrow<Q>` for `HashMap` with a tuple. Most recommendations seem to be to either do some contortions with trait objects, or to use the hashbrown crate and implement `hashbrown::Equivalent`, but I have not been able to successfully think through how to do this in a way which works with arbitrary tuples of foreign types. Still, a half-measure which can be improved later is hopefully better than nothing.
1 parent 42ed329 commit 68a9108

File tree

8 files changed

+160
-26
lines changed

8 files changed

+160
-26
lines changed

peg-macros/ast.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,16 @@ pub struct Rule {
4949
pub no_eof: bool,
5050
}
5151

52+
impl Rule {
53+
pub fn cacheable(&self) -> bool {
54+
self.ty_params.is_none()
55+
&& self
56+
.params
57+
.iter()
58+
.all(|param| matches!(param.ty, RuleParamTy::Rust(..)))
59+
}
60+
}
61+
5262
#[derive(Debug)]
5363
pub struct RuleParam {
5464
pub name: Ident,

peg-macros/translate.rs

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ pub(crate) fn compile_grammar(grammar: &Grammar) -> TokenStream {
8484
continue;
8585
}
8686

87-
if rule.cache.is_some() && !(rule.params.is_empty() && rule.ty_params.is_none()) {
87+
if rule.cache.is_some() && !rule.cacheable() {
8888
items.push(report_error(
89-
rule.name.span(),
90-
"rules with generics or parameters cannot use #[cache] or #[cache_left_rec]".to_string(),
89+
rule.name.span(),
90+
"rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec]".to_string(),
9191
));
9292
continue;
9393
}
@@ -158,11 +158,29 @@ fn make_parse_state(grammar: &Grammar) -> TokenStream {
158158
let mut cache_fields_def: Vec<TokenStream> = Vec::new();
159159
let mut cache_fields: Vec<Ident> = Vec::new();
160160
for rule in grammar.iter_rules() {
161-
if rule.cache.is_some() && rule.params.is_empty() && rule.ty_params.is_none() {
161+
if rule.cache.is_some() && rule.cacheable() {
162162
let name = format_ident!("{}_cache", rule.name);
163163
let ret_ty = rule.ret_type.clone().unwrap_or_else(|| quote!(()));
164+
165+
// This could be written more simply as `(usize, #(, #param_ty)*))`,
166+
// but this generates unnecessary brackets when `rule.params` is
167+
// empty, and new releases of clippy have a bad habit of suddenly
168+
// triggering on code generated by proc-macros when `quote_spanned!`
169+
// is used because it thinks that the generated code was handwritten
170+
let key = if rule.params.is_empty() {
171+
quote_spanned!(span=> usize)
172+
} else {
173+
let param_ty = rule.params.iter().map(|param| {
174+
let RuleParamTy::Rust(ty) = &param.ty else {
175+
unreachable!()
176+
};
177+
quote_spanned!(span=> <::peg::chomp_ref!(#ty) as ::std::borrow::ToOwned>::Owned)
178+
});
179+
quote_spanned!(span=> (usize, #(#param_ty),*))
180+
};
181+
164182
cache_fields_def.push(
165-
quote_spanned! { span => #name: ::std::collections::HashMap<usize, ::peg::RuleResult<#ret_ty>> },
183+
quote_spanned!(span=> #name: ::std::collections::HashMap<#key, ::peg::RuleResult<#ret_ty>>),
166184
);
167185
cache_fields.push(name);
168186
}
@@ -274,28 +292,38 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
274292
quote!()
275293
};
276294

295+
let param = rule.params.iter().map(|param| {
296+
let name = &param.name;
297+
quote_spanned!(span=> #name.to_owned())
298+
});
299+
let key = if rule.params.is_empty() {
300+
quote_spanned!(span=> __pos)
301+
} else {
302+
quote_spanned!(span=> (__pos, #(#param),*))
303+
};
304+
277305
match cache_type {
278306
Cache::Simple => quote_spanned! { span =>
279-
if let Some(entry) = __state.#cache_field.get(&__pos) {
307+
if let Some(entry) = __state.#cache_field.get(&#key) {
280308
#cache_trace
281309
return entry.clone();
282310
}
283311

284312
let __rule_result = #wrapped_body;
285-
__state.#cache_field.insert(__pos, __rule_result.clone());
313+
__state.#cache_field.insert(#key, __rule_result.clone());
286314
__rule_result
287315
},
288316
Cache::Recursive =>
289317
// `#[cache_left_rec] support for recursive rules using the technique described here:
290318
// <https://medium.com/@gvanrossum_83706/left-recursive-peg-grammars-65dab3c580e1>
291319
{
292320
quote_spanned! { span =>
293-
if let Some(entry) = __state.#cache_field.get(&__pos) {
321+
if let Some(entry) = __state.#cache_field.get(&#key) {
294322
#cache_trace
295323
return entry.clone();
296324
}
297325

298-
__state.#cache_field.insert(__pos, ::peg::RuleResult::Failed);
326+
__state.#cache_field.insert(#key, ::peg::RuleResult::Failed);
299327
let mut __last_result = ::peg::RuleResult::Failed;
300328
loop {
301329
let __current_result = { #wrapped_body };
@@ -305,7 +333,7 @@ fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
305333
match __last_result {
306334
::peg::RuleResult::Matched(__last_endpos, _) if __current_endpos <= __last_endpos => break,
307335
_ => {
308-
__state.#cache_field.insert(__pos, __current_result.clone());
336+
__state.#cache_field.insert(#key, __current_result.clone());
309337
__last_result = __current_result;
310338
},
311339
}

peg-runtime/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,19 @@ pub fn call_custom_closure<I, T>(
7171
) -> RuleResult<T> {
7272
f(input, pos)
7373
}
74+
75+
// this is used to convert references to ownable types for cache keys, as a
76+
// cleaner alternative to filtering the token tree
77+
#[doc(hidden)]
78+
#[macro_export]
79+
macro_rules! chomp_ref {
80+
(& $lt:lifetime $ty:ty) => {
81+
$ty
82+
};
83+
(& $ty:ty) => {
84+
$ty
85+
};
86+
($ty:ty) => {
87+
$ty
88+
};
89+
}

src/lib.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,10 +312,9 @@
312312
//!
313313
//! ## Caching and left recursion
314314
//!
315-
//! A `rule` without parameters can be prefixed with `#[cache]` if it is likely
316-
//! to be checked repeatedly in the same position. This memoizes the rule result
317-
//! as a function of input position, in the style of a [packrat
318-
//! parser][wp-peg-packrat].
315+
//! A `rule` can be prefixed with `#[cache]` if it is likely to be checked
316+
//! repeatedly in the same position. This memoizes the rule result as a function
317+
//! of input position, in the style of a [packrat parser][wp-peg-packrat].
319318
//!
320319
//! [wp-peg-packrat]: https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars
321320
//!
@@ -334,6 +333,11 @@
334333
//! The `precedence!{}` syntax is another way to handle nested operators and avoid
335334
//! repeatedly matching an expression rule.
336335
//!
336+
//! Currently, rules with arguments can only be cached if all argument types are
337+
//! `ToOwned + Hash + Eq`. Rules with generic types or `rule<_>` arguments
338+
//! cannot be cached. References are converted to values when the cache is
339+
//! checked and when they are inserted to the cache.
340+
//!
337341
//! ## Tracing
338342
//!
339343
//! If you pass the `peg/trace` feature to Cargo when building your project, a
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
extern crate peg;
22

3-
peg::parser!(grammar foo() for str {
3+
peg::parser!(grammar foo() for str {
44
#[cache]
5-
rule foo(x: u32) = "foo" //~ ERROR
5+
rule ltarg<'a>() -> &'a str = { "" } //~ ERROR
66

77
#[cache]
8-
rule ltarg<'a>() -> &'a str = { "" } //~ ERROR
8+
rule rulearg(r: rule<()>) -> &'a str = { "" } //~ ERROR
99
});
1010

11-
fn main() {}
11+
fn main() {}
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec]
2-
--> $DIR/cache_with_args.rs:5:10
1+
error: rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec]
2+
--> tests/compile-fail/cache_with_args.rs:5:10
33
|
4-
5 | rule foo(x: u32) = "foo" //~ ERROR
5-
| ^^^
4+
5 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR
5+
| ^^^^^
66

7-
error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec]
8-
--> $DIR/cache_with_args.rs:8:10
7+
error: rules with generic types or `rule<_>` types cannot use #[cache] or #[cache_left_rec]
8+
--> tests/compile-fail/cache_with_args.rs:8:10
99
|
10-
8 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR
11-
| ^^^^^
10+
8 | rule rulearg(r: rule<()>) -> &'a str = { "" } //~ ERROR
11+
| ^^^^^^^

tests/pass/cache_with_args.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
peg::parser!(grammar foo() for str {
2+
pub rule main()
3+
= yepnope(true)
4+
yepnope(false)
5+
/ yepnope(true)
6+
yepnope(true)
7+
yepnope(false)
8+
9+
#[cache]
10+
rule yepnope(yep: bool)
11+
= &assert(yep, "yep") "yep"
12+
/ !assert(yep, "yep") "nope"
13+
14+
pub rule main_ref()
15+
= yepnope_ref(&true)
16+
yepnope_ref(&false)
17+
/ yepnope_ref(&true)
18+
yepnope_ref(&true)
19+
yepnope_ref(&false)
20+
21+
#[cache]
22+
rule yepnope_ref(yep: &bool)
23+
= &assert(*yep, "yep") "yep"
24+
/ !assert(*yep, "yep") "nope"
25+
26+
pub rule main_ref_lifetime()
27+
= yepnope_ref(&true)
28+
yepnope_ref(&false)
29+
/ yepnope_ref(&true)
30+
yepnope_ref(&true)
31+
yepnope_ref(&false)
32+
33+
#[cache]
34+
rule yepnope_ref_lifetime(yep: &'input bool)
35+
= &assert(*yep, "yep") "yep"
36+
/ !assert(*yep, "yep") "nope"
37+
38+
pub rule main_ref_to_owned()
39+
= yepnope_ref_to_owned("yep")
40+
yepnope_ref_to_owned("nope")
41+
/ yepnope_ref_to_owned("yep")
42+
yepnope_ref_to_owned("yep")
43+
yepnope_ref_to_owned("nope")
44+
45+
#[cache]
46+
rule yepnope_ref_to_owned(yep: &str)
47+
= &assert(yep == "yep", "yep") "yep"
48+
/ !assert(yep == "yep", "yep") "nope"
49+
50+
rule assert(v: bool, msg: &'static str)
51+
= {? if v { Ok(()) } else { Err(msg) } }
52+
});
53+
54+
#[test]
55+
fn main() {
56+
foo::main("yepnope").unwrap();
57+
foo::main("nopeyep").unwrap_err();
58+
foo::main("yepyepnope").unwrap();
59+
foo::main("nopeyepnope").unwrap_err();
60+
61+
foo::main_ref("yepnope").unwrap();
62+
foo::main_ref("nopeyep").unwrap_err();
63+
foo::main_ref("yepyepnope").unwrap();
64+
foo::main_ref("nopeyepnope").unwrap_err();
65+
66+
foo::main_ref_lifetime("yepnope").unwrap();
67+
foo::main_ref_lifetime("nopeyep").unwrap_err();
68+
foo::main_ref_lifetime("yepyepnope").unwrap();
69+
foo::main_ref_lifetime("nopeyepnope").unwrap_err();
70+
71+
foo::main_ref_to_owned("yepnope").unwrap();
72+
foo::main_ref_to_owned("nopeyep").unwrap_err();
73+
foo::main_ref_to_owned("yepyepnope").unwrap();
74+
foo::main_ref_to_owned("nopeyepnope").unwrap_err();
75+
}

tests/pass/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod arithmetic_with_left_recursion;
77
mod assembly_ast_dyn_type_param_bounds;
88
mod borrow_from_input;
99
mod bytes;
10+
mod cache_with_args;
1011
mod conditional_block;
1112
mod crate_import;
1213
mod custom_expr;

0 commit comments

Comments
 (0)