Skip to content

Commit

Permalink
Merge #334
Browse files Browse the repository at this point in the history
334: PEEK[start..end] r=dragostis a=flying-sheep

fixes #329

Co-authored-by: Philipp A <[email protected]>
Co-authored-by: Dragoș Tiselice <[email protected]>
  • Loading branch information
3 people committed Nov 19, 2018
2 parents 79bfdd4 + 7bf4ed6 commit 8e58e02
Show file tree
Hide file tree
Showing 15 changed files with 336 additions and 35 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
target
Cargo.lock
.idea
.vscode
27 changes: 24 additions & 3 deletions derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@
//! * `POP` - pops a string from the stack and matches it
//! * `POP_ALL` - pops the entire state of the stack and matches it
//! * `PEEK` - peeks a string from the stack and matches it
//! * `PEEK[a..b]` - peeks part of the stack and matches it
//! * `PEEK_ALL` - peeks the entire state of the stack and matches it
//! * `DROP` - drops the top of the stack (fails to match if the stack is empty)
//!
Expand Down Expand Up @@ -222,8 +223,8 @@
//! `PUSH(e)` simply pushes the captured string of the expression `e` down a stack. This stack can
//! then later be used to match grammar based on its content with `POP` and `PEEK`.
//!
//! `PEEK` always matches the string at the top of stack. So, if the stack contains `["a", "b"]`,
//! then this grammar:
//! `PEEK` always matches the string at the top of stack. So, if the stack contains `["b", "a"]`
//! (`"a"` being on top), this grammar:
//!
//! ```ignore
//! a = { PEEK }
Expand All @@ -236,13 +237,33 @@
//! ```
//!
//! `POP` works the same way with the exception that it pops the string off of the stack if the
//! the match worked. With the stack from above, if `POP` matches `"a"`, the stack will be mutated
//! match worked. With the stack from above, if `POP` matches `"a"`, the stack will be mutated
//! to `["b"]`.
//!
//! `DROP` makes it possible to remove the string at the top of the stack
//! without matching it. If the stack is nonempty, `DROP` drops the top of the
//! stack. If the stack is empty, then `DROP` fails to match.
//!
//! ### Advanced peeking
//!
//! `PEEK[start..end]` and `PEEK_ALL` allow to peek deeper into the stack. The syntax works exactly
//! like Rust’s exclusive slice syntax. Additionally, negative indices can be used to indicate an
//! offset from the top. If the end lies before or at the start, the expression matches (as does
//! a `PEEK_ALL` on an empty stack). With the stack `["c", "b", "a"]` (`"a"` on top):
//!
//! ```ignore
//! fill = PUSH("c") ~ PUSH("b") ~ PUSH("a")
//! v = { PEEK_ALL } = { "a" ~ "b" ~ "c" } // top to bottom
//! w = { PEEK[..] } = { "c" ~ "b" ~ "a" } // bottom to top
//! x = { PEEK[1..2] } = { PEEK[1..-1] } = { "b" }
//! y = { PEEK[..-2] } = { PEEK[0..1] } = { "a" }
//! z = { PEEK[1..] } = { PEEK[-2..3] } = { "c" ~ "b" }
//! n = { PEEK[2..-2] } = { PEEK[2..1] } = { "" }
//! ```
//!
//! For historical reasons, `PEEK_ALL` matches from top to bottom, while `PEEK[start..end]` matches
//! from bottom to top. There is currectly no syntax to match a slice of the stack top to bottom.
//!
//! ## `Rule`
//!
//! All rules defined or used in the grammar populate a generated `enum` called `Rule`. This
Expand Down
1 change: 1 addition & 0 deletions derive/tests/grammar.pest
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ soi_at_start = { SOI ~ string }
repeat_mutate_stack = { (PUSH('a'..'c') ~ ",")* ~ POP ~ POP ~ POP }
peek_ = { PUSH(range) ~ PUSH(range) ~ PEEK ~ PEEK }
peek_all = { PUSH(range) ~ PUSH(range) ~ PEEK_ALL }
peek_slice_23 = { PUSH(range) ~ PUSH(range) ~ PUSH(range) ~ PUSH(range) ~ PUSH(range) ~ PEEK[1..-2] }
pop_ = { PUSH(range) ~ PUSH(range) ~ POP ~ POP }
pop_all = { PUSH(range) ~ PUSH(range) ~ POP_ALL }
pop_fail = { PUSH(range) ~ !POP ~ range ~ POP }
Expand Down
18 changes: 18 additions & 0 deletions derive/tests/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,24 @@ fn peek_all() {
};
}

#[test]
fn peek_slice_23() {
parses_to! {
parser: GrammarParser,
input: "0123412",
rule: Rule::peek_slice_23,
tokens: [
peek_slice_23(0, 7, [
range(0, 1),
range(1, 2),
range(2, 3),
range(3, 4),
range(4, 5),
])
]
};
}

#[test]
fn pop() {
parses_to! {
Expand Down
24 changes: 24 additions & 0 deletions generator/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::path::Path;

use proc_macro2::{Span, TokenStream};
use syn::{self, Generics, Ident};
use quote::{ToTokens, TokenStreamExt};

use pest_meta::ast::*;
use pest_meta::optimizer::*;
Expand Down Expand Up @@ -351,6 +352,12 @@ fn generate_expr(expr: OptimizedExpr) -> TokenStream {
let ident = Ident::new(&ident, Span::call_site());
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
let end = QuoteOption(end_);
quote! {
state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop)
}
}
OptimizedExpr::PosPred(expr) => {
let expr = generate_expr(*expr);

Expand Down Expand Up @@ -491,6 +498,12 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream {
let ident = Ident::new(&ident, Span::call_site());
quote! { self::#ident(state) }
}
OptimizedExpr::PeekSlice(start, end_) => {
let end = QuoteOption(end_);
quote! {
state.stack_match_peek_slice(#start, #end, ::pest::MatchDir::BottomToTop)
}
}
OptimizedExpr::PosPred(expr) => {
let expr = generate_expr_atomic(*expr);

Expand Down Expand Up @@ -593,6 +606,17 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream {
}
}

struct QuoteOption<T>(Option<T>);

impl<T: ToTokens> ToTokens for QuoteOption<T> {
fn to_tokens(&self, tokens: &mut TokenStream) {
tokens.append_all(match self.0 {
Some(ref t) => quote! { ::std::option::Option::Some(#t) },
None => quote! { ::std::option::Option::None },
});
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 2 additions & 0 deletions generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream {
Rule::non_atomic_modifier => "`!`".to_owned(),
Rule::opening_brace => "`{`".to_owned(),
Rule::closing_brace => "`}`".to_owned(),
Rule::opening_brack => "`[`".to_owned(),
Rule::closing_brack => "`]`".to_owned(),
Rule::opening_paren => "`(`".to_owned(),
Rule::positive_predicate_operator => "`&`".to_owned(),
Rule::negative_predicate_operator => "`!`".to_owned(),
Expand Down
2 changes: 2 additions & 0 deletions meta/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ pub enum Expr {
Range(String, String),
/// Matches the rule with the given name, e.g. `a`
Ident(String),
/// Matches a custom part of the stack, e.g. `PEEK[..]`
PeekSlice(i32, Option<i32>),
/// Positive lookahead; matches expression without making progress, e.g. `&e`
PosPred(Box<Expr>),
/// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
Expand Down
6 changes: 5 additions & 1 deletion meta/src/grammar.pest
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ opening_brace = { "{" }
closing_brace = { "}" }
opening_paren = { "(" }
closing_paren = { ")" }
opening_brack = { "[" }
closing_brack = { "]" }

modifier = _{
silent_modifier |
Expand All @@ -35,7 +37,7 @@ non_atomic_modifier = { "!" }
expression = { term ~ (infix_operator ~ term)* }
term = { prefix_operator* ~ node ~ postfix_operator* }
node = _{ opening_paren ~ expression ~ closing_paren | terminal }
terminal = _{ _push | identifier | string | insensitive_string | range }
terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range }

prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
infix_operator = _{ sequence_operator | choice_operator }
Expand Down Expand Up @@ -63,10 +65,12 @@ repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }

number = @{ '0'..'9'+ }
integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }

comma = { "," }

_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }

identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
alpha = _{ 'a'..'z' | 'A'..'Z' }
Expand Down
6 changes: 5 additions & 1 deletion meta/src/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
Expr::Insens(string) => OptimizedExpr::Insens(string),
Expr::Range(start, end) => OptimizedExpr::Range(start, end),
Expr::Ident(ident) => OptimizedExpr::Ident(ident),
Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
Expr::Seq(lhs, rhs) => {
Expand All @@ -54,7 +55,9 @@ fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
Expr::Skip(strings) => OptimizedExpr::Skip(strings),
Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
_ => unreachable!("No valid transformation to OptimizedRule")
Expr::RepOnce(_) | Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
unreachable!("No valid transformation to OptimizedRule")
}
}
}

Expand Down Expand Up @@ -82,6 +85,7 @@ pub enum OptimizedExpr {
Insens(String),
Range(String, String),
Ident(String),
PeekSlice(i32, Option<i32>),
PosPred(Box<OptimizedExpr>),
NegPred(Box<OptimizedExpr>),
Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
Expand Down
120 changes: 120 additions & 0 deletions meta/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ pub enum ParserExpr<'i> {
Insens(String),
Range(String, String),
Ident(String),
PeekSlice(i32, Option<i32>),
PosPred(Box<ParserNode<'i>>),
NegPred(Box<ParserNode<'i>>),
Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
Expand Down Expand Up @@ -139,6 +140,7 @@ fn convert_node(node: ParserNode) -> Expr {
ParserExpr::Insens(string) => Expr::Insens(string),
ParserExpr::Range(start, end) => Expr::Range(start, end),
ParserExpr::Ident(ident) => Expr::Ident(ident),
ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
ParserExpr::Seq(node1, node2) => Expr::Seq(
Expand Down Expand Up @@ -271,6 +273,32 @@ fn consume_expr<'i>(
span: start.span(&end)
}
}
Rule::peek_slice => {
let mut pairs = pair.clone().into_inner();
pairs.next().unwrap(); // opening_brack
let pair_start = pairs.next().unwrap(); // .. or integer
let start: i32 = match pair_start.as_rule() {
Rule::range_operator => 0,
Rule::integer => {
pairs.next().unwrap(); // ..
pair_start.as_str().parse().unwrap()
},
_ => unreachable!()
};
let pair_end = pairs.next().unwrap(); // integer or }
let end: Option<i32> = match pair_end.as_rule() {
Rule::closing_brack => None,
Rule::integer => {
pairs.next().unwrap(); // }
Some(pair_end.as_str().parse().unwrap())
},
_ => unreachable!()
};
ParserNode {
expr: ParserExpr::PeekSlice(start, end),
span: pair.into_span()
}
},
Rule::identifier => ParserNode {
expr: ParserExpr::Ident(pair.as_str().to_owned()),
span: pair.clone().into_span()
Expand Down Expand Up @@ -800,6 +828,74 @@ mod tests {
};
}

#[test]
fn peek_slice_all() {
parses_to! {
parser: PestParser,
input: "PEEK[..]",
rule: Rule::peek_slice,
tokens: [
peek_slice(0, 8, [
opening_brack(4, 5),
range_operator(5, 7),
closing_brack(7, 8)
])
]
};
}

#[test]
fn peek_slice_start() {
parses_to! {
parser: PestParser,
input: "PEEK[1..]",
rule: Rule::peek_slice,
tokens: [
peek_slice(0, 9, [
opening_brack(4, 5),
integer(5, 6),
range_operator(6, 8),
closing_brack(8, 9)
])
]
};
}

#[test]
fn peek_slice_end() {
parses_to! {
parser: PestParser,
input: "PEEK[ ..-1]",
rule: Rule::peek_slice,
tokens: [
peek_slice(0, 11, [
opening_brack(4, 5),
range_operator(6, 8),
integer(8, 10),
closing_brack(10, 11)
])
]
};
}

#[test]
fn peek_slice_start_end() {
parses_to! {
parser: PestParser,
input: "PEEK[-5..10]",
rule: Rule::peek_slice,
tokens: [
peek_slice(0, 12, [
opening_brack(4, 5),
integer(5, 7),
range_operator(7, 9),
integer(9, 11),
closing_brack(11, 12)
])
]
};
}

#[test]
fn identifier() {
parses_to! {
Expand Down Expand Up @@ -1047,6 +1143,7 @@ mod tests {
Rule::positive_predicate_operator,
Rule::negative_predicate_operator,
Rule::_push,
Rule::peek_slice,
Rule::identifier,
Rule::insensitive_string,
Rule::quote,
Expand Down Expand Up @@ -1168,6 +1265,29 @@ mod tests {
]
);
}

#[test]
fn ast_peek_slice() {
let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }";

let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
let ast = consume_rules_with_spans(pairs).unwrap();
let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();

assert_eq!(
ast,
vec![
AstRule {
name: "rule".to_owned(),
ty: RuleType::Silent,
expr: Expr::Seq(
Box::new(Expr::PeekSlice(-4, None)),
Box::new(Expr::PeekSlice(0, Some(3))),
)
}
],
);
}

#[test]
#[should_panic(expected = "grammar error
Expand Down
2 changes: 1 addition & 1 deletion pest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
extern crate ucd_trie;

pub use parser::Parser;
pub use parser_state::{state, Atomicity, Lookahead, ParseResult, ParserState};
pub use parser_state::{state, Atomicity, Lookahead, ParseResult, ParserState, MatchDir};
pub use position::Position;
pub use span::Span;
use std::fmt::Debug;
Expand Down
Loading

0 comments on commit 8e58e02

Please sign in to comment.