From 063159db09cf5957e6d9569dea02b18cea9dc8d5 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 19 Jun 2024 17:45:40 -0700 Subject: [PATCH 01/28] feat(fmt): An attempt at aesthetic items into PL This is an attempt to get around the complications of managing lexer + parser output, which #4397 has hit in a few incarnations by just adding comments ('aesthetics') to PL. This very very nearly works -- with chumsky we can create a function that wraps anything that might have a comment, implement a trait on the AST items that contain it, and away we go (though it did require a lot of debugging in the end). This would then be really easy to write back out. I think there's literally a single case where it doesn't work -- where a comment doesn't come directly before or directly after an AST item -- in the final trailing comma of a tuple or array. So tests fail at the moment. Next we need to consider: - Can we workaround that one case? We don't actually care about whether there's a trailing comma, so we could likely hack around it... - Are there actually other cases of this model failing? I know this approach -- of putting aesthetic items into AST -- is not generally favored, and it's really rare that there's even a single case of something not working. --- prqlc/prqlc-ast/src/expr.rs | 24 ++++- prqlc/prqlc-ast/src/lib.rs | 8 ++ prqlc/prqlc-ast/src/stmt.rs | 42 +++++++- prqlc/prqlc-parser/src/expr.rs | 44 +++++---- prqlc/prqlc-parser/src/interpolation.rs | 4 + prqlc/prqlc-parser/src/lib.rs | 63 ++++++++---- ...qlc_parser__test__pipeline_parse_tree.snap | 8 ++ prqlc/prqlc-parser/src/stmt.rs | 51 +++++++--- prqlc/prqlc-parser/src/test.rs | 11 ++- prqlc/prqlc-parser/src/types.rs | 2 +- prqlc/prqlc/src/semantic/ast_expand.rs | 6 ++ .../tests/integration/ast_code_matches.rs | 95 ++++++++++++++++++- ...__queries__debug_lineage__aggregation.snap | 5 + ...n__queries__debug_lineage__arithmetic.snap | 2 + ...gration__queries__debug_lineage__cast.snap | 2 + ..._queries__debug_lineage__date_to_text.snap | 5 + ...ion__queries__debug_lineage__distinct.snap | 2 + ...__queries__debug_lineage__distinct_on.snap | 2 + ..._queries__debug_lineage__genre_counts.snap | 3 + ...on__queries__debug_lineage__group_all.snap | 2 + ...n__queries__debug_lineage__group_sort.snap | 2 + ..._debug_lineage__group_sort_limit_take.snap | 3 + ...ueries__debug_lineage__invoice_totals.snap | 3 + ...tion__queries__debug_lineage__loop_01.snap | 3 + ...__queries__debug_lineage__math_module.snap | 3 + ...on__queries__debug_lineage__pipelines.snap | 4 + ...ion__queries__debug_lineage__read_csv.snap | 4 + ...ueries__debug_lineage__set_ops_remove.snap | 2 + ...gration__queries__debug_lineage__sort.snap | 8 +- ...ation__queries__debug_lineage__switch.snap | 3 + ...gration__queries__debug_lineage__take.snap | 2 + ...__queries__debug_lineage__text_module.snap | 4 + ...ation__queries__debug_lineage__window.snap | 10 ++ 33 files changed, 370 insertions(+), 62 deletions(-) diff --git a/prqlc/prqlc-ast/src/expr.rs b/prqlc/prqlc-ast/src/expr.rs index 36c7ed0adc2e..e908bb6aae04 100644 --- a/prqlc/prqlc-ast/src/expr.rs +++ b/prqlc/prqlc-ast/src/expr.rs @@ -11,8 +11,8 @@ pub use self::ident::Ident; pub use self::ops::{BinOp, UnOp}; pub use self::token::{Literal, ValueAndUnit}; use super::token; -use crate::span::Span; -use crate::Ty; +use crate::{span::Span, WithAesthetics}; +use crate::{TokenKind, Ty}; impl Expr { pub fn new>(kind: K) -> Self { @@ -20,6 +20,8 @@ impl Expr { kind: kind.into(), span: None, alias: None, + aesthetics_before: Vec::new(), + aesthetics_after: Vec::new(), } } } @@ -38,6 +40,24 @@ pub struct Expr { #[serde(skip_serializing_if = "Option::is_none")] pub alias: Option, + + // Maybe should be Token? + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_before: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_after: Vec, +} + +impl WithAesthetics for Expr { + fn with_aesthetics( + mut self, + aesthetics_before: Vec, + aesthetics_after: Vec, + ) -> Self { + self.aesthetics_before = aesthetics_before; + self.aesthetics_after = aesthetics_after; + self + } } #[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)] diff --git a/prqlc/prqlc-ast/src/lib.rs b/prqlc/prqlc-ast/src/lib.rs index ec8101ccf097..c42368980fbe 100644 --- a/prqlc/prqlc-ast/src/lib.rs +++ b/prqlc/prqlc-ast/src/lib.rs @@ -9,3 +9,11 @@ pub use span::*; pub use stmt::*; pub use token::*; pub use types::*; + +pub trait WithAesthetics { + fn with_aesthetics( + self, + aesthetics_before: Vec, + aethetics_after: Vec, + ) -> Self; +} diff --git a/prqlc/prqlc-ast/src/stmt.rs b/prqlc/prqlc-ast/src/stmt.rs index 45fd0cd78182..2ac135cf2c92 100644 --- a/prqlc/prqlc-ast/src/stmt.rs +++ b/prqlc/prqlc-ast/src/stmt.rs @@ -4,7 +4,7 @@ use enum_as_inner::EnumAsInner; use semver::VersionReq; use serde::{Deserialize, Serialize}; -use crate::{expr::Expr, Ident, Span, Ty}; +use crate::{expr::Expr, Ident, Span, TokenKind, Ty, WithAesthetics}; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)] pub struct QueryDef { @@ -31,6 +31,26 @@ pub struct Stmt { #[serde(skip_serializing_if = "Vec::is_empty", default)] pub annotations: Vec, + + // Maybe should be Token? + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_before: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_after: Vec, +} + +impl WithAesthetics for Stmt { + fn with_aesthetics( + self, + aesthetics_before: Vec, + aesthetics_after: Vec, + ) -> Self { + Stmt { + aesthetics_before, + aesthetics_after, + ..self + } + } } #[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize)] @@ -73,6 +93,24 @@ pub struct ImportDef { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Annotation { pub expr: Box, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_before: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub aesthetics_after: Vec, +} + +impl WithAesthetics for Annotation { + fn with_aesthetics( + self, + aesthetics_before: Vec, + aesthetics_after: Vec, + ) -> Self { + Annotation { + aesthetics_before, + aesthetics_after, + ..self + } + } } impl Stmt { @@ -81,6 +119,8 @@ impl Stmt { kind, span: None, annotations: Vec::new(), + aesthetics_before: Vec::new(), + aesthetics_after: Vec::new(), } } } diff --git a/prqlc/prqlc-parser/src/expr.rs b/prqlc/prqlc-parser/src/expr.rs index eb7e3b391b3a..85921f6a2042 100644 --- a/prqlc/prqlc-parser/src/expr.rs +++ b/prqlc/prqlc-parser/src/expr.rs @@ -10,7 +10,7 @@ use super::interpolation; use crate::err::parse_error::PError; use crate::types::type_expr; -pub fn expr_call() -> impl Parser { +pub fn expr_call() -> impl Parser + Clone { let expr = expr(); lambda_func(expr.clone()).or(func_call(expr)) @@ -27,7 +27,9 @@ pub fn expr() -> impl Parser + Clone { .map(|x| x.to_string()) .map(ExprKind::Internal); - let nested_expr = pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed(); + let nested_expr = with_aesthetics( + pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed(), + ); let tuple = ident_part() .then_ignore(ctrl('=')) @@ -122,18 +124,20 @@ pub fn expr() -> impl Parser + Clone { let param = select! { TokenKind::Param(id) => ExprKind::Param(id) }; - let term = choice(( - literal, - internal, - tuple, - array, - interpolation, - ident_kind, - case, - param, - )) - .map_with_span(into_expr) - .or(pipeline) + let term = with_aesthetics( + choice(( + literal, + internal, + tuple, + array, + interpolation, + ident_kind, + case, + param, + )) + .map_with_span(into_expr) + .or(pipeline), + ) .boxed(); // indirections @@ -229,9 +233,9 @@ pub fn expr() -> impl Parser + Clone { }) } -pub fn pipeline(expr: E) -> impl Parser +pub fn pipeline(expr: E) -> impl Parser + Clone where - E: Parser, + E: Parser + Clone, { // expr has to be a param, because it can be either a normal expr() or // a recursive expr called from within expr() @@ -264,7 +268,7 @@ where pub fn binary_op_parser<'a, Term, Op>( term: Term, op: Op, -) -> impl Parser + 'a +) -> impl Parser + 'a + Clone where Term: Parser + 'a, Op: Parser + 'a, @@ -290,7 +294,7 @@ where .boxed() } -fn func_call(expr: E) -> impl Parser +fn func_call(expr: E) -> impl Parser + Clone where E: Parser + Clone, { @@ -342,7 +346,7 @@ where .labelled("function call") } -fn lambda_func(expr: E) -> impl Parser +fn lambda_func(expr: E) -> impl Parser + Clone where E: Parser + Clone + 'static, { @@ -402,7 +406,7 @@ where .labelled("function definition") } -pub fn ident() -> impl Parser { +pub fn ident() -> impl Parser + Clone { ident_part() .separated_by(ctrl('.')) .at_least(1) diff --git a/prqlc/prqlc-parser/src/interpolation.rs b/prqlc/prqlc-parser/src/interpolation.rs index 12af7f14f46f..109fbde7b641 100644 --- a/prqlc/prqlc-parser/src/interpolation.rs +++ b/prqlc/prqlc-parser/src/interpolation.rs @@ -99,6 +99,8 @@ fn parse_interpolate() { 0:8-9, ), alias: None, + aesthetics_before: [], + aesthetics_after: [], }, format: None, }, @@ -144,6 +146,8 @@ fn parse_interpolate() { 0:14-15, ), alias: None, + aesthetics_before: [], + aesthetics_after: [], }, format: None, }, diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs index 91a408f0a10a..072f7b8e2704 100644 --- a/prqlc/prqlc-parser/src/lib.rs +++ b/prqlc/prqlc-parser/src/lib.rs @@ -23,6 +23,7 @@ pub fn parse_source(source: &str, source_id: u16) -> Result, Vec Result, Vec = tokens.map(|tokens| { - tokens.into_iter().filter(|token| { - !matches!( - token.kind, - TokenKind::Comment(_) | TokenKind::LineWrap(_) | TokenKind::DocComment(_) - ) - }) - }); - - let ast = if let Some(semantic_tokens) = semantic_tokens { - let stream = prepare_stream(semantic_tokens, source, source_id); + let ast = if let Some(tokens) = tokens { + let stream = prepare_stream(tokens.into_iter(), source, source_id); - let (ast, parse_errors) = ::chumsky::Parser::parse_recovery(&stmt::source(), stream); + let (ast, parse_errors) = + // ::chumsky::Parser::parse_recovery_verbose(&stmt::source(), stream); + ::chumsky::Parser::parse_recovery(&stmt::source(), stream); log::debug!("parse errors: {:?}", parse_errors); errors.extend(parse_errors.into_iter().map(|e| e.into())); @@ -72,16 +64,16 @@ pub fn lex_source(source: &str) -> Result> { mod common { use chumsky::prelude::*; - use prqlc_ast::expr::*; use prqlc_ast::stmt::*; use prqlc_ast::token::*; use prqlc_ast::Span; use prqlc_ast::Ty; use prqlc_ast::TyKind; + use prqlc_ast::{expr::*, WithAesthetics}; use crate::err::parse_error::PError; - pub fn ident_part() -> impl Parser { + pub fn ident_part() -> impl Parser + Clone { return select! { TokenKind::Ident(ident) => ident, TokenKind::Keyword(ident) if &ident == "module" => ident, @@ -112,6 +104,8 @@ mod common { kind, span: Some(span), annotations, + aesthetics_before: Vec::new(), + aesthetics_after: Vec::new(), } } @@ -128,6 +122,43 @@ mod common { ..Ty::new(kind) } } + + pub fn aesthetic() -> impl Parser + Clone { + select! { + TokenKind::Comment(comment) => TokenKind::Comment(comment), + TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw), + TokenKind::DocComment(dc) => TokenKind::DocComment(dc), + } + } + + pub fn with_aesthetics(parser: P) -> impl Parser + Clone + where + P: Parser + Clone, + O: WithAesthetics, + { + // We can have newlines between the aesthetics and the actual token to + // cover a case like `# foo` here: + // + // ```prql + // # foo + // + // from bar + // # baz + // select artists + // ``` + // + // ...but not after the aesthetics after the token; since we don't want + // to eat the newline after `from bar` + // + let aesthetics_before = aesthetic().then_ignore(new_line().repeated()).repeated(); + let aesthetics_after = aesthetic().separated_by(new_line()); + + aesthetics_before.then(parser).then(aesthetics_after).map( + |((aesthetics_before, inner), aesthetics_after)| { + inner.with_aesthetics(aesthetics_before, aesthetics_after) + }, + ) + } } /// Convert the output of the lexer into the input of the parser. Requires diff --git a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap index 6d6d8c6c3c77..f3b9a1d1fc86 100644 --- a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap +++ b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap @@ -24,6 +24,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" right: Literal: String: USA + aesthetics_after: + - Comment: " Each line transforms the previous result." - FuncCall: name: Ident: derive @@ -36,12 +38,16 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" right: Ident: payroll_tax alias: gross_salary + aesthetics_before: + - Comment: " This adds columns / variables." - Binary: left: Ident: gross_salary op: Add right: Ident: benefits_cost + aesthetics_after: + - Comment: " Variables can use other variables." alias: gross_cost - FuncCall: name: @@ -71,6 +77,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" Ident: average args: - Ident: salary + aesthetics_before: + - Comment: " Aggregate each group to a single row" - FuncCall: name: Ident: average diff --git a/prqlc/prqlc-parser/src/stmt.rs b/prqlc/prqlc-parser/src/stmt.rs index c63c25a2b20f..2036aea3f9d7 100644 --- a/prqlc/prqlc-parser/src/stmt.rs +++ b/prqlc/prqlc-parser/src/stmt.rs @@ -11,11 +11,11 @@ use semver::VersionReq; use super::common::{ctrl, ident_part, into_stmt, keyword, new_line}; use super::expr::{expr, expr_call, ident, pipeline}; -use crate::err::parse_error::PError; use crate::types::type_expr; +use crate::{common::with_aesthetics, err::parse_error::PError}; pub fn source() -> impl Parser, Error = PError> { - query_def() + with_aesthetics(query_def()) .or_not() .chain(module_contents()) .then_ignore(end()) @@ -34,19 +34,40 @@ fn module_contents() -> impl Parser, Error = PError> { .then_ignore(new_line().repeated()) .map(|expr| Annotation { expr: Box::new(expr), + aesthetics_before: Vec::new(), + aesthetics_after: Vec::new(), }); - annotation - .repeated() - .then(choice((module_def, type_def(), import_def(), var_def()))) - .map_with_span(into_stmt) - .separated_by(new_line().repeated().at_least(1)) - .allow_leading() - .allow_trailing() + // TODO: I think some duplication here; we allow for potential + // newlines before each item here, but then also have `.allow_leading` + // below — since now we can get newlines after a comment between the + // aesthetic item and the stmt... So a bit messy + let stmt_kind = new_line().repeated().ignore_then(choice(( + module_def, + type_def(), + import_def(), + var_def(), + ))); + + // Two wrapping of `with_aesthetics` — the first for the whole block, + // and the second for just the annotation; if there's a comment between + // the annotation and the code. + with_aesthetics( + with_aesthetics(annotation) + .repeated() + // TODO: do we need this? I think possibly we get an additional + // error when we remove it; check (because it seems redundant...). + .then_ignore(new_line().repeated()) + .then(stmt_kind) + .map_with_span(into_stmt), + ) + .separated_by(new_line().repeated().at_least(1)) + .allow_leading() + .allow_trailing() }) } -fn query_def() -> impl Parser { +fn query_def() -> impl Parser + Clone { new_line() .repeated() .ignore_then(keyword("prql")) @@ -115,8 +136,10 @@ fn query_def() -> impl Parser { .labelled("query header") } -fn var_def() -> impl Parser { - let let_ = keyword("let") +fn var_def() -> impl Parser + Clone { + let let_ = new_line() + .repeated() + .ignore_then(keyword("let")) .ignore_then(ident_part()) .then(type_expr().delimited_by(ctrl('<'), ctrl('>')).or_not()) .then(ctrl('=').ignore_then(expr_call()).map(Box::new).or_not()) @@ -153,7 +176,7 @@ fn var_def() -> impl Parser { let_.or(main_or_into) } -fn type_def() -> impl Parser { +fn type_def() -> impl Parser + Clone { keyword("type") .ignore_then(ident_part()) .then(ctrl('=').ignore_then(type_expr()).or_not()) @@ -161,7 +184,7 @@ fn type_def() -> impl Parser { .labelled("type definition") } -fn import_def() -> impl Parser { +fn import_def() -> impl Parser + Clone { keyword("import") .ignore_then(ident_part().then_ignore(ctrl('=')).or_not()) .then(ident()) diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 4f32615a4d09..e6a8b58dadaf 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -404,6 +404,8 @@ fn test_basic_exprs() { args: - Ident: a span: "0:28-36" + aesthetics_before: + - Comment: " this is a comment" "###); assert_yaml_snapshot!(parse_expr( "join side:left country (id==employee_id)" @@ -1827,7 +1829,7 @@ fn test_dates() { #[test] fn test_multiline_string() { assert_yaml_snapshot!(parse_single(r##" - derive x = r#"r-string test"# + derive x = r"r-string test" "##).unwrap(), @r###" --- - VarDef: @@ -1838,9 +1840,10 @@ fn test_multiline_string() { name: Ident: derive args: - - Ident: r + - Literal: + String: r-string test alias: x - span: "0:9-39" + span: "0:9-37" "### ) } @@ -1928,6 +1931,8 @@ fn test_allowed_idents() { op: EqSelf expr: Ident: employee_id + aesthetics_after: + - Comment: " table with leading underscore" - FuncCall: name: Ident: filter diff --git a/prqlc/prqlc-parser/src/types.rs b/prqlc/prqlc-parser/src/types.rs index 7444842b2338..5c227a02efaf 100644 --- a/prqlc/prqlc-parser/src/types.rs +++ b/prqlc/prqlc-parser/src/types.rs @@ -5,7 +5,7 @@ use super::common::*; use crate::err::parse_error::PError; use crate::expr::ident; -pub fn type_expr() -> impl Parser { +pub fn type_expr() -> impl Parser + Clone { recursive(|nested_type_expr| { let basic = select! { TokenKind::Literal(lit) => TyKind::Singleton(lit), diff --git a/prqlc/prqlc/src/semantic/ast_expand.rs b/prqlc/prqlc/src/semantic/ast_expand.rs index 44d9a1a9cb91..f09ec287444b 100644 --- a/prqlc/prqlc/src/semantic/ast_expand.rs +++ b/prqlc/prqlc/src/semantic/ast_expand.rs @@ -292,6 +292,8 @@ pub fn restrict_expr(expr: pl::Expr) -> ast::Expr { kind: restrict_expr_kind(expr.kind), span: expr.span, alias: expr.alias, + aesthetics_before: vec![], + aesthetics_after: vec![], } } @@ -455,12 +457,16 @@ fn restrict_stmt(stmt: pl::Stmt) -> ast::Stmt { .into_iter() .map(restrict_annotation) .collect(), + aesthetics_before: vec![], + aesthetics_after: vec![], } } pub fn restrict_annotation(value: pl::Annotation) -> ast::Annotation { ast::Annotation { expr: restrict_expr_box(value.expr), + aesthetics_before: vec![], + aesthetics_after: vec![], } } diff --git a/prqlc/prqlc/tests/integration/ast_code_matches.rs b/prqlc/prqlc/tests/integration/ast_code_matches.rs index fea731d35cb4..61a61ecf87bb 100644 --- a/prqlc/prqlc/tests/integration/ast_code_matches.rs +++ b/prqlc/prqlc/tests/integration/ast_code_matches.rs @@ -17,6 +17,51 @@ fn test_expr_ast_code_matches() { -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Serialize, Deserialize)] @@ .. @@ + - // Maybe should be Token? + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_before: Vec, + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_after: Vec, + -} + + /// Unique identificator of the node. Set exactly once during semantic::resolve. + + #[serde(skip_serializing_if = "Option::is_none")] + + pub id: Option, + @@ .. @@ + -impl WithAesthetics for Expr { + - fn with_aesthetics( + - mut self, + - aesthetics_before: Vec, + - aesthetics_after: Vec, + - ) -> Self { + - self.aesthetics_before = aesthetics_before; + - self.aesthetics_after = aesthetics_after; + - self + - } + + /// For [Ident]s, this is id of node referenced by the ident + + #[serde(skip_serializing_if = "Option::is_none")] + + pub target_id: Option, + + + + /// Type of expression this node represents. + + /// [None] means that type should be inferred. + + #[serde(skip_serializing_if = "Option::is_none")] + + pub ty: Option, + + + + /// Information about where data of this expression will come from. + + /// + + /// Currently, this is used to infer relational pipeline frames. + + /// Must always exists if ty is a relation. + + #[serde(skip_serializing_if = "Option::is_none")] + + pub lineage: Option, + + + + #[serde(skip)] + + pub needs_window: bool, + + + + /// When true on [ExprKind::Tuple], this list will be flattened when placed + + /// in some other list. + + // TODO: maybe we should have a special ExprKind instead of this flag? + + #[serde(skip)] + + pub flatten: bool, + @@ .. @@ - Ident(String), - Indirection { - base: Box, @@ -38,6 +83,8 @@ fn test_expr_ast_code_matches() { - Binary(BinaryExpr), - Unary(UnaryExpr), @@ .. @@ + -} + - -#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize)] -pub enum IndirectionKind { - Name(String), @@ -50,8 +97,7 @@ fn test_expr_ast_code_matches() { - pub left: Box, - pub op: BinOp, - pub right: Box, - -} - - + @@ .. @@ -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] -pub struct UnaryExpr { - pub op: UnOp, @@ -59,11 +105,12 @@ fn test_expr_ast_code_matches() { -} - @@ .. @@ + - -#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] -pub struct GenericTypeParam { - /// Assigned name of this generic type argument. - pub name: String, - - + @@ .. @@ - pub domain: Vec, -} - @@ -92,8 +139,48 @@ fn test_stmt_ast_code_matches() { &read_to_string("../prqlc/src/ir/pl/stmt.rs").unwrap(), ), @r###" @@ .. @@ + - + - // Maybe should be Token? + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_before: Vec, + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_after: Vec, + @@ .. @@ + -impl WithAesthetics for Stmt { + - fn with_aesthetics( + - self, + - aesthetics_before: Vec, + - aesthetics_after: Vec, + - ) -> Self { + - Stmt { + - aesthetics_before, + - aesthetics_after, + - ..self + - } + - } + -} + - + @@ .. @@ - pub kind: VarDefKind, @@ .. @@ + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_before: Vec, + - #[serde(skip_serializing_if = "Vec::is_empty")] + - pub aesthetics_after: Vec, + -} + - + -impl WithAesthetics for Annotation { + - fn with_aesthetics( + - self, + - aesthetics_before: Vec, + - aesthetics_after: Vec, + - ) -> Self { + - Annotation { + - aesthetics_before, + - aesthetics_after, + - ..self + - } + - } -} - -impl Stmt { @@ -102,6 +189,8 @@ fn test_stmt_ast_code_matches() { - kind, - span: None, - annotations: Vec::new(), + - aesthetics_before: Vec::new(), + - aesthetics_after: Vec::new(), - } - } "### diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap index 56eb48f37e81..930c9963253c 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap @@ -258,3 +258,8 @@ ast: args: - Ident: empty_name span: 1:102-244 + aesthetics_before: + - !Comment ' mssql:skip' + - !Comment ' mysql:skip' + - !Comment ' clickhouse:skip' + - !Comment ' glaredb:skip (the string_agg function is not supported)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap index 29fd7ffed5bf..145276b511f4 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap @@ -1159,3 +1159,5 @@ ast: args: - Ident: id span: 1:13-833 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap index 673866095522..0e090ab209a3 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap @@ -187,3 +187,5 @@ ast: - Literal: Integer: 20 span: 1:13-106 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap index a2423ce4dc50..8de3ba831ddd 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap @@ -546,3 +546,8 @@ ast: String: 100%% in %d days alias: d12 span: 1:57-719 + aesthetics_before: + - !Comment ' generic:skip' + - !Comment ' glaredb:skip' + - !Comment ' sqlite:skip' + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap index d2c9f6f98c33..efc1d91446a0 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap @@ -209,3 +209,5 @@ ast: Ident: tracks field: Star span: 1:13-91 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap index 5b1eb4ad31d4..109c88863b91 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap @@ -264,3 +264,5 @@ ast: Ident: genre_id - Ident: media_type_id span: 1:13-160 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap index d8a39e5b9d1c..960ab255aa48 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap @@ -116,6 +116,9 @@ ast: - Ident: name alias: a span: 1:117-185 + aesthetics_before: + - !Comment ' clickhouse:skip (ClickHouse prefers aliases to column names https://github.com/PRQL/prql/issues/2827)' + - !Comment ' mssql:test' - VarDef: kind: Main name: main diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap index a664ec8a01f4..67750c5dc693 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap @@ -307,3 +307,5 @@ ast: args: - Ident: album_id span: 1:13-161 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap index 69119a7b38af..6b1bb688413a 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap @@ -297,3 +297,5 @@ ast: alias: d1 - Ident: n1 span: 1:13-151 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap index b6d9be2bc15f..a4a91ab9f0b2 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap @@ -373,3 +373,6 @@ ast: expr: Ident: milliseconds span: 1:76-252 + aesthetics_before: + - !Comment ' Compute the 3 longest songs for each genre and sort by genre' + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap index 4054030d0bb4..8cebc7fa91b2 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap @@ -901,3 +901,6 @@ ast: - Literal: Integer: 20 span: 1:131-792 + aesthetics_before: + - !Comment ' clickhouse:skip (clickhouse doesn''t have lag function)' + - !DocComment ' Calculate a number of metrics about the sales of tracks in each city.' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap index 96f527899b53..29e8542c9e12 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap @@ -332,3 +332,6 @@ ast: args: - Ident: n span: 1:162-257 + aesthetics_before: + - !Comment ' clickhouse:skip (DB::Exception: Syntax error)' + - !Comment ' glaredb:skip (DataFusion does not support recursive CTEs https://github.com/apache/arrow-datafusion/issues/462)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap index da55144526c2..3d6cbcffc293 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap @@ -757,3 +757,6 @@ ast: Integer: 2 alias: total_square span: 1:82-788 + aesthetics_before: + - !Comment ' mssql:test' + - !Comment ' sqlite:skip (see https://github.com/rusqlite/rusqlite/issues/1211)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap index db6ae5f060e6..2e193153c425 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap @@ -308,3 +308,7 @@ ast: - Ident: name - Ident: composer span: 1:166-298 + aesthetics_before: + - !Comment ' sqlite:skip (Only works on Sqlite implementations which have the extension' + - !Comment ' installed' + - !Comment ' https://stackoverflow.com/questions/24037982/how-to-use-regexp-in-sqlite)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap index 009ebf55c985..961711aa69c0 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap @@ -66,3 +66,7 @@ ast: args: - Ident: media_type_id span: 1:43-111 + aesthetics_before: + - !Comment ' sqlite:skip' + - !Comment ' postgres:skip' + - !Comment ' mysql:skip' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap index a36f100dd7fb..32c9c55189d5 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap @@ -276,6 +276,8 @@ ast: named_params: [] generic_type_params: [] span: 1:13-79 + aesthetics_before: + - !Comment ' mssql:test' - VarDef: kind: Main name: main diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap index 4f92fb305d05..48ab859dc5a0 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap @@ -26,7 +26,7 @@ frames: table: - default_db - employees -- - 1:145-215 +- - 1:92-215 - columns: - !All input_id: 128 @@ -170,7 +170,7 @@ nodes: - 119 - id: 143 kind: 'TransformCall: Join' - span: 1:145-215 + span: 1:92-215 children: - 138 - 119 @@ -256,6 +256,8 @@ ast: - FuncCall: name: Ident: join + aesthetics_before: + - !Comment ' joining may use HashMerge, which can undo ORDER BY' args: - Ident: employees alias: manager @@ -292,3 +294,5 @@ ast: Ident: manager field: !Name first_name span: 1:13-272 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap index 7af6fc2334b6..c4057740161f 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap @@ -212,3 +212,6 @@ ast: - Literal: Integer: 10 span: 1:89-255 + aesthetics_before: + - !Comment ' glaredb:skip (May be a bag of String type conversion for Postgres Client)' + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap index f2d9b74335bb..cba8322bda57 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap @@ -103,3 +103,5 @@ ast: Literal: Integer: 5 span: 1:13-52 + aesthetics_before: + - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap index a298ce5786ab..37394d37b0ff 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap @@ -632,3 +632,7 @@ ast: - Literal: String: os span: 1:113-589 + aesthetics_before: + - !Comment ' mssql:test' + - !Comment ' glaredb:skip — TODO: started raising an error on 2024-05-20; see `window.prql`' + - !Comment ' for more details' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap index 665a6da67969..f01c9ddba58e 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap @@ -459,3 +459,13 @@ ast: Literal: Integer: 22 span: 1:762-1021 + aesthetics_before: + - !Comment ' mssql:skip Conversion("cannot interpret I64(Some(1)) as an i32 value")'', connection.rs:200:34' + - !Comment ' duckdb:skip problems with DISTINCT ON (duckdb internal error: [with INPUT_TYPE = int; RESULT_TYPE = unsigned char]: Assertion `min_val <= input'' failed.)' + - !Comment ' clickhouse:skip problems with DISTINCT ON' + - !Comment ' postgres:skip problems with DISTINCT ON' + - !Comment ' glaredb:skip — TODO: started raising an error on 2024-05-20, from https://github.com/PRQL/prql/actions/runs/9154902656/job/25198160283:' + - !Comment ' ERROR: This feature is not implemented: Unsupported ast node in sqltorel:' + - !Comment ' Substring { expr: Identifier(Ident { value: "title", quote_style: None }),' + - !Comment ' substring_from: Some(Value(Number("2", false))), substring_for:' + - !Comment ' Some(Value(Number("5", false))), special: true }' From d4a8fb7356dcdde6201da68b196df462ff8e47f2 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 19 Jun 2024 21:36:00 -0700 Subject: [PATCH 02/28] clarify explanation --- prqlc/prqlc-parser/src/parser/common.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 19d21ba7f519..6f1162d529df 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -69,8 +69,8 @@ where P: Parser + Clone, O: WithAesthetics, { - // We can have newlines between the aesthetics and the actual token to - // cover a case like `# foo` here: + // We can safely remove newlines following the `aesthetics_before`, to cover + // a case like `# foo` here: // // ```prql // # foo @@ -80,8 +80,8 @@ where // select artists // ``` // - // ...but not after the aesthetics after the token; since we don't want - // to eat the newline after `from bar` + // ...but not following the `aesthetics_after`; since that would eat all + // newlines between `from_bar` and `select_artists`. // let aesthetics_before = aesthetic().then_ignore(new_line().repeated()).repeated(); let aesthetics_after = aesthetic().separated_by(new_line()); From 3ee3cada0352a3ec6da2481baed8e0545bf4f4fe Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 19 Jun 2024 21:42:14 -0700 Subject: [PATCH 03/28] internal: Add `Clone` to parsers Extracting this from #4639 --- prqlc/prqlc-parser/src/parser/common.rs | 2 +- prqlc/prqlc-parser/src/parser/expr.rs | 14 +++++++------- prqlc/prqlc-parser/src/parser/stmt.rs | 8 ++++---- prqlc/prqlc-parser/src/parser/types.rs | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 2941e807ed3b..0b0848838949 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -5,7 +5,7 @@ use crate::lexer::lr::TokenKind; use crate::parser::pr::{Annotation, Expr, ExprKind, Stmt, StmtKind, Ty, TyKind}; use crate::span::Span; -pub fn ident_part() -> impl Parser { +pub fn ident_part() -> impl Parser + Clone { return select! { TokenKind::Ident(ident) => ident, TokenKind::Keyword(ident) if &ident == "module" => ident, diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index 9db876afd4ec..f99135365e82 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -12,7 +12,7 @@ use crate::parser::pr::*; use crate::parser::types::type_expr; use crate::span::Span; -pub fn expr_call() -> impl Parser { +pub fn expr_call() -> impl Parser + Clone { let expr = expr(); lambda_func(expr.clone()).or(func_call(expr)) @@ -231,9 +231,9 @@ pub fn expr() -> impl Parser + Clone { }) } -pub fn pipeline(expr: E) -> impl Parser +pub fn pipeline(expr: E) -> impl Parser + Clone where - E: Parser, + E: Parser + Clone, { // expr has to be a param, because it can be either a normal expr() or // a recursive expr called from within expr() @@ -266,7 +266,7 @@ where pub fn binary_op_parser<'a, Term, Op>( term: Term, op: Op, -) -> impl Parser + 'a +) -> impl Parser + 'a + Clone where Term: Parser + 'a, Op: Parser + 'a, @@ -292,7 +292,7 @@ where .boxed() } -fn func_call(expr: E) -> impl Parser +fn func_call(expr: E) -> impl Parser + Clone where E: Parser + Clone, { @@ -344,7 +344,7 @@ where .labelled("function call") } -fn lambda_func(expr: E) -> impl Parser +fn lambda_func(expr: E) -> impl Parser + Clone where E: Parser + Clone + 'static, { @@ -404,7 +404,7 @@ where .labelled("function definition") } -pub fn ident() -> impl Parser { +pub fn ident() -> impl Parser + Clone { ident_part() .separated_by(ctrl('.')) .at_least(1) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 0206c14789e7..16ca656a468d 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -43,7 +43,7 @@ fn module_contents() -> impl Parser, Error = PError> { }) } -fn query_def() -> impl Parser { +fn query_def() -> impl Parser + Clone { new_line() .repeated() .ignore_then(keyword("prql")) @@ -112,7 +112,7 @@ fn query_def() -> impl Parser { .labelled("query header") } -fn var_def() -> impl Parser { +fn var_def() -> impl Parser + Clone { let let_ = keyword("let") .ignore_then(ident_part()) .then(type_expr().delimited_by(ctrl('<'), ctrl('>')).or_not()) @@ -150,7 +150,7 @@ fn var_def() -> impl Parser { let_.or(main_or_into) } -fn type_def() -> impl Parser { +fn type_def() -> impl Parser + Clone { keyword("type") .ignore_then(ident_part()) .then(ctrl('=').ignore_then(type_expr()).or_not()) @@ -158,7 +158,7 @@ fn type_def() -> impl Parser { .labelled("type definition") } -fn import_def() -> impl Parser { +fn import_def() -> impl Parser + Clone { keyword("import") .ignore_then(ident_part().then_ignore(ctrl('=')).or_not()) .then(ident()) diff --git a/prqlc/prqlc-parser/src/parser/types.rs b/prqlc/prqlc-parser/src/parser/types.rs index 337a4e335fe0..7833904566a8 100644 --- a/prqlc/prqlc-parser/src/parser/types.rs +++ b/prqlc/prqlc-parser/src/parser/types.rs @@ -6,7 +6,7 @@ use crate::lexer::lr::TokenKind; use crate::parser::expr::ident; use crate::parser::pr::*; -pub fn type_expr() -> impl Parser { +pub fn type_expr() -> impl Parser + Clone { recursive(|nested_type_expr| { let basic = select! { TokenKind::Literal(lit) => TyKind::Singleton(lit), From e4aeac01315b110258702c6e10bd4d70076282da Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Mon, 1 Jul 2024 11:10:01 -0700 Subject: [PATCH 04/28] --- prqlc/prqlc-parser/src/parser/common.rs | 4 ++-- prqlc/prqlc-parser/src/parser/mod.rs | 14 +++++++------- prqlc/prqlc-parser/src/test.rs | 8 -------- ...ation__queries__debug_lineage__aggregation.snap | 5 ----- ...ration__queries__debug_lineage__arithmetic.snap | 2 -- .../integration__queries__debug_lineage__cast.snap | 2 -- ...tion__queries__debug_lineage__date_to_text.snap | 5 ----- ...egration__queries__debug_lineage__distinct.snap | 2 -- ...ation__queries__debug_lineage__distinct_on.snap | 2 -- ...tegration__queries__debug_lineage__loop_01.snap | 3 --- ...ation__queries__debug_lineage__math_module.snap | 9 --------- ...on__queries__debug_lineage__set_ops_remove.snap | 2 -- .../integration__queries__debug_lineage__sort.snap | 12 ++---------- 13 files changed, 11 insertions(+), 59 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 664b7db21145..e3a15c204668 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -44,8 +44,8 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - pub fn aesthetic() -> impl Parser + Clone { select! { - TokenKind::Comment(comment) => TokenKind::Comment(comment), - TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw), + // TokenKind::Comment(comment) => TokenKind::Comment(comment), + // TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw), TokenKind::DocComment(dc) => TokenKind::DocComment(dc), } } diff --git a/prqlc/prqlc-parser/src/parser/mod.rs b/prqlc/prqlc-parser/src/parser/mod.rs index f0f3fa1c5fea..30bfb061926c 100644 --- a/prqlc/prqlc-parser/src/parser/mod.rs +++ b/prqlc/prqlc-parser/src/parser/mod.rs @@ -20,13 +20,13 @@ pub fn parse_lr_to_pr( ) -> (Option>, Vec) { // We don't want comments in the AST (but we do intend to use them as part of // formatting) - // let semantic_tokens = lr_iter.into_iter().filter(|token| { - // !matches!( - // token.kind, - // lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_) | lr::TokenKind::DocComment(_) - // ) - // }); - let semantic_tokens = lr_iter.into_iter(); + let semantic_tokens = lr_iter.into_iter().filter(|token| { + !matches!( + token.kind, + lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_) //| lr::TokenKind::DocComment(_) + ) + }); + // let semantic_tokens = lr_iter.into_iter(); let stream = prepare_stream(semantic_tokens, source, source_id); let (pr, parse_errors) = ::chumsky::Parser::parse_recovery(&stmt::source(), stream); diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 3649dd8339a3..c49ffdf4826e 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -482,8 +482,6 @@ fn test_basic_exprs() { span: "0:35-36" span: "0:28-36" span: "0:28-36" - aesthetics_before: - - Comment: " this is a comment" "###); assert_yaml_snapshot!(parse_expr( "join side:left country (id==employee_id)" @@ -2452,15 +2450,9 @@ fn test_allowed_idents() { op: EqSelf expr: Ident: employee_id -<<<<<<< HEAD - aesthetics_after: - - Comment: " table with leading underscore" -||||||| 566e6f14 -======= span: "0:48-59" span: "0:46-59" span: "0:32-60" ->>>>>>> main - FuncCall: name: Ident: filter diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap index 2eba5c40c824..feb3f0fbad2f 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap @@ -287,8 +287,3 @@ ast: span: 1:168-243 span: 1:102-243 span: 1:102-244 - aesthetics_before: - - !Comment ' mssql:skip' - - !Comment ' mysql:skip' - - !Comment ' clickhouse:skip' - - !Comment ' glaredb:skip (the string_agg function is not supported)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap index d937542897aa..9c5a750c0f08 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap @@ -1278,5 +1278,3 @@ ast: span: 1:825-832 span: 1:13-832 span: 1:13-833 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap index b39c1617f573..310465f90e8a 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap @@ -210,5 +210,3 @@ ast: span: 1:98-105 span: 1:13-105 span: 1:13-106 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap index d0275285bba9..955e7d60a22b 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap @@ -628,8 +628,3 @@ ast: span: 1:79-718 span: 1:57-718 span: 1:57-719 - aesthetics_before: - - !Comment ' generic:skip' - - !Comment ' glaredb:skip' - - !Comment ' sqlite:skip' - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap index 5c99e4aa6836..60c1240ce602 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap @@ -229,5 +229,3 @@ ast: span: 1:77-90 span: 1:13-90 span: 1:13-91 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap index 5efc928f6260..be7a54a80889 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap @@ -294,5 +294,3 @@ ast: span: 1:128-159 span: 1:13-159 span: 1:13-160 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap index 26748118a15e..8a0143ae52e8 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap @@ -364,6 +364,3 @@ ast: span: 1:250-256 span: 1:162-256 span: 1:162-257 - aesthetics_before: - - !Comment ' clickhouse:skip (DB::Exception: Syntax error)' - - !Comment ' glaredb:skip (DataFusion does not support recursive CTEs https://github.com/apache/arrow-datafusion/issues/462)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap index 689819cade0c..49c84b3bf375 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap @@ -915,14 +915,6 @@ ast: span: 1:773-785 span: 1:752-785 alias: total_square -<<<<<<< HEAD - span: 1:82-788 - aesthetics_before: - - !Comment ' mssql:test' - - !Comment ' sqlite:skip (see https://github.com/rusqlite/rusqlite/issues/1211)' -||||||| 566e6f14 - span: 1:82-788 -======= - Pipeline: exprs: - Binary: @@ -954,4 +946,3 @@ ast: span: 1:103-839 span: 1:82-839 span: 1:82-840 ->>>>>>> main diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap index 150b9324f87d..5af44fc9bec6 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap @@ -290,8 +290,6 @@ ast: generic_type_params: [] span: 1:28-79 span: 1:13-79 - aesthetics_before: - - !Comment ' mssql:test' - VarDef: kind: Main name: main diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap index 8b50cabe53dd..a1aa70338287 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap @@ -26,7 +26,7 @@ frames: table: - default_db - employees -- - 1:92-215 +- - 1:145-215 - columns: - !All input_id: 128 @@ -170,7 +170,7 @@ nodes: - 119 - id: 143 kind: 'TransformCall: Join' - span: 1:92-215 + span: 1:145-215 children: - 138 - 119 @@ -269,13 +269,7 @@ ast: - FuncCall: name: Ident: join -<<<<<<< HEAD - aesthetics_before: - - !Comment ' joining may use HashMerge, which can undo ORDER BY' -||||||| 566e6f14 -======= span: 1:145-149 ->>>>>>> main args: - Ident: employees span: 1:158-167 @@ -330,5 +324,3 @@ ast: span: 1:217-271 span: 1:13-271 span: 1:13-272 - aesthetics_before: - - !Comment ' mssql:test' From 28001bfaff83c727385c33b7a55b8e698ce36076 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 21:05:28 -0700 Subject: [PATCH 05/28] --- prqlc/prqlc-parser/src/parser/common.rs | 47 ++++++----------- prqlc/prqlc-parser/src/parser/expr.rs | 8 +-- .../prqlc-parser/src/parser/interpolation.rs | 6 +-- prqlc/prqlc-parser/src/parser/mod.rs | 8 +-- prqlc/prqlc-parser/src/parser/pr/expr.rs | 31 ++++-------- prqlc/prqlc-parser/src/parser/pr/stmt.rs | 50 ++++++------------- prqlc/prqlc-parser/src/parser/stmt.rs | 11 ++-- ...qlc_parser__test__pipeline_parse_tree.snap | 20 -------- prqlc/prqlc-parser/src/test.rs | 44 ++++++++++++++++ prqlc/prqlc/src/semantic/ast_expand.rs | 7 +-- ..._queries__debug_lineage__genre_counts.snap | 3 -- ...on__queries__debug_lineage__group_all.snap | 2 - ...n__queries__debug_lineage__group_sort.snap | 2 - ..._debug_lineage__group_sort_limit_take.snap | 3 -- ...ueries__debug_lineage__invoice_totals.snap | 4 +- ...on__queries__debug_lineage__pipelines.snap | 4 -- ...ion__queries__debug_lineage__read_csv.snap | 4 -- ...ation__queries__debug_lineage__switch.snap | 3 -- ...gration__queries__debug_lineage__take.snap | 2 - ...__queries__debug_lineage__text_module.snap | 4 -- ...ation__queries__debug_lineage__window.snap | 10 ---- 21 files changed, 103 insertions(+), 170 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index e3a15c204668..a9406e5d85f4 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -3,9 +3,10 @@ use chumsky::prelude::*; use crate::error::parse_error::PError; use crate::lexer::lr::TokenKind; use crate::parser::pr::{Annotation, Stmt, StmtKind}; -use crate::parser::WithAesthetics; use crate::span::Span; +use super::SupportsDocComment; + pub fn ident_part() -> impl Parser + Clone { return select! { TokenKind::Ident(ident) => ident, @@ -37,46 +38,30 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - kind, span: Some(span), annotations, - aesthetics_before: Vec::new(), - aesthetics_after: Vec::new(), + doc_comment: None, } } -pub fn aesthetic() -> impl Parser + Clone { +pub fn doc_comment() -> impl Parser + Clone { select! { - // TokenKind::Comment(comment) => TokenKind::Comment(comment), - // TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw), - TokenKind::DocComment(dc) => TokenKind::DocComment(dc), + TokenKind::DocComment(dc) => dc, } + .then_ignore(new_line().repeated()) + .repeated() + .at_least(1) + .collect() + .map(|lines: Vec| lines.join("\n")) } -pub fn with_aesthetics<'a, P, O>( +pub fn with_doc_comment<'a, P, O>( parser: P, ) -> impl Parser + Clone + 'a where P: Parser + Clone + 'a, - O: WithAesthetics + 'a, + O: SupportsDocComment + 'a, { - // We can safely remove newlines following the `aesthetics_before`, to cover - // a case like `# foo` here: - // - // ```prql - // # foo - // - // from bar - // # baz - // select artists - // ``` - // - // ...but not following the `aesthetics_after`; since that would eat all - // newlines between `from_bar` and `select_artists`. - // - let aesthetics_before = aesthetic().then_ignore(new_line().repeated()).repeated(); - let aesthetics_after = aesthetic().separated_by(new_line()); - - aesthetics_before.then(parser).then(aesthetics_after).map( - |((aesthetics_before, inner), aesthetics_after)| { - inner.with_aesthetics(aesthetics_before, aesthetics_after) - }, - ) + doc_comment() + .or_not() + .then(parser) + .map(|(doc_comment, inner)| inner.with_doc_comment(doc_comment)) } diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index 0d419ea80c0f..207c9f781559 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -3,10 +3,10 @@ use std::collections::HashMap; use chumsky::prelude::*; use itertools::Itertools; -use super::interpolation; +use super::{common::with_doc_comment, interpolation}; use crate::error::parse_error::PError; use crate::lexer::lr::{Literal, TokenKind}; -use crate::parser::common::{ctrl, ident_part, keyword, new_line, with_aesthetics}; +use crate::parser::common::{ctrl, ident_part, keyword, new_line}; use crate::parser::pr::*; use crate::parser::types::type_expr; use crate::span::Span; @@ -28,7 +28,7 @@ pub fn expr<'a>() -> impl Parser + Clone + 'a { .map(|x| x.to_string()) .map(ExprKind::Internal); - let nested_expr = with_aesthetics( + let nested_expr = with_doc_comment( pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed(), ); @@ -40,7 +40,7 @@ pub fn expr<'a>() -> impl Parser + Clone + 'a { let param = select! { TokenKind::Param(id) => ExprKind::Param(id) }; - let term = with_aesthetics( + let term = with_doc_comment( choice(( literal, internal, diff --git a/prqlc/prqlc-parser/src/parser/interpolation.rs b/prqlc/prqlc-parser/src/parser/interpolation.rs index daca8753a2df..87ecdd0f5310 100644 --- a/prqlc/prqlc-parser/src/parser/interpolation.rs +++ b/prqlc/prqlc-parser/src/parser/interpolation.rs @@ -97,8 +97,7 @@ fn parse_interpolate() { 0:8-9, ), alias: None, - aesthetics_before: [], - aesthetics_after: [], + doc_comment: None, }, format: None, }, @@ -144,8 +143,7 @@ fn parse_interpolate() { 0:14-15, ), alias: None, - aesthetics_before: [], - aesthetics_after: [], + doc_comment: None, }, format: None, }, diff --git a/prqlc/prqlc-parser/src/parser/mod.rs b/prqlc/prqlc-parser/src/parser/mod.rs index 30bfb061926c..217730920aac 100644 --- a/prqlc/prqlc-parser/src/parser/mod.rs +++ b/prqlc/prqlc-parser/src/parser/mod.rs @@ -56,10 +56,6 @@ fn prepare_stream( Stream::from_iter(eoi, tokens) } -pub trait WithAesthetics { - fn with_aesthetics( - self, - aesthetics_before: Vec, - aethetics_after: Vec, - ) -> Self; +pub trait SupportsDocComment { + fn with_doc_comment(self, doc_comment: Option) -> Self; } diff --git a/prqlc/prqlc-parser/src/parser/pr/expr.rs b/prqlc/prqlc-parser/src/parser/pr/expr.rs index af730ee55ae4..cef555076ac9 100644 --- a/prqlc/prqlc-parser/src/parser/pr/expr.rs +++ b/prqlc/prqlc-parser/src/parser/pr/expr.rs @@ -8,9 +8,8 @@ use crate::generic; use crate::lexer::lr::Literal; use crate::parser::pr::ops::{BinOp, UnOp}; use crate::parser::pr::Ty; -use crate::parser::WithAesthetics; +use crate::parser::SupportsDocComment; use crate::span::Span; -use crate::TokenKind; impl Expr { pub fn new>(kind: K) -> Self { @@ -18,8 +17,7 @@ impl Expr { kind: kind.into(), span: None, alias: None, - aesthetics_before: Vec::new(), - aesthetics_after: Vec::new(), + doc_comment: None, } } } @@ -39,22 +37,16 @@ pub struct Expr { #[serde(skip_serializing_if = "Option::is_none")] pub alias: Option, - // Maybe should be Token? - #[serde(skip_serializing_if = "Vec::is_empty")] - pub aesthetics_before: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub aesthetics_after: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub doc_comment: Option, } -impl WithAesthetics for Expr { - fn with_aesthetics( - mut self, - aesthetics_before: Vec, - aesthetics_after: Vec, - ) -> Self { - self.aesthetics_before = aesthetics_before; - self.aesthetics_after = aesthetics_after; - self +impl SupportsDocComment for Expr { + fn with_doc_comment(self, doc_comment: Option) -> Self { + Self { + doc_comment, + ..self + } } } @@ -103,8 +95,7 @@ impl ExprKind { span: Some(span), kind: self, alias: None, - aesthetics_after: Vec::new(), - aesthetics_before: Vec::new(), + doc_comment: None, } } } diff --git a/prqlc/prqlc-parser/src/parser/pr/stmt.rs b/prqlc/prqlc-parser/src/parser/pr/stmt.rs index 6c1d4d48e806..f3cc7bd3f911 100644 --- a/prqlc/prqlc-parser/src/parser/pr/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/pr/stmt.rs @@ -5,9 +5,8 @@ use schemars::JsonSchema; use semver::VersionReq; use serde::{Deserialize, Serialize}; -use crate::parser::pr::ident::Ident; use crate::parser::pr::{Expr, Ty}; -use crate::parser::WithAesthetics; +use crate::parser::{pr::ident::Ident, SupportsDocComment}; use crate::span::Span; use crate::TokenKind; @@ -38,22 +37,22 @@ pub struct Stmt { #[serde(skip_serializing_if = "Vec::is_empty", default)] pub annotations: Vec, - // Maybe should be Token? - #[serde(skip_serializing_if = "Vec::is_empty")] - pub aesthetics_before: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub aesthetics_after: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub doc_comment: Option, } -impl WithAesthetics for Stmt { - fn with_aesthetics( - self, - aesthetics_before: Vec, - aesthetics_after: Vec, - ) -> Self { +// impl WithAesthetics for Stmt { +// fn with_aesthetics(self, aesthetics_before: Vec) -> Self { +// Stmt { +// aesthetics_before, +// ..self +// } +// } +// } +impl SupportsDocComment for Stmt { + fn with_doc_comment(self, doc_comment: Option) -> Self { Stmt { - aesthetics_before, - aesthetics_after, + doc_comment, ..self } } @@ -99,24 +98,8 @@ pub struct ImportDef { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct Annotation { pub expr: Box, - #[serde(skip_serializing_if = "Vec::is_empty")] + #[serde(skip_serializing_if = "Vec::is_empty", default)] pub aesthetics_before: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub aesthetics_after: Vec, -} - -impl WithAesthetics for Annotation { - fn with_aesthetics( - self, - aesthetics_before: Vec, - aesthetics_after: Vec, - ) -> Self { - Annotation { - aesthetics_before, - aesthetics_after, - ..self - } - } } impl Stmt { @@ -125,8 +108,7 @@ impl Stmt { kind, span: None, annotations: Vec::new(), - aesthetics_before: Vec::new(), - aesthetics_after: Vec::new(), + doc_comment: None, } } } diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 87de5ed6e30e..6eeade3981ab 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -4,16 +4,15 @@ use chumsky::prelude::*; use itertools::Itertools; use semver::VersionReq; -use super::common::{ctrl, ident_part, into_stmt, keyword, new_line}; +use super::common::{ctrl, ident_part, into_stmt, keyword, new_line, with_doc_comment}; use super::expr::{expr, expr_call, ident, pipeline}; use crate::error::parse_error::PError; use crate::lexer::lr::{Literal, TokenKind}; -use crate::parser::common::with_aesthetics; use crate::parser::pr::*; use crate::parser::types::type_expr; pub fn source() -> impl Parser, Error = PError> { - with_aesthetics(query_def()) + with_doc_comment(query_def()) .or_not() .chain(module_contents()) .then_ignore(end()) @@ -33,7 +32,6 @@ fn module_contents() -> impl Parser, Error = PError> { .map(|expr| Annotation { expr: Box::new(expr), aesthetics_before: Vec::new(), - aesthetics_after: Vec::new(), }); // TODO: I think some duplication here; we allow for potential @@ -50,8 +48,9 @@ fn module_contents() -> impl Parser, Error = PError> { // Two wrapping of `with_aesthetics` — the first for the whole block, // and the second for just the annotation; if there's a comment between // the annotation and the code. - with_aesthetics( - with_aesthetics(annotation) + with_doc_comment( + // with_aesthetics(annotation) + annotation .repeated() // TODO: do we need this? I think possibly we get an additional // error when we remove it; check (because it seems redundant...). diff --git a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap index 7e4afedcf5ef..84bca84aeba0 100644 --- a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap +++ b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap @@ -29,15 +29,9 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" right: Literal: String: USA -<<<<<<< HEAD - aesthetics_after: - - Comment: " Each line transforms the previous result." -||||||| 566e6f14 -======= span: "0:34-39" span: "0:23-39" span: "0:16-39" ->>>>>>> main - FuncCall: name: Ident: derive @@ -54,8 +48,6 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" span: "0:209-220" span: "0:200-220" alias: gross_salary - aesthetics_before: - - Comment: " This adds columns / variables." - Binary: left: Ident: gross_salary @@ -63,14 +55,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" op: Add right: Ident: benefits_cost -<<<<<<< HEAD - aesthetics_after: - - Comment: " Variables can use other variables." -||||||| 566e6f14 -======= span: "0:252-265" span: "0:237-265" ->>>>>>> main alias: gross_cost span: "0:112-305" span: "0:105-305" @@ -113,14 +99,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" span: "0:500-507" args: - Ident: salary -<<<<<<< HEAD - aesthetics_before: - - Comment: " Aggregate each group to a single row" -||||||| 566e6f14 -======= span: "0:508-514" span: "0:500-514" ->>>>>>> main - FuncCall: name: Ident: average diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index c49ffdf4826e..7e6c0b379773 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -3041,3 +3041,47 @@ fn test_lookup_02() { span: "0:13-35" "###); } + +#[test] +fn doc_comment() { + use insta::assert_yaml_snapshot; + + assert_yaml_snapshot!(parse_single(r###" + #! This is a doc comment + from artists + "###).unwrap(), @r###" + --- + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: from + span: "0:34-38" + args: + - Ident: artists + span: "0:39-46" + span: "0:34-46" + span: "0:34-47" + doc_comment: " This is a doc comment" + "###); + + assert_debug_snapshot!(parse_single(r###" + from artists #! This is a doc comment + "###).unwrap_err(), @r###" + [ + Error { + kind: Error, + span: Some( + 0:18-42, + ), + reason: Simple( + "unexpected #! This is a doc comment\n while parsing function call", + ), + hints: [], + code: None, + }, + ] + "###); +} diff --git a/prqlc/prqlc/src/semantic/ast_expand.rs b/prqlc/prqlc/src/semantic/ast_expand.rs index f256d061d1ca..e853f60116cd 100644 --- a/prqlc/prqlc/src/semantic/ast_expand.rs +++ b/prqlc/prqlc/src/semantic/ast_expand.rs @@ -304,8 +304,7 @@ pub fn restrict_expr(expr: pl::Expr) -> pr::Expr { kind: restrict_expr_kind(expr.kind), span: expr.span, alias: expr.alias, - aesthetics_before: vec![], - aesthetics_after: vec![], + doc_comment: None, } } @@ -469,8 +468,7 @@ fn restrict_stmt(stmt: pl::Stmt) -> pr::Stmt { .into_iter() .map(restrict_annotation) .collect(), - aesthetics_before: vec![], - aesthetics_after: vec![], + doc_comment: None, } } @@ -478,7 +476,6 @@ pub fn restrict_annotation(value: pl::Annotation) -> pr::Annotation { pr::Annotation { expr: restrict_expr_box(value.expr), aesthetics_before: vec![], - aesthetics_after: vec![], } } diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap index 340e039f79dd..3d2bdc3a0345 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap @@ -126,9 +126,6 @@ ast: span: 1:157-183 span: 1:135-185 span: 1:117-185 - aesthetics_before: - - !Comment ' clickhouse:skip (ClickHouse prefers aliases to column names https://github.com/PRQL/prql/issues/2827)' - - !Comment ' mssql:test' - VarDef: kind: Main name: main diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap index 56e59c08ba1b..13ab542b8168 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap @@ -340,5 +340,3 @@ ast: span: 1:147-160 span: 1:13-160 span: 1:13-161 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap index bef21f61e577..7331f875ccb6 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap @@ -326,5 +326,3 @@ ast: span: 1:129-150 span: 1:13-150 span: 1:13-151 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap index c31b6c02e78e..c8e9c4bc4627 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap @@ -412,6 +412,3 @@ ast: span: 1:225-251 span: 1:76-251 span: 1:76-252 - aesthetics_before: - - !Comment ' Compute the 3 longest songs for each genre and sort by genre' - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap index 9139022b0f66..4471a56f7d03 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap @@ -985,6 +985,4 @@ ast: span: 1:784-791 span: 1:131-791 span: 1:131-792 - aesthetics_before: - - !Comment ' clickhouse:skip (clickhouse doesn''t have lag function)' - - !DocComment ' Calculate a number of metrics about the sales of tracks in each city.' + doc_comment: ' Calculate a number of metrics about the sales of tracks in each city.' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap index bed29b743744..705569b7ab1d 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap @@ -343,7 +343,3 @@ ast: span: 1:274-297 span: 1:166-297 span: 1:166-298 - aesthetics_before: - - !Comment ' sqlite:skip (Only works on Sqlite implementations which have the extension' - - !Comment ' installed' - - !Comment ' https://stackoverflow.com/questions/24037982/how-to-use-regexp-in-sqlite)' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap index 08cfe0b7ef34..e3b20518dfde 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap @@ -75,7 +75,3 @@ ast: span: 1:92-110 span: 1:43-110 span: 1:43-111 - aesthetics_before: - - !Comment ' sqlite:skip' - - !Comment ' postgres:skip' - - !Comment ' mysql:skip' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap index 98837a9f37b0..c0fb2ecf4d47 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap @@ -235,6 +235,3 @@ ast: span: 1:247-254 span: 1:89-254 span: 1:89-255 - aesthetics_before: - - !Comment ' glaredb:skip (May be a bag of String type conversion for Postgres Client)' - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap index 3b0e83b48fb6..8c02c5a85974 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap @@ -117,5 +117,3 @@ ast: span: 1:42-51 span: 1:13-51 span: 1:13-52 - aesthetics_before: - - !Comment ' mssql:test' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap index 02aa7fa3aea8..69bf68a2073c 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap @@ -706,7 +706,3 @@ ast: span: 1:477-588 span: 1:113-588 span: 1:113-589 - aesthetics_before: - - !Comment ' mssql:test' - - !Comment ' glaredb:skip — TODO: started raising an error on 2024-05-20; see `window.prql`' - - !Comment ' for more details' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap index bcdf6f50e6cf..999eb235b48a 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap @@ -503,13 +503,3 @@ ast: span: 1:999-1020 span: 1:762-1020 span: 1:762-1021 - aesthetics_before: - - !Comment ' mssql:skip Conversion("cannot interpret I64(Some(1)) as an i32 value")'', connection.rs:200:34' - - !Comment ' duckdb:skip problems with DISTINCT ON (duckdb internal error: [with INPUT_TYPE = int; RESULT_TYPE = unsigned char]: Assertion `min_val <= input'' failed.)' - - !Comment ' clickhouse:skip problems with DISTINCT ON' - - !Comment ' postgres:skip problems with DISTINCT ON' - - !Comment ' glaredb:skip — TODO: started raising an error on 2024-05-20, from https://github.com/PRQL/prql/actions/runs/9154902656/job/25198160283:' - - !Comment ' ERROR: This feature is not implemented: Unsupported ast node in sqltorel:' - - !Comment ' Substring { expr: Identifier(Ident { value: "title", quote_style: None }),' - - !Comment ' substring_from: Some(Value(Number("2", false))), substring_for:' - - !Comment ' Some(Value(Number("5", false))), special: true }' From e5a4f268dcccd6835e278629bf53c73178732adc Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 21:11:43 -0700 Subject: [PATCH 06/28] fix: Enforce a line break between annotations Found this while doing the formatting, we don't yet enforce this Possibly there are other areas we don't enforce this sort of thing? --- prqlc/prqlc-parser/src/test.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index c49ffdf4826e..f942a06e9e3d 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -2871,6 +2871,14 @@ fn test_annotation() { "#, ) .unwrap(); + + parse_single( + r#" + @{binding_strength=1}@{binding_strength=2} + let add = a b -> a + b + "#, + ) + .unwrap_err(); } #[test] From 8b9d45feb773e6388e9c5c585f0dfd819249502a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 21:13:36 -0700 Subject: [PATCH 07/28] --- prqlc/prqlc-parser/src/parser/stmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 17394ebefe27..0f2bca320ff9 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -28,7 +28,7 @@ fn module_contents() -> impl Parser, Error = PError> { let annotation = just(TokenKind::Annotate) .ignore_then(expr()) - .then_ignore(new_line().repeated()) + .then_ignore(new_line().repeated().at_least(1)) .map(|expr| Annotation { expr: Box::new(expr), }); From ab7905c523cecd48ca2b86dbf937714c7f1098e1 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 21:30:59 -0700 Subject: [PATCH 08/28] Revert "" This reverts commit 8b9d45feb773e6388e9c5c585f0dfd819249502a. --- prqlc/prqlc-parser/src/parser/stmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index c97905547231..6eeade3981ab 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -28,7 +28,7 @@ fn module_contents() -> impl Parser, Error = PError> { let annotation = just(TokenKind::Annotate) .ignore_then(expr()) - .then_ignore(new_line().repeated().at_least(1)) + .then_ignore(new_line().repeated()) .map(|expr| Annotation { expr: Box::new(expr), aesthetics_before: Vec::new(), From 577924a058fa22b8a99046832b2d796168d9d8f7 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 21:31:15 -0700 Subject: [PATCH 09/28] Revert "fix: Enforce a line break between annotations" This reverts commit e5a4f268dcccd6835e278629bf53c73178732adc. --- prqlc/prqlc-parser/src/test.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index d5673e5e3d25..7e6c0b379773 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -2871,14 +2871,6 @@ fn test_annotation() { "#, ) .unwrap(); - - parse_single( - r#" - @{binding_strength=1}@{binding_strength=2} - let add = a b -> a + b - "#, - ) - .unwrap_err(); } #[test] From 369ec03d57d41f42408311c519b871173dff413e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 23:02:07 -0700 Subject: [PATCH 10/28] Assume chumsky parser outputs are `Debug` For using `dbg!` --- prqlc/prqlc-parser/src/error/parse_error.rs | 16 ++++++++-------- prqlc/prqlc-parser/src/parser/common.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/prqlc/prqlc-parser/src/error/parse_error.rs b/prqlc/prqlc-parser/src/error/parse_error.rs index 94d92262b6d2..3d0efcf696db 100644 --- a/prqlc/prqlc-parser/src/error/parse_error.rs +++ b/prqlc/prqlc-parser/src/error/parse_error.rs @@ -1,6 +1,6 @@ use core::fmt; use std::collections::HashSet; -use std::fmt::Display; +use std::fmt::{Debug, Display}; use std::hash::Hash; use crate::error::{Error, ErrorSource, Reason, WithErrorInfo}; @@ -8,7 +8,7 @@ use crate::lexer::lr::TokenKind; use crate::span::Span; #[derive(Clone, Debug)] -pub struct ChumError { +pub struct ChumError { span: Span, reason: Option, expected: HashSet>, @@ -18,7 +18,7 @@ pub struct ChumError { pub type PError = ChumError; -impl ChumError { +impl ChumError { ///Create an error with a custom error message. pub fn custom(span: Span, msg: M) -> Self { Self { @@ -59,7 +59,7 @@ impl ChumError { /// /// This can be used to unify the errors between parsing stages that operate upon two forms of input (for example, /// the initial lexing stage and the parsing stage in most compilers). - pub fn map U>(self, mut f: F) -> ChumError { + pub fn map U>(self, mut f: F) -> ChumError { ChumError { span: self.span, reason: self.reason, @@ -70,7 +70,7 @@ impl ChumError { } } -impl chumsky::Error for ChumError { +impl chumsky::Error for ChumError { type Span = Span; type Label = &'static str; @@ -143,7 +143,7 @@ impl chumsky::Error for ChumError PartialEq for ChumError { +impl PartialEq for ChumError { fn eq(&self, other: &Self) -> bool { self.span == other.span && self.found == other.found @@ -152,9 +152,9 @@ impl PartialEq for ChumError { && self.label == other.label } } -impl Eq for ChumError {} +impl Eq for ChumError {} -impl fmt::Display for ChumError { +impl fmt::Display for ChumError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // TODO: Take `self.reason` into account diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 4536431afb54..c1fdc939e21e 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -24,7 +24,7 @@ pub fn keyword(kw: &'static str) -> impl Parser + } pub fn new_line() -> impl Parser + Clone { - just(TokenKind::NewLine).ignored() + just(TokenKind::NewLine).ignored().labelled("new line") } pub fn ctrl(char: char) -> impl Parser + Clone { From 2ba018f0f195a620c63e927547ca67c708ab687a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 23:05:09 -0700 Subject: [PATCH 11/28] Add some dbgs --- prqlc/prqlc-parser/src/error/parse_error.rs | 14 ++++++++++---- prqlc/prqlc-parser/src/parser/common.rs | 6 ++++-- prqlc/prqlc-parser/src/parser/pr/stmt.rs | 8 -------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/prqlc/prqlc-parser/src/error/parse_error.rs b/prqlc/prqlc-parser/src/error/parse_error.rs index 94d92262b6d2..52d880bf2e27 100644 --- a/prqlc/prqlc-parser/src/error/parse_error.rs +++ b/prqlc/prqlc-parser/src/error/parse_error.rs @@ -80,6 +80,8 @@ impl chumsky::Error for ChumError, ) -> Self { let exp = expected.into_iter().collect(); + let msg = format!("expected {:?} but found {:?}", exp, found); + dbg!(msg); log::trace!("looking for {:?} but found {:?} at: {:?}", exp, found, span); Self { span, @@ -122,6 +124,7 @@ impl chumsky::Error for ChumError Self { + dbg!((&self, &other)); // TODO: Assert that `self.span == other.span` here? // self.reason = match (&self.reason, &other.reason) { // (Some(..), None) => self.reason, @@ -136,10 +139,11 @@ impl chumsky::Error for ChumError Eq for ChumError {} impl fmt::Display for ChumError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + dbg!(&self); // TODO: Take `self.reason` into account if let Some(found) = &self.found { @@ -207,6 +212,7 @@ impl From for Error { } fn construct_parser_error(e: PError) -> Error { + dbg!(e.clone()); if let Some(message) = e.reason() { return Error::new_simple(message).with_source(ErrorSource::Parser(e)); } diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index a9406e5d85f4..8bb651aa6ddf 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -8,17 +8,18 @@ use crate::span::Span; use super::SupportsDocComment; pub fn ident_part() -> impl Parser + Clone { - return select! { + select! { TokenKind::Ident(ident) => ident, TokenKind::Keyword(ident) if &ident == "module" => ident, } .map_err(|e: PError| { + dbg!(e.clone()); PError::expected_input_found( e.span(), [Some(TokenKind::Ident("".to_string()))], e.found().cloned(), ) - }); + }) } pub fn keyword(kw: &'static str) -> impl Parser + Clone { @@ -51,6 +52,7 @@ pub fn doc_comment() -> impl Parser + Clone { .at_least(1) .collect() .map(|lines: Vec| lines.join("\n")) + .labelled("doc comment") } pub fn with_doc_comment<'a, P, O>( diff --git a/prqlc/prqlc-parser/src/parser/pr/stmt.rs b/prqlc/prqlc-parser/src/parser/pr/stmt.rs index f3cc7bd3f911..0a3f75904ec6 100644 --- a/prqlc/prqlc-parser/src/parser/pr/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/pr/stmt.rs @@ -41,14 +41,6 @@ pub struct Stmt { pub doc_comment: Option, } -// impl WithAesthetics for Stmt { -// fn with_aesthetics(self, aesthetics_before: Vec) -> Self { -// Stmt { -// aesthetics_before, -// ..self -// } -// } -// } impl SupportsDocComment for Stmt { fn with_doc_comment(self, doc_comment: Option) -> Self { Stmt { From 42acef1c12d684b547af07dd99b217b7e36f18fd Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 23:09:11 -0700 Subject: [PATCH 12/28] --- prqlc/prqlc-parser/src/lib.rs | 2 -- prqlc/prqlc-parser/src/parser/pr/stmt.rs | 3 --- prqlc/prqlc-parser/src/parser/stmt.rs | 1 - prqlc/prqlc/src/semantic/ast_expand.rs | 1 - 4 files changed, 7 deletions(-) diff --git a/prqlc/prqlc-parser/src/lib.rs b/prqlc/prqlc-parser/src/lib.rs index fbd7dfe8e336..3d66e52cf158 100644 --- a/prqlc/prqlc-parser/src/lib.rs +++ b/prqlc/prqlc-parser/src/lib.rs @@ -5,5 +5,3 @@ pub mod parser; pub mod span; #[cfg(test)] mod test; - -use crate::lexer::lr::TokenKind; diff --git a/prqlc/prqlc-parser/src/parser/pr/stmt.rs b/prqlc/prqlc-parser/src/parser/pr/stmt.rs index 0a3f75904ec6..878d5788a1b4 100644 --- a/prqlc/prqlc-parser/src/parser/pr/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/pr/stmt.rs @@ -8,7 +8,6 @@ use serde::{Deserialize, Serialize}; use crate::parser::pr::{Expr, Ty}; use crate::parser::{pr::ident::Ident, SupportsDocComment}; use crate::span::Span; -use crate::TokenKind; #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default, JsonSchema)] pub struct QueryDef { @@ -90,8 +89,6 @@ pub struct ImportDef { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct Annotation { pub expr: Box, - #[serde(skip_serializing_if = "Vec::is_empty", default)] - pub aesthetics_before: Vec, } impl Stmt { diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 6eeade3981ab..80d74d02c18c 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -31,7 +31,6 @@ fn module_contents() -> impl Parser, Error = PError> { .then_ignore(new_line().repeated()) .map(|expr| Annotation { expr: Box::new(expr), - aesthetics_before: Vec::new(), }); // TODO: I think some duplication here; we allow for potential diff --git a/prqlc/prqlc/src/semantic/ast_expand.rs b/prqlc/prqlc/src/semantic/ast_expand.rs index e853f60116cd..8394a41fdbba 100644 --- a/prqlc/prqlc/src/semantic/ast_expand.rs +++ b/prqlc/prqlc/src/semantic/ast_expand.rs @@ -475,7 +475,6 @@ fn restrict_stmt(stmt: pl::Stmt) -> pr::Stmt { pub fn restrict_annotation(value: pl::Annotation) -> pr::Annotation { pr::Annotation { expr: restrict_expr_box(value.expr), - aesthetics_before: vec![], } } From 11f3781ae4fc00dbd51b2980994f500ef1f691e0 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 7 Jul 2024 23:11:50 -0700 Subject: [PATCH 13/28] --- prqlc/prqlc-parser/src/test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 7e6c0b379773..7575f157ca0d 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -3074,10 +3074,10 @@ fn doc_comment() { Error { kind: Error, span: Some( - 0:18-42, + 0:46-47, ), reason: Simple( - "unexpected #! This is a doc comment\n while parsing function call", + "Expected one of (, [, an identifier, keyword case, keyword internal or {, but didn't find anything before the end.", ), hints: [], code: None, From 419fbfac104dbbdea49360a9f4a8f673053fd949 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 9 Jul 2024 23:56:37 -0700 Subject: [PATCH 14/28] . --- prqlc/prqlc-parser/src/parser/common.rs | 61 +++++++++++++++++++++---- prqlc/prqlc-parser/src/parser/perror.rs | 6 ++- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 5e249943352e..4012dbf3e5cc 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -44,17 +44,23 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - } pub fn doc_comment() -> impl Parser + Clone { - select! { - TokenKind::DocComment(dc) => dc, - } - .then_ignore(new_line().repeated()) - .repeated() - .at_least(1) - .collect() - .map(|lines: Vec| lines.join("\n")) - .labelled("doc comment") + // doc comments must start on a new line, so we enforce a new line before + // the doc comment (but how to handle the start of a file?) + new_line() + .repeated() + .at_least(1) + .ignore_then( + select! { + TokenKind::DocComment(dc) => dc, + } + .then_ignore(new_line().repeated()) + .repeated() + .at_least(1) + .collect(), + ) + .map(|lines: Vec| lines.join("\n")) + .labelled("doc comment") } - pub fn with_doc_comment<'a, P, O>( parser: P, ) -> impl Parser + Clone + 'a @@ -67,3 +73,38 @@ where .then(parser) .map(|(doc_comment, inner)| inner.with_doc_comment(doc_comment)) } + +#[cfg(test)] +mod tests { + use insta::assert_yaml_snapshot; + + use super::*; + use crate::parser::prepare_stream; + + fn parse_with_parser( + source: &str, + parser: impl Parser, + ) -> Result> { + let tokens = crate::lexer::lex_source(source).unwrap(); + let stream = prepare_stream(tokens.0.into_iter(), source, 0); + + let (ast, parse_errors) = parser.parse_recovery(stream); + + if !parse_errors.is_empty() { + return Err(parse_errors); + } + Ok(ast.unwrap()) + } + + #[test] + fn test_doc_comment() { + assert_yaml_snapshot!(parse_with_parser(r#" + #! doc comment + #! another line + + "#, doc_comment()), @r###" + --- + Ok: " doc comment\n another line" + "###); + } +} diff --git a/prqlc/prqlc-parser/src/parser/perror.rs b/prqlc/prqlc-parser/src/parser/perror.rs index ad2db49db94b..858426da0720 100644 --- a/prqlc/prqlc-parser/src/parser/perror.rs +++ b/prqlc/prqlc-parser/src/parser/perror.rs @@ -4,12 +4,14 @@ use std::fmt::Debug; use std::fmt::Display; use std::hash::Hash; +use serde::{Deserialize, Serialize}; + use crate::error::WithErrorInfo; use crate::error::{Error, ErrorSource, Reason}; use crate::lexer::lr::TokenKind; use crate::span::Span; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] pub struct ChumError { span: Span, reason: Option, @@ -283,7 +285,7 @@ impl From for Error { // since it's private in chumsky /// A type representing zero, one, or many labels applied to an error -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] enum SimpleLabel { Some(&'static str), None, From fd3e09446d00bda7e86990e4c36b6b04f341376e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 10 Jul 2024 20:28:14 -0700 Subject: [PATCH 15/28] . --- prqlc/prqlc-parser/src/parser/common.rs | 11 ++++++----- prqlc/prqlc-parser/src/parser/perror.rs | 6 ++---- prqlc/prqlc-parser/src/test.rs | 13 ++++++------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 4012dbf3e5cc..d7097f06eed0 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -27,7 +27,7 @@ pub fn keyword(kw: &'static str) -> impl Parser + } pub fn new_line() -> impl Parser + Clone { - just(TokenKind::NewLine).ignored().labelled("new line") + just(TokenKind::NewLine).ignored() } pub fn ctrl(char: char) -> impl Parser + Clone { @@ -76,7 +76,7 @@ where #[cfg(test)] mod tests { - use insta::assert_yaml_snapshot; + use insta::assert_debug_snapshot; use super::*; use crate::parser::prepare_stream; @@ -98,13 +98,14 @@ mod tests { #[test] fn test_doc_comment() { - assert_yaml_snapshot!(parse_with_parser(r#" + assert_debug_snapshot!(parse_with_parser(r#" #! doc comment #! another line "#, doc_comment()), @r###" - --- - Ok: " doc comment\n another line" + Ok( + " doc comment\n another line", + ) "###); } } diff --git a/prqlc/prqlc-parser/src/parser/perror.rs b/prqlc/prqlc-parser/src/parser/perror.rs index 858426da0720..ad2db49db94b 100644 --- a/prqlc/prqlc-parser/src/parser/perror.rs +++ b/prqlc/prqlc-parser/src/parser/perror.rs @@ -4,14 +4,12 @@ use std::fmt::Debug; use std::fmt::Display; use std::hash::Hash; -use serde::{Deserialize, Serialize}; - use crate::error::WithErrorInfo; use crate::error::{Error, ErrorSource, Reason}; use crate::lexer::lr::TokenKind; use crate::span::Span; -#[derive(Clone, Debug, Serialize)] +#[derive(Clone, Debug)] pub struct ChumError { span: Span, reason: Option, @@ -285,7 +283,7 @@ impl From for Error { // since it's private in chumsky /// A type representing zero, one, or many labels applied to an error -#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq)] enum SimpleLabel { Some(&'static str), None, diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index acefc46bcf0d..be0f9e4649af 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -1821,13 +1821,12 @@ fn doc_comment() { FuncCall: name: Ident: from - span: "0:34-38" + span: "0:37-41" args: - Ident: artists - span: "0:39-46" - span: "0:34-46" - span: "0:34-47" - doc_comment: " This is a doc comment" + span: "0:42-49" + span: "0:37-49" + span: "0:37-50" "###); assert_debug_snapshot!(parse_single(r###" @@ -1837,10 +1836,10 @@ fn doc_comment() { Error { kind: Error, span: Some( - 0:46-47, + 0:18-42, ), reason: Simple( - "Expected one of (, [, an identifier, keyword case, keyword internal or {, but didn't find anything before the end.", + "unexpected #! This is a doc comment\n", ), hints: [], code: None, From 6b700af514b6093e57977d22156050a0a3ef8cd7 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 11 Jul 2024 09:04:28 -0700 Subject: [PATCH 16/28] . --- prqlc/prqlc-parser/src/error.rs | 4 + prqlc/prqlc-parser/src/parser/common.rs | 37 +++------ prqlc/prqlc-parser/src/parser/stmt.rs | 102 ++++++++++++++++++++---- prqlc/prqlc-parser/src/test.rs | 87 ++++++++++++++++++-- 4 files changed, 182 insertions(+), 48 deletions(-) diff --git a/prqlc/prqlc-parser/src/error.rs b/prqlc/prqlc-parser/src/error.rs index f9d255f856fd..9298ccc7e96d 100644 --- a/prqlc/prqlc-parser/src/error.rs +++ b/prqlc/prqlc-parser/src/error.rs @@ -46,8 +46,12 @@ pub enum MessageKind { pub enum Reason { Simple(String), Expected { + /// Where we were + // (could rename to `where` / `location` / `within`?) who: Option, + /// What we expected expected: String, + /// What we found found: String, }, Unexpected { diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index d7097f06eed0..a0bf5d04e54b 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -46,21 +46,23 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - pub fn doc_comment() -> impl Parser + Clone { // doc comments must start on a new line, so we enforce a new line before // the doc comment (but how to handle the start of a file?) + // + // TODO: we currently lose any empty newlines between doc comments; + // eventually we want to retain them new_line() .repeated() .at_least(1) - .ignore_then( - select! { - TokenKind::DocComment(dc) => dc, - } - .then_ignore(new_line().repeated()) - .repeated() - .at_least(1) - .collect(), - ) + .ignore_then(select! { + TokenKind::DocComment(dc) => dc, + }) + .repeated() + .at_least(1) + .collect() + .debug("doc_comment") .map(|lines: Vec| lines.join("\n")) .labelled("doc comment") } + pub fn with_doc_comment<'a, P, O>( parser: P, ) -> impl Parser + Clone + 'a @@ -79,22 +81,7 @@ mod tests { use insta::assert_debug_snapshot; use super::*; - use crate::parser::prepare_stream; - - fn parse_with_parser( - source: &str, - parser: impl Parser, - ) -> Result> { - let tokens = crate::lexer::lex_source(source).unwrap(); - let stream = prepare_stream(tokens.0.into_iter(), source, 0); - - let (ast, parse_errors) = parser.parse_recovery(stream); - - if !parse_errors.is_empty() { - return Err(parse_errors); - } - Ok(ast.unwrap()) - } + use crate::test::parse_with_parser; #[test] fn test_doc_comment() { diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 718372611523..d79e7ee8c3a0 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -31,35 +31,29 @@ fn module_contents() -> impl Parser, Error = PError> { .then_ignore(new_line().repeated()) .map(|expr| Annotation { expr: Box::new(expr), - }); + }) + .labelled("annotation"); - // TODO: I think some duplication here; we allow for potential - // newlines before each item here, but then also have `.allow_leading` - // below — since now we can get newlines after a comment between the - // aesthetic item and the stmt... So a bit messy - let stmt_kind = new_line().repeated().ignore_then(choice(( + // Also need to handle new_line vs. start of file here + let stmt_kind = new_line().repeated().at_least(1).ignore_then(choice(( module_def, type_def(), import_def(), var_def(), ))); - // Two wrapping of `with_aesthetics` — the first for the whole block, - // and the second for just the annotation; if there's a comment between - // the annotation and the code. + // Currently doc comments need to be before the annotation; probably + // should relax this? with_doc_comment( - // with_aesthetics(annotation) annotation .repeated() - // TODO: do we need this? I think possibly we get an additional - // error when we remove it; check (because it seems redundant...). - .then_ignore(new_line().repeated()) .then(stmt_kind) .map_with_span(into_stmt), ) - .separated_by(new_line().repeated().at_least(1)) - .allow_leading() - .allow_trailing() + .repeated() + // .separated_by(new_line().repeated().at_least(1)) + // .allow_leading() + // .allow_trailing() }) } @@ -188,3 +182,79 @@ fn import_def() -> impl Parser + Clone { .map(|(alias, name)| StmtKind::ImportDef(ImportDef { name, alias })) .labelled("import statement") } + +#[cfg(test)] +mod tests { + use insta::assert_yaml_snapshot; + + use super::*; + use crate::test::parse_with_parser; + + #[test] + fn test_doc_comment_module() { + assert_yaml_snapshot!(parse_with_parser(r#" + + #! first doc comment + from foo + + "#, module_contents()).unwrap(), @r###" + --- + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: from + span: "0:39-43" + args: + - Ident: foo + span: "0:44-47" + span: "0:39-47" + span: "0:30-49" + doc_comment: " first doc comment" + "###); + + assert_yaml_snapshot!(parse_with_parser(r#" + + + #! first doc comment + from foo + into x + + #! second doc comment + from bar + + "#, module_contents()).unwrap(), @r###" + --- + - VarDef: + kind: Into + name: x + value: + FuncCall: + name: + Ident: from + span: "0:40-44" + args: + - Ident: foo + span: "0:45-48" + span: "0:40-48" + span: "0:31-63" + doc_comment: " first doc comment" + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: from + span: "0:103-107" + args: + - Ident: bar + span: "0:108-111" + span: "0:103-111" + span: "0:94-113" + doc_comment: " second doc comment" + "###); + } +} diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index be0f9e4649af..2b92a7f5b00e 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -1,5 +1,6 @@ use chumsky::Parser; use insta::{assert_debug_snapshot, assert_yaml_snapshot}; +use std::fmt::Debug; use crate::parser::pr::Stmt; use crate::parser::prepare_stream; @@ -9,16 +10,17 @@ use crate::{error::Error, lexer::lr::TokenKind, parser::perror::PError}; /// Parse source code based on the supplied parser. /// /// Use this to test any parser! -pub(crate) fn parse_with_parser( +pub(crate) fn parse_with_parser( source: &str, parser: impl Parser, ) -> Result> { let tokens = crate::lexer::lex_source(source)?; let stream = prepare_stream(tokens.0.into_iter(), source, 0); - let (ast, parse_errors) = parser.parse_recovery(stream); + let (ast, parse_errors) = parser.parse_recovery_verbose(stream); if !parse_errors.is_empty() { + dbg!(&ast); return Err(parse_errors.into_iter().map(|e| e.into()).collect()); } Ok(ast.unwrap()) @@ -1809,9 +1811,73 @@ fn test_number() { fn doc_comment() { use insta::assert_yaml_snapshot; + assert_yaml_snapshot!(parse_single(r###" + from artists + derive x = 5 + "###).unwrap(), @r###" + --- + - VarDef: + kind: Main + name: main + value: + Pipeline: + exprs: + - FuncCall: + name: + Ident: from + span: "0:5-9" + args: + - Ident: artists + span: "0:10-17" + span: "0:5-17" + - FuncCall: + name: + Ident: derive + span: "0:22-28" + args: + - Literal: + Integer: 5 + span: "0:33-34" + alias: x + span: "0:22-34" + span: "0:5-34" + span: "0:0-35" + "###); + + assert_yaml_snapshot!(parse_single(r###" + from artists + + #! This is a doc comment + + derive x = 5 + "###).unwrap(), @r###" + --- + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: from + span: "0:5-9" + args: + - Ident: artists + span: "0:10-17" + - Ident: derive + span: "0:51-57" + doc_comment: " This is a doc comment" + - Literal: + Integer: 5 + span: "0:62-63" + alias: x + span: "0:5-63" + span: "0:5-64" + "###); + assert_yaml_snapshot!(parse_single(r###" #! This is a doc comment from artists + derive x = 5 "###).unwrap(), @r###" --- - VarDef: @@ -1821,12 +1887,19 @@ fn doc_comment() { FuncCall: name: Ident: from - span: "0:37-41" + span: "0:5-9" args: - Ident: artists - span: "0:42-49" - span: "0:37-49" - span: "0:37-50" + span: "0:10-17" + - Ident: derive + span: "0:51-57" + doc_comment: " This is a doc comment" + - Literal: + Integer: 5 + span: "0:62-63" + alias: x + span: "0:5-63" + span: "0:5-64" "###); assert_debug_snapshot!(parse_single(r###" @@ -1839,7 +1912,7 @@ fn doc_comment() { 0:18-42, ), reason: Simple( - "unexpected #! This is a doc comment\n", + "unexpected #! This is a doc comment\n while parsing function call", ), hints: [], code: None, From f5ed3f4a05891c5490ea7ff454740206ac1ab0e9 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 10:09:26 -0700 Subject: [PATCH 17/28] almost there --- prqlc/prqlc-parser/src/parser/common.rs | 51 +++-- prqlc/prqlc-parser/src/parser/expr.rs | 179 +++++++++++++----- prqlc/prqlc-parser/src/parser/mod.rs | 1 + prqlc/prqlc-parser/src/parser/stmt.rs | 107 ++++++++++- prqlc/prqlc-parser/src/parser/test.rs | 10 +- ...qlc_parser__test__pipeline_parse_tree.snap | 2 +- prqlc/prqlc-parser/src/test.rs | 125 +++++++----- 7 files changed, 350 insertions(+), 125 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index a0bf5d04e54b..f23f08ea25c8 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -27,7 +27,10 @@ pub fn keyword(kw: &'static str) -> impl Parser + } pub fn new_line() -> impl Parser + Clone { - just(TokenKind::NewLine).ignored() + just(TokenKind::NewLine) + .or(just(TokenKind::Start)) + .ignored() + .labelled("new line") } pub fn ctrl(char: char) -> impl Parser + Clone { @@ -44,23 +47,20 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - } pub fn doc_comment() -> impl Parser + Clone { - // doc comments must start on a new line, so we enforce a new line before - // the doc comment (but how to handle the start of a file?) + // doc comments must start on a new line, so we enforce a new line (which + // can also be a file start) before the doc comment // // TODO: we currently lose any empty newlines between doc comments; // eventually we want to retain them - new_line() - .repeated() - .at_least(1) - .ignore_then(select! { - TokenKind::DocComment(dc) => dc, - }) - .repeated() - .at_least(1) - .collect() - .debug("doc_comment") - .map(|lines: Vec| lines.join("\n")) - .labelled("doc comment") + (new_line().repeated().at_least(1).ignore_then(select! { + TokenKind::DocComment(dc) => dc, + })) + .repeated() + .at_least(1) + .collect() + .debug("doc_comment") + .map(|lines: Vec| lines.join("\n")) + .labelled("doc comment") } pub fn with_doc_comment<'a, P, O>( @@ -95,4 +95,25 @@ mod tests { ) "###); } + + #[test] + fn test_doc_comment_or_not() { + assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not()).unwrap(), @"None"); + assert_debug_snapshot!(parse_with_parser(r#"hello"#, doc_comment().or_not().then_ignore(new_line().repeated()).then(ident_part())).unwrap(), @r###" + ( + None, + "hello", + ) + "###); + } + + #[test] + fn test_no_doc_comment_in_with_doc_comment() { + impl SupportsDocComment for String { + fn with_doc_comment(self, _doc_comment: Option) -> Self { + self + } + } + assert_debug_snapshot!(parse_with_parser(r#"hello"#, with_doc_comment(new_line().ignore_then(ident_part()))).unwrap(), @r###""hello""###); + } } diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index 667f34361dd1..8447565c9e13 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -76,12 +76,16 @@ pub fn expr<'a>() -> impl Parser + Clone + 'a { fn tuple( nested_expr: impl Parser + Clone, ) -> impl Parser + Clone { - ident_part() - .then_ignore(ctrl('=')) - .or_not() - .then(nested_expr) - .map(|(alias, expr)| Expr { alias, ..expr }) - .padded_by(new_line().repeated()) + new_line() + .repeated() + .ignore_then( + ident_part() + .then_ignore(ctrl('=')) + .or_not() + .then(nested_expr) + .map(|(alias, expr)| Expr { alias, ..expr }), + ) + // .padded_by(new_line().repeated()) .separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) @@ -103,22 +107,27 @@ fn tuple( fn array( expr: impl Parser + Clone, ) -> impl Parser + Clone { - expr.padded_by(new_line().repeated()) - .separated_by(ctrl(',')) - .allow_trailing() - .then_ignore(new_line().repeated()) - .delimited_by(ctrl('['), ctrl(']')) - .recover_with(nested_delimiters( - TokenKind::Control('['), - TokenKind::Control(']'), - [ - (TokenKind::Control('{'), TokenKind::Control('}')), - (TokenKind::Control('('), TokenKind::Control(')')), - (TokenKind::Control('['), TokenKind::Control(']')), - ], - |_| vec![], - )) - .map(ExprKind::Array) + new_line() + .repeated() + .ignore_then( + expr + // .padded_by(new_line().repeated()) + .separated_by(ctrl(',')) + .allow_trailing() + .then_ignore(new_line().repeated()) + .delimited_by(ctrl('['), ctrl(']')) + .recover_with(nested_delimiters( + TokenKind::Control('['), + TokenKind::Control(']'), + [ + (TokenKind::Control('{'), TokenKind::Control('}')), + (TokenKind::Control('('), TokenKind::Control(')')), + (TokenKind::Control('['), TokenKind::Control(']')), + ], + |_| vec![], + )) + .map(ExprKind::Array), + ) .labelled("array") } @@ -162,12 +171,16 @@ fn case( ) -> impl Parser + Clone { keyword("case") .ignore_then( - func_call(expr.clone()) - .map(Box::new) - .then_ignore(just(TokenKind::ArrowFat)) - .then(func_call(expr).map(Box::new)) - .map(|(condition, value)| SwitchCase { condition, value }) - .padded_by(new_line().repeated()) + new_line() + .repeated() + .ignore_then( + func_call(expr.clone()) + .map(Box::new) + .then_ignore(just(TokenKind::ArrowFat)) + .then(func_call(expr).map(Box::new)) + .map(|(condition, value)| SwitchCase { condition, value }), + ) + // .padded_by(new_line().repeated()) .separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) @@ -276,29 +289,29 @@ where new_line().repeated().at_least(1).ignored(), )); - new_line() - .repeated() - .ignore_then( + with_doc_comment( + new_line().repeated().ignore_then( ident_part() .then_ignore(ctrl('=')) .or_not() .then(expr) - .map(|(alias, expr)| Expr { alias, ..expr }) - .separated_by(pipe) - .at_least(1) - .map_with_span(|exprs, span| { - // If there's only one expr, then we don't need to wrap it - // in a pipeline — just return the lone expr. Otherwise, - // wrap them in a pipeline. - exprs.into_iter().exactly_one().unwrap_or_else(|exprs| { - ExprKind::Pipeline(Pipeline { - exprs: exprs.collect(), - }) - .into_expr(span) - }) - }), - ) - .labelled("pipeline") + .map(|(alias, expr)| Expr { alias, ..expr }), + ), + ) + .separated_by(pipe) + .at_least(1) + .map_with_span(|exprs, span| { + // If there's only one expr, then we don't need to wrap it + // in a pipeline — just return the lone expr. Otherwise, + // wrap them in a pipeline. + exprs.into_iter().exactly_one().unwrap_or_else(|exprs| { + ExprKind::Pipeline(Pipeline { + exprs: exprs.collect(), + }) + .into_expr(span) + }) + }) + .labelled("pipeline") } fn binary_op_parser<'a, Term, Op>( @@ -545,3 +558,75 @@ fn operator_or() -> impl Parser + Clone { fn operator_coalesce() -> impl Parser + Clone { just(TokenKind::Coalesce).to(BinOp::Coalesce) } + +#[cfg(test)] +mod tests { + + use insta::assert_yaml_snapshot; + + use crate::test::{parse_with_parser, trim_start}; + + use super::*; + + #[test] + fn test_expr() { + assert_yaml_snapshot!( + parse_with_parser(r#"5+5"#, trim_start().ignore_then(expr())).unwrap(), + @r###" + --- + Binary: + left: + Literal: + Integer: 5 + span: "0:0-1" + op: Add + right: + Literal: + Integer: 5 + span: "0:2-3" + span: "0:0-3" + "###); + + assert_yaml_snapshot!( + parse_with_parser(r#"derive x = 5"#, trim_start().ignore_then(expr())).unwrap(), + @r###" + --- + Ident: derive + span: "0:0-6" + "###); + } + + #[test] + fn test_pipeline() { + assert_yaml_snapshot!( + parse_with_parser(r#" + from artists + derive x = 5 + "#, trim_start().then(pipeline(expr_call()))).unwrap(), + @r###" + --- + - ~ + - Pipeline: + exprs: + - FuncCall: + name: + Ident: from + span: "0:13-17" + args: + - Ident: artists + span: "0:18-25" + span: "0:13-25" + - FuncCall: + name: + Ident: derive + span: "0:38-44" + args: + - Literal: + Integer: 5 + span: "0:49-50" + alias: x + span: "0:38-50" + span: "0:13-50" + "###); + } +} diff --git a/prqlc/prqlc-parser/src/parser/mod.rs b/prqlc/prqlc-parser/src/parser/mod.rs index 669f2f2f2c3e..b4c5dc3b18b8 100644 --- a/prqlc/prqlc-parser/src/parser/mod.rs +++ b/prqlc/prqlc-parser/src/parser/mod.rs @@ -1,5 +1,6 @@ use chumsky::{prelude::*, Stream}; +pub use self::common::new_line; use crate::error::Error; use crate::lexer::lr; use crate::span::Span; diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index d79e7ee8c3a0..890e5bc904aa 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -15,6 +15,9 @@ pub fn source() -> impl Parser, Error = PError> { with_doc_comment(query_def()) .or_not() .chain(module_contents()) + // This is the only instance we can consume newlines at the end of something, since + // this is the end of the module + .then_ignore(new_line().repeated()) .then_ignore(end()) } @@ -26,12 +29,18 @@ fn module_contents() -> impl Parser, Error = PError> { .map(|(name, stmts)| StmtKind::ModuleDef(ModuleDef { name, stmts })) .labelled("module definition"); - let annotation = just(TokenKind::Annotate) - .ignore_then(expr()) - .then_ignore(new_line().repeated()) - .map(|expr| Annotation { - expr: Box::new(expr), - }) + let annotation = new_line() + .repeated() + // TODO: we could enforce annotations starting on a new line? + // .at_least(1) + .ignore_then( + just(TokenKind::Annotate) + .ignore_then(expr()) + // .then_ignore(new_line().repeated()) + .map(|expr| Annotation { + expr: Box::new(expr), + }), + ) .labelled("annotation"); // Also need to handle new_line vs. start of file here @@ -60,6 +69,7 @@ fn module_contents() -> impl Parser, Error = PError> { fn query_def() -> impl Parser + Clone { new_line() .repeated() + .at_least(1) .ignore_then(keyword("prql")) .ignore_then( // named arg @@ -144,9 +154,13 @@ fn var_def() -> impl Parser + Clone { .labelled("variable definition"); let main_or_into = pipeline(expr_call()) - .then_ignore(new_line().repeated()) .map(Box::new) - .then(keyword("into").ignore_then(ident_part()).or_not()) + .then( + new_line() + .repeated() + .ignore_then(keyword("into").ignore_then(ident_part())) + .or_not(), + ) .map(|(value, name)| { let kind = if name.is_none() { VarDefKind::Main @@ -162,6 +176,8 @@ fn var_def() -> impl Parser + Clone { ty: None, }) }) + // TODO: this isn't really accurate, since a standard `from artists` + // also counts as this; we should change .labelled("variable definition"); let_.or(main_or_into) @@ -190,6 +206,77 @@ mod tests { use super::*; use crate::test::parse_with_parser; + #[test] + fn test_module_def() { + assert_yaml_snapshot!(parse_with_parser(r#"module hello { + + let world = 1 + + let man = module.world + + } + "#, module_contents()).unwrap(), @r###" + --- + - ModuleDef: + name: hello + stmts: + - VarDef: + kind: Let + name: world + value: + Literal: + Integer: 1 + span: "0:50-51" + span: "0:38-51" + - VarDef: + kind: Let + name: man + value: + Indirection: + base: + Ident: module + span: "0:74-80" + field: + Name: world + span: "0:74-86" + span: "0:64-86" + span: "0:11-98" + "###); + + assert_yaml_snapshot!(parse_with_parser(r#" + module hello { + let world = 1 + let man = module.world + } + "#, module_contents()).unwrap(), @r###" + --- + - ModuleDef: + name: hello + stmts: + - VarDef: + kind: Let + name: world + value: + Literal: + Integer: 1 + span: "0:50-51" + span: "0:38-51" + - VarDef: + kind: Let + name: man + value: + Indirection: + base: + Ident: module + span: "0:74-80" + field: + Name: world + span: "0:74-86" + span: "0:64-86" + span: "0:11-98" + "###); + } + #[test] fn test_doc_comment_module() { assert_yaml_snapshot!(parse_with_parser(r#" @@ -211,7 +298,7 @@ mod tests { - Ident: foo span: "0:44-47" span: "0:39-47" - span: "0:30-49" + span: "0:30-47" doc_comment: " first doc comment" "###); @@ -253,7 +340,7 @@ mod tests { - Ident: bar span: "0:108-111" span: "0:103-111" - span: "0:94-113" + span: "0:94-111" doc_comment: " second doc comment" "###); } diff --git a/prqlc/prqlc-parser/src/parser/test.rs b/prqlc/prqlc-parser/src/parser/test.rs index 0d6ff9963f0f..e8cff7a53957 100644 --- a/prqlc/prqlc-parser/src/parser/test.rs +++ b/prqlc/prqlc-parser/src/parser/test.rs @@ -1,3 +1,4 @@ +use chumsky::Parser; use insta::assert_yaml_snapshot; use crate::parser::prepare_stream; @@ -6,10 +7,13 @@ use crate::test::parse_with_parser; use crate::{error::Error, lexer::lex_source}; use crate::{lexer::lr::TokenKind, parser::pr::FuncCall}; -use super::pr::Expr; +use super::{common::new_line, pr::Expr}; fn parse_expr(source: &str) -> Result> { - parse_with_parser(source, super::expr::expr_call()) + parse_with_parser( + source, + new_line().repeated().ignore_then(super::expr::expr_call()), + ) } #[test] @@ -22,6 +26,8 @@ fn test_prepare_stream() { let mut stream = prepare_stream(tokens.0.into_iter(), input, 0); assert_yaml_snapshot!(stream.fetch_tokens().collect::>(), @r###" --- + - - Start + - "0:0-0" - - Ident: from - "0:0-4" - - Ident: artists diff --git a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap index 84bca84aeba0..38604e50876c 100644 --- a/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap +++ b/prqlc/prqlc-parser/src/snapshots/prqlc_parser__test__pipeline_parse_tree.snap @@ -188,4 +188,4 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\" span: "0:711-713" span: "0:706-713" span: "0:1-713" - span: "0:1-714" + span: "0:0-713" diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 2b92a7f5b00e..377e839587d4 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -2,6 +2,7 @@ use chumsky::Parser; use insta::{assert_debug_snapshot, assert_yaml_snapshot}; use std::fmt::Debug; +use crate::parser::new_line; use crate::parser::pr::Stmt; use crate::parser::prepare_stream; use crate::parser::stmt; @@ -27,10 +28,17 @@ pub(crate) fn parse_with_parser( } /// Parse into statements -fn parse_single(source: &str) -> Result, Vec> { +pub(crate) fn parse_single(source: &str) -> Result, Vec> { + // parse_with_parser(source, new_line().repeated().ignore_then(stmt::source())) parse_with_parser(source, stmt::source()) } +// TODO: move to expr singe stmts don't need it? +/// Remove leading newlines & the start token, for tests +pub(crate) fn trim_start() -> impl Parser { + new_line().repeated().ignored() +} + #[test] fn test_error_unicode_string() { // Test various unicode strings successfully parse errors. We were @@ -81,7 +89,7 @@ fn test_error_unexpected() { 0:6-7, ), reason: Simple( - "unexpected : while parsing function call", + "unexpected :", ), hints: [], code: None, @@ -365,7 +373,7 @@ fn test_basic_exprs() { - Ident: a span: "0:35-36" span: "0:28-36" - span: "0:28-36" + span: "0:0-36" "###); } @@ -543,7 +551,7 @@ fn test_function() { named_params: [] generic_type_params: [] span: "0:0-27" - span: "0:0-28" + span: "0:0-27" "###); assert_yaml_snapshot!(parse_single(r#"let count = X -> s"SUM({X})" @@ -635,7 +643,7 @@ fn test_function() { named_params: [] generic_type_params: [] span: "0:27-147" - span: "0:13-147" + span: "0:0-147" "###); assert_yaml_snapshot!(parse_single("let add = x to:a -> x + to\n").unwrap(), @r###" @@ -776,7 +784,7 @@ fn test_var_def() { SString: - String: SELECT * FROM employees span: "0:21-47" - span: "0:13-47" + span: "0:0-47" "###); assert_yaml_snapshot!(parse_single( @@ -828,7 +836,7 @@ fn test_var_def() { - Ident: x span: "0:101-102" span: "0:96-102" - span: "0:96-102" + span: "0:84-102" "###); } @@ -918,7 +926,7 @@ fn test_sql_parameters() { span: "0:37-107" span: "0:30-107" span: "0:9-107" - span: "0:9-108" + span: "0:0-107" "###); } @@ -1019,7 +1027,7 @@ join `my-proj`.`dataset`.`table` span: "0:118-126" span: "0:94-126" span: "0:1-126" - span: "0:1-127" + span: "0:0-126" "###); } @@ -1115,7 +1123,7 @@ fn test_sort() { span: "0:136-174" span: "0:131-174" span: "0:9-174" - span: "0:9-175" + span: "0:0-174" "###); } @@ -1162,7 +1170,7 @@ fn test_dates() { span: "0:39-76" span: "0:32-76" span: "0:9-76" - span: "0:9-77" + span: "0:0-76" "###); } @@ -1186,7 +1194,7 @@ fn test_multiline_string() { span: "0:20-36" alias: x span: "0:9-36" - span: "0:9-37" + span: "0:0-36" "### ) } @@ -1227,7 +1235,7 @@ derive x = 5 alias: x span: "0:14-26" span: "0:1-26" - span: "0:1-31" + span: "0:0-26" "### ) } @@ -1270,7 +1278,7 @@ fn test_coalesce() { alias: amount span: "0:32-59" span: "0:9-59" - span: "0:9-60" + span: "0:0-59" "### ) } @@ -1294,7 +1302,7 @@ fn test_literal() { span: "0:20-24" alias: x span: "0:9-24" - span: "0:9-25" + span: "0:0-24" "###) } @@ -1366,7 +1374,7 @@ fn test_allowed_idents() { span: "0:140-172" span: "0:133-172" span: "0:9-172" - span: "0:9-173" + span: "0:0-172" "###) } @@ -1459,7 +1467,7 @@ fn test_gt_lt_gte_lte() { span: "0:127-139" span: "0:120-139" span: "0:9-139" - span: "0:9-140" + span: "0:0-139" "###) } @@ -1500,7 +1508,7 @@ join s=salaries (==id) span: "0:33-37" span: "0:16-38" span: "0:1-38" - span: "0:1-39" + span: "0:0-38" "###); } @@ -1539,7 +1547,7 @@ fn test_var_defs() { value: Ident: x span: "0:17-42" - span: "0:9-42" + span: "0:0-42" "###); assert_yaml_snapshot!(parse_single(r#" @@ -1553,7 +1561,7 @@ fn test_var_defs() { value: Ident: x span: "0:9-10" - span: "0:9-25" + span: "0:0-25" "###); assert_yaml_snapshot!(parse_single(r#" @@ -1566,7 +1574,7 @@ fn test_var_defs() { value: Ident: x span: "0:9-10" - span: "0:9-11" + span: "0:0-10" "###); } @@ -1589,7 +1597,7 @@ fn test_array() { Integer: 2 span: "0:21-22" span: "0:17-24" - span: "0:9-24" + span: "0:0-24" - VarDef: kind: Let name: a @@ -1602,7 +1610,7 @@ fn test_array() { String: hello span: "0:49-56" span: "0:41-57" - span: "0:33-57" + span: "0:24-57" "###); } @@ -1637,7 +1645,7 @@ fn test_annotation() { named_params: [] generic_type_params: [] span: "0:49-61" - span: "0:9-61" + span: "0:0-61" annotations: - expr: Tuple: @@ -1649,7 +1657,8 @@ fn test_annotation() { "###); parse_single( r#" - @{binding_strength=1} let add = a b -> a + b + @{binding_strength=1} + let add = a b -> a + b "#, ) .unwrap(); @@ -1753,7 +1762,7 @@ fn test_target() { span: "0:72-77" span: "0:65-77" span: "0:45-77" - span: "0:45-78" + span: "0:34-77" "###); } @@ -1841,7 +1850,7 @@ fn doc_comment() { alias: x span: "0:22-34" span: "0:5-34" - span: "0:0-35" + span: "0:0-34" "###); assert_yaml_snapshot!(parse_single(r###" @@ -1863,15 +1872,24 @@ fn doc_comment() { args: - Ident: artists span: "0:10-17" - - Ident: derive - span: "0:51-57" - doc_comment: " This is a doc comment" + span: "0:5-17" + span: "0:0-17" + - VarDef: + kind: Main + name: main + value: + FuncCall: + name: + Ident: derive + span: "0:53-59" + args: - Literal: Integer: 5 - span: "0:62-63" + span: "0:64-65" alias: x - span: "0:5-63" - span: "0:5-64" + span: "0:53-65" + span: "0:47-65" + doc_comment: " This is a doc comment" "###); assert_yaml_snapshot!(parse_single(r###" @@ -1884,22 +1902,29 @@ fn doc_comment() { kind: Main name: main value: - FuncCall: - name: - Ident: from - span: "0:5-9" - args: - - Ident: artists - span: "0:10-17" - - Ident: derive - span: "0:51-57" - doc_comment: " This is a doc comment" - - Literal: - Integer: 5 - span: "0:62-63" - alias: x - span: "0:5-63" - span: "0:5-64" + Pipeline: + exprs: + - FuncCall: + name: + Ident: from + span: "0:34-38" + args: + - Ident: artists + span: "0:39-46" + span: "0:34-46" + - FuncCall: + name: + Ident: derive + span: "0:51-57" + args: + - Literal: + Integer: 5 + span: "0:62-63" + alias: x + span: "0:51-63" + span: "0:34-63" + span: "0:29-63" + doc_comment: " This is a doc comment" "###); assert_debug_snapshot!(parse_single(r###" @@ -1912,7 +1937,7 @@ fn doc_comment() { 0:18-42, ), reason: Simple( - "unexpected #! This is a doc comment\n while parsing function call", + "unexpected #! This is a doc comment\n", ), hints: [], code: None, From d2447c453f1d4db3a8c89414f33ba9fd2b86380b Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 10:23:36 -0700 Subject: [PATCH 18/28] . --- prqlc/prqlc-parser/src/parser/stmt.rs | 148 +++++++++++++++----------- prqlc/prqlc-parser/src/test.rs | 6 +- 2 files changed, 88 insertions(+), 66 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 890e5bc904aa..b17cd7b7d9ad 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -25,7 +25,11 @@ fn module_contents() -> impl Parser, Error = PError> { recursive(|module_contents| { let module_def = keyword("module") .ignore_then(ident_part()) - .then(module_contents.delimited_by(ctrl('{'), ctrl('}'))) + .then( + module_contents + .then_ignore(new_line().repeated()) + .delimited_by(ctrl('{'), ctrl('}')), + ) .map(|(name, stmts)| StmtKind::ModuleDef(ModuleDef { name, stmts })) .labelled("module definition"); @@ -44,7 +48,8 @@ fn module_contents() -> impl Parser, Error = PError> { .labelled("annotation"); // Also need to handle new_line vs. start of file here - let stmt_kind = new_line().repeated().at_least(1).ignore_then(choice(( + // let stmt_kind = new_line().repeated().at_least(1).ignore_then(choice(( + let stmt_kind = new_line().repeated().ignore_then(choice(( module_def, type_def(), import_def(), @@ -207,74 +212,91 @@ mod tests { use crate::test::parse_with_parser; #[test] - fn test_module_def() { - assert_yaml_snapshot!(parse_with_parser(r#"module hello { - + fn test_module_contents() { + assert_yaml_snapshot!(parse_with_parser(r#" let world = 1 - let man = module.world + "#, module_contents()).unwrap(), @r###" + --- + - VarDef: + kind: Let + name: world + value: + Literal: + Integer: 1 + span: "0:25-26" + span: "0:0-26" + - VarDef: + kind: Let + name: man + value: + Indirection: + base: + Ident: module + span: "0:49-55" + field: + Name: world + span: "0:49-61" + span: "0:26-61" + "###); + } - } + #[test] + fn test_module_def() { + // Same line + assert_yaml_snapshot!(parse_with_parser(r#"module two {let houses = both.alike} "#, module_contents()).unwrap(), @r###" - --- - - ModuleDef: - name: hello - stmts: - - VarDef: - kind: Let - name: world - value: - Literal: - Integer: 1 - span: "0:50-51" - span: "0:38-51" - - VarDef: - kind: Let - name: man - value: - Indirection: - base: - Ident: module - span: "0:74-80" - field: - Name: world - span: "0:74-86" - span: "0:64-86" - span: "0:11-98" - "###); + --- + - ModuleDef: + name: two + stmts: + - VarDef: + kind: Let + name: houses + value: + Indirection: + base: + Ident: both + span: "0:25-29" + field: + Name: alike + span: "0:25-35" + span: "0:12-35" + span: "0:0-36" + "###); assert_yaml_snapshot!(parse_with_parser(r#" - module hello { - let world = 1 - let man = module.world - } + module dignity { + let fair = 1 + let verona = we.lay + } "#, module_contents()).unwrap(), @r###" - --- - - ModuleDef: - name: hello - stmts: - - VarDef: - kind: Let - name: world - value: - Literal: - Integer: 1 - span: "0:50-51" - span: "0:38-51" - - VarDef: - kind: Let - name: man - value: - Indirection: - base: - Ident: module - span: "0:74-80" - field: - Name: world - span: "0:74-86" - span: "0:64-86" - span: "0:11-98" - "###); + --- + - ModuleDef: + name: dignity + stmts: + - VarDef: + kind: Let + name: fair + value: + Literal: + Integer: 1 + span: "0:51-52" + span: "0:27-52" + - VarDef: + kind: Let + name: verona + value: + Indirection: + base: + Ident: we + span: "0:78-80" + field: + Name: lay + span: "0:78-84" + span: "0:52-84" + span: "0:0-95" + "###); } #[test] diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 377e839587d4..232ed5c0ae81 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -1788,7 +1788,7 @@ fn test_module() { Literal: Integer: 1 span: "0:50-51" - span: "0:38-51" + span: "0:25-51" - VarDef: kind: Let name: man @@ -1800,8 +1800,8 @@ fn test_module() { field: Name: world span: "0:74-86" - span: "0:64-86" - span: "0:11-98" + span: "0:51-86" + span: "0:0-98" "###); } From b6018f96e8b0803dd049a3d072728d5e0dbc32cb Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 10:27:06 -0700 Subject: [PATCH 19/28] --- prqlc/prqlc-parser/src/parser/common.rs | 1 - prqlc/prqlc-parser/src/parser/perror.rs | 10 ++-------- prqlc/prqlc-parser/src/test.rs | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index f23f08ea25c8..9a070665322e 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -13,7 +13,6 @@ pub fn ident_part() -> impl Parser + Clone { TokenKind::Keyword(ident) if &ident == "module" => ident, } .map_err(|e: PError| { - dbg!(e.clone()); PError::expected_input_found( e.span(), [Some(TokenKind::Ident("".to_string()))], diff --git a/prqlc/prqlc-parser/src/parser/perror.rs b/prqlc/prqlc-parser/src/parser/perror.rs index ad2db49db94b..0afe440b4d04 100644 --- a/prqlc/prqlc-parser/src/parser/perror.rs +++ b/prqlc/prqlc-parser/src/parser/perror.rs @@ -83,7 +83,7 @@ impl chumsky::Error for ChumError { ) -> Self { let exp = expected.into_iter().collect(); let msg = format!("expected {:?} but found {:?}", exp, found); - dbg!(msg); + msg; log::trace!("looking for {:?} but found {:?} at: {:?}", exp, found, span); Self { span, @@ -126,7 +126,6 @@ impl chumsky::Error for ChumError { ///from chumsky::error::Simple fn merge(mut self, other: Self) -> Self { - dbg!((&self, &other)); // TODO: Assert that `self.span == other.span` here? // self.reason = match (&self.reason, &other.reason) { // (Some(..), None) => self.reason, @@ -142,10 +141,7 @@ impl chumsky::Error for ChumError { self.label = self.label.merge(other.label); self.expected.extend(other.expected); - // for expected in other.expected { - // self.expected.insert(expected); - // } - dbg!(self) + self } } @@ -162,7 +158,6 @@ impl Eq for ChumError {} impl fmt::Display for ChumError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - dbg!(&self); // TODO: Take `self.reason` into account if let Some(found) = &self.found { @@ -214,7 +209,6 @@ impl From for Error { } fn construct_parser_error(e: PError) -> Error { - dbg!(e.clone()); if let Some(message) = e.reason() { return Error::new_simple(message).with_source(ErrorSource::Parser(e)); } diff --git a/prqlc/prqlc-parser/src/test.rs b/prqlc/prqlc-parser/src/test.rs index 232ed5c0ae81..0593c577e336 100644 --- a/prqlc/prqlc-parser/src/test.rs +++ b/prqlc/prqlc-parser/src/test.rs @@ -21,7 +21,7 @@ pub(crate) fn parse_with_parser( let (ast, parse_errors) = parser.parse_recovery_verbose(stream); if !parse_errors.is_empty() { - dbg!(&ast); + log::info!("ast: {ast:?}"); return Err(parse_errors.into_iter().map(|e| e.into()).collect()); } Ok(ast.unwrap()) From c932c7cb13f60d194d00680e9aa1f079a9fd6694 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 10:27:44 -0700 Subject: [PATCH 20/28] --- prqlc/prqlc-parser/src/parser/perror.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/perror.rs b/prqlc/prqlc-parser/src/parser/perror.rs index 0afe440b4d04..973aee315857 100644 --- a/prqlc/prqlc-parser/src/parser/perror.rs +++ b/prqlc/prqlc-parser/src/parser/perror.rs @@ -82,8 +82,6 @@ impl chumsky::Error for ChumError { found: Option, ) -> Self { let exp = expected.into_iter().collect(); - let msg = format!("expected {:?} but found {:?}", exp, found); - msg; log::trace!("looking for {:?} but found {:?} at: {:?}", exp, found, span); Self { span, From da56a30104fd288921fa475381089de44e7442a7 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:00:47 -0700 Subject: [PATCH 21/28] --- prqlc/prqlc-parser/src/lexer/mod.rs | 3 + prqlc/prqlc-parser/src/parser/common.rs | 3 +- prqlc/prqlc-parser/src/parser/expr.rs | 66 ++++++++++++++----- prqlc/prqlc-parser/src/parser/test.rs | 20 +++--- .../tests/integration/bad_error_messages.rs | 9 +-- ...__queries__debug_lineage__aggregation.snap | 2 +- ...n__queries__debug_lineage__arithmetic.snap | 2 +- ...gration__queries__debug_lineage__cast.snap | 2 +- ...ueries__debug_lineage__constants_only.snap | 2 +- ..._queries__debug_lineage__date_to_text.snap | 2 +- ...ion__queries__debug_lineage__distinct.snap | 2 +- ...__queries__debug_lineage__distinct_on.snap | 2 +- ..._queries__debug_lineage__genre_counts.snap | 4 +- ...on__queries__debug_lineage__group_all.snap | 2 +- ...n__queries__debug_lineage__group_sort.snap | 2 +- ..._debug_lineage__group_sort_limit_take.snap | 4 +- ...ueries__debug_lineage__invoice_totals.snap | 6 +- ...tion__queries__debug_lineage__loop_01.snap | 2 +- ...__queries__debug_lineage__math_module.snap | 2 +- ...on__queries__debug_lineage__pipelines.snap | 2 +- ...ion__queries__debug_lineage__read_csv.snap | 2 +- ...ueries__debug_lineage__set_ops_remove.snap | 4 +- ...gration__queries__debug_lineage__sort.snap | 2 +- ...ation__queries__debug_lineage__switch.snap | 2 +- ...gration__queries__debug_lineage__take.snap | 2 +- ...__queries__debug_lineage__text_module.snap | 2 +- ...ation__queries__debug_lineage__window.snap | 4 +- 27 files changed, 99 insertions(+), 58 deletions(-) diff --git a/prqlc/prqlc-parser/src/lexer/mod.rs b/prqlc/prqlc-parser/src/lexer/mod.rs index f86cb319f1fa..5baf5e315df5 100644 --- a/prqlc/prqlc-parser/src/lexer/mod.rs +++ b/prqlc/prqlc-parser/src/lexer/mod.rs @@ -189,6 +189,9 @@ fn line_wrap() -> impl Parser> { fn comment() -> impl Parser> { just('#').ignore_then(choice(( + // One option would be to check that doc comments have new lines in the + // lexer (we currently do in the parser); which would give better error + // messages? just('!').ignore_then( newline() .not() diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 9a070665322e..14ff246b5de8 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -27,6 +27,8 @@ pub fn keyword(kw: &'static str) -> impl Parser + pub fn new_line() -> impl Parser + Clone { just(TokenKind::NewLine) + // Start is considered a new line, so we can enforce things start on a new + // line while allowing them to be at the beginning of a file .or(just(TokenKind::Start)) .ignored() .labelled("new line") @@ -57,7 +59,6 @@ pub fn doc_comment() -> impl Parser + Clone { .repeated() .at_least(1) .collect() - .debug("doc_comment") .map(|lines: Vec| lines.join("\n")) .labelled("doc comment") } diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index 8447565c9e13..bec0df34ccea 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -85,7 +85,6 @@ fn tuple( .then(nested_expr) .map(|(alias, expr)| Expr { alias, ..expr }), ) - // .padded_by(new_line().repeated()) .separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) @@ -110,9 +109,7 @@ fn array( new_line() .repeated() .ignore_then( - expr - // .padded_by(new_line().repeated()) - .separated_by(ctrl(',')) + expr.separated_by(ctrl(',')) .allow_trailing() .then_ignore(new_line().repeated()) .delimited_by(ctrl('['), ctrl(']')) @@ -169,21 +166,19 @@ fn interpolation() -> impl Parser + Clone { fn case( expr: impl Parser + Clone, ) -> impl Parser + Clone { + // The `nickname != null => nickname,` part + let mapping = func_call(expr.clone()) + .map(Box::new) + .then_ignore(just(TokenKind::ArrowFat)) + .then(func_call(expr).map(Box::new)) + .map(|(condition, value)| SwitchCase { condition, value }); + keyword("case") .ignore_then( - new_line() - .repeated() - .ignore_then( - func_call(expr.clone()) - .map(Box::new) - .then_ignore(just(TokenKind::ArrowFat)) - .then(func_call(expr).map(Box::new)) - .map(|(condition, value)| SwitchCase { condition, value }), - ) - // .padded_by(new_line().repeated()) - .separated_by(ctrl(',')) + mapping + .separated_by(ctrl(',').then_ignore(new_line().repeated())) .allow_trailing() - .then_ignore(new_line().repeated()) + .padded_by(new_line().repeated()) .delimited_by(ctrl('['), ctrl(']')), ) .map(ExprKind::Case) @@ -629,4 +624,43 @@ mod tests { span: "0:13-50" "###); } + + #[test] + fn test_case() { + assert_yaml_snapshot!( + parse_with_parser(r#" + + case [ + + nickname != null => nickname, + true => null + + ] + "#, trim_start().then(case(expr()))).unwrap(), + @r###" + --- + - ~ + - Case: + - condition: + Binary: + left: + Ident: nickname + span: "0:30-38" + op: Ne + right: + Literal: "Null" + span: "0:42-46" + span: "0:30-46" + value: + Ident: nickname + span: "0:50-58" + - condition: + Literal: + Boolean: true + span: "0:72-76" + value: + Literal: "Null" + span: "0:80-84" + "###); + } } diff --git a/prqlc/prqlc-parser/src/parser/test.rs b/prqlc/prqlc-parser/src/parser/test.rs index e8cff7a53957..2d8a71839f0f 100644 --- a/prqlc/prqlc-parser/src/parser/test.rs +++ b/prqlc/prqlc-parser/src/parser/test.rs @@ -1255,33 +1255,35 @@ fn test_ident_with_keywords() { #[test] fn test_case() { - assert_yaml_snapshot!(parse_expr(r#"case [ + assert_yaml_snapshot!(parse_expr(r#" + case [ nickname != null => nickname, true => null - ]"#).unwrap(), @r###" + ] + "#).unwrap(), @r###" --- Case: - condition: Binary: left: Ident: nickname - span: "0:19-27" + span: "0:28-36" op: Ne right: Literal: "Null" - span: "0:31-35" - span: "0:19-35" + span: "0:40-44" + span: "0:28-44" value: Ident: nickname - span: "0:39-47" + span: "0:48-56" - condition: Literal: Boolean: true - span: "0:61-65" + span: "0:70-74" value: Literal: "Null" - span: "0:69-73" - span: "0:0-83" + span: "0:78-82" + span: "0:9-92" "###); } diff --git a/prqlc/prqlc/tests/integration/bad_error_messages.rs b/prqlc/prqlc/tests/integration/bad_error_messages.rs index 9f4b25bf2019..25e89ce05c3f 100644 --- a/prqlc/prqlc/tests/integration/bad_error_messages.rs +++ b/prqlc/prqlc/tests/integration/bad_error_messages.rs @@ -220,11 +220,12 @@ fn just_std() { std "###).unwrap_err(), @r###" Error: - ╭─[:2:5] + ╭─[:1:1] │ - 2 │ std - │ ──┬─ - │ ╰─── internal compiler error; tracked at https://github.com/PRQL/prql/issues/4474 + 1 │ ╭─▶ + 2 │ ├─▶ std + │ │ + │ ╰───────────── internal compiler error; tracked at https://github.com/PRQL/prql/issues/4474 ───╯ "###); } diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap index feb3f0fbad2f..bb10ce1c8881 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__aggregation.snap @@ -286,4 +286,4 @@ ast: span: 1:178-243 span: 1:168-243 span: 1:102-243 - span: 1:102-244 + span: 1:0-243 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap index 9c5a750c0f08..b4f798314b68 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__arithmetic.snap @@ -1277,4 +1277,4 @@ ast: span: 1:830-832 span: 1:825-832 span: 1:13-832 - span: 1:13-833 + span: 1:0-832 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap index 310465f90e8a..a5f28ab5572d 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__cast.snap @@ -209,4 +209,4 @@ ast: span: 1:103-105 span: 1:98-105 span: 1:13-105 - span: 1:13-106 + span: 1:0-105 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__constants_only.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__constants_only.snap index df827a1fedcf..0ec1fbdecd0d 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__constants_only.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__constants_only.snap @@ -192,4 +192,4 @@ ast: alias: d span: 1:52-65 span: 1:0-65 - span: 1:0-66 + span: 1:0-65 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap index 955e7d60a22b..322da4de0e44 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__date_to_text.snap @@ -627,4 +627,4 @@ ast: span: 1:86-718 span: 1:79-718 span: 1:57-718 - span: 1:57-719 + span: 1:0-718 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap index 60c1240ce602..37c8167916d4 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct.snap @@ -228,4 +228,4 @@ ast: span: 1:88-90 span: 1:77-90 span: 1:13-90 - span: 1:13-91 + span: 1:0-90 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap index be7a54a80889..c3b19d9668db 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__distinct_on.snap @@ -293,4 +293,4 @@ ast: span: 1:133-159 span: 1:128-159 span: 1:13-159 - span: 1:13-160 + span: 1:0-159 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap index 3d2bdc3a0345..24159d7d24df 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__genre_counts.snap @@ -125,7 +125,7 @@ ast: span: 1:167-183 span: 1:157-183 span: 1:135-185 - span: 1:117-185 + span: 1:0-185 - VarDef: kind: Main name: main @@ -170,4 +170,4 @@ ast: alias: a span: 1:217-230 span: 1:187-230 - span: 1:187-231 + span: 1:185-230 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap index 13ab542b8168..5149c491e3af 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_all.snap @@ -339,4 +339,4 @@ ast: span: 1:152-160 span: 1:147-160 span: 1:13-160 - span: 1:13-161 + span: 1:0-160 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap index 7331f875ccb6..16722f0bae96 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort.snap @@ -325,4 +325,4 @@ ast: span: 1:136-150 span: 1:129-150 span: 1:13-150 - span: 1:13-151 + span: 1:0-150 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap index c8e9c4bc4627..d8e94bf276d7 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__group_sort_limit_take.snap @@ -362,7 +362,7 @@ ast: Integer: 3 span: 1:168-169 span: 1:163-169 - span: 1:140-169 + span: 1:137-169 span: 1:119-171 - FuncCall: name: @@ -411,4 +411,4 @@ ast: span: 1:230-251 span: 1:225-251 span: 1:76-251 - span: 1:76-252 + span: 1:0-251 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap index 4471a56f7d03..dadcbbb337cb 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__invoice_totals.snap @@ -871,7 +871,7 @@ ast: alias: total_price span: 1:338-466 span: 1:328-466 - span: 1:281-466 + span: 1:276-466 span: 1:254-468 - FuncCall: name: @@ -920,7 +920,7 @@ ast: Boolean: true span: 1:521-525 span: 1:504-592 - span: 1:488-592 + span: 1:483-592 span: 1:469-594 - FuncCall: name: @@ -984,5 +984,5 @@ ast: span: 1:789-791 span: 1:784-791 span: 1:131-791 - span: 1:131-792 + span: 1:130-791 doc_comment: ' Calculate a number of metrics about the sales of tracks in each city.' diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap index 8a0143ae52e8..1436d51ee9d2 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__loop_01.snap @@ -363,4 +363,4 @@ ast: span: 1:255-256 span: 1:250-256 span: 1:162-256 - span: 1:162-257 + span: 1:0-256 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap index 49c84b3bf375..7b92c56f9b1d 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__math_module.snap @@ -945,4 +945,4 @@ ast: span: 1:110-839 span: 1:103-839 span: 1:82-839 - span: 1:82-840 + span: 1:0-839 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap index 705569b7ab1d..77e89cb74ca6 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__pipelines.snap @@ -342,4 +342,4 @@ ast: span: 1:281-297 span: 1:274-297 span: 1:166-297 - span: 1:166-298 + span: 1:0-297 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap index e3b20518dfde..f12478b04ec2 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__read_csv.snap @@ -74,4 +74,4 @@ ast: span: 1:97-110 span: 1:92-110 span: 1:43-110 - span: 1:43-111 + span: 1:0-110 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap index 5af44fc9bec6..0dd6dd7a607b 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__set_ops_remove.snap @@ -289,7 +289,7 @@ ast: named_params: [] generic_type_params: [] span: 1:28-79 - span: 1:13-79 + span: 1:0-79 - VarDef: kind: Main name: main @@ -339,4 +339,4 @@ ast: span: 1:244-245 span: 1:239-245 span: 1:81-245 - span: 1:81-246 + span: 1:79-245 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap index a1aa70338287..82f3ab247866 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__sort.snap @@ -323,4 +323,4 @@ ast: span: 1:224-271 span: 1:217-271 span: 1:13-271 - span: 1:13-272 + span: 1:0-271 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap index c0fb2ecf4d47..957538b8f03c 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__switch.snap @@ -234,4 +234,4 @@ ast: span: 1:252-254 span: 1:247-254 span: 1:89-254 - span: 1:89-255 + span: 1:0-254 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap index 8c02c5a85974..2b2a4492caf9 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__take.snap @@ -116,4 +116,4 @@ ast: span: 1:47-51 span: 1:42-51 span: 1:13-51 - span: 1:13-52 + span: 1:0-51 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap index 69bf68a2073c..b3cf60d07916 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__text_module.snap @@ -705,4 +705,4 @@ ast: span: 1:484-588 span: 1:477-588 span: 1:113-588 - span: 1:113-589 + span: 1:0-588 diff --git a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap index 999eb235b48a..8c6cc9a41d7e 100644 --- a/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap +++ b/prqlc/prqlc/tests/integration/snapshots/integration__queries__debug_lineage__window.snap @@ -453,7 +453,7 @@ ast: Integer: 10 span: 1:914-916 span: 1:909-916 - span: 1:793-916 + span: 1:790-916 span: 1:774-918 - FuncCall: name: @@ -502,4 +502,4 @@ ast: span: 1:1006-1020 span: 1:999-1020 span: 1:762-1020 - span: 1:762-1021 + span: 1:0-1020 From 511bffaa9aa4de2358573531280709319048ed36 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:11:17 -0700 Subject: [PATCH 22/28] --- prqlc/prqlc-parser/src/parser/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/mod.rs b/prqlc/prqlc-parser/src/parser/mod.rs index b4c5dc3b18b8..8fe0c10c6892 100644 --- a/prqlc/prqlc-parser/src/parser/mod.rs +++ b/prqlc/prqlc-parser/src/parser/mod.rs @@ -43,8 +43,7 @@ pub(crate) fn prepare_stream( let semantic_tokens = tokens.filter(|token| { !matches!( token.kind, - lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_) // | lr::TokenKind::DocComment(_) - // | lr::TokenKind::Start + lr::TokenKind::Comment(_) | lr::TokenKind::LineWrap(_) ) }); From 97a6ad15d0f774bae938540fbd3f670e3cf364e5 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:19:18 -0700 Subject: [PATCH 23/28] --- prqlc/prqlc-parser/src/parser/stmt.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index b17cd7b7d9ad..52c912120a6e 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -40,14 +40,18 @@ fn module_contents() -> impl Parser, Error = PError> { .ignore_then( just(TokenKind::Annotate) .ignore_then(expr()) - // .then_ignore(new_line().repeated()) .map(|expr| Annotation { expr: Box::new(expr), }), ) .labelled("annotation"); - // Also need to handle new_line vs. start of file here + // TODO: we want to confirm that we're not allowing things on the same + // line that shoudln't be; e.g. `let foo = 5 let bar = 6`. We can't + // enforce a new line here because then `module two {let houses = + // both.alike}` fails (though we could force a new line after the + // `module` if that were helpful) + // // let stmt_kind = new_line().repeated().at_least(1).ignore_then(choice(( let stmt_kind = new_line().repeated().ignore_then(choice(( module_def, @@ -65,9 +69,6 @@ fn module_contents() -> impl Parser, Error = PError> { .map_with_span(into_stmt), ) .repeated() - // .separated_by(new_line().repeated().at_least(1)) - // .allow_leading() - // .allow_trailing() }) } From df678f19b6a7f75ec45025927da79537202e2f48 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 22:19:33 +0000 Subject: [PATCH 24/28] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- prqlc/prqlc-parser/src/parser/stmt.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc-parser/src/parser/stmt.rs b/prqlc/prqlc-parser/src/parser/stmt.rs index 52c912120a6e..23f343acf37c 100644 --- a/prqlc/prqlc-parser/src/parser/stmt.rs +++ b/prqlc/prqlc-parser/src/parser/stmt.rs @@ -47,7 +47,7 @@ fn module_contents() -> impl Parser, Error = PError> { .labelled("annotation"); // TODO: we want to confirm that we're not allowing things on the same - // line that shoudln't be; e.g. `let foo = 5 let bar = 6`. We can't + // line that should't be; e.g. `let foo = 5 let bar = 6`. We can't // enforce a new line here because then `module two {let houses = // both.alike}` fails (though we could force a new line after the // `module` if that were helpful) From a36dccbcdc2df61dd217fd46e006eeb5c21749fb Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:21:37 -0700 Subject: [PATCH 25/28] --- prqlc/prqlc-parser/src/parser/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc-parser/src/parser/expr.rs b/prqlc/prqlc-parser/src/parser/expr.rs index bec0df34ccea..278d90183046 100644 --- a/prqlc/prqlc-parser/src/parser/expr.rs +++ b/prqlc/prqlc-parser/src/parser/expr.rs @@ -17,7 +17,7 @@ pub fn expr_call() -> impl Parser + Clone { lambda_func(expr.clone()).or(func_call(expr)) } -pub fn expr<'a>() -> impl Parser + Clone + 'a { +pub fn expr() -> impl Parser + Clone { recursive(|expr| { let literal = select! { TokenKind::Literal(lit) => ExprKind::Literal(lit) }; From 10ddf919a83833dfc67796087bf9eea94fc1d364 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:24:48 -0700 Subject: [PATCH 26/28] --- prqlc/prqlc-parser/src/parser/common.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 14ff246b5de8..123a8345d100 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -25,6 +25,10 @@ pub fn keyword(kw: &'static str) -> impl Parser + just(TokenKind::Keyword(kw.to_string())).ignored() } +/// Our approach to new lines is each item consumes new lines _before_ itself, +/// but not newlines after itself. This allows us to enforce new lines between +/// some items. The only place we handle new lines after an item is in the root +/// parser. pub fn new_line() -> impl Parser + Clone { just(TokenKind::NewLine) // Start is considered a new line, so we can enforce things start on a new From c46a50dda256dcea9f3d72dadff970961fafe88e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:25:40 -0700 Subject: [PATCH 27/28] --- prqlc/prqlc-parser/src/parser/common.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prqlc/prqlc-parser/src/parser/common.rs b/prqlc/prqlc-parser/src/parser/common.rs index 123a8345d100..802a5c42e33e 100644 --- a/prqlc/prqlc-parser/src/parser/common.rs +++ b/prqlc/prqlc-parser/src/parser/common.rs @@ -7,7 +7,7 @@ use crate::span::Span; use super::SupportsDocComment; -pub fn ident_part() -> impl Parser + Clone { +pub(crate) fn ident_part() -> impl Parser + Clone { select! { TokenKind::Ident(ident) => ident, TokenKind::Keyword(ident) if &ident == "module" => ident, @@ -21,7 +21,7 @@ pub fn ident_part() -> impl Parser + Clone { }) } -pub fn keyword(kw: &'static str) -> impl Parser + Clone { +pub(crate) fn keyword(kw: &'static str) -> impl Parser + Clone { just(TokenKind::Keyword(kw.to_string())).ignored() } @@ -38,7 +38,7 @@ pub fn new_line() -> impl Parser + Clone { .labelled("new line") } -pub fn ctrl(char: char) -> impl Parser + Clone { +pub(crate) fn ctrl(char: char) -> impl Parser + Clone { just(TokenKind::Control(char)).ignored() } @@ -51,7 +51,7 @@ pub fn into_stmt((annotations, kind): (Vec, StmtKind), span: Span) - } } -pub fn doc_comment() -> impl Parser + Clone { +pub(crate) fn doc_comment() -> impl Parser + Clone { // doc comments must start on a new line, so we enforce a new line (which // can also be a file start) before the doc comment // @@ -67,7 +67,7 @@ pub fn doc_comment() -> impl Parser + Clone { .labelled("doc comment") } -pub fn with_doc_comment<'a, P, O>( +pub(crate) fn with_doc_comment<'a, P, O>( parser: P, ) -> impl Parser + Clone + 'a where From 909bcd199899c5c57855d0ea237ae27a3f332b6d Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 13 Jul 2024 15:32:48 -0700 Subject: [PATCH 28/28] --- prqlc/prqlc/tests/integration/error_messages.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prqlc/prqlc/tests/integration/error_messages.rs b/prqlc/prqlc/tests/integration/error_messages.rs index fdc44a0983a1..ee69ca0666bf 100644 --- a/prqlc/prqlc/tests/integration/error_messages.rs +++ b/prqlc/prqlc/tests/integration/error_messages.rs @@ -98,7 +98,7 @@ fn test_errors() { │ 1 │ Answer: T-H-A-T! │ ┬ - │ ╰── unexpected : while parsing function call + │ ╰── unexpected : ───╯ "###); }