From 2b69abfeab51734ce85a8cdaaf8927132bc1e7be Mon Sep 17 00:00:00 2001 From: psteinroe Date: Wed, 16 Jul 2025 09:07:10 +0200 Subject: [PATCH] feat: add support for named params --- crates/pgt_lexer/src/lexer.rs | 12 ++++ crates/pgt_lexer/src/lib.rs | 30 ++++++++ crates/pgt_lexer_codegen/src/syntax_kind.rs | 2 +- crates/pgt_tokenizer/src/lib.rs | 69 ++++++++++++++++++- .../pgt_tokenizer__tests__named_param_at.snap | 23 +++++++ ...__tests__named_param_colon_identifier.snap | 23 +++++++ ...kenizer__tests__named_param_colon_raw.snap | 23 +++++++ ...izer__tests__named_param_colon_string.snap | 23 +++++++ crates/pgt_tokenizer/src/token.rs | 30 ++++++++ 9 files changed, 231 insertions(+), 4 deletions(-) create mode 100644 crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_at.snap create mode 100644 crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_identifier.snap create mode 100644 crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_raw.snap create mode 100644 crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_string.snap diff --git a/crates/pgt_lexer/src/lexer.rs b/crates/pgt_lexer/src/lexer.rs index db4b4ae2..ad6db297 100644 --- a/crates/pgt_lexer/src/lexer.rs +++ b/crates/pgt_lexer/src/lexer.rs @@ -132,6 +132,18 @@ impl<'a> Lexer<'a> { pgt_tokenizer::TokenKind::Eof => SyntaxKind::EOF, pgt_tokenizer::TokenKind::Backtick => SyntaxKind::BACKTICK, pgt_tokenizer::TokenKind::PositionalParam => SyntaxKind::POSITIONAL_PARAM, + pgt_tokenizer::TokenKind::NamedParam { kind } => { + match kind { + pgt_tokenizer::NamedParamKind::ColonIdentifier { terminated: false } => { + err = "Missing trailing \" to terminate the named parameter"; + } + pgt_tokenizer::NamedParamKind::ColonString { terminated: false } => { + err = "Missing trailing ' to terminate the named parameter"; + } + _ => {} + }; + SyntaxKind::POSITIONAL_PARAM + } pgt_tokenizer::TokenKind::QuotedIdent { terminated } => { if !terminated { err = "Missing trailing \" to terminate the quoted identifier" diff --git a/crates/pgt_lexer/src/lib.rs b/crates/pgt_lexer/src/lib.rs index 2d8779a7..45fa6c6b 100644 --- a/crates/pgt_lexer/src/lib.rs +++ b/crates/pgt_lexer/src/lib.rs @@ -50,6 +50,36 @@ mod tests { assert!(!errors[0].message.to_string().is_empty()); } + #[test] + fn test_lexing_string_params_with_errors() { + let input = "SELECT :'unterminated string"; + let lexed = lex(input); + + // Should have tokens + assert!(!lexed.is_empty()); + + // Should have an error for unterminated string + let errors = lexed.errors(); + assert!(!errors.is_empty()); + // Check the error message exists + assert!(!errors[0].message.to_string().is_empty()); + } + + #[test] + fn test_lexing_identifier_params_with_errors() { + let input = "SELECT :\"unterminated string"; + let lexed = lex(input); + + // Should have tokens + assert!(!lexed.is_empty()); + + // Should have an error for unterminated string + let errors = lexed.errors(); + assert!(!errors.is_empty()); + // Check the error message exists + assert!(!errors[0].message.to_string().is_empty()); + } + #[test] fn test_token_ranges() { let input = "SELECT id"; diff --git a/crates/pgt_lexer_codegen/src/syntax_kind.rs b/crates/pgt_lexer_codegen/src/syntax_kind.rs index 07b7a419..c671e451 100644 --- a/crates/pgt_lexer_codegen/src/syntax_kind.rs +++ b/crates/pgt_lexer_codegen/src/syntax_kind.rs @@ -43,7 +43,7 @@ const PUNCT: &[(&str, &str)] = &[ ("`", "BACKTICK"), ]; -const EXTRA: &[&str] = &["POSITIONAL_PARAM", "ERROR", "COMMENT", "EOF"]; +const EXTRA: &[&str] = &["POSITIONAL_PARAM", "NAMED_PARAM", "ERROR", "COMMENT", "EOF"]; const LITERALS: &[&str] = &[ "BIT_STRING", diff --git a/crates/pgt_tokenizer/src/lib.rs b/crates/pgt_tokenizer/src/lib.rs index 787adcaa..80b66363 100644 --- a/crates/pgt_tokenizer/src/lib.rs +++ b/crates/pgt_tokenizer/src/lib.rs @@ -1,7 +1,7 @@ mod cursor; mod token; use cursor::{Cursor, EOF_CHAR}; -pub use token::{Base, LiteralKind, Token, TokenKind}; +pub use token::{Base, LiteralKind, NamedParamKind, Token, TokenKind}; // via: https://github.com/postgres/postgres/blob/db0c96cc18aec417101e37e59fcc53d4bf647915/src/backend/parser/scan.l#L346 // ident_start [A-Za-z\200-\377_] @@ -132,6 +132,46 @@ impl Cursor<'_> { } _ => TokenKind::Dot, }, + '@' => { + if is_ident_start(self.first()) { + // Named parameter with @ prefix. + self.eat_while(is_ident_cont); + TokenKind::NamedParam { + kind: NamedParamKind::AtPrefix, + } + } else { + TokenKind::At + } + } + ':' => { + // Named parameters in psql with different substitution styles. + // + // https://www.postgresql.org/docs/current/app-psql.html#APP-PSQL-INTERPOLATION + match self.first() { + '\'' => { + // Named parameter with colon prefix and single quotes. + self.bump(); + let terminated = self.single_quoted_string(); + let kind = NamedParamKind::ColonString { terminated }; + TokenKind::NamedParam { kind } + } + '"' => { + // Named parameter with colon prefix and double quotes. + self.bump(); + let terminated = self.double_quoted_string(); + let kind = NamedParamKind::ColonIdentifier { terminated }; + TokenKind::NamedParam { kind } + } + c if is_ident_start(c) => { + // Named parameter with colon prefix. + self.eat_while(is_ident_cont); + TokenKind::NamedParam { + kind: NamedParamKind::ColonRaw, + } + } + _ => TokenKind::Colon, + } + } // One-symbol tokens. ';' => TokenKind::Semi, '\\' => TokenKind::Backslash, @@ -140,11 +180,9 @@ impl Cursor<'_> { ')' => TokenKind::CloseParen, '[' => TokenKind::OpenBracket, ']' => TokenKind::CloseBracket, - '@' => TokenKind::At, '#' => TokenKind::Pound, '~' => TokenKind::Tilde, '?' => TokenKind::Question, - ':' => TokenKind::Colon, '$' => { // Dollar quoted strings if is_ident_start(self.first()) || self.first() == '$' { @@ -613,6 +651,31 @@ mod tests { } tokens } + + #[test] + fn named_param_at() { + let result = lex("select 1 from c where id = @id;"); + assert_debug_snapshot!(result); + } + + #[test] + fn named_param_colon_raw() { + let result = lex("select 1 from c where id = :id;"); + assert_debug_snapshot!(result); + } + + #[test] + fn named_param_colon_string() { + let result = lex("select 1 from c where id = :'id';"); + assert_debug_snapshot!(result); + } + + #[test] + fn named_param_colon_identifier() { + let result = lex("select 1 from c where id = :\"id\";"); + assert_debug_snapshot!(result); + } + #[test] fn lex_statement() { let result = lex("select 1;"); diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_at.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_at.snap new file mode 100644 index 00000000..30bbe87f --- /dev/null +++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_at.snap @@ -0,0 +1,23 @@ +--- +source: crates/pgt_tokenizer/src/lib.rs +expression: result +snapshot_kind: text +--- +[ + "select" @ Ident, + " " @ Space, + "1" @ Literal { kind: Int { base: Decimal, empty_int: false } }, + " " @ Space, + "from" @ Ident, + " " @ Space, + "c" @ Ident, + " " @ Space, + "where" @ Ident, + " " @ Space, + "id" @ Ident, + " " @ Space, + "=" @ Eq, + " " @ Space, + "@id" @ NamedParam { kind: AtPrefix }, + ";" @ Semi, +] diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_identifier.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_identifier.snap new file mode 100644 index 00000000..6986ab0e --- /dev/null +++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_identifier.snap @@ -0,0 +1,23 @@ +--- +source: crates/pgt_tokenizer/src/lib.rs +expression: result +snapshot_kind: text +--- +[ + "select" @ Ident, + " " @ Space, + "1" @ Literal { kind: Int { base: Decimal, empty_int: false } }, + " " @ Space, + "from" @ Ident, + " " @ Space, + "c" @ Ident, + " " @ Space, + "where" @ Ident, + " " @ Space, + "id" @ Ident, + " " @ Space, + "=" @ Eq, + " " @ Space, + ":\"id\"" @ NamedParam { kind: ColonIdentifier { terminated: true } }, + ";" @ Semi, +] diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_raw.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_raw.snap new file mode 100644 index 00000000..f6db199d --- /dev/null +++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_raw.snap @@ -0,0 +1,23 @@ +--- +source: crates/pgt_tokenizer/src/lib.rs +expression: result +snapshot_kind: text +--- +[ + "select" @ Ident, + " " @ Space, + "1" @ Literal { kind: Int { base: Decimal, empty_int: false } }, + " " @ Space, + "from" @ Ident, + " " @ Space, + "c" @ Ident, + " " @ Space, + "where" @ Ident, + " " @ Space, + "id" @ Ident, + " " @ Space, + "=" @ Eq, + " " @ Space, + ":id" @ NamedParam { kind: ColonRaw }, + ";" @ Semi, +] diff --git a/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_string.snap b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_string.snap new file mode 100644 index 00000000..d9150083 --- /dev/null +++ b/crates/pgt_tokenizer/src/snapshots/pgt_tokenizer__tests__named_param_colon_string.snap @@ -0,0 +1,23 @@ +--- +source: crates/pgt_tokenizer/src/lib.rs +expression: result +snapshot_kind: text +--- +[ + "select" @ Ident, + " " @ Space, + "1" @ Literal { kind: Int { base: Decimal, empty_int: false } }, + " " @ Space, + "from" @ Ident, + " " @ Space, + "c" @ Ident, + " " @ Space, + "where" @ Ident, + " " @ Space, + "id" @ Ident, + " " @ Space, + "=" @ Eq, + " " @ Space, + ":'id'" @ NamedParam { kind: ColonString { terminated: true } }, + ";" @ Semi, +] diff --git a/crates/pgt_tokenizer/src/token.rs b/crates/pgt_tokenizer/src/token.rs index 50a7d12a..e3dbaee2 100644 --- a/crates/pgt_tokenizer/src/token.rs +++ b/crates/pgt_tokenizer/src/token.rs @@ -94,6 +94,12 @@ pub enum TokenKind { /// /// see: PositionalParam, + /// Named Parameter, e.g., `@name` + /// + /// This is used in some ORMs and query builders, like sqlc. + NamedParam { + kind: NamedParamKind, + }, /// Quoted Identifier, e.g., `"update"` in `update "my_table" set "a" = 5;` /// /// These are case-sensitive, unlike [`TokenKind::Ident`] @@ -104,6 +110,30 @@ pub enum TokenKind { }, } +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum NamedParamKind { + /// e.g. `@name` + /// + /// Used in: + /// - sqlc: https://docs.sqlc.dev/en/latest/howto/named_parameters.html + AtPrefix, + + /// e.g. `:name` (raw substitution) + /// + /// Used in: psql + ColonRaw, + + /// e.g. `:'name'` (quoted string substitution) + /// + /// Used in: psql + ColonString { terminated: bool }, + + /// e.g. `:"name"` (quoted identifier substitution) + /// + /// Used in: psql + ColonIdentifier { terminated: bool }, +} + /// Parsed token. /// It doesn't contain information about data that has been parsed, /// only the type of the token and its size.