diff --git a/rust/experimental/query_engine/Cargo.toml b/rust/experimental/query_engine/Cargo.toml index 57801d4462..d1cfb03908 100644 --- a/rust/experimental/query_engine/Cargo.toml +++ b/rust/experimental/query_engine/Cargo.toml @@ -3,6 +3,7 @@ members = [ "expressions", "parser-abstractions", "kql-parser", + "kql-parser/src/macros", "ottl-parser", "engine-recordset", "engine-recordset-otlp-bridge", @@ -26,7 +27,11 @@ hex = "0.4.3" opentelemetry-proto = "0.31.0" pest = "2.8" pest_derive = "2.8" +pest_meta = "2.8" +proc-macro-crate = "3.4.0" +proc-macro2 = "1.0" prost = "0.14" +quote = "1.0" regex = "1.11.1" serde_json = "1.0.140" sha2 = "0.10.8" diff --git a/rust/experimental/query_engine/kql-parser/Cargo.toml b/rust/experimental/query_engine/kql-parser/Cargo.toml index 4d396067fc..2e33a4cb99 100644 --- a/rust/experimental/query_engine/kql-parser/Cargo.toml +++ b/rust/experimental/query_engine/kql-parser/Cargo.toml @@ -12,7 +12,10 @@ rust-version.workspace = true chrono = { workspace = true } pest = { workspace = true } pest_derive = { workspace = true } +pest_meta = { workspace = true } +proc-macro2 = { workspace = true } regex = { workspace = true } data_engine_expressions = { path = "../expressions" } +data_engine_kql_parser_macros = { path = "src/macros" } data_engine_parser_abstractions = { path = "../parser-abstractions" } \ No newline at end of file diff --git a/rust/experimental/query_engine/kql-parser/src/base.pest b/rust/experimental/query_engine/kql-parser/src/base.pest new file mode 100644 index 0000000000..5eec867e1a --- /dev/null +++ b/rust/experimental/query_engine/kql-parser/src/base.pest @@ -0,0 +1,329 @@ +// Base KQL Grammar is heavily influenced by the following sources: +// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/KqlTokens.g4 +// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/Kql.g4 + +// Parsers that support KQL expressions can use this grammar and provide an additional grammar +// with queries, tabular expressions, etc. that the language supports, as well as any special +// language features. + +// These two special rules, when defined, are implicitly allowed at: +// - at every sequence (split by ~) +// - between every repetition (+ or *) +// Atomics (marked with @) are excluded +// See https://pest.rs/book/grammars/syntax.html#implicit-whitespace +WHITESPACE = _{ " " | NEWLINE | "\t" } +COMMENT = _{ ("/*" ~ (!"*/" ~ ANY)* ~ "*/") | ("//" ~ (!"\n" ~ ANY)*) } + +// Math Tokens +minus_token = { "-" } +plus_token = { "+" } +multiply_token = { "*" } +divide_token = { "/" } +modulo_token = { "%" } +positive_infinity_token = { "+inf" } +negative_infinity_token = { "-inf" } + +// Logical Tokens +invalid_equals_token = @{ "=" ~ !("="|"~") } +equals_token = @{ "==" } +equals_insensitive_token = @{ "=~" } +not_equals_token = @{ "!=" } +not_equals_insensitive_token = @{ "!~" } +greater_than_token = @{ ">" ~ !"=" } +greater_than_or_equal_to_token = @{ ">=" } +less_than_token = @{ "<" ~ !"=" } +less_than_or_equal_to_token = @{ "<=" } +and_token = @{ "and" } +or_token = @{ "or" } + +// Comparison Tokens +contains_token = @{ "contains" } +contains_cs_token = @{ "contains_cs" } +has_token = @{ "has" } +has_cs_token = @{ "has_cs" } +in_token = @{ "in" } +in_insensitive_token = @{ "in~" } +not_contains_token = @{ "!contains" } +not_contains_cs_token = @{ "!contains_cs" } +not_has_token = @{ "!has" } +not_has_cs_token = @{ "!has_cs" } +not_in_token = @{ "!in" } +not_in_insensitive_token = @{ "!in~" } +matches_regex_token = @{ "matches regex" } + +// Misc Tokens +statement_end_token = { &";" } + +// Literals +true_literal = @{ "true" | "True" | "TRUE" } +false_literal = @{ "false" | "False" | "FALSE" } +integer_literal = @{ + "-"? + ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) +} +double_quote_string_char = _{ + !("\"" | "\\") ~ ANY + | "\\" ~ ("\"" | "\\" | "n" | "r" | "t") + | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) +} +single_quote_string_char = _{ + !("'" | "\\") ~ ANY + | "\\" ~ ("'" | "\\" | "n" | "r" | "t") + | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) +} +exponent_literal = { ^"e" ~ (plus_token | minus_token)? ~ integer_literal } +double_literal = @{ + (integer_literal ~ "." ~ integer_literal ~ exponent_literal?) + | (integer_literal ~ exponent_literal) +} +string_literal = @{ + ("\"" ~ double_quote_string_char* ~ "\"") + | ("'" ~ single_quote_string_char* ~ "'") +} +type_literal = { + "bool" + | "datetime" + | "decimal" + | "double" + | "dynamic" + | "guid" + | "int" + | "long" + | "real" + | "regex" + | "string" + | "timespan" +} +null_literal = { type_literal ~ "(" ~ "null" ~ ")" } +identifier_literal = @{ ("_" | ASCII_ALPHA) ~ ("_" | ASCII_ALPHANUMERIC)* } +identifier_or_pattern_literal = ${ + (("_" | ASCII_ALPHA | "*") ~ ("_" | ASCII_ALPHANUMERIC | "*")* ~ !("["|".")) + | ("[" ~ string_literal ~ "]") +} +datetime_literal = { (ASCII_ALPHANUMERIC|"-"|"+"|":"|"/"|"."|",")+ } +time_literal = { + "-"? ~ (ASCII_DIGIT+ ~ ".")? ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? +} +time_units_literal = @{ + "day" ~ "s"? + | "hour" ~ "s"? + | "microsecond" ~ "s"? + | "millisecond" ~ "s"? + | "minute" ~ "s"? + | "second" ~ "s"? + | "tick" ~ "s"? + | "ms" + | "d" + | "h" + | "m" + | "s" +} + +// Expressions +accessor_index = _{ "[" ~ (integer_literal | string_literal | (minus_token? ~ scalar_expression)) ~ "]" } +accessor = _{ (identifier_literal | ("[" ~ string_literal ~ "]")) ~ accessor_index? } +accessor_expression = { accessor ~ (("." ~ accessor)|accessor_index)* } + +real_expression = { "real(" ~ (positive_infinity_token|negative_infinity_token|double_literal|integer_literal) ~ ")" } +datetime_expression = { "datetime(" ~ (string_literal|datetime_literal) ~ ")" } +time_expression = { + (double_literal|integer_literal) ~ time_units_literal + | "timespan(" ~ (time_literal|string_literal) ~ ")" + | "timespan(" ~ (double_literal|integer_literal) ~ time_units_literal? ~ ")" +} +regex_expression = { "regex(" ~ string_literal ~ ("," ~ string_literal)? ~ ")" } +dynamic_array_expression = { "[" ~ (dynamic_inner_expression ~ ("," ~ dynamic_inner_expression)*)? ~ "]" } +dynamic_map_item_expression = { string_literal ~ ":" ~ dynamic_inner_expression } +dynamic_map_expression = { "{" ~ (dynamic_map_item_expression ~ ("," ~ dynamic_map_item_expression)*)? ~ "}" } +dynamic_inner_expression = _{ dynamic_array_expression|dynamic_map_expression|type_unary_expressions } +dynamic_expression = { "dynamic" ~ "(" ~ dynamic_inner_expression ~ ")" } +type_unary_expressions = { + null_literal + | real_expression + | datetime_expression + | time_expression + | regex_expression + | dynamic_expression + | true_literal + | false_literal + | double_literal + | integer_literal + | string_literal +} + +get_type_expression = { "gettype" ~ "(" ~ scalar_expression ~ ")" } +typeof_expression = { "typeof" ~ "(" ~ type_literal ~ ")" } + +conditional_expression = { ("iff"|"iif") ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } +case_expression = { "case" ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ ("," ~ logical_expression ~ "," ~ scalar_expression)* ~ "," ~ scalar_expression ~ ")" } +coalesce_expression = { "coalesce" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" } +conditional_unary_expressions = { + conditional_expression + | case_expression + | coalesce_expression +} + +tostring_expression = { "tostring" ~ "(" ~ scalar_expression ~ ")" } +toint_expression = { "toint" ~ "(" ~ scalar_expression ~ ")" } +tobool_expression = { "tobool" ~ "(" ~ scalar_expression ~ ")" } +tofloat_expression = { "tofloat" ~ "(" ~ scalar_expression ~ ")" } +tolong_expression = { "tolong" ~ "(" ~ scalar_expression ~ ")" } +toreal_expression = { "toreal" ~ "(" ~ scalar_expression ~ ")" } +todouble_expression = { "todouble" ~ "(" ~ scalar_expression ~ ")" } +todatetime_expression = { "todatetime" ~ "(" ~ scalar_expression ~ ")" } +totimespan_expression = { "totimespan" ~ "(" ~ scalar_expression ~ ")" } +conversion_unary_expressions = { + tostring_expression + | toint_expression + | tobool_expression + | tofloat_expression + | tolong_expression + | toreal_expression + | todouble_expression + | todatetime_expression + | totimespan_expression +} + +strlen_expression = { "strlen" ~ "(" ~ scalar_expression ~ ")" } +replace_string_expression = { "replace_string" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } +substring_expression = { "substring" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" } +strcat_expression = { "strcat" ~ scalar_list_expression } +strcat_delim_expression = { "strcat_delim" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" } +extract_expression = { "extract" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } +string_unary_expressions = { + strlen_expression + | replace_string_expression + | substring_expression + | strcat_expression + | strcat_delim_expression + | extract_expression +} + +parse_json_expression = { "parse_json" ~ "(" ~ scalar_expression ~ ")" } +parse_regex_expression = { "parse_regex" ~ "(" ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" } +parse_unary_expressions = { + parse_json_expression + | parse_regex_expression +} + +array_concat_expression = { "array_concat" ~ scalar_list_expression } +array_unary_expressions = { + array_concat_expression +} + +negate_expression = { "-" ~ scalar_unary_expression } +bin_expression = { "bin" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } +math_unary_expressions = { + negate_expression + | bin_expression +} + +now_expression = { "now" ~ "(" ~ scalar_expression? ~ ")" } +temporal_unary_expressions = { + now_expression +} + +not_expression = { "not" ~ "(" ~ logical_expression ~ ")" } +logical_unary_expressions = { + not_expression +} + +extract_json_expression = { "extract_json" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ typeof_expression)? ~ ")" } + +invoke_function_argument_expression = { + (identifier_literal ~ "=" ~ scalar_expression) + | scalar_expression +} +invoke_function_expression = { identifier_literal ~ "(" ~ (invoke_function_argument_expression ~ ("," ~ invoke_function_argument_expression)*)? ~ ")" } + +/* Note: Order is imporant here. Once Pest has matched something it won't go +backwards. For example if integer_literal is defined before time_expression "1h" +would be parsed as integer_literal(1) and the remaining "h" would be fed into +the next rule. */ +scalar_unary_expression = { + type_unary_expressions + | get_type_expression + | conditional_unary_expressions + | conversion_unary_expressions + | string_unary_expressions + | array_unary_expressions + | math_unary_expressions + | temporal_unary_expressions + | logical_unary_expressions + | parse_unary_expressions + | extract_json_expression + | invoke_function_expression + | accessor_expression + | "(" ~ scalar_expression ~ ")" +} + +scalar_arithmetic_binary_expression = _{ + (multiply_token|divide_token|modulo_token) ~ scalar_unary_expression + | (plus_token|minus_token) ~ scalar_unary_expression +} +scalar_logical_binary_expression = _{ + (equals_token|equals_insensitive_token|not_equals_token|not_equals_insensitive_token|greater_than_token|greater_than_or_equal_to_token|less_than_token|less_than_or_equal_to_token|invalid_equals_token) ~ scalar_unary_expression + | matches_regex_token ~ scalar_unary_expression + | (not_contains_cs_token|not_contains_token|not_has_cs_token|not_has_token|contains_cs_token|contains_token|has_cs_token|has_token) ~ scalar_unary_expression + | (not_in_insensitive_token|not_in_token|in_insensitive_token|in_token) ~ scalar_list_expression + | (and_token|or_token) ~ scalar_unary_expression +} + +scalar_expression = { + scalar_unary_expression ~ (scalar_arithmetic_binary_expression|scalar_logical_binary_expression)* +} + +scalar_list_expression = { + "(" ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" +} + +logical_expression = { + scalar_expression +} + +assignment_expression = { accessor_expression ~ "=" ~ scalar_expression } + +extend_expression = { "extend" ~ assignment_expression ~ ("," ~ assignment_expression)* } +project_expression = { "project" ~ (assignment_expression|accessor_expression) ~ ("," ~ (assignment_expression|accessor_expression))* } +project_keep_expression = { "project-keep" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* } +project_away_expression = { "project-away" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* } +project_rename_expression = { "project-rename" ~ assignment_expression ~ ("," ~ assignment_expression)* } +where_expression = { "where" ~ logical_expression } + +average_aggregate_expression = { + "avg" ~ "(" ~ scalar_expression ~ ")" +} +count_aggregate_expression = { + "count" ~ "(" ~ ")" +} +maximum_aggregate_expression = { + "max" ~ "(" ~ scalar_expression ~ ")" +} +minimum_aggregate_expression = { + "min" ~ "(" ~ scalar_expression ~ ")" +} +sum_aggregate_expression = { + "sum" ~ "(" ~ scalar_expression ~ ")" +} + +aggregate_expressions = _{ + average_aggregate_expression + | count_aggregate_expression + | maximum_aggregate_expression + | minimum_aggregate_expression + | sum_aggregate_expression +} + +aggregate_expression = { + identifier_literal ~ "=" ~ aggregate_expressions + | aggregate_expressions +} + +group_by_expression = { + identifier_literal ~ "=" ~ scalar_expression + | scalar_expression +} + +variable_definition_expression = { "let" ~ identifier_literal ~ "=" ~ scalar_expression ~ statement_end_token } + + diff --git a/rust/experimental/query_engine/kql-parser/src/base_parser.rs b/rust/experimental/query_engine/kql-parser/src/base_parser.rs new file mode 100644 index 0000000000..75ffc7614b --- /dev/null +++ b/rust/experimental/query_engine/kql-parser/src/base_parser.rs @@ -0,0 +1,56 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Base for parsers that support KQL-like syntax based on the grammar defined in base.pest +//! +//! Multiple query languages may share a common base grammar for shared constructs such as +//! scalar expressions, but differ in their overall query structure or supported tabular expressions. +//! Typically, parsers for specific KQL-like languages will use the grammar defined in base.pest +//! along with their own grammar file to define the full language syntax. +//! +//! In order to have common utilities for parsing the expressions in base.pest, we define a +//! BasePestParser here that only includes the base.pest grammar. Many of the parsing utilities +//! for handling scalar expressions will be generic over derived `Rule` types, that can be converted +//! to the base rules derived for [`BasePestParser`]. +//! +//! This module also defines the [`TryAsBaseRule`] trait for converting derived parser `Rule` to the +//! base parser `Rule`. It's not necessary to implement this trait manually, as it can be derived +//! ```ignore` +//! use data_engine_kql_parser_macros::BaseRuleCompatible; +//! +//! #[derive(pest_derive::Parser, BaseRuleCompatible)] +//! #[grammar = "path/to/derived_language.pest"] +//! struct DerivedLanguagePestParser {} +//! ``` + +use data_engine_kql_parser_macros::ScalarExprPrattParser; +use data_engine_parser_abstractions::ParserError; +use pest::{RuleType, iterators::Pair}; + +use crate::ScalarExprRules; + +#[derive(pest_derive::Parser, ScalarExprPrattParser)] +#[grammar = "base.pest"] +pub struct BasePestParser; + +/// Trait for converting derived parser Rule types to the base parser Rule type. +/// This is used to allow parsing utilities to work with different parser Rule types +/// that share a common base grammar. +/// +/// It's not necessary to implement this trait manually, as it can be derived for any +/// parsers that use the base.pest grammar via the +/// [`BaseRuleCompatible`](data_engine_kql_parser_macros::BaseRuleCompatible) macro. +pub trait TryAsBaseRule { + fn try_as_base_rule(&self) -> Result; +} + +impl TryAsBaseRule for Pair<'_, T> +where + Rule: TryFrom, + T: RuleType + ScalarExprRules + 'static, + E: Into, +{ + fn try_as_base_rule(&self) -> Result { + Rule::try_from(self.as_rule()).map_err(|e| e.into()) + } +} diff --git a/rust/experimental/query_engine/kql-parser/src/kql.pest b/rust/experimental/query_engine/kql-parser/src/kql.pest index c2bc4cedbb..8c0c0eec95 100644 --- a/rust/experimental/query_engine/kql-parser/src/kql.pest +++ b/rust/experimental/query_engine/kql-parser/src/kql.pest @@ -1,324 +1,6 @@ -// KQL Grammar is heavily influenced by the following sources: -// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/KqlTokens.g4 -// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/Kql.g4 - -// These two special rules, when defined, are implicitly allowed at: -// - at every sequence (split by ~) -// - between every repetition (+ or *) -// Atomics (marked with @) are excluded -// See https://pest.rs/book/grammars/syntax.html#implicit-whitespace -WHITESPACE = _{ " " | NEWLINE | "\t" } -COMMENT = _{ ("/*" ~ (!"*/" ~ ANY)* ~ "*/") | ("//" ~ (!"\n" ~ ANY)*) } - -// Math Tokens -minus_token = { "-" } -plus_token = { "+" } -multiply_token = { "*" } -divide_token = { "/" } -modulo_token = { "%" } -positive_infinity_token = { "+inf" } -negative_infinity_token = { "-inf" } - -// Logical Tokens -invalid_equals_token = @{ "=" ~ !("="|"~") } -equals_token = @{ "==" } -equals_insensitive_token = @{ "=~" } -not_equals_token = @{ "!=" } -not_equals_insensitive_token = @{ "!~" } -greater_than_token = @{ ">" ~ !"=" } -greater_than_or_equal_to_token = @{ ">=" } -less_than_token = @{ "<" ~ !"=" } -less_than_or_equal_to_token = @{ "<=" } -and_token = @{ "and" } -or_token = @{ "or" } - -// Comparison Tokens -contains_token = @{ "contains" } -contains_cs_token = @{ "contains_cs" } -has_token = @{ "has" } -has_cs_token = @{ "has_cs" } -in_token = @{ "in" } -in_insensitive_token = @{ "in~" } -not_contains_token = @{ "!contains" } -not_contains_cs_token = @{ "!contains_cs" } -not_has_token = @{ "!has" } -not_has_cs_token = @{ "!has_cs" } -not_in_token = @{ "!in" } -not_in_insensitive_token = @{ "!in~" } -matches_regex_token = @{ "matches regex" } - -// Misc Tokens -statement_end_token = { &";" } - -// Literals -true_literal = @{ "true" | "True" | "TRUE" } -false_literal = @{ "false" | "False" | "FALSE" } -integer_literal = @{ - "-"? - ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) -} -double_quote_string_char = _{ - !("\"" | "\\") ~ ANY - | "\\" ~ ("\"" | "\\" | "n" | "r" | "t") - | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) -} -single_quote_string_char = _{ - !("'" | "\\") ~ ANY - | "\\" ~ ("'" | "\\" | "n" | "r" | "t") - | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) -} -exponent_literal = { ^"e" ~ (plus_token | minus_token)? ~ integer_literal } -double_literal = @{ - (integer_literal ~ "." ~ integer_literal ~ exponent_literal?) - | (integer_literal ~ exponent_literal) -} -string_literal = @{ - ("\"" ~ double_quote_string_char* ~ "\"") - | ("'" ~ single_quote_string_char* ~ "'") -} -type_literal = { - "bool" - | "datetime" - | "decimal" - | "double" - | "dynamic" - | "guid" - | "int" - | "long" - | "real" - | "regex" - | "string" - | "timespan" -} -null_literal = { type_literal ~ "(" ~ "null" ~ ")" } -identifier_literal = @{ ("_" | ASCII_ALPHA) ~ ("_" | ASCII_ALPHANUMERIC)* } -identifier_or_pattern_literal = ${ - (("_" | ASCII_ALPHA | "*") ~ ("_" | ASCII_ALPHANUMERIC | "*")* ~ !("["|".")) - | ("[" ~ string_literal ~ "]") -} -datetime_literal = { (ASCII_ALPHANUMERIC|"-"|"+"|":"|"/"|"."|",")+ } -time_literal = { - "-"? ~ (ASCII_DIGIT+ ~ ".")? ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? -} -time_units_literal = @{ - "day" ~ "s"? - | "hour" ~ "s"? - | "microsecond" ~ "s"? - | "millisecond" ~ "s"? - | "minute" ~ "s"? - | "second" ~ "s"? - | "tick" ~ "s"? - | "ms" - | "d" - | "h" - | "m" - | "s" -} - -// Expressions -accessor_index = _{ "[" ~ (integer_literal | string_literal | (minus_token? ~ scalar_expression)) ~ "]" } -accessor = _{ (identifier_literal | ("[" ~ string_literal ~ "]")) ~ accessor_index? } -accessor_expression = { accessor ~ (("." ~ accessor)|accessor_index)* } - -real_expression = { "real(" ~ (positive_infinity_token|negative_infinity_token|double_literal|integer_literal) ~ ")" } -datetime_expression = { "datetime(" ~ (string_literal|datetime_literal) ~ ")" } -time_expression = { - (double_literal|integer_literal) ~ time_units_literal - | "timespan(" ~ (time_literal|string_literal) ~ ")" - | "timespan(" ~ (double_literal|integer_literal) ~ time_units_literal? ~ ")" -} -regex_expression = { "regex(" ~ string_literal ~ ("," ~ string_literal)? ~ ")" } -dynamic_array_expression = { "[" ~ (dynamic_inner_expression ~ ("," ~ dynamic_inner_expression)*)? ~ "]" } -dynamic_map_item_expression = { string_literal ~ ":" ~ dynamic_inner_expression } -dynamic_map_expression = { "{" ~ (dynamic_map_item_expression ~ ("," ~ dynamic_map_item_expression)*)? ~ "}" } -dynamic_inner_expression = _{ dynamic_array_expression|dynamic_map_expression|type_unary_expressions } -dynamic_expression = { "dynamic" ~ "(" ~ dynamic_inner_expression ~ ")" } -type_unary_expressions = { - null_literal - | real_expression - | datetime_expression - | time_expression - | regex_expression - | dynamic_expression - | true_literal - | false_literal - | double_literal - | integer_literal - | string_literal -} - -get_type_expression = { "gettype" ~ "(" ~ scalar_expression ~ ")" } -typeof_expression = { "typeof" ~ "(" ~ type_literal ~ ")" } - -conditional_expression = { ("iff"|"iif") ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } -case_expression = { "case" ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ ("," ~ logical_expression ~ "," ~ scalar_expression)* ~ "," ~ scalar_expression ~ ")" } -coalesce_expression = { "coalesce" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" } -conditional_unary_expressions = { - conditional_expression - | case_expression - | coalesce_expression -} - -tostring_expression = { "tostring" ~ "(" ~ scalar_expression ~ ")" } -toint_expression = { "toint" ~ "(" ~ scalar_expression ~ ")" } -tobool_expression = { "tobool" ~ "(" ~ scalar_expression ~ ")" } -tofloat_expression = { "tofloat" ~ "(" ~ scalar_expression ~ ")" } -tolong_expression = { "tolong" ~ "(" ~ scalar_expression ~ ")" } -toreal_expression = { "toreal" ~ "(" ~ scalar_expression ~ ")" } -todouble_expression = { "todouble" ~ "(" ~ scalar_expression ~ ")" } -todatetime_expression = { "todatetime" ~ "(" ~ scalar_expression ~ ")" } -totimespan_expression = { "totimespan" ~ "(" ~ scalar_expression ~ ")" } -conversion_unary_expressions = { - tostring_expression - | toint_expression - | tobool_expression - | tofloat_expression - | tolong_expression - | toreal_expression - | todouble_expression - | todatetime_expression - | totimespan_expression -} - -strlen_expression = { "strlen" ~ "(" ~ scalar_expression ~ ")" } -replace_string_expression = { "replace_string" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } -substring_expression = { "substring" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" } -strcat_expression = { "strcat" ~ scalar_list_expression } -strcat_delim_expression = { "strcat_delim" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" } -extract_expression = { "extract" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } -string_unary_expressions = { - strlen_expression - | replace_string_expression - | substring_expression - | strcat_expression - | strcat_delim_expression - | extract_expression -} - -parse_json_expression = { "parse_json" ~ "(" ~ scalar_expression ~ ")" } -parse_regex_expression = { "parse_regex" ~ "(" ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" } -parse_unary_expressions = { - parse_json_expression - | parse_regex_expression -} - -array_concat_expression = { "array_concat" ~ scalar_list_expression } -array_unary_expressions = { - array_concat_expression -} - -negate_expression = { "-" ~ scalar_unary_expression } -bin_expression = { "bin" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ")" } -math_unary_expressions = { - negate_expression - | bin_expression -} - -now_expression = { "now" ~ "(" ~ scalar_expression? ~ ")" } -temporal_unary_expressions = { - now_expression -} - -not_expression = { "not" ~ "(" ~ logical_expression ~ ")" } -logical_unary_expressions = { - not_expression -} - -extract_json_expression = { "extract_json" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ typeof_expression)? ~ ")" } - -invoke_function_argument_expression = { - (identifier_literal ~ "=" ~ scalar_expression) - | scalar_expression -} -invoke_function_expression = { identifier_literal ~ "(" ~ (invoke_function_argument_expression ~ ("," ~ invoke_function_argument_expression)*)? ~ ")" } - -/* Note: Order is imporant here. Once Pest has matched something it won't go -backwards. For example if integer_literal is defined before time_expression "1h" -would be parsed as integer_literal(1) and the remaining "h" would be fed into -the next rule. */ -scalar_unary_expression = { - type_unary_expressions - | get_type_expression - | conditional_unary_expressions - | conversion_unary_expressions - | string_unary_expressions - | array_unary_expressions - | math_unary_expressions - | temporal_unary_expressions - | logical_unary_expressions - | parse_unary_expressions - | extract_json_expression - | invoke_function_expression - | accessor_expression - | "(" ~ scalar_expression ~ ")" -} - -scalar_arithmetic_binary_expression = _{ - (multiply_token|divide_token|modulo_token) ~ scalar_unary_expression - | (plus_token|minus_token) ~ scalar_unary_expression -} -scalar_logical_binary_expression = _{ - (equals_token|equals_insensitive_token|not_equals_token|not_equals_insensitive_token|greater_than_token|greater_than_or_equal_to_token|less_than_token|less_than_or_equal_to_token|invalid_equals_token) ~ scalar_unary_expression - | matches_regex_token ~ scalar_unary_expression - | (not_contains_cs_token|not_contains_token|not_has_cs_token|not_has_token|contains_cs_token|contains_token|has_cs_token|has_token) ~ scalar_unary_expression - | (not_in_insensitive_token|not_in_token|in_insensitive_token|in_token) ~ scalar_list_expression - | (and_token|or_token) ~ scalar_unary_expression -} - -scalar_expression = { - scalar_unary_expression ~ (scalar_arithmetic_binary_expression|scalar_logical_binary_expression)* -} - -scalar_list_expression = { - "(" ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" -} - -logical_expression = { - scalar_expression -} - -assignment_expression = { accessor_expression ~ "=" ~ scalar_expression } - -extend_expression = { "extend" ~ assignment_expression ~ ("," ~ assignment_expression)* } -project_expression = { "project" ~ (assignment_expression|accessor_expression) ~ ("," ~ (assignment_expression|accessor_expression))* } -project_keep_expression = { "project-keep" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* } -project_away_expression = { "project-away" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* } -project_rename_expression = { "project-rename" ~ assignment_expression ~ ("," ~ assignment_expression)* } -where_expression = { "where" ~ logical_expression } - -average_aggregate_expression = { - "avg" ~ "(" ~ scalar_expression ~ ")" -} -count_aggregate_expression = { - "count" ~ "(" ~ ")" -} -maximum_aggregate_expression = { - "max" ~ "(" ~ scalar_expression ~ ")" -} -minimum_aggregate_expression = { - "min" ~ "(" ~ scalar_expression ~ ")" -} -sum_aggregate_expression = { - "sum" ~ "(" ~ scalar_expression ~ ")" -} - -aggregate_expressions = _{ - average_aggregate_expression - | count_aggregate_expression - | maximum_aggregate_expression - | minimum_aggregate_expression - | sum_aggregate_expression -} - -aggregate_expression = { - identifier_literal ~ "=" ~ aggregate_expressions - | aggregate_expressions -} - -group_by_expression = { - identifier_literal ~ "=" ~ scalar_expression - | scalar_expression -} +// KQL Grammar. This inherits rules from the base grammar and defines +// the queries, tabular expressions, and other language features unique +// to this KQL parser implementation. summarize_expression = { "summarize" @@ -341,8 +23,6 @@ tabular_expression = { identifier_literal ~ ("|" ~ tabular_expressions)* } -variable_definition_expression = { "let" ~ identifier_literal ~ "=" ~ scalar_expression ~ statement_end_token } - user_defined_function_parameter_definition_expression = { identifier_literal ~ ":" diff --git a/rust/experimental/query_engine/kql-parser/src/kql_parser.rs b/rust/experimental/query_engine/kql-parser/src/kql_parser.rs index d6eadd5cb9..f9766433a3 100644 --- a/rust/experimental/query_engine/kql-parser/src/kql_parser.rs +++ b/rust/experimental/query_engine/kql-parser/src/kql_parser.rs @@ -1,12 +1,16 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +use data_engine_expressions::QueryLocation; +use data_engine_kql_parser_macros::BaseRuleCompatible; use data_engine_parser_abstractions::*; +use pest::error::Error; use pest_derive::Parser; use crate::query_expression::parse_query; -#[derive(Parser)] +#[derive(Parser, BaseRuleCompatible)] +#[grammar = "base.pest"] #[grammar = "kql.pest"] pub(crate) struct KqlPestParser; @@ -35,6 +39,33 @@ pub(crate) fn map_kql_errors(error: ParserError) -> ParserError { } } +pub fn map_parse_error(query: &str, pest_error: Error) -> ParserError { + let (start, end) = match pest_error.location { + pest::error::InputLocation::Pos(p) => (0, p), + pest::error::InputLocation::Span(s) => s, + }; + + let (line, column) = match pest_error.line_col { + pest::error::LineColLocation::Pos(p) => p, + pest::error::LineColLocation::Span(l, _) => l, + }; + + let content = if line > 0 && column > 0 { + &query + .lines() + .nth(line - 1) + .expect("Query line did not exist")[column - 1..] + } else { + &query[start..end] + }; + + ParserError::SyntaxNotSupported( + QueryLocation::new(start, end, line, column) + .expect("QueryLocation could not be constructed"), + format!("Syntax '{content}' supplied in query is not supported"), + ) +} + #[cfg(test)] mod tests { use super::*; diff --git a/rust/experimental/query_engine/kql-parser/src/lib.rs b/rust/experimental/query_engine/kql-parser/src/lib.rs index 79297c8c27..6ddc520ba9 100644 --- a/rust/experimental/query_engine/kql-parser/src/lib.rs +++ b/rust/experimental/query_engine/kql-parser/src/lib.rs @@ -17,7 +17,10 @@ pub(crate) mod scalar_primitive_expressions; pub(crate) mod scalar_string_function_expressions; pub(crate) mod scalar_temporal_function_expressions; pub(crate) mod shared_expressions; -pub(crate) mod tabular_expressions; +pub mod tabular_expressions; + +pub mod base_parser; +pub use scalar_expression::ScalarExprRules; pub use kql_parser::*; diff --git a/rust/experimental/query_engine/kql-parser/src/logical_expressions.rs b/rust/experimental/query_engine/kql-parser/src/logical_expressions.rs index 199b9fe50f..d86845a94f 100644 --- a/rust/experimental/query_engine/kql-parser/src/logical_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/logical_expressions.rs @@ -5,12 +5,19 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; use pest::iterators::Pair; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::TryAsBaseRule, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_logical_expression( - logical_expression_rule: Pair, +pub(crate) fn parse_logical_expression<'a, R>( + logical_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + Pair<'a, R>: TryAsBaseRule, + R: ScalarExprRules + 'static, +{ let scalar = parse_scalar_expression(logical_expression_rule, scope)?; to_logical_expression(scalar, scope) @@ -42,13 +49,13 @@ mod tests { use pest::Parser; use regex::Regex; - use crate::KqlPestParser; + use crate::base_parser::{BasePestParser, Rule}; use super::*; #[test] fn test_pest_parse_comparison_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::logical_expression, &[ "1 == 1", @@ -97,7 +104,7 @@ mod tests { )), ); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let mut expression = parse_logical_expression(result.next().unwrap(), &state).unwrap(); @@ -111,7 +118,7 @@ mod tests { let run_test_failure = |input: &str, expected: &str| { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let error = parse_logical_expression(result.next().unwrap(), &state).unwrap_err(); @@ -331,7 +338,7 @@ mod tests { #[test] fn test_pest_parse_logical_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::logical_expression, &[ "true", @@ -359,7 +366,7 @@ mod tests { state.push_variable_name("variable"); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let expression = parse_logical_expression(result.next().unwrap(), &state).unwrap(); @@ -369,7 +376,7 @@ mod tests { let run_test_failure = |input: &str, expected_id: &str, expected_msg: &str| { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let error = parse_logical_expression(result.next().unwrap(), &state).unwrap_err(); @@ -555,7 +562,7 @@ mod tests { state.push_variable_name("variable"); state.push_variable_name("var1"); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let expression = parse_logical_expression(result.next().unwrap(), &state).unwrap(); diff --git a/rust/experimental/query_engine/kql-parser/src/macros/Cargo.toml b/rust/experimental/query_engine/kql-parser/src/macros/Cargo.toml new file mode 100644 index 0000000000..2b502a4f52 --- /dev/null +++ b/rust/experimental/query_engine/kql-parser/src/macros/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "data_engine_kql_parser_macros" +version.workspace = true +authors.workspace = true +edition.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +rust-version.workspace = true + +[lib] +proc-macro = true + +[dependencies] +pest_meta = { workspace = true } +proc-macro2 = { workspace = true } +proc-macro-crate = { workspace = true } +quote = { workspace = true } + +[dev-dependencies] +data_engine_kql_parser = { path = "../../" } +data_engine_parser_abstractions = { path = "../../../parser-abstractions" } +pest = { workspace = true } +pest_derive = { workspace = true } diff --git a/rust/experimental/query_engine/kql-parser/src/macros/src/lib.rs b/rust/experimental/query_engine/kql-parser/src/macros/src/lib.rs new file mode 100644 index 0000000000..e28d800fc2 --- /dev/null +++ b/rust/experimental/query_engine/kql-parser/src/macros/src/lib.rs @@ -0,0 +1,141 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! This module contains macros for parser implementations that use the base KQL pest grammar. +//! These macros help with converting between derived parser `Rule` types and the base parser +//! `Rule` type, as well as providing a Pratt parser implementation for scalar expressions. + +use proc_macro_crate::{FoundCrate, crate_name}; +use quote::{format_ident, quote}; + +const BASE_PEST_SOURCE: &str = include_str!("../../base.pest"); + +extern crate proc_macro; + +/// This marco derives the implementation of `TryFrom<`Rule`>` for `kql_parser::base_rule::Rule` +/// for `Rule` enum derived from the pest_derive::Parser marco. This allows converting between +/// the derived parser `Rule` type and the base parser `Rule` type, which allows the rules for +/// the derived parser to used in the parsing utilities in the kql-parser crate. +#[proc_macro_derive(BaseRuleCompatible)] +pub fn derive_base_rule_compatible(_input: proc_macro::TokenStream) -> proc_macro::TokenStream { + // parse the base.pest grammar to get the list of rules + let pairs = + pest_meta::parser::parse(pest_meta::parser::Rule::grammar_rules, BASE_PEST_SOURCE).unwrap(); + let ast = pest_meta::parser::consume_rules(pairs).unwrap(); + let rules = pest_meta::optimizer::optimize(ast); + + let rule_conversions = rules.iter().map(|rule| { + let rule_name = format_ident!("r#{}", rule.name); + quote! { + Rule::#rule_name => Self::#rule_name + } + }); + + // derive `TryFrom` the derived rule for the base rule. + let base_rule_crate_base = kql_parser_crate_name(); + let rule_conversion = quote! { + impl TryFrom for #base_rule_crate_base::base_parser::Rule { + type Error = data_engine_parser_abstractions::ParserError; + + fn try_from(value: Rule) -> Result { + Ok(match value { + #(#rule_conversions),*, + _ => return Err(data_engine_parser_abstractions::ParserError::RuleConversionError( + format!("could not convert {value:?} to base_parser::Rule") + )) + }) + } + } + }; + + // derive the pratt parser for the scalar expressions + let scalar_expr_pratt_parser_impl = generate_scalar_expr_pratt_parser(); + + quote! { + #rule_conversion + #scalar_expr_pratt_parser_impl + } + .into() +} + +/// Determine the crate path for data_engine_kql_parser, either "crate" if the macro is being +/// executed in the kql parser crate, otheriwse reference the crate by name. +fn kql_parser_crate_name() -> proc_macro2::TokenStream { + match crate_name("data_engine_kql_parser").expect("data_engine_kql_parser is present") { + FoundCrate::Itself => quote! { crate }, + FoundCrate::Name(name) => { + let crate_name = format_ident!("r#{}", name); + quote! { #crate_name } + } + } +} + +#[proc_macro_derive(ScalarExprPrattParser)] +pub fn derive_scalar_expr_pratt_parser(_input: proc_macro::TokenStream) -> proc_macro::TokenStream { + generate_scalar_expr_pratt_parser().into() +} + +/// Parsing scalar expressions uses a pratt parser, which takes Pair as input. Because pest +/// doesn't expose a way to generically convert between different parser Pair types, we define +/// the pratt parser for each derived parser Rule type. This macro generates the pratt parser +/// and a trait implementation for the derived parser Rule type to access it. +fn generate_scalar_expr_pratt_parser() -> proc_macro2::TokenStream { + let base_rule_crate_base = kql_parser_crate_name(); + + quote! { + static PRATT_PARSER: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + use pest::pratt_parser::{Assoc::*, Op, PrattParser}; + use Rule::*; + + // Lowest precedence first + PrattParser::new() + // or + .op(Op::infix(or_token, Left)) + // and + .op(Op::infix(and_token, Left)) + // == != + .op(Op::infix(equals_token, Left) + | Op::infix(equals_insensitive_token, Left) + | Op::infix(not_equals_token, Left) + | Op::infix(not_equals_insensitive_token, Left) + | Op::infix(invalid_equals_token, Left)) + // <= >= < > + .op(Op::infix(less_than_or_equal_to_token, Left) + | Op::infix(greater_than_or_equal_to_token, Left) + | Op::infix(less_than_token, Left) + | Op::infix(greater_than_token, Left)) + // contains & has + .op(Op::infix(not_contains_cs_token, Left) + | Op::infix(not_contains_token, Left) + | Op::infix(not_has_cs_token, Left) + | Op::infix(not_has_token, Left) + | Op::infix(contains_cs_token, Left) + | Op::infix(contains_token, Left) + | Op::infix(has_cs_token, Left) + | Op::infix(has_token, Left)) + // in + .op(Op::infix(not_in_insensitive_token, Left) + | Op::infix(not_in_token, Left) + | Op::infix(in_insensitive_token, Left) + | Op::infix(in_token, Left)) + // matches + .op(Op::infix(matches_regex_token, Left)) + // + - + .op(Op::infix(plus_token, Left) | Op::infix(minus_token, Left)) + // * / % + .op(Op::infix(multiply_token, Left) + | Op::infix(divide_token, Left) + | Op::infix(modulo_token, Left)) + + // ^ ** (right-associative) + //.op(Op::infix(power, Right)) + }); + + + impl #base_rule_crate_base::ScalarExprRules for Rule { + fn pratt_parser() -> &'static pest::pratt_parser::PrattParser { + &PRATT_PARSER + } + } + } +} diff --git a/rust/experimental/query_engine/kql-parser/src/macros/tests/test_base_rule_compat_derive.rs b/rust/experimental/query_engine/kql-parser/src/macros/tests/test_base_rule_compat_derive.rs new file mode 100644 index 0000000000..72e17e7d8b --- /dev/null +++ b/rust/experimental/query_engine/kql-parser/src/macros/tests/test_base_rule_compat_derive.rs @@ -0,0 +1,39 @@ +use data_engine_kql_parser_macros::BaseRuleCompatible; + +#[derive(pest_derive::Parser, BaseRuleCompatible)] +#[grammar = "../../base.pest"] +#[grammar_inline = "macro_test_rule = { + \"test_rule\" +}"] +struct TestParser; + +#[cfg(test)] +mod test { + use super::*; + use data_engine_kql_parser::{ScalarExprRules, base_parser::TryAsBaseRule}; + use pest::Parser; + + #[test] + fn test_rule_convert() { + // check that we generated the TryFrom impl for base rules + let rule = TestParser::parse(Rule::logical_expression, "1 == 1") + .expect("should parse test_rule") + .next() + .expect("one rule"); + assert!(rule.try_as_base_rule().is_ok()); + + // check that custom rule can be converted to base rule + let rule = TestParser::parse(Rule::macro_test_rule, "test_rule") + .expect("should parse test_rule") + .next() + .expect("one rule"); + + assert!(rule.try_as_base_rule().is_err()); + } + + #[test] + fn test_derives_pratt_parser() { + // check that we can parse a scalar expression using the pratt parser + _ = Rule::pratt_parser(); + } +} diff --git a/rust/experimental/query_engine/kql-parser/src/query_expression.rs b/rust/experimental/query_engine/kql-parser/src/query_expression.rs index de9fed11e4..7788b135fc 100644 --- a/rust/experimental/query_engine/kql-parser/src/query_expression.rs +++ b/rust/experimental/query_engine/kql-parser/src/query_expression.rs @@ -8,8 +8,8 @@ use data_engine_parser_abstractions::*; use pest::Parser; use crate::{ - KqlPestParser, Rule, scalar_expression::parse_scalar_expression, shared_expressions::*, - tabular_expressions::parse_tabular_expression, + KqlPestParser, Rule, map_parse_error, scalar_expression::parse_scalar_expression, + shared_expressions::*, tabular_expressions::parse_tabular_expression, }; pub(crate) fn parse_query( @@ -22,38 +22,8 @@ pub(crate) fn parse_query( let parse_result = KqlPestParser::parse(Rule::query, query); - if parse_result.is_err() { - let pest_error = if let Err(error) = parse_result { - error - } else { - unreachable!() - }; - - let (start, end) = match pest_error.location { - pest::error::InputLocation::Pos(p) => (0, p), - pest::error::InputLocation::Span(s) => s, - }; - - let (line, column) = match pest_error.line_col { - pest::error::LineColLocation::Pos(p) => p, - pest::error::LineColLocation::Span(l, _) => l, - }; - - let content = if line > 0 && column > 0 { - &query - .lines() - .nth(line - 1) - .expect("Query line did not exist")[column - 1..] - } else { - &query[start..end] - }; - - errors.push(ParserError::SyntaxNotSupported( - QueryLocation::new(start, end, line, column) - .expect("QueryLocation could not be constructed"), - format!("Syntax '{content}' supplied in query is not supported"), - )); - + if let Err(pest_error) = parse_result { + errors.push(map_parse_error(query, pest_error)); return Err(errors); } diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_array_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_array_function_expressions.rs index 988cbb4338..7fbbc62cf3 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_array_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_array_function_expressions.rs @@ -3,26 +3,37 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_array_unary_expressions( - array_unary_expressions_rule: Pair, +pub(crate) fn parse_array_unary_expressions<'a, R>( + array_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = array_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::array_concat_expression => parse_array_concat_expression(rule, scope), _ => panic!("Unexpected rule in array_unary_expressions: {rule}"), } } -fn parse_array_concat_expression( - array_concat_expression_rule: Pair, +fn parse_array_concat_expression<'a, R>( + array_concat_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&array_concat_expression_rule); let array_concat_rules = array_concat_expression_rule @@ -63,7 +74,7 @@ fn parse_array_concat_expression( mod tests { use pest::Parser; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -76,7 +87,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -94,7 +105,7 @@ mod tests { let run_test_failure = |input: &str, expected_id: &str, expected_msg: &str| { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::logical_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::logical_expression, input).unwrap(); let error = parse_scalar_expression(result.next().unwrap(), &state).unwrap_err(); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_conditional_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_conditional_function_expressions.rs index b674cac23f..21e5aff06d 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_conditional_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_conditional_function_expressions.rs @@ -3,22 +3,28 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; use crate::{ - Rule, logical_expressions::parse_logical_expression, scalar_expression::parse_scalar_expression, + base_parser::{Rule, TryAsBaseRule}, + logical_expressions::parse_logical_expression, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, }; -pub(crate) fn parse_conditional_unary_expressions( - conditional_unary_expressions_rule: Pair, +pub(crate) fn parse_conditional_unary_expressions<'a, R>( + conditional_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = conditional_unary_expressions_rule .into_inner() .next() .unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::conditional_expression => parse_conditional_expression(rule, scope), Rule::case_expression => parse_case_expression(rule, scope), Rule::coalesce_expression => parse_coalesce_expression(rule, scope), @@ -26,10 +32,14 @@ pub(crate) fn parse_conditional_unary_expressions( } } -fn parse_conditional_expression( - conditional_expression_rule: Pair, +fn parse_conditional_expression<'a, R>( + conditional_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&conditional_expression_rule); let mut conditional_rules = conditional_expression_rule.into_inner(); @@ -50,10 +60,14 @@ fn parse_conditional_expression( )) } -fn parse_case_expression( - case_expression_rule: Pair, +fn parse_case_expression<'a, R>( + case_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&case_expression_rule); let case_rules = case_expression_rule.into_inner(); @@ -105,10 +119,14 @@ fn parse_case_expression( ))) } -fn parse_coalesce_expression( - coalesce_expression_rule: Pair, +fn parse_coalesce_expression<'a, R>( + coalesce_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&coalesce_expression_rule); let mut coalesce_rules = coalesce_expression_rule.into_inner(); @@ -139,7 +157,7 @@ fn parse_coalesce_expression( mod tests { use pest::Parser; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -148,7 +166,7 @@ mod tests { let run_test_success = |input: &str, expected: ScalarExpression| { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::conditional_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::conditional_expression, input).unwrap(); let expression = parse_conditional_expression(result.next().unwrap(), &state).unwrap(); @@ -208,7 +226,7 @@ mod tests { ); state.push_variable_name("key"); - let mut result = KqlPestParser::parse(Rule::case_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::case_expression, input).unwrap(); let expression = parse_case_expression(result.next().unwrap(), &state).unwrap(); @@ -357,7 +375,7 @@ mod tests { let run_test_success = |input: &str, expected: ScalarExpression| { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_conversion_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_conversion_function_expressions.rs index 25a6c2db04..185740eaa5 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_conversion_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_conversion_function_expressions.rs @@ -3,20 +3,27 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_conversion_unary_expressions( - conversion_unary_expressions_rule: Pair, +pub(crate) fn parse_conversion_unary_expressions<'a, R>( + conversion_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = conversion_unary_expressions_rule .into_inner() .next() .unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::tostring_expression => parse_tostring_expression(rule, scope), Rule::toint_expression => parse_toint_expression(rule, scope), Rule::tobool_expression => parse_tobool_expression(rule, scope), @@ -30,10 +37,14 @@ pub(crate) fn parse_conversion_unary_expressions( } } -fn parse_tostring_expression( - tostring_rule: Pair, +fn parse_tostring_expression<'a, R>( + tostring_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&tostring_rule); let mut inner = tostring_rule.into_inner(); @@ -47,10 +58,14 @@ fn parse_tostring_expression( ))) } -fn parse_toint_expression( - toint_rule: Pair, +fn parse_toint_expression<'a, R>( + toint_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&toint_rule); let mut inner = toint_rule.into_inner(); @@ -64,10 +79,14 @@ fn parse_toint_expression( ))) } -fn parse_tobool_expression( - tobool_rule: Pair, +fn parse_tobool_expression<'a, R>( + tobool_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&tobool_rule); let mut inner = tobool_rule.into_inner(); @@ -79,10 +98,14 @@ fn parse_tobool_expression( ))) } -fn parse_tofloat_expression( - tofloat_rule: Pair, +fn parse_tofloat_expression<'a, R>( + tofloat_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&tofloat_rule); let mut inner = tofloat_rule.into_inner(); @@ -94,10 +117,14 @@ fn parse_tofloat_expression( ))) } -fn parse_tolong_expression( - tolong_rule: Pair, +fn parse_tolong_expression<'a, R>( + tolong_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&tolong_rule); let mut inner = tolong_rule.into_inner(); @@ -109,10 +136,14 @@ fn parse_tolong_expression( ))) } -fn parse_toreal_expression( - toreal_rule: Pair, +fn parse_toreal_expression<'a, R>( + toreal_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&toreal_rule); let mut inner = toreal_rule.into_inner(); @@ -124,10 +155,14 @@ fn parse_toreal_expression( ))) } -fn parse_todouble_expression( - todouble_rule: Pair, +fn parse_todouble_expression<'a, R>( + todouble_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&todouble_rule); let mut inner = todouble_rule.into_inner(); @@ -139,10 +174,14 @@ fn parse_todouble_expression( ))) } -fn parse_todatetime_expression( - todatetime_rule: Pair, +fn parse_todatetime_expression<'a, R>( + todatetime_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&todatetime_rule); let mut inner = todatetime_rule.into_inner(); @@ -157,10 +196,14 @@ fn parse_todatetime_expression( )) } -fn parse_totimespan_expression( - totimespan_rule: Pair, +fn parse_totimespan_expression<'a, R>( + totimespan_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&totimespan_rule); let mut inner = totimespan_rule.into_inner(); @@ -178,12 +221,12 @@ fn parse_totimespan_expression( #[cfg(test)] mod tests { use super::*; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use pest::Parser; #[test] fn test_pest_parse_tostring_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::tostring_expression, &[ "tostring(123)", @@ -234,7 +277,7 @@ mod tests { fn test_tostring_query_location() { let input = "tostring(42)"; let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::tostring_expression, input).unwrap(); + let mut parsed = BasePestParser::parse(Rule::tostring_expression, input).unwrap(); let result = parse_tostring_expression(parsed.next().unwrap(), &state).unwrap(); @@ -264,7 +307,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::tostring_expression, input) + let mut parsed = BasePestParser::parse(Rule::tostring_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_tostring_expression(parsed.next().unwrap(), &state) @@ -291,7 +334,7 @@ mod tests { #[test] fn test_pest_parse_toint_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::toint_expression, &[ "toint(123)", @@ -338,7 +381,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::toint_expression, input) + let mut parsed = BasePestParser::parse(Rule::toint_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_toint_expression(parsed.next().unwrap(), &state) @@ -355,7 +398,7 @@ mod tests { #[test] fn test_pest_parse_tobool_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::tobool_expression, &[ "tobool(123)", @@ -401,7 +444,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::tobool_expression, input) + let mut parsed = BasePestParser::parse(Rule::tobool_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_tobool_expression(parsed.next().unwrap(), &state) @@ -418,7 +461,7 @@ mod tests { #[test] fn test_pest_parse_tofloat_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::tofloat_expression, &[ "tofloat(123)", @@ -465,7 +508,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::tofloat_expression, input) + let mut parsed = BasePestParser::parse(Rule::tofloat_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_tofloat_expression(parsed.next().unwrap(), &state) @@ -482,7 +525,7 @@ mod tests { #[test] fn test_pest_parse_tolong_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::tolong_expression, &[ "tolong(123)", @@ -529,7 +572,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::tolong_expression, input) + let mut parsed = BasePestParser::parse(Rule::tolong_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_tolong_expression(parsed.next().unwrap(), &state) @@ -546,7 +589,7 @@ mod tests { #[test] fn test_pest_parse_toreal_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::toreal_expression, &[ "toreal(123)", @@ -593,7 +636,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::toreal_expression, input) + let mut parsed = BasePestParser::parse(Rule::toreal_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_toreal_expression(parsed.next().unwrap(), &state) @@ -610,7 +653,7 @@ mod tests { #[test] fn test_pest_parse_todouble_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::todouble_expression, &[ "todouble(123)", @@ -657,7 +700,7 @@ mod tests { for (input, description) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::todouble_expression, input) + let mut parsed = BasePestParser::parse(Rule::todouble_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_todouble_expression(parsed.next().unwrap(), &state) @@ -691,7 +734,7 @@ mod tests { for (input, value) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::scalar_expression, input) + let mut parsed = BasePestParser::parse(Rule::scalar_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_scalar_expression(parsed.next().unwrap(), &state) @@ -725,7 +768,7 @@ mod tests { for (input, value) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::scalar_expression, input) + let mut parsed = BasePestParser::parse(Rule::scalar_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_scalar_expression(parsed.next().unwrap(), &state) diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_expression.rs b/rust/experimental/query_engine/kql-parser/src/scalar_expression.rs index 0f45a327c6..3bb62e31e7 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_expression.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_expression.rs @@ -1,77 +1,49 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -use std::{ - collections::{BTreeMap, HashMap}, - sync::LazyLock, -}; +use std::collections::{BTreeMap, HashMap}; use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::{iterators::Pair, pratt_parser::*}; +use pest::{RuleType, iterators::Pair, pratt_parser::*}; use crate::{ - Rule, logical_expressions::to_logical_expression, scalar_array_function_expressions::*, - scalar_conditional_function_expressions::*, scalar_conversion_function_expressions::*, - scalar_logical_function_expressions::*, scalar_mathematical_function_expressions::*, - scalar_parse_function_expressions::*, scalar_primitive_expressions::*, - scalar_string_function_expressions::*, scalar_temporal_function_expressions::*, + base_parser::{Rule, TryAsBaseRule}, + logical_expressions::to_logical_expression, + scalar_array_function_expressions::*, + scalar_conditional_function_expressions::*, + scalar_conversion_function_expressions::*, + scalar_logical_function_expressions::*, + scalar_mathematical_function_expressions::*, + scalar_parse_function_expressions::*, + scalar_primitive_expressions::*, + scalar_string_function_expressions::*, + scalar_temporal_function_expressions::*, }; -static PRATT_PARSER: LazyLock> = LazyLock::new(|| { - use Assoc::*; - use Rule::*; - - // Lowest precedence first - PrattParser::new() - // or - .op(Op::infix(or_token, Left)) - // and - .op(Op::infix(and_token, Left)) - // == != - .op(Op::infix(equals_token, Left) - | Op::infix(equals_insensitive_token, Left) - | Op::infix(not_equals_token, Left) - | Op::infix(not_equals_insensitive_token, Left) - | Op::infix(invalid_equals_token, Left)) - // <= >= < > - .op(Op::infix(less_than_or_equal_to_token, Left) - | Op::infix(greater_than_or_equal_to_token, Left) - | Op::infix(less_than_token, Left) - | Op::infix(greater_than_token, Left)) - // contains & has - .op(Op::infix(not_contains_cs_token, Left) - | Op::infix(not_contains_token, Left) - | Op::infix(not_has_cs_token, Left) - | Op::infix(not_has_token, Left) - | Op::infix(contains_cs_token, Left) - | Op::infix(contains_token, Left) - | Op::infix(has_cs_token, Left) - | Op::infix(has_token, Left)) - // in - .op(Op::infix(not_in_insensitive_token, Left) - | Op::infix(not_in_token, Left) - | Op::infix(in_insensitive_token, Left) - | Op::infix(in_token, Left)) - // matches - .op(Op::infix(matches_regex_token, Left)) - // + - - .op(Op::infix(plus_token, Left) | Op::infix(minus_token, Left)) - // * / % - .op(Op::infix(multiply_token, Left) - | Op::infix(divide_token, Left) - | Op::infix(modulo_token, Left)) - - // ^ ** (right-associative) - //.op(Op::infix(power, Right)) -}); - -pub(crate) fn parse_scalar_expression( - scalar_expression_rule: Pair, +/// Trait for parser Rule types that can be used to parse scalar expressions. +pub trait ScalarExprRules: RuleType + 'static { + /// Returns the PrattParser configured for parsing scalar expressions. + /// + /// The scalar expression uses a pratt parser to handle operator precedence, but because + /// pest's pratt parser takes the `Pair`, and not just the `Rule`, and pest doesn't provide + /// a way to have a common `Pair` type across different parser `Rule` types, we need a way to + /// access the pratt parser for each derived parser Rule type. + /// + /// This can be derived for the rule type using the macros in the ./src/macros crate. + fn pratt_parser() -> &'static PrattParser; +} + +pub(crate) fn parse_scalar_expression<'a, R>( + scalar_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { - PRATT_PARSER - .map_primary(|primary| match primary.as_rule() { +) -> Result +where + Pair<'a, R>: TryAsBaseRule, + R: ScalarExprRules, +{ + R::pratt_parser() + .map_primary(|primary| match primary.try_as_base_rule()? { Rule::scalar_unary_expression => parse_scalar_unary_expression(primary, scope), Rule::scalar_expression => parse_scalar_expression(primary, scope), Rule::scalar_list_expression => { @@ -96,7 +68,7 @@ pub(crate) fn parse_scalar_expression( let lhs = lhs?; let rhs = rhs?; - Ok(match op.as_rule() { + Ok(match op.try_as_base_rule()? { Rule::equals_token => ScalarExpression::Logical( LogicalExpression::EqualTo(EqualToLogicalExpression::new( location, lhs, rhs, false, @@ -289,13 +261,17 @@ pub(crate) fn parse_scalar_expression( .parse(scalar_expression_rule.into_inner()) } -pub(crate) fn parse_scalar_unary_expression( - scalar_unary_expression_rule: Pair, +pub(crate) fn parse_scalar_unary_expression<'a, R>( + scalar_unary_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: ScalarExprRules + 'static, + Pair<'a, R>: TryAsBaseRule, +{ let rule = scalar_unary_expression_rule.into_inner().next().unwrap(); - Ok(match rule.as_rule() { + Ok(match rule.try_as_base_rule()? { Rule::type_unary_expressions => { ScalarExpression::Static(parse_type_unary_expressions(rule)?) } @@ -445,7 +421,7 @@ pub(crate) fn parse_scalar_unary_expression( let mut rules = argument_rule.into_inner(); while let Some(argument_rule) = rules.next() { - match argument_rule.as_rule() { + match argument_rule.try_as_base_rule()? { Rule::identifier_literal => { found_named = true; @@ -758,13 +734,13 @@ fn parse_identifier_from_accessor( mod tests { use pest::Parser; - use crate::{KqlPestParser, date_utils::create_utc}; + use crate::{base_parser::BasePestParser, date_utils::create_utc}; use super::*; #[test] fn test_pest_parse_scalar_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::scalar_expression, &[ "1", @@ -803,7 +779,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -1333,7 +1309,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -1367,7 +1343,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut scalar = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -1415,7 +1391,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let actual = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -1616,7 +1592,7 @@ mod tests { HashMap::new(), ); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let actual = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -1675,7 +1651,7 @@ mod tests { ]), ); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = parse_scalar_expression(result.next().unwrap(), &state).unwrap_err(); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_logical_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_logical_function_expressions.rs index 0b3ab9dd06..b83bd57418 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_logical_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_logical_function_expressions.rs @@ -3,26 +3,38 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, logical_expressions::parse_logical_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + logical_expressions::parse_logical_expression, + scalar_expression::ScalarExprRules, +}; -pub(crate) fn parse_logical_unary_expressions( - logical_unary_expressions_rule: Pair, +pub(crate) fn parse_logical_unary_expressions<'a, R>( + logical_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = logical_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::not_expression => parse_not_expression(rule, scope), _ => panic!("Unexpected rule in logical_unary_expressions: {rule}"), } } -fn parse_not_expression( - not_expression_rule: Pair, +fn parse_not_expression<'a, R>( + not_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(¬_expression_rule); let mut not_rules = not_expression_rule.into_inner(); @@ -38,12 +50,12 @@ fn parse_not_expression( #[cfg(test)] mod tests { use super::*; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use pest::Parser; #[test] fn test_pest_parse_not_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::not_expression, &[ "not(true)", @@ -70,7 +82,7 @@ mod tests { ParserOptions::new().with_attached_data_names(&["resource"]), ); - let mut result = KqlPestParser::parse(Rule::not_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::not_expression, input).unwrap(); let result = parse_not_expression(result.next().unwrap(), &state).unwrap(); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_mathematical_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_mathematical_function_expressions.rs index 008d8a2013..944c6626f5 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_mathematical_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_mathematical_function_expressions.rs @@ -3,27 +3,38 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::*}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::*, +}; -pub(crate) fn parse_math_unary_expressions( - math_unary_expressions_rule: Pair, +pub(crate) fn parse_math_unary_expressions<'a, R>( + math_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = math_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::negate_expression => parse_negate_expression(rule, scope), Rule::bin_expression => parse_bin_expression(rule, scope), _ => panic!("Unexpected rule in math_unary_expressions: {rule}"), } } -fn parse_negate_expression( - negate_expression_rule: Pair, +fn parse_negate_expression<'a, R>( + negate_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&negate_expression_rule); let mut inner = negate_expression_rule.into_inner(); @@ -37,10 +48,14 @@ fn parse_negate_expression( ))) } -fn parse_bin_expression( - bin_expression_rule: Pair, +fn parse_bin_expression<'a, R>( + bin_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&bin_expression_rule); let mut inner = bin_expression_rule.into_inner(); @@ -58,7 +73,7 @@ mod tests { use chrono::TimeDelta; use pest::Parser; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -91,7 +106,7 @@ mod tests { for (input, value) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::scalar_expression, input) + let mut parsed = BasePestParser::parse(Rule::scalar_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_scalar_expression(parsed.next().unwrap(), &state) @@ -131,7 +146,7 @@ mod tests { for (input, left, right) in test_cases { let state = ParserState::new(input); - let mut parsed = KqlPestParser::parse(Rule::scalar_expression, input) + let mut parsed = BasePestParser::parse(Rule::scalar_expression, input) .unwrap_or_else(|_| panic!("Failed to parse: {input}")); let result = parse_scalar_expression(parsed.next().unwrap(), &state) @@ -153,7 +168,7 @@ mod tests { println!("Testing: {input}"); let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); assert_eq!(expected, expression); @@ -407,7 +422,7 @@ mod tests { #[test] fn test_pest_parse_arithmetic_expression() { // Add arithmetic expressions to the pest parser tests - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::scalar_expression, &[ // Basic arithmetic @@ -450,7 +465,7 @@ mod tests { println!("Testing: {input}"); let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); let pipeline: PipelineExpression = Default::default(); @@ -493,7 +508,7 @@ mod tests { println!("Testing: {input}"); let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); let pipeline: PipelineExpression = Default::default(); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_parse_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_parse_function_expressions.rs index cd843dff32..5abfa769d8 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_parse_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_parse_function_expressions.rs @@ -3,27 +3,38 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_parse_unary_expressions( - parse_unary_expressions_rule: Pair, +pub(crate) fn parse_parse_unary_expressions<'a, R>( + parse_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = parse_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::parse_json_expression => parse_parse_json_expression(rule, scope), Rule::parse_regex_expression => parse_parse_regex_expression(rule, scope), _ => panic!("Unexpected rule in parse_unary_expressions: {rule}"), } } -fn parse_parse_json_expression( - parse_json_expression_rule: Pair, +fn parse_parse_json_expression<'a, R>( + parse_json_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&parse_json_expression_rule); let mut parse_json_rules = parse_json_expression_rule.into_inner(); @@ -47,10 +58,14 @@ fn parse_parse_json_expression( ))) } -fn parse_parse_regex_expression( - parse_regex_expression_rule: Pair, +fn parse_parse_regex_expression<'a, R>( + parse_regex_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&parse_regex_expression_rule); let mut inner_rules = parse_regex_expression_rule.into_inner(); @@ -98,7 +113,7 @@ mod tests { use pest::Parser; use regex::Regex; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -109,7 +124,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -125,7 +140,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = match parse_scalar_expression(result.next().unwrap(), &state) { Ok(e) => e, @@ -210,7 +225,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -226,7 +241,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = match parse_scalar_expression(result.next().unwrap(), &state) { Err(e) => e, diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_primitive_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_primitive_expressions.rs index 58db360135..6ae5beb0f2 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_primitive_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_primitive_expressions.rs @@ -6,16 +6,22 @@ use std::collections::HashMap; use chrono::TimeDelta; use data_engine_expressions::*; use data_engine_parser_abstractions::*; +use pest::RuleType; use pest::iterators::Pair; -use crate::{scalar_expression::parse_scalar_expression, *}; - -pub(crate) fn parse_type_unary_expressions( - type_unary_expressions_rule: Pair, -) -> Result { +use crate::base_parser::{Rule, TryAsBaseRule}; +use crate::map_kql_errors; +use crate::scalar_expression::{ScalarExprRules, parse_scalar_expression}; + +pub(crate) fn parse_type_unary_expressions<'a, R>( + type_unary_expressions_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ let rule = type_unary_expressions_rule.into_inner().next().unwrap(); - - Ok(match rule.as_rule() { + Ok(match rule.try_as_base_rule()? { Rule::null_literal => parse_standard_null_literal(rule), Rule::real_expression => parse_real_expression(rule)?, Rule::datetime_expression => parse_datetime_expression(rule)?, @@ -34,7 +40,10 @@ pub(crate) fn parse_type_unary_expressions( /// when parsed from pest: /// * `'some \\' string'` -> `some ' string` /// * `\"some \\\" string\"` -> `some \" string` -pub(crate) fn parse_string_literal(string_literal_rule: Pair) -> StaticScalarExpression { +pub(crate) fn parse_string_literal(string_literal_rule: Pair) -> StaticScalarExpression +where + R: RuleType, +{ let query_location = to_query_location(&string_literal_rule); let raw_string = string_literal_rule.as_str(); @@ -84,14 +93,18 @@ pub(crate) fn parse_string_literal(string_literal_rule: Pair) -> StaticSca StaticScalarExpression::String(StringScalarExpression::new(query_location, s.as_str())) } -fn parse_datetime_expression( - datetime_expression_rule: Pair, -) -> Result { +fn parse_datetime_expression<'a, R>( + datetime_expression_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&datetime_expression_rule); let datetime_rule = datetime_expression_rule.into_inner().next().unwrap(); - let original_value: String = match datetime_rule.as_rule() { + let original_value: String = match datetime_rule.try_as_base_rule()? { Rule::string_literal => match parse_string_literal(datetime_rule) { StaticScalarExpression::String(v) => v.get_value().into(), _ => panic!("Unexpected type returned from parse_string_literal"), @@ -116,16 +129,20 @@ fn parse_datetime_expression( } } -fn parse_timespan_expression( - time_expression_rule: Pair, -) -> Result { +fn parse_timespan_expression<'a, R>( + time_expression_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&time_expression_rule); let mut inner_rules = time_expression_rule.into_inner(); let first_rule = inner_rules.next().unwrap(); - let string_value = match first_rule.as_rule() { + let string_value = match first_rule.try_as_base_rule()? { Rule::string_literal => match parse_string_literal(first_rule) { StaticScalarExpression::String(v) => v, _ => panic!("Unexpected type returned from parse_string_literal"), @@ -190,7 +207,10 @@ fn parse_timespan_expression( )), }; - fn get_multiplier(rule: Option>) -> i64 { + fn get_multiplier(rule: Option>) -> i64 + where + R: RuleType, + { match rule.map(|r| r.as_str()) { Some("millisecond") | Some("milliseconds") | Some("ms") => { TimeDelta::milliseconds(1).num_nanoseconds().unwrap() @@ -229,9 +249,13 @@ fn parse_timespan_expression( } } -fn parse_regex_expression( - regex_expression_rule: Pair, -) -> Result { +fn parse_regex_expression<'a, R>( + regex_expression_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(®ex_expression_rule); let mut inner_rules = regex_expression_rule.into_inner(); @@ -253,17 +277,25 @@ fn parse_regex_expression( ))) } -fn parse_dynamic_expression( - dynamic_expression_rule: Pair, -) -> Result { +fn parse_dynamic_expression<'a, R>( + dynamic_expression_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ return parse_dynamic_inner_expression(dynamic_expression_rule.into_inner().next().unwrap()); - fn parse_dynamic_inner_expression( - dynamic_inner_expression_rule: Pair, - ) -> Result { + fn parse_dynamic_inner_expression<'a, R>( + dynamic_inner_expression_rule: Pair<'a, R>, + ) -> Result + where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, + { let query_location = to_query_location(&dynamic_inner_expression_rule); - match dynamic_inner_expression_rule.as_rule() { + match dynamic_inner_expression_rule.try_as_base_rule()? { Rule::type_unary_expressions => { Ok(parse_type_unary_expressions(dynamic_inner_expression_rule)?) } @@ -313,14 +345,18 @@ fn parse_dynamic_expression( } } -fn parse_real_expression( - real_expression_rule: Pair, -) -> Result { +fn parse_real_expression<'a, R>( + real_expression_rule: Pair<'a, R>, +) -> Result +where + R: RuleType, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&real_expression_rule); let real_rule = real_expression_rule.into_inner().next().unwrap(); - match real_rule.as_rule() { + match real_rule.try_as_base_rule()? { Rule::positive_infinity_token => Ok(StaticScalarExpression::Double( DoubleScalarExpression::new(query_location, f64::INFINITY), )), @@ -357,11 +393,15 @@ fn parse_real_expression( /// on [`ParserState`]. /// /// `unknown` -> `Source(MapKey("attributes"), MapKey("unknown"))` -pub(crate) fn parse_accessor_expression( - accessor_expression_rule: Pair, +pub(crate) fn parse_accessor_expression<'a, R>( + accessor_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, allow_root_scalar: bool, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&accessor_expression_rule); let mut accessor_rules = accessor_expression_rule.into_inner(); @@ -372,7 +412,7 @@ pub(crate) fn parse_accessor_expression( let mut root_full_identifier_literal_depth = 0; let mut root_full_identifier_literal = String::new(); - let root_accessor_identity = match root_accessor_identity_rule.as_rule() { + let root_accessor_identity = match root_accessor_identity_rule.try_as_base_rule()? { Rule::string_literal => match parse_string_literal(root_accessor_identity_rule) { StaticScalarExpression::String(v) => { root_full_identifier_literal_complete = true; @@ -403,7 +443,7 @@ pub(crate) fn parse_accessor_expression( let pair = accessor.unwrap(); let pair_value = pair.as_str(); - let add_to_root_literal = match pair.as_rule() { + let add_to_root_literal = match pair.try_as_base_rule()? { Rule::integer_literal => { match parse_standard_integer_literal(pair)? { StaticScalarExpression::Integer(v) => { @@ -740,13 +780,13 @@ mod tests { use chrono::{DateTime, Datelike, FixedOffset, Utc}; use pest::Parser; - use crate::{KqlPestParser, date_utils::*}; + use crate::{base_parser::BasePestParser, date_utils::*}; use super::*; #[test] fn test_pest_parse_string_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::string_literal, &[ "\"hello\"", @@ -763,7 +803,7 @@ mod tests { #[test] fn test_parse_string_literal() { let run_test = |input: &str, expected: &str| { - let mut result = KqlPestParser::parse(Rule::string_literal, input).unwrap(); + let mut result = BasePestParser::parse(Rule::string_literal, input).unwrap(); let actual = parse_string_literal(result.next().unwrap()); @@ -783,7 +823,7 @@ mod tests { #[test] fn test_pest_parse_bool_literal_rule() { - parse_test_helpers::test_parse_bool_literal::( + parse_test_helpers::test_parse_bool_literal::( Rule::true_literal, Rule::false_literal, &[ @@ -799,7 +839,7 @@ mod tests { #[test] fn test_pest_parse_double_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::double_literal, &[ "1.0", "-1.0", "1.0e1", "-1.0e1", "1e1", "-1e1", "1e+1", "1e-1", @@ -811,7 +851,7 @@ mod tests { #[test] fn test_parse_double_literal() { let run_test = |input: &str, expected: f64| { - let mut result = KqlPestParser::parse(Rule::double_literal, input).unwrap(); + let mut result = BasePestParser::parse(Rule::double_literal, input).unwrap(); let f = parse_standard_double_literal(result.next().unwrap(), None); @@ -837,7 +877,7 @@ mod tests { #[test] fn test_pest_parse_integer_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::integer_literal, &["123", "-123"], &[".53", "abc"], @@ -847,7 +887,7 @@ mod tests { #[test] fn test_parse_integer_literal() { let run_test = |input: &str, expected: i64| { - let mut result = KqlPestParser::parse(Rule::integer_literal, input).unwrap(); + let mut result = BasePestParser::parse(Rule::integer_literal, input).unwrap(); let i = parse_standard_integer_literal(result.next().unwrap()); @@ -866,7 +906,7 @@ mod tests { #[test] fn test_parse_invalid_integer_literal() { let input = format!("{}", i64::MAX as i128 + 1); - let result = KqlPestParser::parse(Rule::integer_literal, input.as_str()); + let result = BasePestParser::parse(Rule::integer_literal, input.as_str()); assert!(result.is_ok()); @@ -883,7 +923,7 @@ mod tests { #[test] fn test_pest_parse_datetime_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::datetime_literal, &[ "12/31/2025", @@ -898,7 +938,7 @@ mod tests { ); // ISO 8601 - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::datetime_literal, &[ "2014-05-25T08:20:03.123456Z", @@ -908,19 +948,19 @@ mod tests { &[], ); // RFC 822 - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::datetime_literal, &["Sat, 8 Nov 14 15:05:02 GMT", "8 Nov 14 15:05 GMT"], &[], ); // RFC 850 - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::datetime_literal, &["Saturday, 08-Nov-14 15:05:02 GMT", "08-Nov-14 15:05:02 GMT"], &[], ); // Sortable - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::datetime_literal, &["2014-11-08 15:05:25 GMT", "2014-11-08T15:05:25 GMT"], &[], @@ -930,7 +970,7 @@ mod tests { #[test] fn test_parse_datetime_expression() { let run_test_success = |input: &str, expected: DateTime| { - let mut result = KqlPestParser::parse(Rule::datetime_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::datetime_expression, input).unwrap(); let d = parse_datetime_expression(result.next().unwrap()); @@ -943,7 +983,7 @@ mod tests { }; let run_test_failure = |input: &str| { - let mut result = KqlPestParser::parse(Rule::datetime_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::datetime_expression, input).unwrap(); let d = parse_datetime_expression(result.next().unwrap()); @@ -1118,7 +1158,7 @@ mod tests { #[test] fn test_parse_timespan_expression() { let run_test_success = |input: &str, expected: TimeDelta| { - let mut result = KqlPestParser::parse(Rule::time_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::time_expression, input).unwrap(); let d = parse_timespan_expression(result.next().unwrap()); @@ -1131,7 +1171,7 @@ mod tests { }; let run_test_failure = |input: &str| { - let mut result = KqlPestParser::parse(Rule::time_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::time_expression, input).unwrap(); let d = parse_timespan_expression(result.next().unwrap()); @@ -1219,7 +1259,7 @@ mod tests { #[test] fn test_parse_regex_expression() { let run_test_success = |input: &str, test: &str| { - let mut result = KqlPestParser::parse(Rule::regex_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::regex_expression, input).unwrap(); let d = parse_regex_expression(result.next().unwrap()).unwrap(); @@ -1230,7 +1270,7 @@ mod tests { }; let run_test_failure = |input: &str| { - let mut result = KqlPestParser::parse(Rule::regex_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::regex_expression, input).unwrap(); let d = parse_regex_expression(result.next().unwrap()); @@ -1247,7 +1287,7 @@ mod tests { #[test] fn test_parse_dynamic_expression() { let run_test = |input: &str, expected: StaticScalarExpression| { - let mut result = KqlPestParser::parse(Rule::dynamic_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::dynamic_expression, input).unwrap(); let actual = parse_dynamic_expression(result.next().unwrap()).unwrap(); @@ -1352,7 +1392,7 @@ mod tests { #[test] fn test_pest_parse_real_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::real_expression, &["real(1.0)", "real(1)", "real(+inf)", "real(-inf)"], &["real(.1)", "real()", "real(abc)"], @@ -1362,7 +1402,7 @@ mod tests { #[test] fn test_parse_real_expression() { let run_test = |input: &str, expected: f64| { - let mut result = KqlPestParser::parse(Rule::real_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::real_expression, input).unwrap(); let r = parse_real_expression(result.next().unwrap()); @@ -1383,7 +1423,7 @@ mod tests { #[test] fn test_pest_parse_identifier_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::identifier_literal, &["Abc", "abc_123", "_abc"], &[], @@ -1392,7 +1432,7 @@ mod tests { #[test] fn test_pest_parse_identifier_or_pattern_literal_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::identifier_or_pattern_literal, &["*", "abc*", "a*b*c", "['something.*']"], &[], @@ -1402,7 +1442,7 @@ mod tests { #[test] fn test_pest_parse_accessor_expression_rule() { pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "Abc").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "Abc").unwrap(), &[ (Rule::accessor_expression, "Abc"), (Rule::identifier_literal, "Abc"), @@ -1410,7 +1450,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "['hello world']").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "['hello world']").unwrap(), &[ (Rule::accessor_expression, "['hello world']"), (Rule::string_literal, "'hello world'"), @@ -1418,7 +1458,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "abc_123").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "abc_123").unwrap(), &[ (Rule::accessor_expression, "abc_123"), (Rule::identifier_literal, "abc_123"), @@ -1426,7 +1466,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "_abc").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "_abc").unwrap(), &[ (Rule::accessor_expression, "_abc"), (Rule::identifier_literal, "_abc"), @@ -1434,7 +1474,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "array[0]").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "array[0]").unwrap(), &[ (Rule::accessor_expression, "array[0]"), (Rule::identifier_literal, "array"), @@ -1443,7 +1483,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "array[-1]").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "array[-1]").unwrap(), &[ (Rule::accessor_expression, "array[-1]"), (Rule::identifier_literal, "array"), @@ -1452,7 +1492,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "abc.name").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "abc.name").unwrap(), &[ (Rule::accessor_expression, "abc.name"), (Rule::identifier_literal, "abc"), @@ -1461,7 +1501,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "abc['name']").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "abc['name']").unwrap(), &[ (Rule::accessor_expression, "abc['name']"), (Rule::identifier_literal, "abc"), @@ -1470,7 +1510,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "abc[-'name']").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "abc[-'name']").unwrap(), &[ (Rule::accessor_expression, "abc[-'name']"), (Rule::identifier_literal, "abc"), @@ -1483,7 +1523,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse(Rule::accessor_expression, "abc.name1.name2").unwrap(), + BasePestParser::parse(Rule::accessor_expression, "abc.name1.name2").unwrap(), &[ (Rule::accessor_expression, "abc.name1.name2"), (Rule::identifier_literal, "abc"), @@ -1493,7 +1533,7 @@ mod tests { ); pest_test_helpers::test_compound_pest_rule( - KqlPestParser::parse( + BasePestParser::parse( Rule::accessor_expression, "abc['~name-!'].name1[0][-sub].name2", ) @@ -1516,7 +1556,7 @@ mod tests { ], ); - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::accessor_expression, &[], &["123", "+name", "-name", "~name", ".name"], @@ -1526,7 +1566,7 @@ mod tests { #[test] fn test_parse_accessor_expression_from_source() { let mut result = - KqlPestParser::parse(Rule::accessor_expression, "source.subkey['array'][0]").unwrap(); + BasePestParser::parse(Rule::accessor_expression, "source.subkey['array'][0]").unwrap(); let expression = parse_accessor_expression( result.next().unwrap(), @@ -1558,7 +1598,7 @@ mod tests { #[test] fn test_parse_accessor_expression_implicit_source() { - let mut result = KqlPestParser::parse( + let mut result = BasePestParser::parse( Rule::accessor_expression, "['sub.key thing'][var][-neg_attr]", ) @@ -1607,7 +1647,7 @@ mod tests { #[test] fn test_parse_accessor_expression_implicit_source_and_default_map() { let run_test = |query: &str, expected: &[ScalarExpression]| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, query).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, query).unwrap(); let expression = parse_accessor_expression( result.next().unwrap(), @@ -1671,7 +1711,7 @@ mod tests { #[test] fn test_parse_accessor_expression_implicit_source_and_souce_keys() { let run_test_success = |query: &str, expected: SourceScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, query).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, query).unwrap(); let expression = parse_accessor_expression( result.next().unwrap(), @@ -1695,7 +1735,7 @@ mod tests { }; let run_test_failure = |query: &str, expected_id: &str, expected_msg: &str| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, query).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, query).unwrap(); let error = parse_accessor_expression( result.next().unwrap(), @@ -1762,7 +1802,7 @@ mod tests { #[test] fn test_parse_accessor_expression_from_attached() { let mut result = - KqlPestParser::parse(Rule::accessor_expression, "resource['~at\\'tr~']").unwrap(); + BasePestParser::parse(Rule::accessor_expression, "resource['~at\\'tr~']").unwrap(); let expression = parse_accessor_expression( result.next().unwrap(), @@ -1790,7 +1830,7 @@ mod tests { #[test] fn test_parse_accessor_expression_from_variable() { - let mut result = KqlPestParser::parse(Rule::accessor_expression, "a[-1]").unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, "a[-1]").unwrap(); let state = ParserState::new("a[-1]"); @@ -1815,7 +1855,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_scalars_and_allow_root_scalars() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new(input); @@ -1855,7 +1895,7 @@ mod tests { }; let run_test_failure = |input: &str, expected_id: &str, expected_msg: &str| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new(input); @@ -2011,7 +2051,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_scalars_and_disallow_root_scalars() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new(input); @@ -2064,7 +2104,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_default_map_schema() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2103,7 +2143,7 @@ mod tests { }; let run_test_failure = |input: &str, expected_id: Option<&str>, expected_msg: &str| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2334,7 +2374,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_default_map_schema_and_allow_undefined_keys() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2394,7 +2434,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_schema() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2411,7 +2451,7 @@ mod tests { }; let run_test_failure = |input: &str, expected_id: Option<&str>, expected_msg: &str| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2467,7 +2507,7 @@ mod tests { #[test] fn test_parse_accessor_expression_with_schema_allow_undefined_keys() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options( input, @@ -2516,7 +2556,7 @@ mod tests { #[test] fn test_parse_accessor_expression_without_source() { let run_test_failure = |input: &str, expected_id: Option<&str>, expected_msg: &str| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options(input, Default::default()); @@ -2567,7 +2607,7 @@ mod tests { #[test] fn test_parse_accessor_expression_argument() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options(input, Default::default()); @@ -2608,7 +2648,7 @@ mod tests { #[test] fn test_parse_accessor_expression_local_variable() { let run_test_success = |input: &str, expected: ScalarExpression| { - let mut result = KqlPestParser::parse(Rule::accessor_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::accessor_expression, input).unwrap(); let state = ParserState::new_with_options(input, Default::default()); diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_string_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_string_function_expressions.rs index 1be6ca3d42..c6141475ac 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_string_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_string_function_expressions.rs @@ -3,17 +3,24 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_string_unary_expressions( - string_unary_expressions_rule: Pair, +pub(crate) fn parse_string_unary_expressions<'a, R>( + string_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = string_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::strlen_expression => parse_strlen_expression(rule, scope), Rule::replace_string_expression => parse_replace_string_expression(rule, scope), Rule::substring_expression => parse_substring_expression(rule, scope), @@ -24,10 +31,14 @@ pub(crate) fn parse_string_unary_expressions( } } -fn parse_strlen_expression( - strlen_expression_rule: Pair, +fn parse_strlen_expression<'a, R>( + strlen_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&strlen_expression_rule); let mut strlen_rules = strlen_expression_rule.into_inner(); @@ -52,10 +63,14 @@ fn parse_strlen_expression( ))) } -fn parse_replace_string_expression( - replace_string_expression_rule: Pair, +fn parse_replace_string_expression<'a, R>( + replace_string_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&replace_string_expression_rule); let mut replace_string_rules = replace_string_expression_rule.into_inner(); @@ -70,10 +85,14 @@ fn parse_replace_string_expression( ), ))); - fn parse_and_validate_string_rule( - rule: Pair, + fn parse_and_validate_string_rule<'a, R>( + rule: Pair<'a, R>, scope: &dyn ParserScope, - ) -> Result { + ) -> Result + where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, + { let location = to_query_location(&rule); let mut scalar = parse_scalar_expression(rule, scope)?; @@ -92,10 +111,14 @@ fn parse_replace_string_expression( } } -fn parse_substring_expression( - substring_expression_rule: Pair, +fn parse_substring_expression<'a, R>( + substring_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&substring_expression_rule); let mut substring_rules = substring_expression_rule.into_inner(); @@ -142,10 +165,14 @@ fn parse_substring_expression( ))) } -fn parse_strcat_expression( - strcat_expression_rule: Pair, +fn parse_strcat_expression<'a, R>( + strcat_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&strcat_expression_rule); let strcat_rules = strcat_expression_rule @@ -172,10 +199,14 @@ fn parse_strcat_expression( ))) } -fn parse_strcat_delim_expression( - strcat_delim_expression_rule: Pair, +fn parse_strcat_delim_expression<'a, R>( + strcat_delim_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&strcat_delim_expression_rule); let mut strcat_delim_rules = strcat_delim_expression_rule.into_inner(); @@ -201,10 +232,14 @@ fn parse_strcat_delim_expression( ))) } -fn parse_extract_expression( - extract_expression_rule: Pair, +fn parse_extract_expression<'a, R>( + extract_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&extract_expression_rule); let mut extract_rules = extract_expression_rule.into_inner(); @@ -262,13 +297,13 @@ mod tests { use pest::Parser; use regex::Regex; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; #[test] fn test_pest_parse_string_scalar_expression_rule() { - pest_test_helpers::test_pest_rule::( + pest_test_helpers::test_pest_rule::( Rule::scalar_expression, &[ "strlen(\"hello\")", @@ -288,7 +323,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -300,7 +335,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = parse_scalar_expression(result.next().unwrap(), &state).unwrap_err(); @@ -341,7 +376,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -353,7 +388,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = parse_scalar_expression(result.next().unwrap(), &state).unwrap_err(); @@ -416,7 +451,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -428,7 +463,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = parse_scalar_expression(result.next().unwrap(), &state).unwrap_err(); @@ -495,7 +530,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -545,7 +580,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -601,7 +636,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let mut expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); @@ -617,7 +652,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let error = match parse_scalar_expression(result.next().unwrap(), &state) { Err(e) => e, diff --git a/rust/experimental/query_engine/kql-parser/src/scalar_temporal_function_expressions.rs b/rust/experimental/query_engine/kql-parser/src/scalar_temporal_function_expressions.rs index 21fe33ba2a..3ba1632dbd 100644 --- a/rust/experimental/query_engine/kql-parser/src/scalar_temporal_function_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/scalar_temporal_function_expressions.rs @@ -3,26 +3,37 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; -use crate::{Rule, scalar_expression::parse_scalar_expression}; +use crate::{ + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, +}; -pub(crate) fn parse_temporal_unary_expressions( - temporal_unary_expressions_rule: Pair, +pub(crate) fn parse_temporal_unary_expressions<'a, R>( + temporal_unary_expressions_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let rule = temporal_unary_expressions_rule.into_inner().next().unwrap(); - match rule.as_rule() { + match rule.try_as_base_rule()? { Rule::now_expression => parse_now_expression(rule, scope), _ => panic!("Unexpected rule in temporal_unary_expressions: {rule}"), } } -fn parse_now_expression( - now_expression_rule: Pair, +fn parse_now_expression<'a, R>( + now_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&now_expression_rule); let mut now_rules = now_expression_rule.into_inner(); @@ -31,7 +42,7 @@ fn parse_now_expression( None => Ok(ScalarExpression::Temporal(TemporalScalarExpression::Now( NowScalarExpression::new(query_location), ))), - Some(r) => match r.as_rule() { + Some(r) => match r.try_as_base_rule()? { Rule::scalar_expression => { let offset = parse_scalar_expression(r, scope)?; @@ -55,7 +66,7 @@ mod tests { use chrono::TimeDelta; use pest::Parser; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -66,7 +77,7 @@ mod tests { let state = ParserState::new(input); - let mut result = KqlPestParser::parse(Rule::scalar_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::scalar_expression, input).unwrap(); let expression = parse_scalar_expression(result.next().unwrap(), &state).unwrap(); diff --git a/rust/experimental/query_engine/kql-parser/src/shared_expressions.rs b/rust/experimental/query_engine/kql-parser/src/shared_expressions.rs index 179f37e2c6..9b4a288f23 100644 --- a/rust/experimental/query_engine/kql-parser/src/shared_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/shared_expressions.rs @@ -3,14 +3,21 @@ use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; use crate::{ - Rule, scalar_expression::parse_scalar_expression, + base_parser::{Rule, TryAsBaseRule}, + scalar_expression::{ScalarExprRules, parse_scalar_expression}, scalar_primitive_expressions::parse_accessor_expression, }; -pub(crate) fn parse_typeof_expression(typeof_expression_rule: Pair) -> Option { +pub(crate) fn parse_typeof_expression<'a, R>( + typeof_expression_rule: Pair<'a, R>, +) -> Option +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let typeof_rules = typeof_expression_rule.into_inner(); parse_type_literal(typeof_rules.as_str()) @@ -34,10 +41,14 @@ pub(crate) fn parse_type_literal(type_literal: &str) -> Option { } } -pub(crate) fn parse_source_assignment_expression( - assignment_expression_rule: Pair, +pub(crate) fn parse_source_assignment_expression<'a, R>( + assignment_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result<(QueryLocation, ScalarExpression, SourceScalarExpression), ParserError> { +) -> Result<(QueryLocation, ScalarExpression, SourceScalarExpression), ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&assignment_expression_rule); let mut assignment_rules = assignment_expression_rule.into_inner(); @@ -46,7 +57,7 @@ pub(crate) fn parse_source_assignment_expression( let destination_rule_location = to_query_location(&destination_rule); let destination_rule_str = destination_rule.as_str(); - let accessor = match destination_rule.as_rule() { + let accessor = match destination_rule.try_as_base_rule()? { // Note: Root-level static accessors are not valid in an assignment // expression so allow_root_scalar=false is passed here. Example: // accessor(some_constant1) = [expression] cannot be folded as @@ -84,7 +95,7 @@ pub(crate) fn parse_source_assignment_expression( let source_rule = assignment_rules.next().unwrap(); - let scalar = match source_rule.as_rule() { + let scalar = match source_rule.try_as_base_rule()? { Rule::scalar_expression => parse_scalar_expression(source_rule, scope)?, _ => panic!("Unexpected rule in assignment_expression: {source_rule}"), }; @@ -92,10 +103,14 @@ pub(crate) fn parse_source_assignment_expression( Ok((query_location, scalar, destination)) } -pub(crate) fn parse_variable_definition_expression( - variable_definition_expression_rule: Pair, +pub(crate) fn parse_variable_definition_expression<'a, R>( + variable_definition_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&variable_definition_expression_rule); let mut variable_definition_rules = variable_definition_expression_rule.into_inner(); @@ -132,7 +147,7 @@ pub(crate) fn parse_variable_definition_expression( mod tests { use pest::Parser; - use crate::KqlPestParser; + use crate::base_parser::BasePestParser; use super::*; @@ -146,7 +161,7 @@ mod tests { state.push_variable_name("variable"); - let mut result = KqlPestParser::parse(Rule::assignment_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::assignment_expression, input).unwrap(); let (query_location, source, destination) = parse_source_assignment_expression(result.next().unwrap(), &state).unwrap(); @@ -169,7 +184,7 @@ mod tests { state.push_variable_name("variable"); - let mut result = KqlPestParser::parse(Rule::assignment_expression, input).unwrap(); + let mut result = BasePestParser::parse(Rule::assignment_expression, input).unwrap(); let error = parse_source_assignment_expression(result.next().unwrap(), &state).unwrap_err(); @@ -236,7 +251,7 @@ mod tests { state.push_variable_name("variable"); let mut result = - KqlPestParser::parse(Rule::variable_definition_expression, input).unwrap(); + BasePestParser::parse(Rule::variable_definition_expression, input).unwrap(); let expression = parse_variable_definition_expression(result.next().unwrap(), &state).unwrap(); @@ -253,7 +268,7 @@ mod tests { state.push_variable_name("variable"); let mut result = - KqlPestParser::parse(Rule::variable_definition_expression, input).unwrap(); + BasePestParser::parse(Rule::variable_definition_expression, input).unwrap(); let error = parse_variable_definition_expression(result.next().unwrap(), &state).unwrap_err(); diff --git a/rust/experimental/query_engine/kql-parser/src/tabular_expressions.rs b/rust/experimental/query_engine/kql-parser/src/tabular_expressions.rs index ea0ef2664b..8d9dd4f116 100644 --- a/rust/experimental/query_engine/kql-parser/src/tabular_expressions.rs +++ b/rust/experimental/query_engine/kql-parser/src/tabular_expressions.rs @@ -5,29 +5,34 @@ use std::collections::{HashMap, HashSet}; use data_engine_expressions::*; use data_engine_parser_abstractions::*; -use pest::iterators::Pair; +use pest::{RuleType, iterators::Pair}; use regex::Regex; use crate::{ - Rule, + Rule, ScalarExprRules, aggregate_expressions::parse_aggregate_expression, + base_parser::{Rule as BaseRule, TryAsBaseRule}, logical_expressions::parse_logical_expression, scalar_expression::{parse_scalar_expression, try_resolve_identifier}, scalar_primitive_expressions::{parse_accessor_expression, parse_string_literal}, shared_expressions::parse_source_assignment_expression, }; -pub(crate) fn parse_extend_expression( - extend_expression_rule: Pair, +pub(crate) fn parse_extend_expression<'a, R>( + extend_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result, ParserError> { +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let extend_rules = extend_expression_rule.into_inner(); let mut set_expressions = Vec::new(); for rule in extend_rules { - match rule.as_rule() { - Rule::assignment_expression => { + match rule.try_as_base_rule()? { + BaseRule::assignment_expression => { let (query_location, source, destination) = parse_source_assignment_expression(rule, scope)?; @@ -44,10 +49,14 @@ pub(crate) fn parse_extend_expression( Ok(set_expressions) } -pub(crate) fn parse_project_expression( - project_expression_rule: Pair, +pub(crate) fn parse_project_expression<'a, R>( + project_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result, ParserError> { +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&project_expression_rule); let project_rules = project_expression_rule.into_inner(); @@ -59,8 +68,8 @@ pub(crate) fn parse_project_expression( for rule in project_rules { let rule_location = to_query_location(&rule); - match rule.as_rule() { - Rule::assignment_expression => { + match rule.try_as_base_rule()? { + BaseRule::assignment_expression => { let (query_location, source, destination) = parse_source_assignment_expression(rule, scope)?; @@ -77,7 +86,7 @@ pub(crate) fn parse_project_expression( MutableValueExpression::Source(destination), ))); } - Rule::accessor_expression => { + BaseRule::accessor_expression => { let accessor_expression = parse_accessor_expression(rule, scope, true)?; if let ScalarExpression::Source(s) = &accessor_expression { @@ -113,10 +122,14 @@ pub(crate) fn parse_project_expression( Ok(expressions) } -pub(crate) fn parse_project_keep_expression( - project_keep_expression_rule: Pair, +pub(crate) fn parse_project_keep_expression<'a, R>( + project_keep_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result, ParserError> { +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&project_keep_expression_rule); let project_keep_rules = project_keep_expression_rule.into_inner(); @@ -128,8 +141,8 @@ pub(crate) fn parse_project_keep_expression( for rule in project_keep_rules { let rule_location = to_query_location(&rule); - match rule.as_rule() { - Rule::identifier_or_pattern_literal => { + match rule.try_as_base_rule()? { + BaseRule::identifier_or_pattern_literal => { if let Some(identifier_or_pattern) = parse_identifier_or_pattern_literal(scope, rule_location.clone(), rule)? { @@ -152,7 +165,7 @@ pub(crate) fn parse_project_keep_expression( )); } } - Rule::accessor_expression => { + BaseRule::accessor_expression => { let accessor_expression = parse_accessor_expression(rule, scope, true)?; if let ScalarExpression::Source(s) = &accessor_expression { @@ -188,10 +201,14 @@ pub(crate) fn parse_project_keep_expression( Ok(expressions) } -pub(crate) fn parse_project_away_expression( - project_away_expression_rule: Pair, +pub(crate) fn parse_project_away_expression<'a, R>( + project_away_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result, ParserError> { +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&project_away_expression_rule); let project_away_rules = project_away_expression_rule.into_inner(); @@ -203,8 +220,8 @@ pub(crate) fn parse_project_away_expression( for rule in project_away_rules { let rule_location = to_query_location(&rule); - match rule.as_rule() { - Rule::identifier_or_pattern_literal => { + match rule.try_as_base_rule()? { + BaseRule::identifier_or_pattern_literal => { if let Some(identifier_or_pattern) = parse_identifier_or_pattern_literal(scope, rule_location.clone(), rule)? { @@ -227,7 +244,7 @@ pub(crate) fn parse_project_away_expression( )); } } - Rule::accessor_expression => { + BaseRule::accessor_expression => { let accessor_expression = parse_accessor_expression(rule, scope, true)?; if let ScalarExpression::Source(s) = &accessor_expression { @@ -263,10 +280,14 @@ pub(crate) fn parse_project_away_expression( Ok(expressions) } -pub(crate) fn parse_project_rename_expression( - project_rename_expression_rule: Pair, +pub(crate) fn parse_project_rename_expression<'a, R>( + project_rename_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&project_rename_expression_rule); let project_rename_rules = project_rename_expression_rule.into_inner(); @@ -274,8 +295,8 @@ pub(crate) fn parse_project_rename_expression( let mut expressions = Vec::new(); for rule in project_rename_rules { - match rule.as_rule() { - Rule::assignment_expression => { + match rule.try_as_base_rule()? { + BaseRule::assignment_expression => { let e = parse_source_assignment_expression(rule, scope)?; if let ScalarExpression::Source(s) = e.1 { expressions.push((e.0, s, e.2)); @@ -324,16 +345,20 @@ pub(crate) fn parse_project_rename_expression( } } -pub(crate) fn parse_where_expression( - where_expression_rule: Pair, +pub(crate) fn parse_where_expression<'a, R>( + where_expression_rule: Pair<'a, R>, scope: &dyn ParserScope, -) -> Result { +) -> Result +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let query_location = to_query_location(&where_expression_rule); let where_rule = where_expression_rule.into_inner().next().unwrap(); - let predicate = match where_rule.as_rule() { - Rule::logical_expression => parse_logical_expression(where_rule, scope)?, + let predicate = match where_rule.try_as_base_rule()? { + BaseRule::logical_expression => parse_logical_expression(where_rule, scope)?, _ => panic!("Unexpected rule in where_expression: {where_rule}"), }; @@ -516,21 +541,42 @@ pub(crate) fn parse_tabular_expression_rule( let mut expressions = Vec::new(); match tabular_expression_rule.as_rule() { - Rule::extend_expression => { + Rule::summarize_expression => { + expressions.push(parse_summarize_expression(tabular_expression_rule, scope)?) + } + _ => return parse_common_tabular_expression_rule(tabular_expression_rule, scope), + } + + Ok(expressions) +} + +/// parse tabular expression rules that are common to multiple parsers +pub fn parse_common_tabular_expression_rule<'a, R>( + tabular_expression_rule: Pair<'a, R>, + scope: &dyn ParserScope, +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ + let mut expressions = Vec::new(); + + match tabular_expression_rule.try_as_base_rule()? { + BaseRule::extend_expression => { let extend_expressions = parse_extend_expression(tabular_expression_rule, scope)?; for e in extend_expressions { expressions.push(DataExpression::Transform(e)); } } - Rule::project_expression => { + BaseRule::project_expression => { let project_expressions = parse_project_expression(tabular_expression_rule, scope)?; for e in project_expressions { expressions.push(DataExpression::Transform(e)); } } - Rule::project_keep_expression => { + BaseRule::project_keep_expression => { let project_keep_expressions = parse_project_keep_expression(tabular_expression_rule, scope)?; @@ -538,7 +584,7 @@ pub(crate) fn parse_tabular_expression_rule( expressions.push(DataExpression::Transform(e)); } } - Rule::project_away_expression => { + BaseRule::project_away_expression => { let project_away_expressions = parse_project_away_expression(tabular_expression_rule, scope)?; @@ -546,16 +592,19 @@ pub(crate) fn parse_tabular_expression_rule( expressions.push(DataExpression::Transform(e)); } } - Rule::project_rename_expression => expressions.push(DataExpression::Transform( + BaseRule::project_rename_expression => expressions.push(DataExpression::Transform( parse_project_rename_expression(tabular_expression_rule, scope)?, )), - Rule::where_expression => { + BaseRule::where_expression => { expressions.push(parse_where_expression(tabular_expression_rule, scope)?) } - Rule::summarize_expression => { - expressions.push(parse_summarize_expression(tabular_expression_rule, scope)?) + other => { + let query_location = to_query_location(&tabular_expression_rule); + return Err(ParserError::SyntaxError( + query_location, + format!("Unexpected rule in tabular_expression: {other:?}"), + )); } - _ => panic!("Unexpected rule in tabular_expression: {tabular_expression_rule}"), } Ok(expressions) @@ -588,16 +637,20 @@ enum IdentifierOrPattern { Pattern(RegexScalarExpression), } -fn parse_identifier_or_pattern_literal( +fn parse_identifier_or_pattern_literal<'a, R>( scope: &dyn ParserScope, location: QueryLocation, - identifier_or_pattern_literal: Pair, -) -> Result, ParserError> { + identifier_or_pattern_literal: Pair<'a, R>, +) -> Result, ParserError> +where + R: RuleType + ScalarExprRules, + Pair<'a, R>: TryAsBaseRule, +{ let raw = identifier_or_pattern_literal.as_str(); let value: Box = match identifier_or_pattern_literal.into_inner().next() { - Some(r) => match r.as_rule() { - Rule::string_literal => match parse_string_literal(r) { + Some(r) => match r.try_as_base_rule()? { + BaseRule::string_literal => match parse_string_literal(r) { StaticScalarExpression::String(v) => v.get_value().into(), _ => panic!("Unexpected type returned from parse_string_literal"), }, diff --git a/rust/experimental/query_engine/parser-abstractions/src/parser_error.rs b/rust/experimental/query_engine/parser-abstractions/src/parser_error.rs index d1d3ba1c32..7d98063400 100644 --- a/rust/experimental/query_engine/parser-abstractions/src/parser_error.rs +++ b/rust/experimental/query_engine/parser-abstractions/src/parser_error.rs @@ -1,6 +1,8 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 +use std::convert::Infallible; + use data_engine_expressions::{ExpressionError, QueryLocation}; use thiserror::Error; @@ -27,6 +29,9 @@ pub enum ParserError { location: QueryLocation, key: String, }, + + #[error("{0}")] + RuleConversionError(String), } impl From<&ExpressionError> for ParserError { @@ -34,3 +39,9 @@ impl From<&ExpressionError> for ParserError { ParserError::SyntaxError(value.get_query_location().clone(), value.to_string()) } } + +impl From for ParserError { + fn from(_value: Infallible) -> Self { + unreachable!("infallible error never occurs") + } +} diff --git a/rust/otap-dataflow/Cargo.toml b/rust/otap-dataflow/Cargo.toml index 06a284e100..8e57bf61fd 100644 --- a/rust/otap-dataflow/Cargo.toml +++ b/rust/otap-dataflow/Cargo.toml @@ -53,6 +53,8 @@ otap-df-otap = { path = "crates/otap" } quiver = { package = "otap-df-quiver", path = "crates/quiver" } data_engine_expressions = { path = "../experimental/query_engine/expressions" } data_engine_kql_parser = { path = "../experimental/query_engine/kql-parser" } +data_engine_kql_parser_macros = { path = "../experimental/query_engine/kql-parser/src/macros" } +data_engine_parser_abstractions = { path = "../experimental/query_engine/parser-abstractions"} ahash = "0.8.11" arc-swap = "1.7" @@ -112,6 +114,8 @@ opentelemetry-prometheus = "0.31.0" parking_lot = "0.12.5" paste = "1" parquet = { version = "57.0", default-features = false, features = ["arrow", "async", "object_store"]} +pest = "2.8" +pest_derive = "2.8" portpicker = "0.1.1" pretty_assertions = "1.4.1" proc-macro2 = "1.0" diff --git a/rust/otap-dataflow/crates/opl/Cargo.toml b/rust/otap-dataflow/crates/opl/Cargo.toml new file mode 100644 index 0000000000..fb877b0ef8 --- /dev/null +++ b/rust/otap-dataflow/crates/opl/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "otap-df-opl" +version.workspace = true +authors.workspace = true +repository.workspace = true +license.workspace = true +publish.workspace = true +edition.workspace = true +rust-version.workspace = true + +[dependencies] +data_engine_expressions = { workspace = true } +data_engine_parser_abstractions = { workspace = true } +data_engine_kql_parser_macros = { workspace = true } +data_engine_kql_parser = { workspace = true } +pest = { workspace = true } +pest_derive = { workspace = true } \ No newline at end of file diff --git a/rust/otap-dataflow/crates/opl/src/lib.rs b/rust/otap-dataflow/crates/opl/src/lib.rs new file mode 100644 index 0000000000..1122c82cc1 --- /dev/null +++ b/rust/otap-dataflow/crates/opl/src/lib.rs @@ -0,0 +1,4 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +pub mod parser; diff --git a/rust/otap-dataflow/crates/opl/src/opl.pest b/rust/otap-dataflow/crates/opl/src/opl.pest new file mode 100644 index 0000000000..79f12f0f2b --- /dev/null +++ b/rust/otap-dataflow/crates/opl/src/opl.pest @@ -0,0 +1,26 @@ +// Grammar for OPL, a KQL-inspired processing language used for observability data +// +// This contains definition of the query, as well the tabular expressions and non-standard KQL +// expressions supported by this language + +tabular_expressions = _{ + extend_expression + | project_expression + | project_keep_expression + | project_away_expression + | project_rename_expression + | where_expression +} + +tabular_expression = { + identifier_literal ~ ("|" ~ tabular_expressions)* +} + + +query_expressions = _{ + tabular_expression +} + +query = { + SOI ~ (query_expressions ~ (";" ~ query_expressions)* ~ ";"?)? ~ EOI +} \ No newline at end of file diff --git a/rust/otap-dataflow/crates/opl/src/parser.rs b/rust/otap-dataflow/crates/opl/src/parser.rs new file mode 100644 index 0000000000..1b7d05bd69 --- /dev/null +++ b/rust/otap-dataflow/crates/opl/src/parser.rs @@ -0,0 +1,146 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//! Utilities for parsing OPL programs +//! +use data_engine_expressions::QueryLocation; +use data_engine_kql_parser::map_parse_error; +use data_engine_kql_parser_macros::BaseRuleCompatible; +use data_engine_parser_abstractions::{ + Parser, ParserError, ParserOptions, ParserResult, ParserState, to_query_location, +}; +use pest::Parser as _; + +use crate::parser::tabular_expression::parse_tabular_expression; + +mod tabular_expression; + +#[derive(pest_derive::Parser, BaseRuleCompatible)] +#[grammar = "../../../../experimental/query_engine/kql-parser/src/base.pest"] +#[grammar = "opl.pest"] +struct OplPestParser {} + +/// Parser for OPL programs +pub struct OplParser {} + +impl Parser for OplParser { + fn parse_with_options( + query: &str, + options: ParserOptions, + ) -> Result> { + let state = ParserState::new_with_options(query, options); + let mut errors = Vec::new(); + + // parse query + let mut parser_rules = match OplPestParser::parse(Rule::query, query) { + Ok(query_rules) => query_rules, + Err(pest_error) => { + errors.push(map_parse_error(query, pest_error)); + return Err(errors); + } + }; + + // get the main query rule + let query_rule = match parser_rules.next() { + Some(rule) => rule, + None => { + // query is invalid at the start + let query_location = + QueryLocation::new(1, query.len(), 1, 1).expect("valid query location"); + + errors.push(ParserError::SyntaxError( + query_location, + "No query found".to_string(), + )); + return Err(errors); + } + }; + + // build pipeline from query rules + for rule in query_rule.into_inner() { + match rule.as_rule() { + Rule::tabular_expression => { + let expressions = match parse_tabular_expression(rule, &state) { + Ok(exprs) => exprs, + Err(e) => { + errors.push(e); + continue; + } + }; + for expr in expressions { + state.push_expression(expr); + } + } + Rule::EOI => {} + other => errors.push(ParserError::SyntaxError( + to_query_location(&rule), + format!("Unexpected rule in OPL query: {:?}", other), + )), + } + } + + if !errors.is_empty() { + return Err(errors); + } + + Ok(ParserResult::new(state.build()?)) + } +} + +#[cfg(test)] +mod test { + use data_engine_expressions::{ + DataExpression, DiscardDataExpression, GreaterThanLogicalExpression, + IntegerScalarExpression, LogicalExpression, NotLogicalExpression, QueryLocation, + ScalarExpression, SourceScalarExpression, StaticScalarExpression, StringScalarExpression, + ValueAccessor, + }; + use data_engine_kql_parser::Parser; + + use crate::parser::OplParser; + + #[test] + fn test_olp_parser() { + // smoke test to ensure we can parse transform expressions from the query + let result = OplParser::parse("logs | where severity_number > 0"); + assert!(result.is_ok()); + + let pipeline = result.unwrap().pipeline; + let expressions = &pipeline.get_expressions(); + assert_eq!( + expressions, + &[DataExpression::Discard( + DiscardDataExpression::new(QueryLocation::new_fake()).with_predicate( + LogicalExpression::Not(NotLogicalExpression::new( + QueryLocation::new_fake(), + LogicalExpression::GreaterThan(GreaterThanLogicalExpression::new( + QueryLocation::new_fake(), + ScalarExpression::Source(SourceScalarExpression::new( + QueryLocation::new_fake(), + ValueAccessor::new_with_selectors(vec![ScalarExpression::Static( + StaticScalarExpression::String(StringScalarExpression::new( + QueryLocation::new_fake(), + "severity_number", + )) + ),]) + )), + ScalarExpression::Static(StaticScalarExpression::Integer( + IntegerScalarExpression::new(QueryLocation::new_fake(), 0), + )), + )), + )), + ), + ),] + ) + } + + #[test] + fn test_parse_empty_query() { + let result = OplParser::parse(""); + assert!(result.is_ok()); + + let pipeline = result.unwrap().pipeline; + let expressions = &pipeline.get_expressions(); + assert_eq!(expressions.len(), 0); + } +} diff --git a/rust/otap-dataflow/crates/opl/src/parser/tabular_expression.rs b/rust/otap-dataflow/crates/opl/src/parser/tabular_expression.rs new file mode 100644 index 0000000000..68cd1a69ea --- /dev/null +++ b/rust/otap-dataflow/crates/opl/src/parser/tabular_expression.rs @@ -0,0 +1,40 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +use data_engine_expressions::DataExpression; +use data_engine_kql_parser::{ + ParserError, tabular_expressions::parse_common_tabular_expression_rule, +}; +use data_engine_parser_abstractions::ParserScope; +use pest::iterators::Pair; + +use crate::parser::Rule; + +pub(crate) fn parse_tabular_expression( + tabular_expression_rule: Pair, + scope: &dyn ParserScope, +) -> Result, ParserError> { + let mut rules = tabular_expression_rule.into_inner(); + + // Note: This is the identifier. In a query like logs | extend a=b the + // identifier is "logs" which is not currently used for anything. + let _ = rules.next(); + + let mut expressions = Vec::new(); + + for rule in rules { + for e in parse_tabular_expression_rule(rule, scope)? { + expressions.push(e); + } + } + + Ok(expressions) +} + +pub(crate) fn parse_tabular_expression_rule( + tabular_expression_rule: Pair, + scope: &dyn ParserScope, +) -> Result, ParserError> { + // TODO handle custom tabular expressions here + parse_common_tabular_expression_rule(tabular_expression_rule, scope) +}