Skip to content
Closed
5 changes: 5 additions & 0 deletions rust/experimental/query_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ members = [
"expressions",
"parser-abstractions",
"kql-parser",
"kql-parser/src/macros",
"ottl-parser",
"engine-recordset",
"engine-recordset-otlp-bridge",
Expand All @@ -26,7 +27,11 @@ hex = "0.4.3"
opentelemetry-proto = "0.31.0"
pest = "2.8"
pest_derive = "2.8"
pest_meta = "2.8"
proc-macro-crate = "3.4.0"
proc-macro2 = "1.0"
prost = "0.14"
quote = "1.0"
regex = "1.11.1"
serde_json = "1.0.140"
sha2 = "0.10.8"
Expand Down
3 changes: 3 additions & 0 deletions rust/experimental/query_engine/kql-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ rust-version.workspace = true
chrono = { workspace = true }
pest = { workspace = true }
pest_derive = { workspace = true }
pest_meta = { workspace = true }
proc-macro2 = { workspace = true }
regex = { workspace = true }

data_engine_expressions = { path = "../expressions" }
data_engine_kql_parser_macros = { path = "src/macros" }
data_engine_parser_abstractions = { path = "../parser-abstractions" }
329 changes: 329 additions & 0 deletions rust/experimental/query_engine/kql-parser/src/base.pest
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
// Base KQL Grammar is heavily influenced by the following sources:
// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/KqlTokens.g4
// https://github.com/microsoft/Kusto-Query-Language/blob/master/grammar/Kql.g4

// Parsers that support KQL expressions can use this grammar and provide an additional grammar
// with queries, tabular expressions, etc. that the language supports, as well as any special
// language features.

// These two special rules, when defined, are implicitly allowed at:
// - at every sequence (split by ~)
// - between every repetition (+ or *)
// Atomics (marked with @) are excluded
// See https://pest.rs/book/grammars/syntax.html#implicit-whitespace
WHITESPACE = _{ " " | NEWLINE | "\t" }
COMMENT = _{ ("/*" ~ (!"*/" ~ ANY)* ~ "*/") | ("//" ~ (!"\n" ~ ANY)*) }

// Math Tokens
minus_token = { "-" }
plus_token = { "+" }
multiply_token = { "*" }
divide_token = { "/" }
modulo_token = { "%" }
positive_infinity_token = { "+inf" }
negative_infinity_token = { "-inf" }

// Logical Tokens
invalid_equals_token = @{ "=" ~ !("="|"~") }
equals_token = @{ "==" }
equals_insensitive_token = @{ "=~" }
not_equals_token = @{ "!=" }
not_equals_insensitive_token = @{ "!~" }
greater_than_token = @{ ">" ~ !"=" }
greater_than_or_equal_to_token = @{ ">=" }
less_than_token = @{ "<" ~ !"=" }
less_than_or_equal_to_token = @{ "<=" }
and_token = @{ "and" }
or_token = @{ "or" }

// Comparison Tokens
contains_token = @{ "contains" }
contains_cs_token = @{ "contains_cs" }
has_token = @{ "has" }
has_cs_token = @{ "has_cs" }
in_token = @{ "in" }
in_insensitive_token = @{ "in~" }
not_contains_token = @{ "!contains" }
not_contains_cs_token = @{ "!contains_cs" }
not_has_token = @{ "!has" }
not_has_cs_token = @{ "!has_cs" }
not_in_token = @{ "!in" }
not_in_insensitive_token = @{ "!in~" }
matches_regex_token = @{ "matches regex" }

// Misc Tokens
statement_end_token = { &";" }

// Literals
true_literal = @{ "true" | "True" | "TRUE" }
false_literal = @{ "false" | "False" | "FALSE" }
integer_literal = @{
"-"?
~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*)
}
double_quote_string_char = _{
!("\"" | "\\") ~ ANY
| "\\" ~ ("\"" | "\\" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
single_quote_string_char = _{
!("'" | "\\") ~ ANY
| "\\" ~ ("'" | "\\" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
exponent_literal = { ^"e" ~ (plus_token | minus_token)? ~ integer_literal }
double_literal = @{
(integer_literal ~ "." ~ integer_literal ~ exponent_literal?)
| (integer_literal ~ exponent_literal)
}
string_literal = @{
("\"" ~ double_quote_string_char* ~ "\"")
| ("'" ~ single_quote_string_char* ~ "'")
}
type_literal = {
"bool"
| "datetime"
| "decimal"
| "double"
| "dynamic"
| "guid"
| "int"
| "long"
| "real"
| "regex"
| "string"
| "timespan"
}
null_literal = { type_literal ~ "(" ~ "null" ~ ")" }
identifier_literal = @{ ("_" | ASCII_ALPHA) ~ ("_" | ASCII_ALPHANUMERIC)* }
identifier_or_pattern_literal = ${
(("_" | ASCII_ALPHA | "*") ~ ("_" | ASCII_ALPHANUMERIC | "*")* ~ !("["|"."))
| ("[" ~ string_literal ~ "]")
}
datetime_literal = { (ASCII_ALPHANUMERIC|"-"|"+"|":"|"/"|"."|",")+ }
time_literal = {
"-"? ~ (ASCII_DIGIT+ ~ ".")? ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ":" ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)?
}
time_units_literal = @{
"day" ~ "s"?
| "hour" ~ "s"?
| "microsecond" ~ "s"?
| "millisecond" ~ "s"?
| "minute" ~ "s"?
| "second" ~ "s"?
| "tick" ~ "s"?
| "ms"
| "d"
| "h"
| "m"
| "s"
}

// Expressions
accessor_index = _{ "[" ~ (integer_literal | string_literal | (minus_token? ~ scalar_expression)) ~ "]" }
accessor = _{ (identifier_literal | ("[" ~ string_literal ~ "]")) ~ accessor_index? }
accessor_expression = { accessor ~ (("." ~ accessor)|accessor_index)* }

real_expression = { "real(" ~ (positive_infinity_token|negative_infinity_token|double_literal|integer_literal) ~ ")" }
datetime_expression = { "datetime(" ~ (string_literal|datetime_literal) ~ ")" }
time_expression = {
(double_literal|integer_literal) ~ time_units_literal
| "timespan(" ~ (time_literal|string_literal) ~ ")"
| "timespan(" ~ (double_literal|integer_literal) ~ time_units_literal? ~ ")"
}
regex_expression = { "regex(" ~ string_literal ~ ("," ~ string_literal)? ~ ")" }
dynamic_array_expression = { "[" ~ (dynamic_inner_expression ~ ("," ~ dynamic_inner_expression)*)? ~ "]" }
dynamic_map_item_expression = { string_literal ~ ":" ~ dynamic_inner_expression }
dynamic_map_expression = { "{" ~ (dynamic_map_item_expression ~ ("," ~ dynamic_map_item_expression)*)? ~ "}" }
dynamic_inner_expression = _{ dynamic_array_expression|dynamic_map_expression|type_unary_expressions }
dynamic_expression = { "dynamic" ~ "(" ~ dynamic_inner_expression ~ ")" }
type_unary_expressions = {
null_literal
| real_expression
| datetime_expression
| time_expression
| regex_expression
| dynamic_expression
| true_literal
| false_literal
| double_literal
| integer_literal
| string_literal
}

get_type_expression = { "gettype" ~ "(" ~ scalar_expression ~ ")" }
typeof_expression = { "typeof" ~ "(" ~ type_literal ~ ")" }

conditional_expression = { ("iff"|"iif") ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" }
case_expression = { "case" ~ "(" ~ logical_expression ~ "," ~ scalar_expression ~ ("," ~ logical_expression ~ "," ~ scalar_expression)* ~ "," ~ scalar_expression ~ ")" }
coalesce_expression = { "coalesce" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" }
conditional_unary_expressions = {
conditional_expression
| case_expression
| coalesce_expression
}

tostring_expression = { "tostring" ~ "(" ~ scalar_expression ~ ")" }
toint_expression = { "toint" ~ "(" ~ scalar_expression ~ ")" }
tobool_expression = { "tobool" ~ "(" ~ scalar_expression ~ ")" }
tofloat_expression = { "tofloat" ~ "(" ~ scalar_expression ~ ")" }
tolong_expression = { "tolong" ~ "(" ~ scalar_expression ~ ")" }
toreal_expression = { "toreal" ~ "(" ~ scalar_expression ~ ")" }
todouble_expression = { "todouble" ~ "(" ~ scalar_expression ~ ")" }
todatetime_expression = { "todatetime" ~ "(" ~ scalar_expression ~ ")" }
totimespan_expression = { "totimespan" ~ "(" ~ scalar_expression ~ ")" }
conversion_unary_expressions = {
tostring_expression
| toint_expression
| tobool_expression
| tofloat_expression
| tolong_expression
| toreal_expression
| todouble_expression
| todatetime_expression
| totimespan_expression
}

strlen_expression = { "strlen" ~ "(" ~ scalar_expression ~ ")" }
replace_string_expression = { "replace_string" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" }
substring_expression = { "substring" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" }
strcat_expression = { "strcat" ~ scalar_list_expression }
strcat_delim_expression = { "strcat_delim" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")" }
extract_expression = { "extract" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ "," ~ scalar_expression ~ ")" }
string_unary_expressions = {
strlen_expression
| replace_string_expression
| substring_expression
| strcat_expression
| strcat_delim_expression
| extract_expression
}

parse_json_expression = { "parse_json" ~ "(" ~ scalar_expression ~ ")" }
parse_regex_expression = { "parse_regex" ~ "(" ~ scalar_expression ~ ("," ~ scalar_expression)? ~ ")" }
parse_unary_expressions = {
parse_json_expression
| parse_regex_expression
}

array_concat_expression = { "array_concat" ~ scalar_list_expression }
array_unary_expressions = {
array_concat_expression
}

negate_expression = { "-" ~ scalar_unary_expression }
bin_expression = { "bin" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ")" }
math_unary_expressions = {
negate_expression
| bin_expression
}

now_expression = { "now" ~ "(" ~ scalar_expression? ~ ")" }
temporal_unary_expressions = {
now_expression
}

not_expression = { "not" ~ "(" ~ logical_expression ~ ")" }
logical_unary_expressions = {
not_expression
}

extract_json_expression = { "extract_json" ~ "(" ~ scalar_expression ~ "," ~ scalar_expression ~ ("," ~ typeof_expression)? ~ ")" }

invoke_function_argument_expression = {
(identifier_literal ~ "=" ~ scalar_expression)
| scalar_expression
}
invoke_function_expression = { identifier_literal ~ "(" ~ (invoke_function_argument_expression ~ ("," ~ invoke_function_argument_expression)*)? ~ ")" }

/* Note: Order is imporant here. Once Pest has matched something it won't go
backwards. For example if integer_literal is defined before time_expression "1h"
would be parsed as integer_literal(1) and the remaining "h" would be fed into
the next rule. */
scalar_unary_expression = {
type_unary_expressions
| get_type_expression
| conditional_unary_expressions
| conversion_unary_expressions
| string_unary_expressions
| array_unary_expressions
| math_unary_expressions
| temporal_unary_expressions
| logical_unary_expressions
| parse_unary_expressions
| extract_json_expression
| invoke_function_expression
| accessor_expression
| "(" ~ scalar_expression ~ ")"
}

scalar_arithmetic_binary_expression = _{
(multiply_token|divide_token|modulo_token) ~ scalar_unary_expression
| (plus_token|minus_token) ~ scalar_unary_expression
}
scalar_logical_binary_expression = _{
(equals_token|equals_insensitive_token|not_equals_token|not_equals_insensitive_token|greater_than_token|greater_than_or_equal_to_token|less_than_token|less_than_or_equal_to_token|invalid_equals_token) ~ scalar_unary_expression
| matches_regex_token ~ scalar_unary_expression
| (not_contains_cs_token|not_contains_token|not_has_cs_token|not_has_token|contains_cs_token|contains_token|has_cs_token|has_token) ~ scalar_unary_expression
| (not_in_insensitive_token|not_in_token|in_insensitive_token|in_token) ~ scalar_list_expression
| (and_token|or_token) ~ scalar_unary_expression
}

scalar_expression = {
scalar_unary_expression ~ (scalar_arithmetic_binary_expression|scalar_logical_binary_expression)*
}

scalar_list_expression = {
"(" ~ scalar_expression ~ ("," ~ scalar_expression)* ~ ")"
}

logical_expression = {
scalar_expression
}

assignment_expression = { accessor_expression ~ "=" ~ scalar_expression }

extend_expression = { "extend" ~ assignment_expression ~ ("," ~ assignment_expression)* }
project_expression = { "project" ~ (assignment_expression|accessor_expression) ~ ("," ~ (assignment_expression|accessor_expression))* }
project_keep_expression = { "project-keep" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* }
project_away_expression = { "project-away" ~ (identifier_or_pattern_literal|accessor_expression) ~ ("," ~ (identifier_or_pattern_literal|accessor_expression))* }
project_rename_expression = { "project-rename" ~ assignment_expression ~ ("," ~ assignment_expression)* }
where_expression = { "where" ~ logical_expression }

average_aggregate_expression = {
"avg" ~ "(" ~ scalar_expression ~ ")"
}
count_aggregate_expression = {
"count" ~ "(" ~ ")"
}
maximum_aggregate_expression = {
"max" ~ "(" ~ scalar_expression ~ ")"
}
minimum_aggregate_expression = {
"min" ~ "(" ~ scalar_expression ~ ")"
}
sum_aggregate_expression = {
"sum" ~ "(" ~ scalar_expression ~ ")"
}

aggregate_expressions = _{
average_aggregate_expression
| count_aggregate_expression
| maximum_aggregate_expression
| minimum_aggregate_expression
| sum_aggregate_expression
}

aggregate_expression = {
identifier_literal ~ "=" ~ aggregate_expressions
| aggregate_expressions
}

group_by_expression = {
identifier_literal ~ "=" ~ scalar_expression
| scalar_expression
}

variable_definition_expression = { "let" ~ identifier_literal ~ "=" ~ scalar_expression ~ statement_end_token }


Loading
Loading