From d3ccb4927357c55c29af0b85fe96ba9d7698e8f0 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Tue, 13 Oct 2020 18:42:13 +0100 Subject: [PATCH 0001/1036] Initial commit: cargo-generated boilerplate --- .gitignore | 1 + Cargo.lock | 5 +++++ Cargo.toml | 9 +++++++++ src/main.rs | 3 +++ 4 files changed, 18 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..ea8c4bf7f35f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000000..37384c782bdc --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "codeql-ruby" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..671b9fbd4635 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "codeql-ruby" +version = "0.1.0" +authors = ["Nick Rolfe "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000000..e7a11a969c03 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} From 89959b2e0d3d81572c5d89bb85ec20976fb04ffc Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 14 Oct 2020 11:15:59 +0100 Subject: [PATCH 0002/1036] Add tree-sitter-ruby submodule --- .gitmodules | 3 +++ tree-sitter-ruby | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 tree-sitter-ruby diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000000..7ea9bdc539c1 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tree-sitter-ruby"] + path = tree-sitter-ruby + url = https://github.com/tree-sitter/tree-sitter-ruby.git diff --git a/tree-sitter-ruby b/tree-sitter-ruby new file mode 160000 index 000000000000..724eedf253d1 --- /dev/null +++ b/tree-sitter-ruby @@ -0,0 +1 @@ +Subproject commit 724eedf253d172e1102d48f14ea1fc41a512f4fa From b677a91fea5c8ae63a8e768e701c171eccbc726f Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 14 Oct 2020 11:16:28 +0100 Subject: [PATCH 0003/1036] Add VSCode workspace --- codeql-ruby.code-workspace | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 codeql-ruby.code-workspace diff --git a/codeql-ruby.code-workspace b/codeql-ruby.code-workspace new file mode 100644 index 000000000000..a66afb9fef76 --- /dev/null +++ b/codeql-ruby.code-workspace @@ -0,0 +1,10 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "editor.formatOnSave": true + } +} \ No newline at end of file From 6c697bf9b5586ccb47716652d5be01b07ad4f7b4 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Thu, 15 Oct 2020 13:20:11 +0100 Subject: [PATCH 0004/1036] Split into generator and extractor packages --- Cargo.lock | 156 +++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 11 +-- extractor/Cargo.toml | 13 ++++ extractor/build.rs | 11 +++ extractor/src/main.rs | 18 +++++ generator/Cargo.toml | 11 +++ generator/src/main.rs | 3 + src/main.rs | 3 - 8 files changed, 213 insertions(+), 13 deletions(-) create mode 100644 extractor/Cargo.toml create mode 100644 extractor/build.rs create mode 100644 extractor/src/main.rs create mode 100644 generator/Cargo.toml create mode 100644 generator/src/main.rs delete mode 100644 src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 37384c782bdc..7e69efbce252 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,159 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] -name = "codeql-ruby" +name = "aho-corasick" +version = "0.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b476ce7103678b0c6d3d395dbbae31d48ff910bd28be979ba5d48c6351131d0d" +dependencies = [ + "memchr", +] + +[[package]] +name = "cc" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d" + +[[package]] +name = "generator" version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "itoa" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8963b85b8ce3074fecffde43b4b0dded83ce2f367dc8d363afc56679f3ee820b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c" + +[[package]] +name = "ruby-extractor" +version = "0.1.0" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "serde" +version = "1.0.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.116" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e03e57e4fcbfe7749842d53e24ccb9aa12b7252dbe5e91d2acad31834c8b8fdd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "tree-sitter" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ee7370fec3aecde3862a7d64c571048f70a7298daef1815e8fc68b9de54b5c" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" diff --git a/Cargo.toml b/Cargo.toml index 671b9fbd4635..8a3ea43b0786 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,2 @@ -[package] -name = "codeql-ruby" -version = "0.1.0" -authors = ["Nick Rolfe "] -edition = "2018" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] +[workspace] +members = ["extractor", "generator"] diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml new file mode 100644 index 000000000000..5a0bbbaf3430 --- /dev/null +++ b/extractor/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "ruby-extractor" +version = "0.1.0" +authors = ["GitHub"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +tree-sitter = "0.17.0" + +[build-dependencies] +cc="*" diff --git a/extractor/build.rs b/extractor/build.rs new file mode 100644 index 000000000000..2b849cd3ba58 --- /dev/null +++ b/extractor/build.rs @@ -0,0 +1,11 @@ +use std::path::PathBuf; + +fn main() { + let dir: PathBuf = ["../tree-sitter-ruby", "src"].iter().collect(); + + cc::Build::new() + .include(&dir) + .file(dir.join("parser.c")) + .file(dir.join("scanner.cc")) + .compile("tree-sitter-ruby"); +} diff --git a/extractor/src/main.rs b/extractor/src/main.rs new file mode 100644 index 000000000000..b1361cedf4e6 --- /dev/null +++ b/extractor/src/main.rs @@ -0,0 +1,18 @@ +use tree_sitter::{Language, Parser}; + +fn main() { + let mut parser = Parser::new(); + + extern "C" { + fn tree_sitter_ruby() -> Language; + } + + let language = unsafe { tree_sitter_ruby() }; + parser.set_language(language).unwrap(); + + let src = "def foo\n puts \"hello\"\nend"; + let tree = parser.parse(src, None).unwrap(); + let root_node = tree.root_node(); + + println!("Root: {}", root_node.to_sexp()); +} diff --git a/generator/Cargo.toml b/generator/Cargo.toml new file mode 100644 index 000000000000..8e789c7aff62 --- /dev/null +++ b/generator/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "generator" +version = "0.1.0" +authors = ["GitHub"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/generator/src/main.rs b/generator/src/main.rs new file mode 100644 index 000000000000..684368c2cfa1 --- /dev/null +++ b/generator/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("generator"); +} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index e7a11a969c03..000000000000 --- a/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello, world!"); -} From ffbb57a8e2dbbb42e2e054d5c6fe7c1c94ae1a81 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Thu, 15 Oct 2020 13:20:37 +0100 Subject: [PATCH 0005/1036] Make VSCode default to unix line endings --- codeql-ruby.code-workspace | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codeql-ruby.code-workspace b/codeql-ruby.code-workspace index a66afb9fef76..2a2ad9f01fe5 100644 --- a/codeql-ruby.code-workspace +++ b/codeql-ruby.code-workspace @@ -5,6 +5,7 @@ } ], "settings": { - "editor.formatOnSave": true + "editor.formatOnSave": true, + "files.eol": "\n" } } \ No newline at end of file From a837c65bc4db13124174235a9b12cf3bc984b3af Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Thu, 15 Oct 2020 13:21:12 +0100 Subject: [PATCH 0006/1036] Add VSCode build task for cargo build --- .vscode/tasks.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .vscode/tasks.json diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000000..c28cd789fcae --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,14 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "cargo", + "subcommand": "build", + "problemMatcher": [ + "$rustc" + ], + "group": "build", + "label": "Rust: cargo build" + } + ] +} \ No newline at end of file From 735fde7a22589fab69d08703eb58f0f04985392a Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Thu, 15 Oct 2020 13:26:13 +0100 Subject: [PATCH 0007/1036] Add README --- README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000000..efd22e317369 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Ruby analysis support for CodeQL + +Under development. \ No newline at end of file From 97181d1c21954877eaa7fe740d495ccc92e3b53c Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Tue, 20 Oct 2020 15:09:06 +0100 Subject: [PATCH 0008/1036] Basic dbscheme generation from node-types.json --- generator/src/dbscheme.rs | 173 ++++ generator/src/main.rs | 315 +++++- generator/src/node_types.rs | 39 + ruby.dbscheme | 1864 +++++++++++++++++++++++++++++++++++ 4 files changed, 2390 insertions(+), 1 deletion(-) create mode 100644 generator/src/dbscheme.rs create mode 100644 generator/src/node_types.rs create mode 100644 ruby.dbscheme diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs new file mode 100644 index 000000000000..b4f03ff7ba00 --- /dev/null +++ b/generator/src/dbscheme.rs @@ -0,0 +1,173 @@ +/// Represents a distinct entry in the database schema. +pub enum Entry { + /// An entry defining a database table. + Table(Table), + + /// An entry defining type that is a union of other types. + Union { name: String, members: Vec }, +} + +/// A table in the database schema. +pub struct Table { + pub name: String, + pub columns: Vec, + pub keysets: Vec>, +} + +/// A column in a table. +pub struct Column { + pub db_type: DbColumnType, + pub name: String, + pub unique: bool, + pub ql_type: QlColumnType, + pub ql_type_is_ref: bool, +} + +/// The database column type. +pub enum DbColumnType { + Int, + String, +} + +// The QL type of a column. +pub enum QlColumnType { + /// Primitive `int` type. + Int, + + /// Primitive `string` type. + String, + + /// A custom type, defined elsewhere by a table or union. + Custom(String), +} + +const RESERVED_KEYWORDS: [&'static str; 14] = [ + "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", + "type", "unique", "varchar", +]; + +/// Returns a string that's a copy of `name` but suitably escaped to be a valid +/// QL identifier. +pub fn escape_name(name: &str) -> String { + let mut result = String::new(); + + // If there's a leading underscore, replace it with 'underscore_'. + if let Some(c) = name.chars().next() { + if c == '_' { + result.push_str("underscore"); + } + } + for c in name.chars() { + match c { + '{' => result.push_str("lbrace"), + '}' => result.push_str("rbrace"), + '<' => result.push_str("langle"), + '>' => result.push_str("rangle"), + '[' => result.push_str("lbracket"), + ']' => result.push_str("rbracket"), + '(' => result.push_str("lparen"), + ')' => result.push_str("rparen"), + '|' => result.push_str("pipe"), + '=' => result.push_str("equal"), + '~' => result.push_str("tilde"), + '?' => result.push_str("question"), + '`' => result.push_str("backtick"), + '^' => result.push_str("caret"), + '!' => result.push_str("bang"), + '#' => result.push_str("hash"), + '%' => result.push_str("percent"), + '&' => result.push_str("ampersand"), + '.' => result.push_str("dot"), + ',' => result.push_str("comma"), + '/' => result.push_str("slash"), + ':' => result.push_str("colon"), + ';' => result.push_str("semicolon"), + '"' => result.push_str("dquote"), + '*' => result.push_str("star"), + '+' => result.push_str("plus"), + '-' => result.push_str("minus"), + '@' => result.push_str("at"), + _ => result.push_str(&c.to_lowercase().to_string()), + } + } + + for &keyword in &RESERVED_KEYWORDS { + if result == keyword { + result.push_str("__"); + break; + } + } + + result +} + +/// Generates the dbscheme by writing the given dbscheme `entries` to the `file`. +pub fn write(file: &mut dyn std::io::Write, entries: &[Entry]) -> Result<(), std::io::Error> { + write!(file, "// CodeQL database schema for Ruby\n")?; + write!( + file, + "// Automatically generated from the tree-sitter grammar; do not edit\n\n" + )?; + + for entry in entries { + match entry { + Entry::Table(table) => { + for keyset in &table.keysets { + write!(file, "#keyset[")?; + for (key_index, key) in keyset.iter().enumerate() { + if key_index > 0 { + write!(file, ", ")?; + } + write!(file, "{}", key)?; + } + write!(file, "]\n")?; + } + + write!(file, "{}(\n", table.name)?; + for (column_index, column) in table.columns.iter().enumerate() { + write!(file, " ")?; + if column.unique { + write!(file, "unique ")?; + } + write!( + file, + "{} ", + match column.db_type { + DbColumnType::Int => "int", + DbColumnType::String => "string", + } + )?; + write!(file, "{}: ", column.name)?; + match &column.ql_type { + QlColumnType::Int => write!(file, "int")?, + QlColumnType::String => write!(file, "string")?, + QlColumnType::Custom(name) => write!(file, "@{}", name)?, + } + if column.ql_type_is_ref { + write!(file, " ref")?; + } + if column_index + 1 != table.columns.len() { + write!(file, ",")?; + } + write!(file, "\n")?; + } + write!(file, ");\n\n")?; + } + Entry::Union { name, members } => { + write!(file, "@{} = ", name)?; + let mut first = true; + for member in members { + if first { + first = false; + } else { + write!(file, " | ")?; + } + write!(file, "@{}", member)?; + } + write!(file, "\n\n")?; + } + } + } + + Ok(()) +} diff --git a/generator/src/main.rs b/generator/src/main.rs index 684368c2cfa1..9393fbaa8d9c 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -1,3 +1,316 @@ +use std::fs::File; +use std::io::LineWriter; +use std::path::Path; + +mod dbscheme; +mod node_types; +use node_types::{FieldInfo, NodeInfo}; + +fn read_node_types() -> Option> { + let json_data = match std::fs::read_to_string(Path::new("tree-sitter-ruby/src/node-types.json")) + { + Ok(s) => s, + Err(_) => return None, + }; + let nodes: Vec = match serde_json::from_str(&json_data) { + Ok(n) => n, + Err(_) => return None, + }; + + Some(nodes) +} + +/// Given a tree-sitter node type's (kind, named) pair, returns a single string +/// representing the (unescaped) name we'll use to refer to corresponding QL +/// type. +fn node_type_name(kind: &str, named: bool) -> String { + if named { + kind.to_string() + } else { + format!("{}_unnamed", kind) + } +} + +/// Given the name of the parent node, and its field information, returns the +/// name of the field's type. This may be an ad-hoc union of all the possible +/// types the field can take, in which case the union is added to `entries`. +fn make_field_type( + parent_name: &str, + field_name: &str, + field_info: &FieldInfo, + entries: &mut Vec, +) -> String { + if field_info.types.len() == 1 { + // This field can only have a single type. + let t = &field_info.types[0]; + dbscheme::escape_name(&node_type_name(&t.kind, t.named)) + } else { + // This field can have one of several types. Create an ad-hoc QL union + // type to represent them. + let field_union_name = format!("{}_{}_type", parent_name, field_name); + let field_union_name = dbscheme::escape_name(&field_union_name); + let mut members: Vec = Vec::new(); + for field_type in &field_info.types { + members.push(dbscheme::escape_name(&node_type_name( + &field_type.kind, + field_type.named, + ))); + } + entries.push(dbscheme::Entry::Union { + name: field_union_name.clone(), + members, + }); + field_union_name + } +} + +/// Adds the appropriate dbscheme information for the given field, either as a +/// column on `main_table`, or as an auxiliary table. +fn add_field( + main_table: &mut dbscheme::Table, + parent_name: &str, + field_name: &str, + field_info: &FieldInfo, + entries: &mut Vec, +) { + if field_info.multiple || !field_info.required { + // This field can appear zero or multiple times, so put + // it in an auxiliary table. + let field_type = make_field_type(parent_name, field_name, field_info, entries); + let field_table = dbscheme::Table { + name: format!("{}_{}", parent_name, field_name), + columns: vec![ + // First column is a reference to the parent. + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: dbscheme::escape_name(parent_name), + ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(parent_name)), + ql_type_is_ref: true, + }, + // Then an index column. + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "index".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + // And then the field + dbscheme::Column { + unique: true, + db_type: dbscheme::DbColumnType::Int, + name: field_type.clone(), + ql_type: dbscheme::QlColumnType::Custom(field_type), + ql_type_is_ref: true, + }, + ], + // In addition to the field being unique, the combination of + // parent+index is unique, so add a keyset for them. + keysets: vec![vec![ + dbscheme::escape_name(parent_name), + "index".to_string(), + ]], + }; + entries.push(dbscheme::Entry::Table(field_table)); + } else { + // This field must appear exactly once, so we add it as + // a column to the main table for the node type. + let field_type = make_field_type(parent_name, field_name, field_info, entries); + main_table.columns.push(dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: String::from(field_name), + ql_type: dbscheme::QlColumnType::Custom(field_type), + ql_type_is_ref: true, + }); + } +} + +/// Converts the given tree-sitter node types into CodeQL dbscheme entries. +fn convert_nodes(nodes: &[NodeInfo]) -> Vec { + let mut entries: Vec = Vec::new(); + let mut top_members: Vec = Vec::new(); + + for node in nodes { + if let Some(subtypes) = &node.subtypes { + // It's a tree-sitter supertype node, for which we create a union + // type. + let mut members: Vec = Vec::new(); + for subtype in subtypes { + members.push(dbscheme::escape_name(&node_type_name( + &subtype.kind, + subtype.named, + ))) + } + entries.push(dbscheme::Entry::Union { + name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)), + members, + }); + } else { + // It's a product type, defined by a table. + let name = node_type_name(&node.kind, node.named); + let mut main_table = dbscheme::Table { + name: dbscheme::escape_name(&(format!("{}_def", name))), + columns: vec![dbscheme::Column { + db_type: dbscheme::DbColumnType::Int, + name: "id".to_string(), + unique: true, + ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)), + ql_type_is_ref: false, + }], + keysets: vec![], + }; + top_members.push(dbscheme::escape_name(&name)); + + let mut is_leaf = true; + + // If the type also has fields or children, then we create either + // auxiliary tables or columns in the defining table for them. + if let Some(fields) = &node.fields { + for (field_name, field_info) in fields { + is_leaf = false; + add_field(&mut main_table, &name, field_name, field_info, &mut entries); + } + } + if let Some(children) = &node.children { + is_leaf = false; + + // Treat children as if they were a field called 'child'. + add_field(&mut main_table, &name, "child", children, &mut entries); + } + + if is_leaf { + // There were no fields and no children, so it's a leaf node in + // the TS grammar. Add a column for the node text. + main_table.columns.push(dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::String, + name: "text".to_string(), + ql_type: dbscheme::QlColumnType::String, + ql_type_is_ref: true, + }); + } + + // Finally, the type's defining table also includes the location. + main_table.columns.push(dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "loc".to_string(), + ql_type: dbscheme::QlColumnType::Custom("location".to_string()), + ql_type_is_ref: true, + }); + + entries.push(dbscheme::Entry::Table(main_table)); + } + } + + // Create a union of all database types. + entries.push(dbscheme::Entry::Union { + name: "top".to_string(), + members: top_members, + }); + + entries +} + +fn write_dbscheme(entries: &[dbscheme::Entry]) -> std::io::Result<()> { + // TODO: figure out proper output path and/or take it from the command line. + let path = Path::new("ruby.dbscheme"); + println!( + "Writing to '{}'", + match path.to_str() { + None => "", + Some(p) => p, + } + ); + let file = File::create(path)?; + let mut file = LineWriter::new(file); + dbscheme::write(&mut file, &entries) +} + +fn create_location_entry() -> dbscheme::Entry { + dbscheme::Entry::Table(dbscheme::Table { + name: "location".to_string(), + keysets: Vec::new(), + columns: vec![ + dbscheme::Column { + unique: true, + db_type: dbscheme::DbColumnType::Int, + name: "id".to_string(), + ql_type: dbscheme::QlColumnType::Custom("location".to_string()), + ql_type_is_ref: false, + }, + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::String, + name: "file_path".to_string(), + ql_type: dbscheme::QlColumnType::String, + ql_type_is_ref: true, + }, + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "start_line".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "start_column".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "end_line".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::Int, + name: "end_column".to_string(), + ql_type: dbscheme::QlColumnType::Int, + ql_type_is_ref: true, + }, + ], + }) +} + +fn create_source_location_prefix_entry() -> dbscheme::Entry { + dbscheme::Entry::Table(dbscheme::Table { + name: "sourceLocationPrefix".to_string(), + keysets: Vec::new(), + columns: vec![dbscheme::Column { + unique: false, + db_type: dbscheme::DbColumnType::String, + name: "prefix".to_string(), + ql_type: dbscheme::QlColumnType::String, + ql_type_is_ref: true, + }], + }) +} + fn main() { - println!("generator"); + match read_node_types() { + None => { + println!("Failed to read node types"); + std::process::exit(1); + } + Some(nodes) => { + let mut dbscheme_entries = convert_nodes(&nodes); + dbscheme_entries.push(create_location_entry()); + dbscheme_entries.push(create_source_location_prefix_entry()); + match write_dbscheme(&dbscheme_entries) { + Err(e) => { + println!("Failed to write dbscheme: {}", e); + std::process::exit(2); + } + Ok(()) => {} + } + } + } } diff --git a/generator/src/node_types.rs b/generator/src/node_types.rs new file mode 100644 index 000000000000..faee47e410eb --- /dev/null +++ b/generator/src/node_types.rs @@ -0,0 +1,39 @@ +use serde::Deserialize; +use std::collections::BTreeMap; + +#[derive(Deserialize)] +pub struct NodeInfo { + #[serde(rename = "type")] + pub kind: String, + pub named: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub fields: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub children: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtypes: Option>, +} + +#[derive(Deserialize)] +pub struct NodeType { + #[serde(rename = "type")] + pub kind: String, + pub named: bool, +} + +#[derive(Deserialize)] +pub struct FieldInfo { + pub multiple: bool, + pub required: bool, + pub types: Vec, +} + +impl Default for FieldInfo { + fn default() -> Self { + FieldInfo { + multiple: false, + required: true, + types: Vec::new(), + } + } +} diff --git a/ruby.dbscheme b/ruby.dbscheme new file mode 100644 index 000000000000..c0631bbb80bb --- /dev/null +++ b/ruby.dbscheme @@ -0,0 +1,1864 @@ +// CodeQL database schema for Ruby +// Automatically generated from the tree-sitter grammar; do not edit + +@underscore_arg = @underscore_primary | @assignment | @binary | @conditional | @operator_assignment | @range | @unary + +@underscore_lhs = @underscore_variable | @call | @element_reference | @false | @method_call | @nil | @scope_resolution | @true + +@underscore_method_name = @class_variable | @constant | @global_variable | @identifier | @instance_variable | @operator | @setter | @symbol + +@underscore_primary = @underscore_lhs | @array | @begin | @break | @case__ | @chained_string | @character | @class | @complex | @float__ | @for | @hash | @heredoc_beginning | @if | @integer | @lambda | @method | @module | @next | @parenthesized_statements | @rational | @redo | @regex | @retry | @return | @singleton_class | @singleton_method | @string__ | @string_array | @subshell | @symbol | @symbol_array | @unary | @unless | @until | @while | @yield + +@underscore_statement = @underscore_arg | @alias | @assignment | @begin_block | @binary | @break | @call | @end_block | @if_modifier | @method_call | @next | @operator_assignment | @rescue_modifier | @return | @unary | @undef | @unless_modifier | @until_modifier | @while_modifier | @yield + +@underscore_variable = @class_variable | @constant | @global_variable | @identifier | @instance_variable | @self | @super + +alias_def( + unique int id: @alias, + int alias: @underscore_method_name ref, + int name: @underscore_method_name ref, + int loc: @location ref +); + +@argument_list_child_type = @underscore_arg | @block_argument | @break | @call | @hash_splat_argument | @method_call | @next | @pair | @return | @splat_argument | @yield + +#keyset[argument_list, index] +argument_list_child( + int argument_list: @argument_list ref, + int index: int ref, + unique int argument_list_child_type: @argument_list_child_type ref +); + +argument_list_def( + unique int id: @argument_list, + int loc: @location ref +); + +@array_child_type = @underscore_arg | @block_argument | @break | @call | @hash_splat_argument | @method_call | @next | @pair | @return | @splat_argument | @yield + +#keyset[array, index] +array_child( + int array: @array ref, + int index: int ref, + unique int array_child_type: @array_child_type ref +); + +array_def( + unique int id: @array, + int loc: @location ref +); + +@assignment_left_type = @underscore_lhs | @left_assignment_list + +@assignment_right_type = @underscore_arg | @break | @call | @method_call | @next | @return | @right_assignment_list | @splat_argument | @yield + +assignment_def( + unique int id: @assignment, + int left: @assignment_left_type ref, + int right: @assignment_right_type ref, + int loc: @location ref +); + +@bare_string_child_type = @escape_sequence | @interpolation + +#keyset[bare_string, index] +bare_string_child( + int bare_string: @bare_string ref, + int index: int ref, + unique int bare_string_child_type: @bare_string_child_type ref +); + +bare_string_def( + unique int id: @bare_string, + int loc: @location ref +); + +@bare_symbol_child_type = @escape_sequence | @interpolation + +#keyset[bare_symbol, index] +bare_symbol_child( + int bare_symbol: @bare_symbol ref, + int index: int ref, + unique int bare_symbol_child_type: @bare_symbol_child_type ref +); + +bare_symbol_def( + unique int id: @bare_symbol, + int loc: @location ref +); + +@begin_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue + +#keyset[begin, index] +begin_child( + int begin: @begin ref, + int index: int ref, + unique int begin_child_type: @begin_child_type ref +); + +begin_def( + unique int id: @begin, + int loc: @location ref +); + +@begin_block_child_type = @underscore_statement | @empty_statement + +#keyset[begin_block, index] +begin_block_child( + int begin_block: @begin_block ref, + int index: int ref, + unique int begin_block_child_type: @begin_block_child_type ref +); + +begin_block_def( + unique int id: @begin_block, + int loc: @location ref +); + +@binary_left_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +@binary_operator_type = @bangequal_unnamed | @bangtilde_unnamed | @percent_unnamed | @ampersand_unnamed | @ampersandampersand_unnamed | @star_unnamed | @starstar_unnamed | @plus_unnamed | @minus_unnamed | @slash_unnamed | @langle_unnamed | @langlelangle_unnamed | @langleequal_unnamed | @langleequalrangle_unnamed | @equalequal_unnamed | @equalequalequal_unnamed | @equaltilde_unnamed | @rangle_unnamed | @rangleequal_unnamed | @ranglerangle_unnamed | @caret_unnamed | @and_unnamed | @or_unnamed | @pipe_unnamed | @pipepipe_unnamed + +@binary_right_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +binary_def( + unique int id: @binary, + int left: @binary_left_type ref, + int operator: @binary_operator_type ref, + int right: @binary_right_type ref, + int loc: @location ref +); + +@block_child_type = @underscore_statement | @block_parameters | @empty_statement + +#keyset[block, index] +block_child( + int block: @block ref, + int index: int ref, + unique int block_child_type: @block_child_type ref +); + +block_def( + unique int id: @block, + int loc: @location ref +); + +block_argument_def( + unique int id: @block_argument, + int child: @underscore_arg ref, + int loc: @location ref +); + +block_parameter_def( + unique int id: @block_parameter, + int name: @identifier ref, + int loc: @location ref +); + +@block_parameters_child_type = @block_parameter | @destructured_parameter | @hash_splat_parameter | @identifier | @keyword_parameter | @optional_parameter | @splat_parameter + +#keyset[block_parameters, index] +block_parameters_child( + int block_parameters: @block_parameters ref, + int index: int ref, + unique int block_parameters_child_type: @block_parameters_child_type ref +); + +block_parameters_def( + unique int id: @block_parameters, + int loc: @location ref +); + +#keyset[break, index] +break_child( + int break: @break ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +break_def( + unique int id: @break, + int loc: @location ref +); + +@call_method_type = @argument_list | @constant | @identifier | @operator + +@call_receiver_type = @underscore_primary | @method_call + +call_def( + unique int id: @call, + int method: @call_method_type ref, + int receiver: @call_receiver_type ref, + int loc: @location ref +); + +#keyset[case__, index] +case_value( + int case__: @case__ ref, + int index: int ref, + unique int underscore_statement: @underscore_statement ref +); + +@case_child_type = @else | @when + +#keyset[case__, index] +case_child( + int case__: @case__ ref, + int index: int ref, + unique int case_child_type: @case_child_type ref +); + +case_def( + unique int id: @case__, + int loc: @location ref +); + +#keyset[chained_string, index] +chained_string_child( + int chained_string: @chained_string ref, + int index: int ref, + unique int string__: @string__ ref +); + +chained_string_def( + unique int id: @chained_string, + int loc: @location ref +); + +@class_name_type = @constant | @scope_resolution + +@class_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue | @superclass + +#keyset[class, index] +class_child( + int class: @class ref, + int index: int ref, + unique int class_child_type: @class_child_type ref +); + +class_def( + unique int id: @class, + int name: @class_name_type ref, + int loc: @location ref +); + +conditional_def( + unique int id: @conditional, + int alternative: @underscore_arg ref, + int condition: @underscore_arg ref, + int consequence: @underscore_arg ref, + int loc: @location ref +); + +@destructured_left_assignment_child_type = @underscore_lhs | @destructured_left_assignment | @rest_assignment + +#keyset[destructured_left_assignment, index] +destructured_left_assignment_child( + int destructured_left_assignment: @destructured_left_assignment ref, + int index: int ref, + unique int destructured_left_assignment_child_type: @destructured_left_assignment_child_type ref +); + +destructured_left_assignment_def( + unique int id: @destructured_left_assignment, + int loc: @location ref +); + +@destructured_parameter_child_type = @block_parameter | @destructured_parameter | @hash_splat_parameter | @identifier | @keyword_parameter | @optional_parameter | @splat_parameter + +#keyset[destructured_parameter, index] +destructured_parameter_child( + int destructured_parameter: @destructured_parameter ref, + int index: int ref, + unique int destructured_parameter_child_type: @destructured_parameter_child_type ref +); + +destructured_parameter_def( + unique int id: @destructured_parameter, + int loc: @location ref +); + +@do_child_type = @underscore_statement | @empty_statement + +#keyset[do, index] +do_child( + int do: @do ref, + int index: int ref, + unique int do_child_type: @do_child_type ref +); + +do_def( + unique int id: @do, + int loc: @location ref +); + +@do_block_child_type = @underscore_statement | @block_parameters | @else | @empty_statement | @ensure | @rescue + +#keyset[do_block, index] +do_block_child( + int do_block: @do_block ref, + int index: int ref, + unique int do_block_child_type: @do_block_child_type ref +); + +do_block_def( + unique int id: @do_block, + int loc: @location ref +); + +@element_reference_child_type = @underscore_arg | @block_argument | @break | @call | @hash_splat_argument | @method_call | @next | @pair | @return | @splat_argument | @yield + +#keyset[element_reference, index] +element_reference_child( + int element_reference: @element_reference ref, + int index: int ref, + unique int element_reference_child_type: @element_reference_child_type ref +); + +element_reference_def( + unique int id: @element_reference, + int object: @underscore_primary ref, + int loc: @location ref +); + +#keyset[else, index] +else_condition( + int else: @else ref, + int index: int ref, + unique int semicolon_unnamed: @semicolon_unnamed ref +); + +@else_child_type = @underscore_statement | @empty_statement + +#keyset[else, index] +else_child( + int else: @else ref, + int index: int ref, + unique int else_child_type: @else_child_type ref +); + +else_def( + unique int id: @else, + int loc: @location ref +); + +@elsif_alternative_type = @else | @elsif + +#keyset[elsif, index] +elsif_alternative( + int elsif: @elsif ref, + int index: int ref, + unique int elsif_alternative_type: @elsif_alternative_type ref +); + +#keyset[elsif, index] +elsif_consequence( + int elsif: @elsif ref, + int index: int ref, + unique int then: @then ref +); + +elsif_def( + unique int id: @elsif, + int condition: @underscore_statement ref, + int loc: @location ref +); + +empty_statement_def( + unique int id: @empty_statement, + string text: string ref, + int loc: @location ref +); + +@end_block_child_type = @underscore_statement | @empty_statement + +#keyset[end_block, index] +end_block_child( + int end_block: @end_block ref, + int index: int ref, + unique int end_block_child_type: @end_block_child_type ref +); + +end_block_def( + unique int id: @end_block, + int loc: @location ref +); + +@ensure_child_type = @underscore_statement | @empty_statement + +#keyset[ensure, index] +ensure_child( + int ensure: @ensure ref, + int index: int ref, + unique int ensure_child_type: @ensure_child_type ref +); + +ensure_def( + unique int id: @ensure, + int loc: @location ref +); + +exception_variable_def( + unique int id: @exception_variable, + int child: @underscore_lhs ref, + int loc: @location ref +); + +@exceptions_child_type = @underscore_arg | @splat_argument + +#keyset[exceptions, index] +exceptions_child( + int exceptions: @exceptions ref, + int index: int ref, + unique int exceptions_child_type: @exceptions_child_type ref +); + +exceptions_def( + unique int id: @exceptions, + int loc: @location ref +); + +@for_pattern_type = @underscore_lhs | @destructured_left_assignment | @rest_assignment + +#keyset[for, index] +for_pattern( + int for: @for ref, + int index: int ref, + unique int for_pattern_type: @for_pattern_type ref +); + +for_def( + unique int id: @for, + int body: @do ref, + int value: @in ref, + int loc: @location ref +); + +@hash_child_type = @hash_splat_argument | @pair + +#keyset[hash, index] +hash_child( + int hash: @hash ref, + int index: int ref, + unique int hash_child_type: @hash_child_type ref +); + +hash_def( + unique int id: @hash, + int loc: @location ref +); + +hash_splat_argument_def( + unique int id: @hash_splat_argument, + int child: @underscore_arg ref, + int loc: @location ref +); + +#keyset[hash_splat_parameter, index] +hash_splat_parameter_name( + int hash_splat_parameter: @hash_splat_parameter ref, + int index: int ref, + unique int identifier: @identifier ref +); + +hash_splat_parameter_def( + unique int id: @hash_splat_parameter, + int loc: @location ref +); + +@if_alternative_type = @else | @elsif + +#keyset[if, index] +if_alternative( + int if: @if ref, + int index: int ref, + unique int if_alternative_type: @if_alternative_type ref +); + +#keyset[if, index] +if_consequence( + int if: @if ref, + int index: int ref, + unique int then: @then ref +); + +if_def( + unique int id: @if, + int condition: @underscore_statement ref, + int loc: @location ref +); + +@if_modifier_condition_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +if_modifier_def( + unique int id: @if_modifier, + int body: @underscore_statement ref, + int condition: @if_modifier_condition_type ref, + int loc: @location ref +); + +in_def( + unique int id: @in, + int child: @underscore_arg ref, + int loc: @location ref +); + +interpolation_def( + unique int id: @interpolation, + int child: @underscore_statement ref, + int loc: @location ref +); + +#keyset[keyword_parameter, index] +keyword_parameter_value( + int keyword_parameter: @keyword_parameter ref, + int index: int ref, + unique int underscore_arg: @underscore_arg ref +); + +keyword_parameter_def( + unique int id: @keyword_parameter, + int name: @identifier ref, + int loc: @location ref +); + +@lambda_body_type = @block | @do_block + +#keyset[lambda, index] +lambda_parameters( + int lambda: @lambda ref, + int index: int ref, + unique int lambda_parameters: @lambda_parameters ref +); + +lambda_def( + unique int id: @lambda, + int body: @lambda_body_type ref, + int loc: @location ref +); + +@lambda_parameters_child_type = @block_parameter | @destructured_parameter | @hash_splat_parameter | @identifier | @keyword_parameter | @optional_parameter | @splat_parameter + +#keyset[lambda_parameters, index] +lambda_parameters_child( + int lambda_parameters: @lambda_parameters ref, + int index: int ref, + unique int lambda_parameters_child_type: @lambda_parameters_child_type ref +); + +lambda_parameters_def( + unique int id: @lambda_parameters, + int loc: @location ref +); + +@left_assignment_list_child_type = @underscore_lhs | @destructured_left_assignment | @rest_assignment + +#keyset[left_assignment_list, index] +left_assignment_list_child( + int left_assignment_list: @left_assignment_list ref, + int index: int ref, + unique int left_assignment_list_child_type: @left_assignment_list_child_type ref +); + +left_assignment_list_def( + unique int id: @left_assignment_list, + int loc: @location ref +); + +#keyset[method, index] +method_parameters( + int method: @method ref, + int index: int ref, + unique int method_parameters: @method_parameters ref +); + +@method_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue + +#keyset[method, index] +method_child( + int method: @method ref, + int index: int ref, + unique int method_child_type: @method_child_type ref +); + +method_def( + unique int id: @method, + int name: @underscore_method_name ref, + int loc: @location ref +); + +#keyset[method_call, index] +method_call_arguments( + int method_call: @method_call ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +@method_call_block_type = @block | @do_block + +#keyset[method_call, index] +method_call_block( + int method_call: @method_call ref, + int index: int ref, + unique int method_call_block_type: @method_call_block_type ref +); + +@method_call_method_type = @underscore_variable | @call | @scope_resolution + +method_call_def( + unique int id: @method_call, + int method: @method_call_method_type ref, + int loc: @location ref +); + +@method_parameters_child_type = @block_parameter | @destructured_parameter | @hash_splat_parameter | @identifier | @keyword_parameter | @optional_parameter | @splat_parameter + +#keyset[method_parameters, index] +method_parameters_child( + int method_parameters: @method_parameters ref, + int index: int ref, + unique int method_parameters_child_type: @method_parameters_child_type ref +); + +method_parameters_def( + unique int id: @method_parameters, + int loc: @location ref +); + +@module_name_type = @constant | @scope_resolution + +@module_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue + +#keyset[module, index] +module_child( + int module: @module ref, + int index: int ref, + unique int module_child_type: @module_child_type ref +); + +module_def( + unique int id: @module, + int name: @module_name_type ref, + int loc: @location ref +); + +#keyset[next, index] +next_child( + int next: @next ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +next_def( + unique int id: @next, + int loc: @location ref +); + +operator_def( + unique int id: @operator, + string text: string ref, + int loc: @location ref +); + +@operator_assignment_right_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +operator_assignment_def( + unique int id: @operator_assignment, + int left: @underscore_lhs ref, + int right: @operator_assignment_right_type ref, + int loc: @location ref +); + +optional_parameter_def( + unique int id: @optional_parameter, + int name: @identifier ref, + int value: @underscore_arg ref, + int loc: @location ref +); + +@pair_key_type = @underscore_arg | @string__ | @symbol + +pair_def( + unique int id: @pair, + int key: @pair_key_type ref, + int value: @underscore_arg ref, + int loc: @location ref +); + +@parenthesized_statements_child_type = @underscore_statement | @empty_statement + +#keyset[parenthesized_statements, index] +parenthesized_statements_child( + int parenthesized_statements: @parenthesized_statements ref, + int index: int ref, + unique int parenthesized_statements_child_type: @parenthesized_statements_child_type ref +); + +parenthesized_statements_def( + unique int id: @parenthesized_statements, + int loc: @location ref +); + +@pattern_child_type = @underscore_arg | @splat_argument + +pattern_def( + unique int id: @pattern, + int child: @pattern_child_type ref, + int loc: @location ref +); + +@program_child_type = @underscore_statement | @empty_statement | @uninterpreted + +#keyset[program, index] +program_child( + int program: @program ref, + int index: int ref, + unique int program_child_type: @program_child_type ref +); + +program_def( + unique int id: @program, + int loc: @location ref +); + +#keyset[range, index] +range_child( + int range: @range ref, + int index: int ref, + unique int underscore_arg: @underscore_arg ref +); + +range_def( + unique int id: @range, + int loc: @location ref +); + +rational_def( + unique int id: @rational, + int child: @integer ref, + int loc: @location ref +); + +#keyset[redo, index] +redo_child( + int redo: @redo ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +redo_def( + unique int id: @redo, + int loc: @location ref +); + +@regex_child_type = @escape_sequence | @interpolation + +#keyset[regex, index] +regex_child( + int regex: @regex ref, + int index: int ref, + unique int regex_child_type: @regex_child_type ref +); + +regex_def( + unique int id: @regex, + int loc: @location ref +); + +#keyset[rescue, index] +rescue_body( + int rescue: @rescue ref, + int index: int ref, + unique int then: @then ref +); + +#keyset[rescue, index] +rescue_exceptions( + int rescue: @rescue ref, + int index: int ref, + unique int exceptions: @exceptions ref +); + +#keyset[rescue, index] +rescue_variable( + int rescue: @rescue ref, + int index: int ref, + unique int exception_variable: @exception_variable ref +); + +rescue_def( + unique int id: @rescue, + int loc: @location ref +); + +@rescue_modifier_handler_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +rescue_modifier_def( + unique int id: @rescue_modifier, + int body: @underscore_statement ref, + int handler: @rescue_modifier_handler_type ref, + int loc: @location ref +); + +#keyset[rest_assignment, index] +rest_assignment_child( + int rest_assignment: @rest_assignment ref, + int index: int ref, + unique int underscore_lhs: @underscore_lhs ref +); + +rest_assignment_def( + unique int id: @rest_assignment, + int loc: @location ref +); + +#keyset[retry, index] +retry_child( + int retry: @retry ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +retry_def( + unique int id: @retry, + int loc: @location ref +); + +#keyset[return, index] +return_child( + int return: @return ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +return_def( + unique int id: @return, + int loc: @location ref +); + +@right_assignment_list_child_type = @underscore_arg | @splat_argument + +#keyset[right_assignment_list, index] +right_assignment_list_child( + int right_assignment_list: @right_assignment_list ref, + int index: int ref, + unique int right_assignment_list_child_type: @right_assignment_list_child_type ref +); + +right_assignment_list_def( + unique int id: @right_assignment_list, + int loc: @location ref +); + +@scope_resolution_name_type = @constant | @identifier + +#keyset[scope_resolution, index] +scope_resolution_scope( + int scope_resolution: @scope_resolution ref, + int index: int ref, + unique int underscore_primary: @underscore_primary ref +); + +scope_resolution_def( + unique int id: @scope_resolution, + int name: @scope_resolution_name_type ref, + int loc: @location ref +); + +setter_def( + unique int id: @setter, + int child: @identifier ref, + int loc: @location ref +); + +@singleton_class_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue + +#keyset[singleton_class, index] +singleton_class_child( + int singleton_class: @singleton_class ref, + int index: int ref, + unique int singleton_class_child_type: @singleton_class_child_type ref +); + +singleton_class_def( + unique int id: @singleton_class, + int value: @underscore_arg ref, + int loc: @location ref +); + +@singleton_method_object_type = @underscore_arg | @underscore_variable + +#keyset[singleton_method, index] +singleton_method_parameters( + int singleton_method: @singleton_method ref, + int index: int ref, + unique int method_parameters: @method_parameters ref +); + +@singleton_method_child_type = @underscore_statement | @else | @empty_statement | @ensure | @rescue + +#keyset[singleton_method, index] +singleton_method_child( + int singleton_method: @singleton_method ref, + int index: int ref, + unique int singleton_method_child_type: @singleton_method_child_type ref +); + +singleton_method_def( + unique int id: @singleton_method, + int name: @underscore_method_name ref, + int object: @singleton_method_object_type ref, + int loc: @location ref +); + +splat_argument_def( + unique int id: @splat_argument, + int child: @underscore_arg ref, + int loc: @location ref +); + +#keyset[splat_parameter, index] +splat_parameter_name( + int splat_parameter: @splat_parameter ref, + int index: int ref, + unique int identifier: @identifier ref +); + +splat_parameter_def( + unique int id: @splat_parameter, + int loc: @location ref +); + +@string_child_type = @escape_sequence | @interpolation + +#keyset[string__, index] +string_child( + int string__: @string__ ref, + int index: int ref, + unique int string_child_type: @string_child_type ref +); + +string_def( + unique int id: @string__, + int loc: @location ref +); + +#keyset[string_array, index] +string_array_child( + int string_array: @string_array ref, + int index: int ref, + unique int bare_string: @bare_string ref +); + +string_array_def( + unique int id: @string_array, + int loc: @location ref +); + +@subshell_child_type = @escape_sequence | @interpolation + +#keyset[subshell, index] +subshell_child( + int subshell: @subshell ref, + int index: int ref, + unique int subshell_child_type: @subshell_child_type ref +); + +subshell_def( + unique int id: @subshell, + int loc: @location ref +); + +@superclass_child_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +superclass_def( + unique int id: @superclass, + int child: @superclass_child_type ref, + int loc: @location ref +); + +@symbol_child_type = @escape_sequence | @interpolation + +#keyset[symbol, index] +symbol_child( + int symbol: @symbol ref, + int index: int ref, + unique int symbol_child_type: @symbol_child_type ref +); + +symbol_def( + unique int id: @symbol, + int loc: @location ref +); + +#keyset[symbol_array, index] +symbol_array_child( + int symbol_array: @symbol_array ref, + int index: int ref, + unique int bare_symbol: @bare_symbol ref +); + +symbol_array_def( + unique int id: @symbol_array, + int loc: @location ref +); + +@then_child_type = @underscore_statement | @empty_statement + +#keyset[then, index] +then_child( + int then: @then ref, + int index: int ref, + unique int then_child_type: @then_child_type ref +); + +then_def( + unique int id: @then, + int loc: @location ref +); + +@unary_child_type = @underscore_arg | @break | @call | @float__ | @integer | @method_call | @next | @parenthesized_statements | @return | @yield + +unary_def( + unique int id: @unary, + int child: @unary_child_type ref, + int loc: @location ref +); + +#keyset[undef, index] +undef_child( + int undef: @undef ref, + int index: int ref, + unique int underscore_method_name: @underscore_method_name ref +); + +undef_def( + unique int id: @undef, + int loc: @location ref +); + +@unless_alternative_type = @else | @elsif + +#keyset[unless, index] +unless_alternative( + int unless: @unless ref, + int index: int ref, + unique int unless_alternative_type: @unless_alternative_type ref +); + +#keyset[unless, index] +unless_consequence( + int unless: @unless ref, + int index: int ref, + unique int then: @then ref +); + +unless_def( + unique int id: @unless, + int condition: @underscore_statement ref, + int loc: @location ref +); + +@unless_modifier_condition_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +unless_modifier_def( + unique int id: @unless_modifier, + int body: @underscore_statement ref, + int condition: @unless_modifier_condition_type ref, + int loc: @location ref +); + +until_def( + unique int id: @until, + int body: @do ref, + int condition: @underscore_statement ref, + int loc: @location ref +); + +@until_modifier_condition_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +until_modifier_def( + unique int id: @until_modifier, + int body: @underscore_statement ref, + int condition: @until_modifier_condition_type ref, + int loc: @location ref +); + +#keyset[when, index] +when_body( + int when: @when ref, + int index: int ref, + unique int then: @then ref +); + +@when_pattern_type = @comma_unnamed | @pattern + +#keyset[when, index] +when_pattern( + int when: @when ref, + int index: int ref, + unique int when_pattern_type: @when_pattern_type ref +); + +when_def( + unique int id: @when, + int loc: @location ref +); + +while_def( + unique int id: @while, + int body: @do ref, + int condition: @underscore_statement ref, + int loc: @location ref +); + +@while_modifier_condition_type = @underscore_arg | @break | @call | @method_call | @next | @return | @yield + +while_modifier_def( + unique int id: @while_modifier, + int body: @underscore_statement ref, + int condition: @while_modifier_condition_type ref, + int loc: @location ref +); + +#keyset[yield, index] +yield_child( + int yield: @yield ref, + int index: int ref, + unique int argument_list: @argument_list ref +); + +yield_def( + unique int id: @yield, + int loc: @location ref +); + +bang_unnamed_def( + unique int id: @bang_unnamed, + string text: string ref, + int loc: @location ref +); + +bangequal_unnamed_def( + unique int id: @bangequal_unnamed, + string text: string ref, + int loc: @location ref +); + +bangtilde_unnamed_def( + unique int id: @bangtilde_unnamed, + string text: string ref, + int loc: @location ref +); + +dquote_unnamed_def( + unique int id: @dquote_unnamed, + string text: string ref, + int loc: @location ref +); + +hashlbrace_unnamed_def( + unique int id: @hashlbrace_unnamed, + string text: string ref, + int loc: @location ref +); + +percent_unnamed_def( + unique int id: @percent_unnamed, + string text: string ref, + int loc: @location ref +); + +percentequal_unnamed_def( + unique int id: @percentequal_unnamed, + string text: string ref, + int loc: @location ref +); + +percentilparen_unnamed_def( + unique int id: @percentilparen_unnamed, + string text: string ref, + int loc: @location ref +); + +percentwlparen_unnamed_def( + unique int id: @percentwlparen_unnamed, + string text: string ref, + int loc: @location ref +); + +ampersand_unnamed_def( + unique int id: @ampersand_unnamed, + string text: string ref, + int loc: @location ref +); + +ampersandampersand_unnamed_def( + unique int id: @ampersandampersand_unnamed, + string text: string ref, + int loc: @location ref +); + +ampersandampersandequal_unnamed_def( + unique int id: @ampersandampersandequal_unnamed, + string text: string ref, + int loc: @location ref +); + +ampersanddot_unnamed_def( + unique int id: @ampersanddot_unnamed, + string text: string ref, + int loc: @location ref +); + +ampersandequal_unnamed_def( + unique int id: @ampersandequal_unnamed, + string text: string ref, + int loc: @location ref +); + +lparen_unnamed_def( + unique int id: @lparen_unnamed, + string text: string ref, + int loc: @location ref +); + +rparen_unnamed_def( + unique int id: @rparen_unnamed, + string text: string ref, + int loc: @location ref +); + +star_unnamed_def( + unique int id: @star_unnamed, + string text: string ref, + int loc: @location ref +); + +starstar_unnamed_def( + unique int id: @starstar_unnamed, + string text: string ref, + int loc: @location ref +); + +starstarequal_unnamed_def( + unique int id: @starstarequal_unnamed, + string text: string ref, + int loc: @location ref +); + +starequal_unnamed_def( + unique int id: @starequal_unnamed, + string text: string ref, + int loc: @location ref +); + +plus_unnamed_def( + unique int id: @plus_unnamed, + string text: string ref, + int loc: @location ref +); + +plusequal_unnamed_def( + unique int id: @plusequal_unnamed, + string text: string ref, + int loc: @location ref +); + +plusat_unnamed_def( + unique int id: @plusat_unnamed, + string text: string ref, + int loc: @location ref +); + +comma_unnamed_def( + unique int id: @comma_unnamed, + string text: string ref, + int loc: @location ref +); + +minus_unnamed_def( + unique int id: @minus_unnamed, + string text: string ref, + int loc: @location ref +); + +minusequal_unnamed_def( + unique int id: @minusequal_unnamed, + string text: string ref, + int loc: @location ref +); + +minusrangle_unnamed_def( + unique int id: @minusrangle_unnamed, + string text: string ref, + int loc: @location ref +); + +minusat_unnamed_def( + unique int id: @minusat_unnamed, + string text: string ref, + int loc: @location ref +); + +dot_unnamed_def( + unique int id: @dot_unnamed, + string text: string ref, + int loc: @location ref +); + +dotdot_unnamed_def( + unique int id: @dotdot_unnamed, + string text: string ref, + int loc: @location ref +); + +dotdotdot_unnamed_def( + unique int id: @dotdotdot_unnamed, + string text: string ref, + int loc: @location ref +); + +slash_unnamed_def( + unique int id: @slash_unnamed, + string text: string ref, + int loc: @location ref +); + +slashequal_unnamed_def( + unique int id: @slashequal_unnamed, + string text: string ref, + int loc: @location ref +); + +colon_unnamed_def( + unique int id: @colon_unnamed, + string text: string ref, + int loc: @location ref +); + +colondquote_unnamed_def( + unique int id: @colondquote_unnamed, + string text: string ref, + int loc: @location ref +); + +coloncolon_unnamed_def( + unique int id: @coloncolon_unnamed, + string text: string ref, + int loc: @location ref +); + +semicolon_unnamed_def( + unique int id: @semicolon_unnamed, + string text: string ref, + int loc: @location ref +); + +langle_unnamed_def( + unique int id: @langle_unnamed, + string text: string ref, + int loc: @location ref +); + +langlelangle_unnamed_def( + unique int id: @langlelangle_unnamed, + string text: string ref, + int loc: @location ref +); + +langlelangleequal_unnamed_def( + unique int id: @langlelangleequal_unnamed, + string text: string ref, + int loc: @location ref +); + +langleequal_unnamed_def( + unique int id: @langleequal_unnamed, + string text: string ref, + int loc: @location ref +); + +langleequalrangle_unnamed_def( + unique int id: @langleequalrangle_unnamed, + string text: string ref, + int loc: @location ref +); + +equal_unnamed_def( + unique int id: @equal_unnamed, + string text: string ref, + int loc: @location ref +); + +equalequal_unnamed_def( + unique int id: @equalequal_unnamed, + string text: string ref, + int loc: @location ref +); + +equalequalequal_unnamed_def( + unique int id: @equalequalequal_unnamed, + string text: string ref, + int loc: @location ref +); + +equalrangle_unnamed_def( + unique int id: @equalrangle_unnamed, + string text: string ref, + int loc: @location ref +); + +equaltilde_unnamed_def( + unique int id: @equaltilde_unnamed, + string text: string ref, + int loc: @location ref +); + +rangle_unnamed_def( + unique int id: @rangle_unnamed, + string text: string ref, + int loc: @location ref +); + +rangleequal_unnamed_def( + unique int id: @rangleequal_unnamed, + string text: string ref, + int loc: @location ref +); + +ranglerangle_unnamed_def( + unique int id: @ranglerangle_unnamed, + string text: string ref, + int loc: @location ref +); + +ranglerangleequal_unnamed_def( + unique int id: @ranglerangleequal_unnamed, + string text: string ref, + int loc: @location ref +); + +question_unnamed_def( + unique int id: @question_unnamed, + string text: string ref, + int loc: @location ref +); + +begin_unnamed_def( + unique int id: @begin_unnamed, + string text: string ref, + int loc: @location ref +); + +end_unnamed_def( + unique int id: @end_unnamed, + string text: string ref, + int loc: @location ref +); + +lbracket_unnamed_def( + unique int id: @lbracket_unnamed, + string text: string ref, + int loc: @location ref +); + +lbracketrbracket_unnamed_def( + unique int id: @lbracketrbracket_unnamed, + string text: string ref, + int loc: @location ref +); + +lbracketrbracketequal_unnamed_def( + unique int id: @lbracketrbracketequal_unnamed, + string text: string ref, + int loc: @location ref +); + +rbracket_unnamed_def( + unique int id: @rbracket_unnamed, + string text: string ref, + int loc: @location ref +); + +caret_unnamed_def( + unique int id: @caret_unnamed, + string text: string ref, + int loc: @location ref +); + +caretequal_unnamed_def( + unique int id: @caretequal_unnamed, + string text: string ref, + int loc: @location ref +); + +underscore__end___unnamed_def( + unique int id: @underscore__end___unnamed, + string text: string ref, + int loc: @location ref +); + +backtick_unnamed_def( + unique int id: @backtick_unnamed, + string text: string ref, + int loc: @location ref +); + +alias_unnamed_def( + unique int id: @alias_unnamed, + string text: string ref, + int loc: @location ref +); + +and_unnamed_def( + unique int id: @and_unnamed, + string text: string ref, + int loc: @location ref +); + +begin_unnamed_def( + unique int id: @begin_unnamed, + string text: string ref, + int loc: @location ref +); + +break_unnamed_def( + unique int id: @break_unnamed, + string text: string ref, + int loc: @location ref +); + +case_unnamed_def( + unique int id: @case_unnamed, + string text: string ref, + int loc: @location ref +); + +character_def( + unique int id: @character, + string text: string ref, + int loc: @location ref +); + +class_unnamed_def( + unique int id: @class_unnamed, + string text: string ref, + int loc: @location ref +); + +class_variable_def( + unique int id: @class_variable, + string text: string ref, + int loc: @location ref +); + +complex_def( + unique int id: @complex, + string text: string ref, + int loc: @location ref +); + +constant_def( + unique int id: @constant, + string text: string ref, + int loc: @location ref +); + +def_unnamed_def( + unique int id: @def_unnamed, + string text: string ref, + int loc: @location ref +); + +definedquestion_unnamed_def( + unique int id: @definedquestion_unnamed, + string text: string ref, + int loc: @location ref +); + +do_unnamed_def( + unique int id: @do_unnamed, + string text: string ref, + int loc: @location ref +); + +else_unnamed_def( + unique int id: @else_unnamed, + string text: string ref, + int loc: @location ref +); + +elsif_unnamed_def( + unique int id: @elsif_unnamed, + string text: string ref, + int loc: @location ref +); + +end_unnamed_def( + unique int id: @end_unnamed, + string text: string ref, + int loc: @location ref +); + +ensure_unnamed_def( + unique int id: @ensure_unnamed, + string text: string ref, + int loc: @location ref +); + +escape_sequence_def( + unique int id: @escape_sequence, + string text: string ref, + int loc: @location ref +); + +false_def( + unique int id: @false, + string text: string ref, + int loc: @location ref +); + +float_def( + unique int id: @float__, + string text: string ref, + int loc: @location ref +); + +for_unnamed_def( + unique int id: @for_unnamed, + string text: string ref, + int loc: @location ref +); + +global_variable_def( + unique int id: @global_variable, + string text: string ref, + int loc: @location ref +); + +heredoc_beginning_def( + unique int id: @heredoc_beginning, + string text: string ref, + int loc: @location ref +); + +heredoc_end_def( + unique int id: @heredoc_end, + string text: string ref, + int loc: @location ref +); + +identifier_def( + unique int id: @identifier, + string text: string ref, + int loc: @location ref +); + +if_unnamed_def( + unique int id: @if_unnamed, + string text: string ref, + int loc: @location ref +); + +in_unnamed_def( + unique int id: @in_unnamed, + string text: string ref, + int loc: @location ref +); + +instance_variable_def( + unique int id: @instance_variable, + string text: string ref, + int loc: @location ref +); + +integer_def( + unique int id: @integer, + string text: string ref, + int loc: @location ref +); + +module_unnamed_def( + unique int id: @module_unnamed, + string text: string ref, + int loc: @location ref +); + +next_unnamed_def( + unique int id: @next_unnamed, + string text: string ref, + int loc: @location ref +); + +nil_def( + unique int id: @nil, + string text: string ref, + int loc: @location ref +); + +not_unnamed_def( + unique int id: @not_unnamed, + string text: string ref, + int loc: @location ref +); + +or_unnamed_def( + unique int id: @or_unnamed, + string text: string ref, + int loc: @location ref +); + +r_unnamed_def( + unique int id: @r_unnamed, + string text: string ref, + int loc: @location ref +); + +redo_unnamed_def( + unique int id: @redo_unnamed, + string text: string ref, + int loc: @location ref +); + +rescue_unnamed_def( + unique int id: @rescue_unnamed, + string text: string ref, + int loc: @location ref +); + +retry_unnamed_def( + unique int id: @retry_unnamed, + string text: string ref, + int loc: @location ref +); + +return_unnamed_def( + unique int id: @return_unnamed, + string text: string ref, + int loc: @location ref +); + +self_def( + unique int id: @self, + string text: string ref, + int loc: @location ref +); + +super_def( + unique int id: @super, + string text: string ref, + int loc: @location ref +); + +then_unnamed_def( + unique int id: @then_unnamed, + string text: string ref, + int loc: @location ref +); + +true_def( + unique int id: @true, + string text: string ref, + int loc: @location ref +); + +undef_unnamed_def( + unique int id: @undef_unnamed, + string text: string ref, + int loc: @location ref +); + +uninterpreted_def( + unique int id: @uninterpreted, + string text: string ref, + int loc: @location ref +); + +unless_unnamed_def( + unique int id: @unless_unnamed, + string text: string ref, + int loc: @location ref +); + +until_unnamed_def( + unique int id: @until_unnamed, + string text: string ref, + int loc: @location ref +); + +when_unnamed_def( + unique int id: @when_unnamed, + string text: string ref, + int loc: @location ref +); + +while_unnamed_def( + unique int id: @while_unnamed, + string text: string ref, + int loc: @location ref +); + +yield_unnamed_def( + unique int id: @yield_unnamed, + string text: string ref, + int loc: @location ref +); + +lbrace_unnamed_def( + unique int id: @lbrace_unnamed, + string text: string ref, + int loc: @location ref +); + +pipe_unnamed_def( + unique int id: @pipe_unnamed, + string text: string ref, + int loc: @location ref +); + +pipeequal_unnamed_def( + unique int id: @pipeequal_unnamed, + string text: string ref, + int loc: @location ref +); + +pipepipe_unnamed_def( + unique int id: @pipepipe_unnamed, + string text: string ref, + int loc: @location ref +); + +pipepipeequal_unnamed_def( + unique int id: @pipepipeequal_unnamed, + string text: string ref, + int loc: @location ref +); + +rbrace_unnamed_def( + unique int id: @rbrace_unnamed, + string text: string ref, + int loc: @location ref +); + +tilde_unnamed_def( + unique int id: @tilde_unnamed, + string text: string ref, + int loc: @location ref +); + +@top = @alias | @argument_list | @array | @assignment | @bare_string | @bare_symbol | @begin | @begin_block | @binary | @block | @block_argument | @block_parameter | @block_parameters | @break | @call | @case__ | @chained_string | @class | @conditional | @destructured_left_assignment | @destructured_parameter | @do | @do_block | @element_reference | @else | @elsif | @empty_statement | @end_block | @ensure | @exception_variable | @exceptions | @for | @hash | @hash_splat_argument | @hash_splat_parameter | @if | @if_modifier | @in | @interpolation | @keyword_parameter | @lambda | @lambda_parameters | @left_assignment_list | @method | @method_call | @method_parameters | @module | @next | @operator | @operator_assignment | @optional_parameter | @pair | @parenthesized_statements | @pattern | @program | @range | @rational | @redo | @regex | @rescue | @rescue_modifier | @rest_assignment | @retry | @return | @right_assignment_list | @scope_resolution | @setter | @singleton_class | @singleton_method | @splat_argument | @splat_parameter | @string__ | @string_array | @subshell | @superclass | @symbol | @symbol_array | @then | @unary | @undef | @unless | @unless_modifier | @until | @until_modifier | @when | @while | @while_modifier | @yield | @bang_unnamed | @bangequal_unnamed | @bangtilde_unnamed | @dquote_unnamed | @hashlbrace_unnamed | @percent_unnamed | @percentequal_unnamed | @percentilparen_unnamed | @percentwlparen_unnamed | @ampersand_unnamed | @ampersandampersand_unnamed | @ampersandampersandequal_unnamed | @ampersanddot_unnamed | @ampersandequal_unnamed | @lparen_unnamed | @rparen_unnamed | @star_unnamed | @starstar_unnamed | @starstarequal_unnamed | @starequal_unnamed | @plus_unnamed | @plusequal_unnamed | @plusat_unnamed | @comma_unnamed | @minus_unnamed | @minusequal_unnamed | @minusrangle_unnamed | @minusat_unnamed | @dot_unnamed | @dotdot_unnamed | @dotdotdot_unnamed | @slash_unnamed | @slashequal_unnamed | @colon_unnamed | @colondquote_unnamed | @coloncolon_unnamed | @semicolon_unnamed | @langle_unnamed | @langlelangle_unnamed | @langlelangleequal_unnamed | @langleequal_unnamed | @langleequalrangle_unnamed | @equal_unnamed | @equalequal_unnamed | @equalequalequal_unnamed | @equalrangle_unnamed | @equaltilde_unnamed | @rangle_unnamed | @rangleequal_unnamed | @ranglerangle_unnamed | @ranglerangleequal_unnamed | @question_unnamed | @begin_unnamed | @end_unnamed | @lbracket_unnamed | @lbracketrbracket_unnamed | @lbracketrbracketequal_unnamed | @rbracket_unnamed | @caret_unnamed | @caretequal_unnamed | @underscore__end___unnamed | @backtick_unnamed | @alias_unnamed | @and_unnamed | @begin_unnamed | @break_unnamed | @case_unnamed | @character | @class_unnamed | @class_variable | @complex | @constant | @def_unnamed | @definedquestion_unnamed | @do_unnamed | @else_unnamed | @elsif_unnamed | @end_unnamed | @ensure_unnamed | @escape_sequence | @false | @float__ | @for_unnamed | @global_variable | @heredoc_beginning | @heredoc_end | @identifier | @if_unnamed | @in_unnamed | @instance_variable | @integer | @module_unnamed | @next_unnamed | @nil | @not_unnamed | @or_unnamed | @r_unnamed | @redo_unnamed | @rescue_unnamed | @retry_unnamed | @return_unnamed | @self | @super | @then_unnamed | @true | @undef_unnamed | @uninterpreted | @unless_unnamed | @until_unnamed | @when_unnamed | @while_unnamed | @yield_unnamed | @lbrace_unnamed | @pipe_unnamed | @pipeequal_unnamed | @pipepipe_unnamed | @pipepipeequal_unnamed | @rbrace_unnamed | @tilde_unnamed + +location( + unique int id: @location, + string file_path: string ref, + int start_line: int ref, + int start_column: int ref, + int end_line: int ref, + int end_column: int ref +); + +sourceLocationPrefix( + string prefix: string ref +); + From fd1f8b22e2830045428c50defbdbeff0637e8916 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 21 Oct 2020 11:06:53 +0100 Subject: [PATCH 0009/1036] Simplify keysets to Option> --- generator/src/dbscheme.rs | 2 +- generator/src/main.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index b4f03ff7ba00..164622092b09 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -11,7 +11,7 @@ pub enum Entry { pub struct Table { pub name: String, pub columns: Vec, - pub keysets: Vec>, + pub keysets: Option>, } /// A column in a table. diff --git a/generator/src/main.rs b/generator/src/main.rs index 9393fbaa8d9c..d7c28142c23a 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -107,10 +107,10 @@ fn add_field( ], // In addition to the field being unique, the combination of // parent+index is unique, so add a keyset for them. - keysets: vec![vec![ + keysets: Some(vec![ dbscheme::escape_name(parent_name), "index".to_string(), - ]], + ]), }; entries.push(dbscheme::Entry::Table(field_table)); } else { @@ -159,7 +159,7 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec { ql_type: dbscheme::QlColumnType::Custom(dbscheme::escape_name(&name)), ql_type_is_ref: false, }], - keysets: vec![], + keysets: None, }; top_members.push(dbscheme::escape_name(&name)); @@ -232,7 +232,7 @@ fn write_dbscheme(entries: &[dbscheme::Entry]) -> std::io::Result<()> { fn create_location_entry() -> dbscheme::Entry { dbscheme::Entry::Table(dbscheme::Table { name: "location".to_string(), - keysets: Vec::new(), + keysets: None, columns: vec![ dbscheme::Column { unique: true, @@ -283,7 +283,7 @@ fn create_location_entry() -> dbscheme::Entry { fn create_source_location_prefix_entry() -> dbscheme::Entry { dbscheme::Entry::Table(dbscheme::Table { name: "sourceLocationPrefix".to_string(), - keysets: Vec::new(), + keysets: None, columns: vec![dbscheme::Column { unique: false, db_type: dbscheme::DbColumnType::String, From 47c8a3d6fb10c39ffdd292e458750bb7c3816fad Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 21 Oct 2020 11:26:23 +0100 Subject: [PATCH 0010/1036] Simplify to std::io::Result --- generator/src/dbscheme.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index 164622092b09..0edbfcb2b746 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -102,7 +102,7 @@ pub fn escape_name(name: &str) -> String { } /// Generates the dbscheme by writing the given dbscheme `entries` to the `file`. -pub fn write(file: &mut dyn std::io::Write, entries: &[Entry]) -> Result<(), std::io::Error> { +pub fn write(file: &mut dyn std::io::Write, entries: &[Entry]) -> std::io::Result<()> { write!(file, "// CodeQL database schema for Ruby\n")?; write!( file, From a7a18b8b0f659c60b3986cd6f5d245871727bbd3 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 21 Oct 2020 11:29:25 +0100 Subject: [PATCH 0011/1036] Gather all hard-coded Ruby-specific names/paths in one struct. --- generator/src/dbscheme.rs | 8 ++++++-- generator/src/language.rs | 7 +++++++ generator/src/main.rs | 36 +++++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 17 deletions(-) create mode 100644 generator/src/language.rs diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index 0edbfcb2b746..e6503707986e 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -102,8 +102,12 @@ pub fn escape_name(name: &str) -> String { } /// Generates the dbscheme by writing the given dbscheme `entries` to the `file`. -pub fn write(file: &mut dyn std::io::Write, entries: &[Entry]) -> std::io::Result<()> { - write!(file, "// CodeQL database schema for Ruby\n")?; +pub fn write( + language_name: &str, + file: &mut dyn std::io::Write, + entries: &[Entry], +) -> std::io::Result<()> { + write!(file, "// CodeQL database schema for {}\n", language_name)?; write!( file, "// Automatically generated from the tree-sitter grammar; do not edit\n\n" diff --git a/generator/src/language.rs b/generator/src/language.rs new file mode 100644 index 000000000000..7f289c27de2d --- /dev/null +++ b/generator/src/language.rs @@ -0,0 +1,7 @@ +use std::path::PathBuf; + +pub struct Language { + pub name: String, + pub node_types_path: PathBuf, + pub dbscheme_path: PathBuf, +} diff --git a/generator/src/main.rs b/generator/src/main.rs index d7c28142c23a..06fc7f250c6a 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -1,14 +1,15 @@ -use std::fs::File; -use std::io::LineWriter; -use std::path::Path; - mod dbscheme; +mod language; mod node_types; + +use language::Language; use node_types::{FieldInfo, NodeInfo}; +use std::fs::File; +use std::io::LineWriter; +use std::path::PathBuf; -fn read_node_types() -> Option> { - let json_data = match std::fs::read_to_string(Path::new("tree-sitter-ruby/src/node-types.json")) - { +fn read_node_types(language: &Language) -> Option> { + let json_data = match std::fs::read_to_string(&language.node_types_path) { Ok(s) => s, Err(_) => return None, }; @@ -214,19 +215,17 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec { entries } -fn write_dbscheme(entries: &[dbscheme::Entry]) -> std::io::Result<()> { - // TODO: figure out proper output path and/or take it from the command line. - let path = Path::new("ruby.dbscheme"); +fn write_dbscheme(language: &Language, entries: &[dbscheme::Entry]) -> std::io::Result<()> { println!( "Writing to '{}'", - match path.to_str() { + match language.dbscheme_path.to_str() { None => "", Some(p) => p, } ); - let file = File::create(path)?; + let file = File::create(&language.dbscheme_path)?; let mut file = LineWriter::new(file); - dbscheme::write(&mut file, &entries) + dbscheme::write(&language.name, &mut file, &entries) } fn create_location_entry() -> dbscheme::Entry { @@ -295,7 +294,14 @@ fn create_source_location_prefix_entry() -> dbscheme::Entry { } fn main() { - match read_node_types() { + // TODO: figure out proper dbscheme output path and/or take it from the + // command line. + let ruby = Language { + name: "Ruby".to_string(), + node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"), + dbscheme_path: PathBuf::from("ruby.dbscheme"), + }; + match read_node_types(&ruby) { None => { println!("Failed to read node types"); std::process::exit(1); @@ -304,7 +310,7 @@ fn main() { let mut dbscheme_entries = convert_nodes(&nodes); dbscheme_entries.push(create_location_entry()); dbscheme_entries.push(create_source_location_prefix_entry()); - match write_dbscheme(&dbscheme_entries) { + match write_dbscheme(&ruby, &dbscheme_entries) { Err(e) => { println!("Failed to write dbscheme: {}", e); std::process::exit(2); From 5e3544fcc3df67c5d8ea6303281b868edca089bc Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 21 Oct 2020 12:45:54 +0100 Subject: [PATCH 0012/1036] Use fmt::Display trait for writing dbscheme --- generator/src/dbscheme.rs | 130 ++++++++++++++++++++++---------------- generator/src/main.rs | 12 ++-- 2 files changed, 80 insertions(+), 62 deletions(-) diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index e6503707986e..f4296beb605d 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -1,10 +1,12 @@ +use std::fmt; + /// Represents a distinct entry in the database schema. pub enum Entry { /// An entry defining a database table. Table(Table), /// An entry defining type that is a union of other types. - Union { name: String, members: Vec }, + Union(Union), } /// A table in the database schema. @@ -14,6 +16,12 @@ pub struct Table { pub keysets: Option>, } +/// A union in the database schema. +pub struct Union { + pub name: String, + pub members: Vec, +} + /// A column in a table. pub struct Column { pub db_type: DbColumnType, @@ -101,6 +109,69 @@ pub fn escape_name(name: &str) -> String { result } +impl fmt::Display for Table { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for keyset in &self.keysets { + write!(f, "#keyset[")?; + for (key_index, key) in keyset.iter().enumerate() { + if key_index > 0 { + write!(f, ", ")?; + } + write!(f, "{}", key)?; + } + write!(f, "]\n")?; + } + + write!(f, "{}(\n", self.name)?; + for (column_index, column) in self.columns.iter().enumerate() { + write!(f, " ")?; + if column.unique { + write!(f, "unique ")?; + } + write!( + f, + "{} ", + match column.db_type { + DbColumnType::Int => "int", + DbColumnType::String => "string", + } + )?; + write!(f, "{}: ", column.name)?; + match &column.ql_type { + QlColumnType::Int => write!(f, "int")?, + QlColumnType::String => write!(f, "string")?, + QlColumnType::Custom(name) => write!(f, "@{}", name)?, + } + if column.ql_type_is_ref { + write!(f, " ref")?; + } + if column_index + 1 != self.columns.len() { + write!(f, ",")?; + } + write!(f, "\n")?; + } + write!(f, ");")?; + + Ok(()) + } +} + +impl fmt::Display for Union { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "@{} = ", self.name)?; + let mut first = true; + for member in &self.members { + if first { + first = false; + } else { + write!(f, " | ")?; + } + write!(f, "@{}", member)?; + } + Ok(()) + } +} + /// Generates the dbscheme by writing the given dbscheme `entries` to the `file`. pub fn write( language_name: &str, @@ -115,61 +186,8 @@ pub fn write( for entry in entries { match entry { - Entry::Table(table) => { - for keyset in &table.keysets { - write!(file, "#keyset[")?; - for (key_index, key) in keyset.iter().enumerate() { - if key_index > 0 { - write!(file, ", ")?; - } - write!(file, "{}", key)?; - } - write!(file, "]\n")?; - } - - write!(file, "{}(\n", table.name)?; - for (column_index, column) in table.columns.iter().enumerate() { - write!(file, " ")?; - if column.unique { - write!(file, "unique ")?; - } - write!( - file, - "{} ", - match column.db_type { - DbColumnType::Int => "int", - DbColumnType::String => "string", - } - )?; - write!(file, "{}: ", column.name)?; - match &column.ql_type { - QlColumnType::Int => write!(file, "int")?, - QlColumnType::String => write!(file, "string")?, - QlColumnType::Custom(name) => write!(file, "@{}", name)?, - } - if column.ql_type_is_ref { - write!(file, " ref")?; - } - if column_index + 1 != table.columns.len() { - write!(file, ",")?; - } - write!(file, "\n")?; - } - write!(file, ");\n\n")?; - } - Entry::Union { name, members } => { - write!(file, "@{} = ", name)?; - let mut first = true; - for member in members { - if first { - first = false; - } else { - write!(file, " | ")?; - } - write!(file, "@{}", member)?; - } - write!(file, "\n\n")?; - } + Entry::Table(table) => write!(file, "{}\n\n", table)?, + Entry::Union(union) => write!(file, "{}\n\n", union)?, } } diff --git a/generator/src/main.rs b/generator/src/main.rs index 06fc7f250c6a..c4a39f50192a 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -57,10 +57,10 @@ fn make_field_type( field_type.named, ))); } - entries.push(dbscheme::Entry::Union { + entries.push(dbscheme::Entry::Union(dbscheme::Union { name: field_union_name.clone(), members, - }); + })); field_union_name } } @@ -144,10 +144,10 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec { subtype.named, ))) } - entries.push(dbscheme::Entry::Union { + entries.push(dbscheme::Entry::Union(dbscheme::Union { name: dbscheme::escape_name(&node_type_name(&node.kind, node.named)), members, - }); + })); } else { // It's a product type, defined by a table. let name = node_type_name(&node.kind, node.named); @@ -207,10 +207,10 @@ fn convert_nodes(nodes: &[NodeInfo]) -> Vec { } // Create a union of all database types. - entries.push(dbscheme::Entry::Union { + entries.push(dbscheme::Entry::Union(dbscheme::Union { name: "top".to_string(), members: top_members, - }); + })); entries } From e018f3f20b81ac7db19a087f9bf8897821fd76c0 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Wed, 21 Oct 2020 12:51:10 +0100 Subject: [PATCH 0013/1036] Use `if let` instead of iterating over Option --- generator/src/dbscheme.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator/src/dbscheme.rs b/generator/src/dbscheme.rs index f4296beb605d..27078ce97179 100644 --- a/generator/src/dbscheme.rs +++ b/generator/src/dbscheme.rs @@ -111,7 +111,7 @@ pub fn escape_name(name: &str) -> String { impl fmt::Display for Table { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for keyset in &self.keysets { + if let Some(keyset) = &self.keysets { write!(f, "#keyset[")?; for (key_index, key) in keyset.iter().enumerate() { if key_index > 0 { From 36823d7804df68bf48c6d000c602e7f473437d3c Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Thu, 22 Oct 2020 11:10:05 +0100 Subject: [PATCH 0014/1036] Move deserialization to node_types module; propagate errors to caller --- generator/src/main.rs | 28 +++++++++++----------------- generator/src/node_types.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/generator/src/main.rs b/generator/src/main.rs index c4a39f50192a..121865b7be80 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -8,19 +8,6 @@ use std::fs::File; use std::io::LineWriter; use std::path::PathBuf; -fn read_node_types(language: &Language) -> Option> { - let json_data = match std::fs::read_to_string(&language.node_types_path) { - Ok(s) => s, - Err(_) => return None, - }; - let nodes: Vec = match serde_json::from_str(&json_data) { - Ok(n) => n, - Err(_) => return None, - }; - - Some(nodes) -} - /// Given a tree-sitter node type's (kind, named) pair, returns a single string /// representing the (unescaped) name we'll use to refer to corresponding QL /// type. @@ -301,12 +288,19 @@ fn main() { node_types_path: PathBuf::from("tree-sitter-ruby/src/node-types.json"), dbscheme_path: PathBuf::from("ruby.dbscheme"), }; - match read_node_types(&ruby) { - None => { - println!("Failed to read node types"); + match node_types::read(&ruby.node_types_path) { + Err(e) => { + println!( + "Failed to read '{}': {}", + match ruby.node_types_path.to_str() { + None => "", + Some(p) => p, + }, + e + ); std::process::exit(1); } - Some(nodes) => { + Ok(nodes) => { let mut dbscheme_entries = convert_nodes(&nodes); dbscheme_entries.push(create_location_entry()); dbscheme_entries.push(create_source_location_prefix_entry()); diff --git a/generator/src/node_types.rs b/generator/src/node_types.rs index faee47e410eb..f8c06f29c679 100644 --- a/generator/src/node_types.rs +++ b/generator/src/node_types.rs @@ -1,5 +1,7 @@ use serde::Deserialize; use std::collections::BTreeMap; +use std::fmt; +use std::path::Path; #[derive(Deserialize)] pub struct NodeInfo { @@ -37,3 +39,36 @@ impl Default for FieldInfo { } } } + +pub enum Error { + IOError(std::io::Error), + JsonError(serde_json::error::Error), +} + +impl From for Error { + fn from(error: std::io::Error) -> Self { + Error::IOError(error) + } +} + +impl From for Error { + fn from(error: serde_json::Error) -> Self { + Error::JsonError(error) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::IOError(e) => write!(f, "{}", e), + Error::JsonError(e) => write!(f, "{}", e), + } + } +} + +/// Deserializes the node types from the JSON at the given `path`. +pub fn read(path: &Path) -> Result, Error> { + let json_data = std::fs::read_to_string(path)?; + let node_types: Vec = serde_json::from_str(&json_data)?; + Ok(node_types) +} From e16b85e51109c8a88d8f972085508695c0ebcd46 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Wed, 21 Oct 2020 18:06:31 +0200 Subject: [PATCH 0015/1036] Add codeql-extractor config --- codeql-extractor.yml | 9 +++++++++ tools/autobuild.cmd | 9 +++++++++ tools/autobuild.sh | 9 +++++++++ tools/index-files.cmd | 8 ++++++++ tools/index-files.sh | 8 ++++++++ 5 files changed, 43 insertions(+) create mode 100644 codeql-extractor.yml create mode 100644 tools/autobuild.cmd create mode 100755 tools/autobuild.sh create mode 100755 tools/index-files.cmd create mode 100755 tools/index-files.sh diff --git a/codeql-extractor.yml b/codeql-extractor.yml new file mode 100644 index 000000000000..0e0626e69582 --- /dev/null +++ b/codeql-extractor.yml @@ -0,0 +1,9 @@ +name: "ruby" +display_name: "Ruby" +version: 0.1 +column_kind: "utf8" +file_types: + - name: ruby + display_name: Ruby files + extensions: + - .rb diff --git a/tools/autobuild.cmd b/tools/autobuild.cmd new file mode 100644 index 000000000000..023c8d9be169 --- /dev/null +++ b/tools/autobuild.cmd @@ -0,0 +1,9 @@ +@echo off + +type NUL && "%CODEQL_DIST%\codeql.exe" database index-files ^ + --include-extension=.rb ^ + --size-limit=5m ^ + --language=ruby ^ + "%CODEQL_EXTRACTOR_RUBY_WIP_DATABASE%" + +exit /b %ERRORLEVEL% diff --git a/tools/autobuild.sh b/tools/autobuild.sh new file mode 100755 index 000000000000..b4388be2eac8 --- /dev/null +++ b/tools/autobuild.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +set -eu + +exec "${CODEQL_DIST}/codeql" database index-files \ + --include-extension=.rb \ + --size-limit=5m \ + --language=ruby \ + "$CODEQL_EXTRACTOR_RUBY_WIP_DATABASE" diff --git a/tools/index-files.cmd b/tools/index-files.cmd new file mode 100755 index 000000000000..bc96895a9882 --- /dev/null +++ b/tools/index-files.cmd @@ -0,0 +1,8 @@ +@echo off + +type NUL && "%CODEQL_EXTRACTOR_RUBY_ROOT%\tools\win64\extractor.exe" ^ + --file-list "%1" ^ + --source-archive-dir "%CODEQL_EXTRACTOR_RUBY_SOURCE_ARCHIVE_DIR%" ^ + --output-dir "%CODEQL_EXTRACTOR_RUBY_TRAP_DIR%" + +exit /b %ERRORLEVEL% diff --git a/tools/index-files.sh b/tools/index-files.sh new file mode 100755 index 000000000000..cfd5a360a275 --- /dev/null +++ b/tools/index-files.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +set -eu + +exec "${CODEQL_EXTRACTOR_RUBY_ROOT}/tools/${CODEQL_PLATFORM}/extractor" \ + --file-list "$1" \ + --source-archive-dir "$CODEQL_EXTRACTOR_RUBY_SOURCE_ARCHIVE_DIR" \ + --output-dir "$CODEQL_EXTRACTOR_RUBY_TRAP_DIR" From 849e10958344f422d8ac7ea8bd3a5620f4417ca4 Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Fri, 23 Oct 2020 13:01:17 +0100 Subject: [PATCH 0016/1036] Add library package for shared code --- Cargo.lock | 23 ++++++++++++------- Cargo.toml | 6 ++++- generator/Cargo.toml | 3 +-- generator/src/main.rs | 1 - node-types/Cargo.toml | 11 +++++++++ .../node_types.rs => node-types/src/lib.rs | 0 6 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 node-types/Cargo.toml rename generator/src/node_types.rs => node-types/src/lib.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 7e69efbce252..046bb12a1f82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,8 +19,7 @@ checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d" name = "generator" version = "0.1.0" dependencies = [ - "serde", - "serde_json", + "node-types", ] [[package]] @@ -41,6 +40,14 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +[[package]] +name = "node-types" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "proc-macro2" version = "1.0.24" @@ -93,18 +100,18 @@ checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "serde" -version = "1.0.116" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96fe57af81d28386a513cbc6858332abc6117cfdb5999647c6444b8f43a370a5" +checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.116" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f630a6370fd8e457873b4bd2ffdae75408bc291ba72be773772a4c2a065d9ae8" +checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" dependencies = [ "proc-macro2", "quote", @@ -124,9 +131,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.44" +version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e03e57e4fcbfe7749842d53e24ccb9aa12b7252dbe5e91d2acad31834c8b8fdd" +checksum = "5ad5de3220ea04da322618ded2c42233d02baca219d6f160a3e9c87cda16c942" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 8a3ea43b0786..fe2bfbea7ac1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,6 @@ [workspace] -members = ["extractor", "generator"] +members = [ + "extractor", + "generator", + "node-types", +] diff --git a/generator/Cargo.toml b/generator/Cargo.toml index 8e789c7aff62..b597ca7a826a 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -7,5 +7,4 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" +node-types = { path = "../node-types" } diff --git a/generator/src/main.rs b/generator/src/main.rs index 121865b7be80..88f144e06202 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -1,6 +1,5 @@ mod dbscheme; mod language; -mod node_types; use language::Language; use node_types::{FieldInfo, NodeInfo}; diff --git a/node-types/Cargo.toml b/node-types/Cargo.toml new file mode 100644 index 000000000000..c751d7360d60 --- /dev/null +++ b/node-types/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "node-types" +version = "0.1.0" +authors = ["GitHub"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" \ No newline at end of file diff --git a/generator/src/node_types.rs b/node-types/src/lib.rs similarity index 100% rename from generator/src/node_types.rs rename to node-types/src/lib.rs From d00c956028d04ac22ab2ffd92914ce9b0c9418b6 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Wed, 21 Oct 2020 18:53:31 +0200 Subject: [PATCH 0017/1036] Build with clang for non-windows platforms --- extractor/build.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/extractor/build.rs b/extractor/build.rs index 2b849cd3ba58..c152a1f49431 100644 --- a/extractor/build.rs +++ b/extractor/build.rs @@ -2,10 +2,13 @@ use std::path::PathBuf; fn main() { let dir: PathBuf = ["../tree-sitter-ruby", "src"].iter().collect(); - - cc::Build::new() + let mut build = cc::Build::new(); + build .include(&dir) - .file(dir.join("parser.c")) - .file(dir.join("scanner.cc")) - .compile("tree-sitter-ruby"); + .file(&dir.join("parser.c")) + .file(&dir.join("scanner.cc")); + if !cfg!(windows) { + build.cpp(true).compiler("clang"); + } + build.compile("tree-sitter-ruby"); } From 47ccc33ab3321f035913d4c48d78cd9cb43f68cc Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Tue, 20 Oct 2020 13:08:13 +0200 Subject: [PATCH 0018/1036] Initial version of extractor based on tree-sitter grammar --- Cargo.lock | 108 +++++++++ extractor/Cargo.toml | 5 +- extractor/src/extractor.rs | 430 +++++++++++++++++++++++++++++++++++ extractor/src/main.rs | 86 ++++++- extractor/src/nodes_types.rs | 170 ++++++++++++++ 5 files changed, 789 insertions(+), 10 deletions(-) create mode 100644 extractor/src/extractor.rs create mode 100644 extractor/src/nodes_types.rs diff --git a/Cargo.lock b/Cargo.lock index 046bb12a1f82..5406d36570b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,12 +9,53 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + [[package]] name = "cc" version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d" +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "generator" version = "0.1.0" @@ -22,6 +63,15 @@ dependencies = [ "node-types", ] +[[package]] +name = "hermit-abi" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" +dependencies = [ + "libc", +] + [[package]] name = "itoa" version = "0.4.6" @@ -34,6 +84,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" + [[package]] name = "memchr" version = "2.3.3" @@ -89,6 +145,9 @@ name = "ruby-extractor" version = "0.1.0" dependencies = [ "cc", + "clap", + "serde", + "serde_json", "tree-sitter", ] @@ -129,6 +188,12 @@ dependencies = [ "serde", ] +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + [[package]] name = "syn" version = "1.0.46" @@ -140,6 +205,15 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thread_local" version = "1.0.1" @@ -159,8 +233,42 @@ dependencies = [ "regex", ] +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index 5a0bbbaf3430..5189c2b93da9 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -8,6 +8,9 @@ edition = "2018" [dependencies] tree-sitter = "0.17.0" - +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +clap = "2.33" [build-dependencies] cc="*" + diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs new file mode 100644 index 000000000000..7a32f8e8df0c --- /dev/null +++ b/extractor/src/extractor.rs @@ -0,0 +1,430 @@ +use super::nodes_types::{Entry, Field, Storage, TypeName}; + +use std::collections::BTreeMap as Map; +use std::collections::BTreeSet as Set; +use std::fmt; +use std::path::Path; +use tree_sitter::{Language, Node, Parser, Tree}; + +pub struct Extractor { + pub parser: Parser, + pub schema: Vec, +} + +pub fn create(language: Language, schema: Vec) -> Extractor { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + + Extractor { parser, schema } +} +impl Extractor { + pub fn extract<'a>(&'a mut self, path: &Path) -> std::io::Result { + let source = std::fs::read(&path)?; + let tree = &self + .parser + .parse(&source, None) + .expect("Failed to parse file"); + let mut visitor = Visitor { + source: &source, + program: vec![Fact::Comment(format!( + "Auto-generated FACT file for {}, generated by the cool kids", + path.display() + ))], + counter: -1, + // TODO: should we handle path strings that are not valid UTF8 better? + path: format!("{}", path.display()), + stack: Vec::new(), + tables: build_schema_lookup(&self.schema), + union_types: build_union_type_lookup(&self.schema), + }; + traverse(&tree, &mut visitor); + + &self.parser.reset(); + Ok(Program(visitor.program)) + } +} + +fn build_schema_lookup<'a>(schema: &'a Vec) -> Map<&'a TypeName, &'a Entry> { + let mut map = std::collections::BTreeMap::new(); + for entry in schema { + if let Entry::Table { type_name, .. } = entry { + map.insert(type_name, entry); + } + } + map +} + +fn build_union_type_lookup<'a>(schema: &'a Vec) -> Map<&'a TypeName, &'a Set> { + let mut union_types = std::collections::BTreeMap::new(); + for entry in schema { + if let Entry::Union { type_name, members } = entry { + union_types.insert(type_name, members); + } + } + union_types +} + +struct Visitor<'a> { + source: &'a Vec, + program: Vec, + counter: i32, + path: String, + stack: Vec, Id, TypeName)>>, + tables: Map<&'a TypeName, &'a Entry>, + union_types: Map<&'a TypeName, &'a Set>, +} + +impl Visitor<'_> { + fn enter_node(&mut self, node: Node) { + if node.is_extra() { + return; + } + self.stack.push(Vec::new()); + } + + fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { + if node.is_extra() { + return; + } + let child_nodes = self.stack.pop().expect("Vistor: empty stack"); + let table = self.tables.get(&TypeName { + kind: node.kind().to_owned(), + named: node.is_named(), + }); + if let Some(Entry::Table { fields, .. }) = table { + self.counter += 1; + let id = Id(self.counter); + let loc = Loc(self.counter); + self.program.push(Fact::New(Arg::IdArg(id))); + self.program.push(Fact::New(Arg::LocArg(loc))); + self.program.push(location_for(&self.path, loc, node)); + let table_name = node_type_name(node.kind(), node.is_named()); + let args: Option>; + if fields.is_empty() { + args = Some(vec![sliced_source_arg(self.source, node)]); + } else { + args = self.complex_node(fields, child_nodes, id); + } + if let Some(args) = args { + self.program + .push(Fact::Definition(table_name, id, args, loc)); + } + if let Some(parent) = self.stack.last_mut() { + parent.push(( + field_name, + id, + TypeName { + kind: node.kind().to_owned(), + named: node.is_named(), + }, + )) + }; + } else { + panic!(format!("Unknown table type: '{}'", node.kind())) + } + } + + fn complex_node( + &mut self, + fields: &Vec, + child_nodes: Vec<(Option<&str>, Id, TypeName)>, + parent_id: Id, + ) -> Option> { + let mut map: Map<&Option, (&Field, Vec)> = std::collections::BTreeMap::new(); + for field in fields { + map.insert(&field.name, (field, Vec::new())); + } + for (child_field, child_id, child_type) in child_nodes { + if let Some((field, values)) = map.get_mut(&child_field.map(|x| x.to_owned())) { + //TODO: handle error and missing nodes + if self.type_matches(&child_type, &field.types) { + values.push(child_id); + } else if field.name.is_some() { + println!( + "Type mismatch for field {:?} with type {:?} != {:?}", + child_field, child_type, field.types + ) + } + } else { + println!( + "Value for unknown field: {:?} and type {:?}", + &child_field, &child_type + ); + } + } + let mut args = Vec::new(); + let mut is_valid = true; + for field in fields { + let child_ids = &map.get(&field.name).unwrap().1; + match &field.storage { + Storage::Column => { + if child_ids.len() == 1 { + args.push(Arg::IdArg(*child_ids.first().unwrap())); + } else { + is_valid = false; + println!("Argument count mismatch for field {:?}", field.name); + } + } + Storage::Table { parent, index } => { + for child_id in child_ids { + self.program.push(Fact::ChildOf( + node_type_name(&parent.kind, parent.named), + parent_id, + match &field.name { + Some(name) => name.to_owned(), + None => "child".to_owned(), + }, + Index(*index), + *child_id, + )); + } + } + } + } + if is_valid { + Some(args) + } else { + None + } + } + fn type_matches(&self, tp: &TypeName, types: &Set) -> bool { + if types.contains(tp) { + return true; + } + for other in types.iter() { + if let Some(x) = self.union_types.get(other) { + if self.type_matches(tp, x) { + return true; + } + } + } + return false; + } +} + +// Emit a slice of a source file as an Arg. +fn sliced_source_arg(source: &Vec, n: Node) -> Arg { + let range = n.byte_range(); + Arg::StringArg(String::from( + std::str::from_utf8(&source[range.start..range.end]).expect("Failed to decode string"), + )) +} + +// Emit a 'Located' fact for the provided node, appropriately calibrated. +fn location_for<'a>(fp: &String, ident: Loc, n: Node) -> Fact { + let start_line = n.start_position().row; + let start_col = n.start_position().column; + let end_line = n.end_position().row; + let end_col = n.end_position().column; + Fact::Located(vec![ + Arg::LocArg(ident), + Arg::StringArg(fp.to_owned()), + Arg::IntArg(start_line), + Arg::IntArg(start_col), + Arg::IntArg(end_line), + Arg::IntArg(end_col), + ]) +} + +fn traverse(tree: &Tree, visitor: &mut Visitor) { + let cursor = &mut tree.walk(); + visitor.enter_node(cursor.node()); + let mut recurse = true; + loop { + if recurse && cursor.goto_first_child() { + visitor.enter_node(cursor.node()); + } else { + visitor.leave_node(cursor.field_name(), cursor.node()); + + if cursor.goto_next_sibling() { + recurse = true; + visitor.enter_node(cursor.node()); + } else if cursor.goto_parent() { + recurse = false; + } else { + break; + } + } + } +} +pub struct Program(Vec); + +impl fmt::Display for Program { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut text = String::new(); + for fact in &self.0 { + text.push_str(&format!("{}\n", fact)); + } + write!(f, "{}", text) + } +} +#[derive(Debug)] +enum Fact { + // @id = *@ + New(Arg), + // @node_def(self, arg?, location)@ + Definition(String, Id, Vec, Loc), + // @node_child(self, index, parent)@ + ChildOf(String, Id, String, Index, Id), + // @location(loc, path, r1, c1, r2, c2) + Located(Vec), + Comment(String), +} +impl fmt::Display for Fact { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Fact::New(id) => write!(f, "{} = *", id), + Fact::Definition(n, id, args, loc) => { + let mut args_str = String::new(); + for arg in args { + args_str.push_str(&format!("{}, ", arg)); + } + write!( + f, + "{}({}, {}{})", + escape_name(&format!("{}_def", &n)), + id, + args_str, + loc + ) + } + Fact::ChildOf(pname, id, fname, idx, p) => write!( + f, + "{}({}, {}, {})", + escape_name(&format!("{}_{}", &pname, &fname)), + id, + idx, + p + ), + Fact::Located(args) => write!( + f, + "location({}, {}, {}, {}, {}, {})", + args.get(0).unwrap(), + args.get(1).unwrap(), + args.get(2).unwrap(), + args.get(3).unwrap(), + args.get(4).unwrap(), + args.get(5).unwrap(), + ), + Fact::Comment(line) => write!(f, "// {}", line), + } + } +} +// Identifiers of the form #0, #1... +#[derive(Debug, Copy, Clone)] +struct Id(i32); + +impl fmt::Display for Id { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#{}", self.0) + } +} +// Locative identifiers of the form #0_loc, #1_loc... +#[derive(Debug, Copy, Clone)] +struct Loc(i32); + +impl fmt::Display for Loc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "#{}_loc", self.0) + } +} +// Numeric indices. +#[derive(Debug, Copy, Clone)] +struct Index(usize); + +impl fmt::Display for Index { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +// Some untyped argument to a fact. +#[derive(Debug)] +enum Arg { + IntArg(usize), + StringArg(String), + IdArg(Id), + LocArg(Loc), +} + +impl fmt::Display for Arg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Arg::IntArg(x) => write!(f, "{}", x), + Arg::StringArg(x) => write!(f, "\"{}\"", x.replace("\"", "\"\"")), + Arg::IdArg(x) => write!(f, "{}", x), + Arg::LocArg(x) => write!(f, "{}", x), + } + } +} + +const RESERVED_KEYWORDS: [&'static str; 14] = [ + "boolean", "case", "date", "float", "int", "key", "of", "order", "ref", "string", "subtype", + "type", "unique", "varchar", +]; + +/// Returns a string that's a copy of `name` but suitably escaped to be a valid +/// QL identifier. +pub fn escape_name(name: &str) -> String { + let mut result = String::new(); + + // If there's a leading underscore, replace it with 'underscore_'. + if let Some(c) = name.chars().next() { + if c == '_' { + result.push_str("underscore"); + } + } + for c in name.chars() { + match c { + '{' => result.push_str("lbrace"), + '}' => result.push_str("rbrace"), + '<' => result.push_str("langle"), + '>' => result.push_str("rangle"), + '[' => result.push_str("lbracket"), + ']' => result.push_str("rbracket"), + '(' => result.push_str("lparen"), + ')' => result.push_str("rparen"), + '|' => result.push_str("pipe"), + '=' => result.push_str("equal"), + '~' => result.push_str("tilde"), + '?' => result.push_str("question"), + '`' => result.push_str("backtick"), + '^' => result.push_str("caret"), + '!' => result.push_str("bang"), + '#' => result.push_str("hash"), + '%' => result.push_str("percent"), + '&' => result.push_str("ampersand"), + '.' => result.push_str("dot"), + ',' => result.push_str("comma"), + '/' => result.push_str("slash"), + ':' => result.push_str("colon"), + ';' => result.push_str("semicolon"), + '"' => result.push_str("dquote"), + '*' => result.push_str("star"), + '+' => result.push_str("plus"), + '-' => result.push_str("minus"), + '@' => result.push_str("at"), + _ => result.push_str(&c.to_lowercase().to_string()), + } + } + + for &keyword in &RESERVED_KEYWORDS { + if result == keyword { + result.push_str("__"); + break; + } + } + + result +} + +/// Given a tree-sitter node type's (kind, named) pair, returns a single string +/// representing the (unescaped) name we'll use to refer to corresponding QL +/// type. +fn node_type_name(kind: &str, named: bool) -> String { + if named { + kind.to_string() + } else { + format!("{}_unnamed", kind) + } +} diff --git a/extractor/src/main.rs b/extractor/src/main.rs index b1361cedf4e6..2200ab366b53 100644 --- a/extractor/src/main.rs +++ b/extractor/src/main.rs @@ -1,18 +1,86 @@ -use tree_sitter::{Language, Parser}; +mod extractor; +mod nodes_types; -fn main() { - let mut parser = Parser::new(); +use clap; +use std::fs; +use std::io::BufRead; +use std::path::{Path, PathBuf}; +use tree_sitter::Language; +fn main() -> std::io::Result<()> { extern "C" { fn tree_sitter_ruby() -> Language; } - let language = unsafe { tree_sitter_ruby() }; - parser.set_language(language).unwrap(); + let matches = clap::App::new("Ruby extractor") + .version("1.0") + .author("GitHub") + .about("CodeQL Ruby extractor") + .args_from_usage( + "--source-archive-dir= 'Sets a custom source archive folder' + --output-dir= 'Sets a custom trap folder' + --file-list= 'A text files containing the paths of the files to extract'", + ) + .get_matches(); + let src_archive_dir = matches + .value_of("source-archive-dir") + .expect("missing --source-archive-dir"); + let src_archive_dir = PathBuf::from(src_archive_dir); + + let trap_dir = matches + .value_of("output-dir") + .expect("missing --output-dir"); + let trap_dir = PathBuf::from(trap_dir); - let src = "def foo\n puts \"hello\"\nend"; - let tree = parser.parse(src, None).unwrap(); - let root_node = tree.root_node(); + let file_list = matches.value_of("file-list").expect("missing --file-list"); + let file_list = fs::File::open(file_list)?; - println!("Root: {}", root_node.to_sexp()); + let node_types_path = PathBuf::from("tree-sitter-ruby/src/node-types.json"); + let language = unsafe { tree_sitter_ruby() }; + let schema = nodes_types::read_node_types(&node_types_path)?; + let mut extractor = extractor::create(language, schema); + for line in std::io::BufReader::new(file_list).lines() { + let path = PathBuf::from(line?); + let trap_file = path_for(&trap_dir, &path, ".trap"); + let src_archive_file = path_for(&src_archive_dir, &path, ""); + let trap = extractor.extract(&path)?; + std::fs::create_dir_all(&src_archive_file.parent().unwrap())?; + std::fs::copy(&path, &src_archive_file)?; + std::fs::create_dir_all(&trap_file.parent().unwrap())?; + let mut trap_file = std::fs::File::create(&trap_file)?; + let trap_file: &mut dyn std::io::Write = &mut trap_file; + write!(trap_file, "{}", trap)?; + } + return Ok(()); +} +fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf { + let mut result = PathBuf::from(dir); + for component in path.components() { + match component { + std::path::Component::Prefix(_) => { + // skip for now + // TODO: handle this properly for Windows + } + std::path::Component::RootDir => { + // skip + } + std::path::Component::Normal(_) => { + result.push(component); + } + std::path::Component::CurDir => { + // skip + } + std::path::Component::ParentDir => { + result.pop(); + } + } + } + if let Some(x) = result.extension() { + let mut new_ext = x.to_os_string(); + new_ext.push(ext); + result.set_extension(new_ext); + } else { + result.set_extension(ext); + } + result } diff --git a/extractor/src/nodes_types.rs b/extractor/src/nodes_types.rs new file mode 100644 index 000000000000..67363d408e0b --- /dev/null +++ b/extractor/src/nodes_types.rs @@ -0,0 +1,170 @@ +use serde::Deserialize; + +use std::collections::BTreeMap as Map; +use std::collections::BTreeSet as Set; +use std::fs; +use std::path::Path; + +#[derive(Debug)] +pub enum Entry { + Union { + type_name: TypeName, + members: Set, + }, + Table { + type_name: TypeName, + fields: Vec, + }, +} + +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)] +pub struct TypeName { + pub kind: String, + pub named: bool, +} + +#[derive(Debug)] +pub struct Field { + pub types: Set, + /// The name of the field or None for the anonymous 'children' + /// entry from node_types.json + pub name: Option, + pub storage: Storage, +} + +#[derive(Debug)] +pub enum Storage { + /// the field is stored as a column in the parent table + Column, + // the field is store in a link table + Table { + parent: TypeName, + index: usize, + }, +} + +pub fn read_node_types(node_types_path: &Path) -> std::io::Result> { + let file = fs::File::open(node_types_path)?; + let node_types = serde_json::from_reader(file)?; + Ok(convert_nodes(node_types)) +} + +fn convert_type(node_type: &NodeType) -> TypeName { + TypeName { + kind: node_type.kind.to_string(), + named: node_type.named, + } +} + +fn convert_types(node_types: &Vec) -> Set { + let iter = node_types.iter().map(convert_type).collect(); + std::collections::BTreeSet::from(iter) +} +pub fn convert_nodes(nodes: Vec) -> Vec { + let mut entries: Vec = Vec::new(); + + for node in nodes { + if let Some(subtypes) = &node.subtypes { + // It's a tree-sitter supertype node, for which we create a union + // type. + entries.push(Entry::Union { + type_name: TypeName { + kind: node.kind, + named: node.named, + }, + members: convert_types(&subtypes), + }); + } else { + // It's a product type, defined by a table. + let type_name = TypeName { + kind: node.kind, + named: node.named, + }; + let mut fields = Vec::new(); + + // If the type also has fields or children, then we create either + // auxiliary tables or columns in the defining table for them. + if let Some(node_fields) = &node.fields { + for (field_name, field_info) in node_fields { + add_field( + &type_name, + Some(field_name.to_string()), + field_info, + &mut fields, + ); + } + } + if let Some(children) = &node.children { + // Treat children as if they were a field called 'child'. + add_field(&type_name, None, children, &mut fields); + } + entries.push(Entry::Table { type_name, fields }); + } + } + entries +} + +fn add_field( + parent_type_name: &TypeName, + field_name: Option, + field_info: &FieldInfo, + fields: &mut Vec, +) { + let storage; + if !field_info.multiple && field_info.required { + // This field must appear exactly once, so we add it as + // a column to the main table for the node type. + storage = Storage::Column; + } else { + // This field can appear zero or multiple times, so put + // it in an auxiliary table. + storage = Storage::Table { + parent: TypeName { + kind: parent_type_name.kind.to_string(), + named: parent_type_name.named, + }, + index: fields.len(), + }; + } + fields.push(Field { + types: convert_types(&field_info.types), + name: field_name, + storage, + }); +} +#[derive(Deserialize)] +pub struct NodeInfo { + #[serde(rename = "type")] + pub kind: String, + pub named: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub fields: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub children: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subtypes: Option>, +} + +#[derive(Deserialize)] +pub struct NodeType { + #[serde(rename = "type")] + pub kind: String, + pub named: bool, +} + +#[derive(Deserialize)] +pub struct FieldInfo { + pub multiple: bool, + pub required: bool, + pub types: Vec, +} + +impl Default for FieldInfo { + fn default() -> Self { + FieldInfo { + multiple: false, + required: true, + types: Vec::new(), + } + } +} From fd39524c5e3aff72f7bd67901139d2ae5057b1af Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Mon, 26 Oct 2020 14:02:07 +0100 Subject: [PATCH 0019/1036] Improve error messages Include file path and line number and emit better descriptions --- extractor/src/extractor.rs | 71 ++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index 7a32f8e8df0c..8bbd33d05e10 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -76,14 +76,33 @@ struct Visitor<'a> { impl Visitor<'_> { fn enter_node(&mut self, node: Node) { + if node.is_error() { + println!( + "error: {}:{}: parse error", + &self.path, + node.start_position().row, + ); + return; + } + if node.is_missing() { + println!( + "error: {}:{}: parse error: expecting '{}'", + &self.path, + node.start_position().row, + node.kind() + ); + return; + } + if node.is_extra() { return; } + self.stack.push(Vec::new()); } fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { - if node.is_extra() { + if node.is_extra() || node.is_error() || node.is_missing() { return; } let child_nodes = self.stack.pop().expect("Vistor: empty stack"); @@ -103,7 +122,7 @@ impl Visitor<'_> { if fields.is_empty() { args = Some(vec![sliced_source_arg(self.source, node)]); } else { - args = self.complex_node(fields, child_nodes, id); + args = self.complex_node(&node, fields, child_nodes, id); } if let Some(args) = args { self.program @@ -120,12 +139,17 @@ impl Visitor<'_> { )) }; } else { - panic!(format!("Unknown table type: '{}'", node.kind())) + println!( + "error: {}:{}: unknown table type: '{}'", + &self.path, + node.start_position().row, + node.kind() + ); } } - fn complex_node( &mut self, + node: &Node, fields: &Vec, child_nodes: Vec<(Option<&str>, Id, TypeName)>, parent_id: Id, @@ -141,15 +165,26 @@ impl Visitor<'_> { values.push(child_id); } else if field.name.is_some() { println!( - "Type mismatch for field {:?} with type {:?} != {:?}", - child_field, child_type, field.types + "error: {}:{}: type mismatch for field {}::{} with type {:?} != {:?}", + &self.path, + node.start_position().row, + node.kind(), + child_field.unwrap_or("child"), + child_type, + field.types ) } } else { - println!( - "Value for unknown field: {:?} and type {:?}", - &child_field, &child_type - ); + if child_field.is_some() || child_type.named { + println!( + "error: {}:{}: value for unknown field: {}::{} and type {:?}", + &self.path, + node.start_position().row, + node.kind(), + &child_field.unwrap_or("child"), + &child_type + ); + } } } let mut args = Vec::new(); @@ -162,7 +197,21 @@ impl Visitor<'_> { args.push(Arg::IdArg(*child_ids.first().unwrap())); } else { is_valid = false; - println!("Argument count mismatch for field {:?}", field.name); + println!( + "error: {}:{}: {} for field: {}::{}", + &self.path, + node.start_position().row, + if child_ids.is_empty() { + "missing value" + } else { + "too many values" + }, + node.kind(), + match field.name.as_ref() { + Some(x) => x, + None => "child", + } + ) } } Storage::Table { parent, index } => { From 1d36b5085ad27ecc7843025efb31c71be5a01e29 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Mon, 26 Oct 2020 15:00:00 +0100 Subject: [PATCH 0020/1036] Do not recurse into 'extra' nodes for now --- extractor/src/extractor.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index 8bbd33d05e10..db328b631d0b 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -75,14 +75,14 @@ struct Visitor<'a> { } impl Visitor<'_> { - fn enter_node(&mut self, node: Node) { + fn enter_node(&mut self, node: Node) -> bool { if node.is_error() { println!( "error: {}:{}: parse error", &self.path, node.start_position().row, ); - return; + return false; } if node.is_missing() { println!( @@ -91,14 +91,15 @@ impl Visitor<'_> { node.start_position().row, node.kind() ); - return; + return false; } if node.is_extra() { - return; + return false; } self.stack.push(Vec::new()); + return true; } fn leave_node(&mut self, field_name: Option<&'static str>, node: Node) { @@ -281,13 +282,12 @@ fn traverse(tree: &Tree, visitor: &mut Visitor) { let mut recurse = true; loop { if recurse && cursor.goto_first_child() { - visitor.enter_node(cursor.node()); + recurse = visitor.enter_node(cursor.node()); } else { visitor.leave_node(cursor.field_name(), cursor.node()); if cursor.goto_next_sibling() { - recurse = true; - visitor.enter_node(cursor.node()); + recurse = visitor.enter_node(cursor.node()); } else if cursor.goto_parent() { recurse = false; } else { From 0f576fe29a446c5cbdcb932cdb6332609fcb6c64 Mon Sep 17 00:00:00 2001 From: Arthur Baars Date: Mon, 26 Oct 2020 19:10:44 +0100 Subject: [PATCH 0021/1036] Address review comments --- extractor/src/extractor.rs | 136 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/extractor/src/extractor.rs b/extractor/src/extractor.rs index db328b631d0b..b724ce384a23 100644 --- a/extractor/src/extractor.rs +++ b/extractor/src/extractor.rs @@ -26,8 +26,8 @@ impl Extractor { .expect("Failed to parse file"); let mut visitor = Visitor { source: &source, - program: vec![Fact::Comment(format!( - "Auto-generated FACT file for {}, generated by the cool kids", + trap_output: vec![TrapEntry::Comment(format!( + "Auto-generated TRAP file for {}", path.display() ))], counter: -1, @@ -40,7 +40,7 @@ impl Extractor { traverse(&tree, &mut visitor); &self.parser.reset(); - Ok(Program(visitor.program)) + Ok(Program(visitor.trap_output)) } } @@ -65,13 +65,24 @@ fn build_union_type_lookup<'a>(schema: &'a Vec) -> Map<&'a TypeName, &'a } struct Visitor<'a> { + /// The file path of the source code (as string) + path: String, + /// The source code as a UTF-8 byte array source: &'a Vec, - program: Vec, + /// The accumulated trap entries + trap_output: Vec, + /// A counter for generating fresh labels counter: i32, - path: String, - stack: Vec, Id, TypeName)>>, + /// A lookup table from type name to dbscheme table entries tables: Map<&'a TypeName, &'a Entry>, + /// A lookup table for union types mapping a type name to its direct members union_types: Map<&'a TypeName, &'a Set>, + /// A stack for gathering information from hild nodes. Whenever a node is entered + /// an empty list is pushed. All children append their data (field name, label, type) to + /// the the list. When the visitor leaves a node the list containing the child data is popped + /// from the stack and matched against the dbscheme for the node. If the expectations are met + /// the corresponding row definitions are added to the trap_output. + stack: Vec, Label, TypeName)>>, } impl Visitor<'_> { @@ -113,11 +124,11 @@ impl Visitor<'_> { }); if let Some(Entry::Table { fields, .. }) = table { self.counter += 1; - let id = Id(self.counter); - let loc = Loc(self.counter); - self.program.push(Fact::New(Arg::IdArg(id))); - self.program.push(Fact::New(Arg::LocArg(loc))); - self.program.push(location_for(&self.path, loc, node)); + let id = Label::Normal(self.counter); + let loc = Label::Location(self.counter); + self.trap_output.push(TrapEntry::New(id)); + self.trap_output.push(TrapEntry::New(loc)); + self.trap_output.push(location_for(&self.path, loc, node)); let table_name = node_type_name(node.kind(), node.is_named()); let args: Option>; if fields.is_empty() { @@ -126,8 +137,8 @@ impl Visitor<'_> { args = self.complex_node(&node, fields, child_nodes, id); } if let Some(args) = args { - self.program - .push(Fact::Definition(table_name, id, args, loc)); + self.trap_output + .push(TrapEntry::Definition(table_name, id, args, loc)); } if let Some(parent) = self.stack.last_mut() { parent.push(( @@ -152,10 +163,10 @@ impl Visitor<'_> { &mut self, node: &Node, fields: &Vec, - child_nodes: Vec<(Option<&str>, Id, TypeName)>, - parent_id: Id, + child_nodes: Vec<(Option<&str>, Label, TypeName)>, + parent_id: Label, ) -> Option> { - let mut map: Map<&Option, (&Field, Vec)> = std::collections::BTreeMap::new(); + let mut map: Map<&Option, (&Field, Vec