Skip to content

Commit 84e41cd

Browse files
committed
fixup! Allow to parse macro identifiers in variable decls
1 parent 74ae22f commit 84e41cd

File tree

1 file changed

+231
-0
lines changed

1 file changed

+231
-0
lines changed
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2+
From: Diego Alonso <[email protected]>
3+
Date: Thu, 26 Jun 2025 14:04:51 +0200
4+
Subject: Allow to parse macro identifiers in variable decls
5+
6+
---
7+
grammar.js | 2 +
8+
src/scanner.c | 128 +++++++++++++++++++++++++++++++++++++++-----------
9+
2 files changed, 103 insertions(+), 27 deletions(-)
10+
11+
diff --git a/grammar.js b/grammar.js
12+
index 6e79004..40ac8b7 100644
13+
--- a/grammar.js
14+
+++ b/grammar.js
15+
@@ -67,6 +67,7 @@ module.exports = grammar({
16+
$._external_end_of_statement,
17+
$._preproc_unary_operator,
18+
$.hollerith_constant,
19+
+ $.macro_identifier,
20+
],
21+
22+
extras: $ => [
23+
@@ -870,6 +871,7 @@ module.exports = grammar({
24+
$.derived_type,
25+
alias($.procedure_declaration, $.procedure),
26+
$.declared_type,
27+
+ $.macro_identifier,
28+
)),
29+
optional(seq(',',
30+
commaSep1(
31+
diff --git a/src/scanner.c b/src/scanner.c
32+
index b768d99..e477df4 100644
33+
--- a/src/scanner.c
34+
+++ b/src/scanner.c
35+
@@ -1,4 +1,5 @@
36+
#include "tree_sitter/alloc.h"
37+
+#include "tree_sitter/array.h"
38+
#include "tree_sitter/parser.h"
39+
#include <ctype.h>
40+
#include <wctype.h>
41+
@@ -13,10 +14,12 @@ enum TokenType {
42+
END_OF_STATEMENT,
43+
PREPROC_UNARY_OPERATOR,
44+
HOLLERITH_CONSTANT,
45+
+ MACRO_IDENTIFIER,
46+
};
47+
48+
typedef struct {
49+
bool in_line_continuation;
50+
+ Array(char *) MacroIdentifiers;
51+
} Scanner;
52+
53+
typedef enum {
54+
@@ -301,31 +304,43 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
55+
return true;
56+
}
57+
58+
-static bool scan_string_literal_kind(TSLexer *lexer) {
59+
- // Strictly, it's allowed for the kind to be an integer literal, in
60+
- // practice I've not seen it
61+
+typedef Array(char) String;
62+
+
63+
+// Returns NULL on error, otherwise an allocated char array for an identifier
64+
+static String *scan_identifier(TSLexer *lexer) {
65+
if (!iswalpha(lexer->lookahead)) {
66+
+ return NULL;
67+
+ }
68+
+ String *possible_identifier = ts_calloc(1, sizeof(String));
69+
+ while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
70+
+ array_push(possible_identifier, lexer->lookahead);
71+
+ // Don't capture the trailing underscore as part of the kind identifier
72+
+ // If another user of this function wants to mark the end again after
73+
+ // the identifier they're free to do so
74+
+ if (lexer->lookahead == '_') {
75+
+ lexer->mark_end(lexer);
76+
+ }
77+
+ advance(lexer);
78+
+ }
79+
+ if (possible_identifier->size == 0) {
80+
+ ts_free(possible_identifier);
81+
+ return NULL;
82+
+ }
83+
+ return possible_identifier;
84+
+}
85+
+
86+
+static bool scan_string_literal_kind(TSLexer *lexer, String *identifier) {
87+
+ if (identifier->size == 0) {
88+
+ return false;
89+
+ }
90+
+
91+
+ char last_char = identifier->contents[identifier->size - 1];
92+
+ if ((last_char != '_') ||
93+
+ (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
94+
return false;
95+
}
96+
97+
lexer->result_symbol = STRING_LITERAL_KIND;
98+
-
99+
- // We need two characters of lookahead to see `_"`
100+
- char current_char = '\0';
101+
-
102+
- while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
103+
- current_char = lexer->lookahead;
104+
- // Don't capture the trailing underscore as part of the kind identifier
105+
- if (lexer->lookahead == '_') {
106+
- lexer->mark_end(lexer);
107+
- }
108+
- advance(lexer);
109+
- }
110+
-
111+
- if ((current_char != '_') || (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
112+
- return false;
113+
- }
114+
-
115+
return true;
116+
}
117+
118+
@@ -393,6 +408,28 @@ static bool scan_string_literal(TSLexer *lexer) {
119+
return false;
120+
}
121+
122+
+static bool scan_macro_identifier(Scanner *scanner, TSLexer *lexer,
123+
+ String *identifier) {
124+
+ unsigned num_macro_ids = scanner->MacroIdentifiers.size;
125+
+ if (num_macro_ids == 0) {
126+
+ return false;
127+
+ }
128+
+
129+
+ for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
130+
+ char *macro_id = *array_get(&scanner->MacroIdentifiers, i);
131+
+ unsigned macro_id_len = strlen(macro_id);
132+
+ if (identifier->size != macro_id_len) {
133+
+ continue;
134+
+ }
135+
+ if (strncmp(macro_id, identifier->contents, identifier->size) == 0) {
136+
+ lexer->mark_end(lexer);
137+
+ lexer->result_symbol = MACRO_IDENTIFIER;
138+
+ return true;
139+
+ }
140+
+ }
141+
+ return false;
142+
+}
143+
+
144+
/// Need an external scanner to catch '!' before its parsed as a comment
145+
static bool scan_preproc_unary_operator(TSLexer *lexer) {
146+
const char next_char = lexer->lookahead;
147+
@@ -467,19 +504,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
148+
return true;
149+
}
150+
151+
- if (valid_symbols[STRING_LITERAL_KIND]) {
152+
+ // These symbols both scan for an identifier, we need to combine the logic
153+
+ // and they always need to be the last to look for since we can't backtrack
154+
+ if (valid_symbols[STRING_LITERAL_KIND] || valid_symbols[MACRO_IDENTIFIER]) {
155+
+ String *identifier = scan_identifier(lexer);
156+
+ bool identifier_result = false;
157+
// This may need a lot of lookahead, so should (probably) always
158+
// be the last token to look for
159+
- if (scan_string_literal_kind(lexer)) {
160+
+ if (identifier && valid_symbols[STRING_LITERAL_KIND]) {
161+
+ if (scan_string_literal_kind(lexer, identifier)) {
162+
+ identifier_result = true;
163+
+ }
164+
+ }
165+
+ if (!identifier_result && identifier && valid_symbols[MACRO_IDENTIFIER]) {
166+
+ if (scan_macro_identifier(scanner, lexer, identifier)) {
167+
+ identifier_result = true;
168+
+ }
169+
+ }
170+
+ if (identifier) {
171+
+ ts_free(identifier);
172+
+ }
173+
+ if (identifier_result) {
174+
return true;
175+
}
176+
}
177+
-
178+
return false;
179+
}
180+
181+
void *tree_sitter_fortran_external_scanner_create() {
182+
- return ts_calloc(1, sizeof(bool));
183+
+ Scanner *result = (Scanner *)ts_calloc(1, sizeof(Scanner));
184+
+ char *macro_ids = getenv("CODEE_TS_MACRO_IDS");
185+
+ if (!macro_ids) {
186+
+ return result;
187+
+ }
188+
+ char *macro_id = strtok(macro_ids, ":");
189+
+ Array(char *) *macroIdsResult = &result->MacroIdentifiers;
190+
+ while (macro_id) {
191+
+ int length = strlen(macro_id);
192+
+ char *new_str = (char *)ts_malloc((length + 1) * sizeof(char));
193+
+ strncpy(new_str, macro_id, length);
194+
+ array_push(macroIdsResult, new_str);
195+
+ // Keep splitting
196+
+ macro_id = strtok(NULL, ":");
197+
+ }
198+
+ return result;
199+
}
200+
201+
bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
202+
@@ -491,8 +559,9 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
203+
unsigned tree_sitter_fortran_external_scanner_serialize(void *payload,
204+
char *buffer) {
205+
Scanner *scanner = (Scanner *)payload;
206+
- buffer[0] = (char)scanner->in_line_continuation;
207+
- return 1;
208+
+ unsigned size = sizeof(*scanner);
209+
+ memcpy(buffer, scanner, size);
210+
+ return size;
211+
}
212+
213+
void tree_sitter_fortran_external_scanner_deserialize(void *payload,
214+
@@ -500,11 +569,16 @@ void tree_sitter_fortran_external_scanner_deserialize(void *payload,
215+
unsigned length) {
216+
Scanner *scanner = (Scanner *)payload;
217+
if (length > 0) {
218+
- scanner->in_line_continuation = buffer[0];
219+
+ unsigned size = sizeof(*scanner);
220+
+ memcpy(scanner, buffer, size);
221+
}
222+
}
223+
224+
void tree_sitter_fortran_external_scanner_destroy(void *payload) {
225+
Scanner *scanner = (Scanner *)payload;
226+
+ for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
227+
+ char *str = *array_get(&scanner->MacroIdentifiers, i);
228+
+ ts_free(str);
229+
+ }
230+
ts_free(scanner);
231+
}

0 commit comments

Comments
 (0)