diff --git a/src/scanner.c b/src/scanner.c index 7e55ff7..75e1dcd 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -29,6 +29,7 @@ typedef enum { Format = 1 << 4, Triple = 1 << 5, Bytes = 1 << 6, + Template = 1 << 7, } Flags; typedef struct { @@ -45,6 +46,8 @@ static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & T static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; } +static inline bool is_template(Delimiter *delimiter) { return delimiter->flags & Template; } + static inline int32_t end_character(Delimiter *delimiter) { if (delimiter->flags & SingleQuote) { return '\''; @@ -66,6 +69,8 @@ static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; } +static inline void set_template(Delimiter *delimiter) { delimiter->flags |= (Template | Format); } + static inline void set_end_character(Delimiter *delimiter, int32_t character) { switch (character) { case '\'': @@ -85,7 +90,7 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) { typedef struct { Array(uint16_t) indents; Array(Delimiter) delimiters; - bool inside_f_string; + bool inside_interpolated_string; } Scanner; static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } @@ -102,7 +107,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 && (lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) { Delimiter *delimiter = array_back(&scanner->delimiters); - if (is_format(delimiter)) { + if (is_format(delimiter) || is_template(delimiter)) { lexer->mark_end(lexer); bool is_left_brace = lexer->lookahead == '{'; advance(lexer); @@ -122,7 +127,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con int32_t end_char = end_character(delimiter); bool has_content = advanced_once; while (lexer->lookahead) { - if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) { + if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && (is_format(delimiter) || is_template(delimiter))) { lexer->mark_end(lexer); lexer->result_symbol = STRING_CONTENT; return has_content; @@ -177,7 +182,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con lexer->mark_end(lexer); array_pop(&scanner->delimiters); lexer->result_symbol = STRING_END; - scanner->inside_f_string = false; + scanner->inside_interpolated_string = false; } return true; } @@ -195,7 +200,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con advance(lexer); array_pop(&scanner->delimiters); lexer->result_symbol = STRING_END; - scanner->inside_f_string = false; + scanner->inside_interpolated_string = false; } lexer->mark_end(lexer); return true; @@ -280,7 +285,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con if ((valid_symbols[DEDENT] || (!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) && !within_brackets)) && - indent_length < current_indent_length && !scanner->inside_f_string && + indent_length < current_indent_length && !scanner->inside_interpolated_string && // Wait to create a dedent token until we've consumed any // comments @@ -309,6 +314,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con set_raw(&delimiter); } else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') { set_bytes(&delimiter); + } else if (lexer->lookahead == 't' || lexer->lookahead == 'T') { + set_template(&delimiter); } else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') { break; } @@ -349,7 +356,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con if (end_character(&delimiter)) { array_push(&scanner->delimiters, delimiter); lexer->result_symbol = STRING_START; - scanner->inside_f_string = is_format(&delimiter); + scanner->inside_interpolated_string = is_format(&delimiter) || is_template(&delimiter); return true; } if (has_flags) { @@ -365,7 +372,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff size_t size = 0; - buffer[size++] = (char)scanner->inside_f_string; + buffer[size++] = (char)scanner->inside_interpolated_string; size_t delimiter_count = scanner->delimiters.size; if (delimiter_count > UINT8_MAX) { @@ -398,7 +405,7 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char * if (length > 0) { size_t size = 0; - scanner->inside_f_string = (bool)buffer[size++]; + scanner->inside_interpolated_string = (bool)buffer[size++]; size_t delimiter_count = (uint8_t)buffer[size++]; if (delimiter_count > 0) { diff --git a/test/corpus/template_strings.txt b/test/corpus/template_strings.txt new file mode 100644 index 0000000..9cfb290 --- /dev/null +++ b/test/corpus/template_strings.txt @@ -0,0 +1,152 @@ +================================================================================ +Simple template string +================================================================================ + +t"Hello, {name}!" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end)))) + +================================================================================ +Template string with format spec +================================================================================ + +t"Price: {price:.2f}" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_end)))) + +================================================================================ +Raw template string +================================================================================ + +tr"Path: {path}\n" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end)))) + +================================================================================ +Triple quoted template string +================================================================================ + +t""" +Multi-line template +with {variable} +""" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end)))) + +================================================================================ +Template string with multiple interpolations +================================================================================ + +t"Hello {first_name} {last_name}!" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier)) + (string_content) + (interpolation + (identifier)) + (string_content) + (string_end)))) + +================================================================================ +Template string with expression +================================================================================ + +t"Result: {a + b}" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (binary_operator + (identifier) + (identifier))) + (string_end)))) + +================================================================================ +Template string with escaped braces +================================================================================ + +t"Use {{braces}} to escape" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content + (escape_interpolation) + (escape_interpolation)) + (string_end)))) +================================================================================ +Template string with format spec (copy from f-string) +================================================================================ + +t"a {b:2} {c:34.5}" + +-------------------------------------------------------------------------------- + +(module + (expression_statement + (string + (string_start) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_content) + (interpolation + (identifier) + (format_specifier)) + (string_end)))) \ No newline at end of file