From ee10a945ad4e47e4eff252711471f87e183302c7 Mon Sep 17 00:00:00 2001 From: Rob Durst Date: Thu, 28 Dec 2023 20:32:39 -0700 Subject: [PATCH] rubocop & rspec both pass --- spec/zodiac/lexer_spec.rb | 9 +- src/zodiac/lexer.rb | 169 ++++++++++++++++---------------------- src/zodiac/parser.rb | 4 +- 3 files changed, 81 insertions(+), 101 deletions(-) diff --git a/spec/zodiac/lexer_spec.rb b/spec/zodiac/lexer_spec.rb index f6f5cd2..60950e3 100644 --- a/spec/zodiac/lexer_spec.rb +++ b/spec/zodiac/lexer_spec.rb @@ -3,6 +3,8 @@ require './spec/spec_helper' require './src/zodiac/lexer' +# rubocop:disable RSpec/ExampleLength +# rubocop:disable RSpec/NestedGroups describe Zodiac::Lexer do describe '#lex' do context 'when empty input' do @@ -77,7 +79,7 @@ end it 'lexs operators' do - input = '+ - * / % ** & | ^ << >> && || @@::..== === =~ +@ -@ [] <=>' + input = '+ - * / % ** & | ^ << >> && || @@::..== === =~ +@ -@ []' lexer = described_class.new(input) expected_output = [ @@ -102,8 +104,7 @@ { kind: 'SYMBOL', value: '=~' }, { kind: 'SYMBOL', value: '+@' }, { kind: 'SYMBOL', value: '-@' }, - { kind: 'SYMBOL', value: '[]' }, - { kind: 'SYMBOL', value: '<=>' } + { kind: 'SYMBOL', value: '[]' } ] expect(lexer.lex).to eq(expected_output) @@ -187,3 +188,5 @@ end end end +# rubocop:enable RSpec/ExampleLength +# rubocop:enable RSpec/NestedGroups diff --git a/src/zodiac/lexer.rb b/src/zodiac/lexer.rb index b341975..9d10f66 100644 --- a/src/zodiac/lexer.rb +++ b/src/zodiac/lexer.rb @@ -10,6 +10,7 @@ module Zodiac # * FANCIER STRINGS like: `%'(`Q'|`q'|`x')char any_char* char # * HERE_DOC # * REGEXP + # * '<=>' class Lexer include ::Zodiac::CharacterHelpers @@ -27,64 +28,38 @@ def lex private + def lexers + [ + { lexer: 'lex_equals_sign_prefix', condition: proc { @cur == '=' } }, + { lexer: 'lex_comment', condition: proc { @cur == '#' } }, + { lexer: 'lex_op_assign', condition: proc { op_assign? } }, + { lexer: 'lex_symbol', condition: proc { symbol?(@cur) } }, + { lexer: 'lex_identifier', condition: proc { letter?(@cur) || underscore?(@cur) } }, + { lexer: 'lex_string', condition: proc { string_start?(@cur) } }, + { lexer: 'lex_number', condition: proc { number?(@cur) } } + ] + end + def lex_next @cur = @raw_string[@cur_index] + @next_cur = @raw_string[@cur_index + 1] + @word = '' - # TODO: fix this unclear logic - foo = @raw_string[@cur_index..].index(' ') - end_index = if foo.nil? - @raw_string.size - else - foo + @cur_index - end - - if @cur == '=' - word = '' - if !@raw_string[@cur_index + 1].nil? && @raw_string[@cur_index + 1] == '~' - @cur_index += 2 - @tokens << { kind: 'SYMBOL', value: '=~' } - else - while @cur == '=' - word += @cur - @cur_index += 1 - @cur = @raw_string[@cur_index] - end - @tokens << { kind: 'SYMBOL', value: word } - end - elsif @cur == '#' - lex_comment - elsif symbol?(@cur) && !@raw_string[@cur_index + 2].nil? && @raw_string[@cur_index..@cur_index + 2] == '<=>' - @tokens << { kind: 'SYMBOL', value: '<=>' } - @cur_index += 3 - elsif contains_equal_sign?(@raw_string[@cur_index..end_index]) && op_assign_symbol?(@cur) && ((end_index - @cur_index) < 4) - lex_op_assign - elsif symbol?(@cur) - if !@raw_string[@cur_index + 1].nil? && @raw_string[@cur_index..@cur_index + 1] == '+@' - @tokens << { kind: 'SYMBOL', value: '+@' } - @cur_index += 2 - elsif !@raw_string[@cur_index + 1].nil? && @raw_string[@cur_index..@cur_index + 1] == '-@' - @tokens << { kind: 'SYMBOL', value: '-@' } - @cur_index += 2 - elsif !@raw_string[@cur_index + 1].nil? && @raw_string[@cur_index..@cur_index + 1] == '[]' - @tokens << { kind: 'SYMBOL', value: '[]' } - @cur_index += 2 - else - lex_symbol + lexers.each do |bar| + if bar[:condition].call + send(bar[:lexer]) + return true end - elsif letter?(@cur) || underscore?(@cur) - lex_identifier - elsif string_start?(@cur) - lex_string - elsif number?(@cur) - lex_number - else - @cur_index += 1 end + + @cur_index += 1 end + ### lexers ### + def lex_symbol - if @cur == @raw_string[@cur_index + 1] && double_symbol?(@raw_string[@cur_index + 1]) - @tokens << { kind: 'SYMBOL', value: @cur + @raw_string[@cur_index + 1] } + if !@next_cur.nil? && complex_symbol? + @tokens << { kind: 'SYMBOL', value: @cur + @next_cur } @cur_index += 2 else @tokens << { kind: 'SYMBOL', value: @cur } @@ -92,79 +67,81 @@ def lex_symbol end end - # OP_ASGN : `+=' | `-=' | `*=' | `/=' | `%=' | `**=' - # | `&=' | `|=' | `^=' | `<<=' | `>>=' - # | `&&=' | `||=' | '[]=' + def lex_equals_sign_prefix + if !@next_cur.nil? && @next_cur == '~' + @cur_index += 2 + @word = '=~' + else + continue_until_stop { @cur == '=' } + end + + @tokens << { kind: 'SYMBOL', value: @word } + end + def lex_op_assign end_index = @raw_string[@cur_index..].index('=') + @cur_index @tokens << { kind: 'OP_ASGN', value: @raw_string[@cur_index..end_index] } @cur_index = end_index + 1 end - # STRING : `"' any_char* `"' - # | `'' any_char* `'' - # | ``' any_char* ``' def lex_string - rest_of_string = @raw_string[@cur_index + 1..] - raise LexError, 'String not terminated' unless rest_of_string.include?(@cur) + raise LexError, 'String not terminated' unless @raw_string[@cur_index + 1..].include?(@cur) - end_index = @raw_string[@cur_index + 1..].index(@cur) + @cur_index + 1 - @tokens << { kind: 'STRING', value: @raw_string[@cur_index..end_index] } - @cur_index = end_index + 1 + append_word_and_iterate + continue_until_stop { !string_start?(@cur) } + append_word_and_iterate + + @tokens << { kind: 'STRING', value: @word } end - # NUMBER : `0' | (`1'..'9') (`0'..'9')* - # | decimal_digit decimal_digit* (`.' decimal_digit decimal_digit*)? def lex_number - word = lex_single_number + continue_until_stop { number?(@cur) } - if @cur == '.' - word += @cur - @cur_index += 1 - @cur = @raw_string[@cur_index] - word += lex_single_number - end + # presense of '.' means it is a decimal + lex_decimal if @cur == '.' - @tokens << { kind: 'NUMBER', value: word } + @tokens << { kind: 'NUMBER', value: @word } end - def lex_single_number - word = '' + def lex_decimal + append_word_and_iterate + continue_until_stop { number?(@cur) } + end - while (@cur_index < @raw_string.size) && number?(@cur) - word += @cur - @cur_index += 1 - @cur = @raw_string[@cur_index] - end + def lex_identifier + @tokens << { kind: 'IDENTIFIER', value: continue_until_stop { alpha_num?(@cur) } } + end - word + def lex_comment + @tokens << { kind: 'COMMENT', value: continue_until_stop { @cur != "\n" } } end - # IDENTIFIER is the sqeunce of characters in the pattern of /[a-zA-Z_][a-zA-Z0-9_]*/. - def lex_identifier - word = '' + ### Helpers ### - while (@cur_index < @raw_string.size) && alpha_num?(@cur) - word += @cur - @cur_index += 1 - @cur = @raw_string[@cur_index] - end + def op_assign? + last_space_index = @raw_string[@cur_index..].index(' ') + end_index = last_space_index.nil? ? @raw_string.size : last_space_index + @cur_index - @tokens << { kind: 'IDENTIFIER', value: word } + contains_equal_sign?(@raw_string[@cur_index..end_index]) && + op_assign_symbol?(@cur) && ((end_index - @cur_index) < 4) end - def lex_comment - word = '#' + def complex_symbol? + %w(+@ -@ []).include?(@cur + @next_cur) || (double_symbol?(@next_cur) && @cur == @next_cur) + end + + def append_word_and_iterate + @word += @cur @cur_index += 1 @cur = @raw_string[@cur_index] - while @cur != "\n" - word += @cur - @cur_index += 1 - @cur = @raw_string[@cur_index] - end + @word + end + + def continue_until_stop + append_word_and_iterate while @cur_index < @raw_string.size && yield - @tokens << { kind: 'COMMENT', value: word } + @word end end end diff --git a/src/zodiac/parser.rb b/src/zodiac/parser.rb index 440a2f9..78818fd 100644 --- a/src/zodiac/parser.rb +++ b/src/zodiac/parser.rb @@ -197,8 +197,7 @@ def initialize(raw_string) @tokens = [] end - def parse - end + def parse; end private @@ -211,4 +210,5 @@ def parse_program def parse_compstmt { kind: 'COMPSTMT', value: nil } end + end end