From c86cb3e98d86d3e45bb24be922ab8bd65a68859d Mon Sep 17 00:00:00 2001 From: David Isaksson Date: Mon, 15 Jul 2024 20:19:40 +0200 Subject: [PATCH] asm: Add parse error for invalid hex and binary literals --- src/assembler/lexer.rs | 74 +++++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/src/assembler/lexer.rs b/src/assembler/lexer.rs index acbeea3..066abce 100644 --- a/src/assembler/lexer.rs +++ b/src/assembler/lexer.rs @@ -9,7 +9,7 @@ use source_position::SourcePosition; pub mod source_position; pub mod token; -#[derive(Error, Debug)] +#[derive(Error, Debug, PartialEq)] pub enum LexerError { #[error("Unexpected character at {0}: '{1}'")] UnexpectedCharacter(SourcePosition, char), @@ -127,6 +127,38 @@ impl<'a> Lexer<'a> { self.read_while(|ch| ch.is_ascii_digit()) } + fn read_binary_literal(&mut self, ch: char) -> Result { + let (_, span_1) = self.read_one_char(); + let (text, span_2) = self.read_binary(); + if text.is_empty() { + return Err(LexerError::UnexpectedCharacter( + self.current_source_position, + ch, + )); + } + Ok(Token::new( + TokenType::BinaryNumber, + text, + SourcePositionSpan::new(span_1.start, span_2.end), + )) + } + + fn read_hex_literal(&mut self, ch: char) -> Result { + let (_, span_1) = self.read_one_char(); + let (text, span_2) = self.read_hex(); + if text.is_empty() { + return Err(LexerError::UnexpectedCharacter( + self.current_source_position, + ch, + )); + } + Ok(Token::new( + TokenType::HexNumber, + text, + SourcePositionSpan::new(span_1.start, span_2.end), + )) + } + #[tracing::instrument] pub fn next_token(&mut self) -> Result, LexerError> { self.skip_whitespace(); @@ -163,24 +195,8 @@ impl<'a> Lexer<'a> { Some(Token::new(TokenType::Pound, text, span)) } // Number literals - '$' => { - let (_, span_1) = self.read_one_char(); - let (text, span_2) = self.read_hex(); - Some(Token::new( - TokenType::HexNumber, - text, - SourcePositionSpan::new(span_1.start, span_2.end), - )) - } - '%' => { - let (_, span_1) = self.read_one_char(); - let (text, span_2) = self.read_binary(); - Some(Token::new( - TokenType::BinaryNumber, - text, - SourcePositionSpan::new(span_1.start, span_2.end), - )) - } + '$' => Some(self.read_hex_literal(ch)?), + '%' => Some(self.read_binary_literal(ch)?), '0'..='9' => { let (text, span) = self.read_decimal(); Some(Token::new(TokenType::DecimalNumber, text, span)) @@ -256,6 +272,26 @@ mod tests { Ok(()) } + #[test] + fn test_invalid_hex_and_binary() -> anyhow::Result<()> { + let tests = vec![ + ( + "$", + LexerError::UnexpectedCharacter(SourcePosition::new(1, 2), '$'), + ), + ( + "%", + LexerError::UnexpectedCharacter(SourcePosition::new(1, 2), '%'), + ), + ]; + for (input, expected) in tests { + let mut lexer = Lexer::new(input); + let result = lexer.next_token(); + assert_eq!(result, Err(expected)); + } + Ok(()) + } + #[test] fn test_decimal() -> anyhow::Result<()> { let tests = vec![