From 90f6ca3fc5045764078cd10ab7d32e636b79ab3f Mon Sep 17 00:00:00 2001
From: Adam Christiansen <code@adamc.io>
Date: Wed, 2 Apr 2025 11:47:35 -0600
Subject: [PATCH 1/3] feat: add indentation to brace tokens

Automcatically inserted `{` and `}` track the indentation (i.e., column
number) of the context that they delimit.
---
 fuyu-core/src/parse/lexer.rs |  8 ++++----
 fuyu-core/src/parse/token.rs | 12 ++++++++++--
 2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/fuyu-core/src/parse/lexer.rs b/fuyu-core/src/parse/lexer.rs
index 2b9865d..e04758f 100644
--- a/fuyu-core/src/parse/lexer.rs
+++ b/fuyu-core/src/parse/lexer.rs
@@ -276,10 +276,10 @@ impl<'a> Lexer<'a> {
             [Some('['), ..] => self.advance_by_and_emit(1, Token::LeftSquare),
             [Some('\\'), ..] => self.advance_by_and_emit(1, Token::BackSlash),
             [Some(']'), ..] => self.advance_by_and_emit(1, Token::RightSquare),
-            [Some('{'), ..] => self.advance_by_and_emit(1, Token::LeftBrace),
+            [Some('{'), ..] => self.advance_by_and_emit(1, Token::LeftBrace(None)),
             [Some('|'), Some('|'), ..] => self.advance_by_and_emit(2, Token::PipePipe),
             [Some('|'), ..] => self.advance_by_and_emit(1, Token::Pipe),
-            [Some('}'), ..] => self.advance_by_and_emit(1, Token::RightBrace),
+            [Some('}'), ..] => self.advance_by_and_emit(1, Token::RightBrace(None)),
             //-------------------------------------------------------------------------------------
             // Numbers.
             //-------------------------------------------------------------------------------------
@@ -720,10 +720,10 @@ mod tests {
 
     #[test]
     fn scan_operators_and_punctuation() {
-        scan!("{", ok: Token::LeftBrace);
+        scan!("{", ok: Token::LeftBrace(None));
         scan!("[", ok: Token::LeftSquare);
         scan!("(", ok: Token::LeftParen);
-        scan!("}", ok: Token::RightBrace);
+        scan!("}", ok: Token::RightBrace(None));
         scan!("]", ok: Token::RightSquare);
         scan!(")", ok: Token::RightParen);
         scan!("&[", ok: Token::AmpLeftSquare);
diff --git a/fuyu-core/src/parse/token.rs b/fuyu-core/src/parse/token.rs
index 6b1e272..cc4a6cd 100644
--- a/fuyu-core/src/parse/token.rs
+++ b/fuyu-core/src/parse/token.rs
@@ -84,13 +84,21 @@ pub enum Token {
     /// The content is not checked for validity.
     RawString,
     /// `{`.
-    LeftBrace,
+    ///
+    /// The associated value is `None` if the brace appears in the source. When the brace is
+    /// inserted by the lexer, the value is `Some` with the 1-indexed indentation of the column
+    /// where the brace is inserted.
+    LeftBrace(Option<usize>),
     /// `[`.
     LeftSquare,
     /// `(`.
     LeftParen,
     /// `}`.
-    RightBrace,
+    ///
+    /// The associated value is `None` if the brace appears in the source. When the brace is
+    /// inserted by the lexer, the value is `Some` with the 1-indexed indentation of the column
+    /// of the matching automatically inserted [`LeftBrace`][Token::LeftBrace].
+    RightBrace(Option<usize>),
     /// `]`.
     RightSquare,
     /// `)`.

From 2acafbd3166394cde5f3a85f1a017495668f1087 Mon Sep 17 00:00:00 2001
From: Adam Christiansen <code@adamc.io>
Date: Thu, 3 Apr 2025 20:14:03 -0600
Subject: [PATCH 2/3] feat: add textchars iterator

The `TextChars` iterator was added for use in the lexer, which does the
following:

- Byte order mark (BOM) handling.
- Reject invalid control characters (closes #74).
- Keep track of the current column position.
---
 fuyu-core/src/parse/lexer.rs | 319 +++++++++++++++++------------------
 fuyu-core/src/parse/text.rs  | 130 ++++++++++++++
 2 files changed, 282 insertions(+), 167 deletions(-)

diff --git a/fuyu-core/src/parse/lexer.rs b/fuyu-core/src/parse/lexer.rs
index e04758f..c74e398 100644
--- a/fuyu-core/src/parse/lexer.rs
+++ b/fuyu-core/src/parse/lexer.rs
@@ -1,33 +1,27 @@
 // SPDX-License-Identifier: MPL-2.0
 
-use crate::parse::{ByteIdx, Text, Token};
-use std::iter;
+use crate::parse::{ByteIdx, Text, TextChars, Token};
+use std::collections::VecDeque;
+use std::iter::FusedIterator;
 use std::str::Chars;
 
-/// The Unicode byte order mark (BOM).
-const BOM: char = '\u{feff}';
-
-/// The lexer looks ahead by 3 tokens.
-const LOOKAHEAD: usize = 3;
-
 /// The `Lexer` produces an iterator of [`Spanned`] [`Token`]s from a source [`Text`].
 #[derive(Debug)]
 pub struct Lexer<'a> {
     /// The source text.
     text: &'a Text,
     /// An iterator over the characters in the source text.
-    chars: Chars<'a>,
-    /// The lexer works based on a window of [`LOOKAHEAD`] characters.
-    ///
-    /// The first character in the window (i.e., `window[0]`) is considered the next character in the source.
-    window: [Option<char>; LOOKAHEAD],
-    /// The current position of the iterator.
-    idx: ByteIdx,
+    text_chars: TextChars<'a>,
     /// The marked start index (inclusive) that tracks the start of a marked region (e.g., token in
     /// a lexer).
     ///
-    /// It is guaranteed that `idx >= mark_idx`.
+    /// It is guaranteed that `self.text_chars.idx() >= self.mark_idx`.
     mark_idx: ByteIdx,
+    // TODO: Use this to (1) track indentation and (2) emit `{`, `}`, and `;`.
+    /// The indentations tracked by the lexer.
+    indent_stack: Vec<usize>,
+    /// Queued tokens to emit from the iterator the next time it is advanced.
+    queued: VecDeque<Spanned>,
 }
 
 /// The values that are produced by the [`Lexer`].
@@ -95,7 +89,7 @@ macro_rules! scan_int {
             return $lexer.emit_error(LexicalError::Number);
         }
         $lexer.advance_while(pat!($pattern | '_'));
-        if !$lexer.window[0].map_or(false, is_text_like) {
+        if !$lexer.peek().map_or(false, is_text_like) {
             $lexer.emit($kind)
         } else {
             $lexer.advance_while(is_text_like);
@@ -109,40 +103,58 @@ impl<'a> Lexer<'a> {
     pub fn new(text: &'a Text) -> Self {
         let mut lexer = Self {
             text,
-            chars: text.as_str().chars(),
-            window: [None; LOOKAHEAD],
-            idx: 0,
+            text_chars: TextChars::new(text),
             mark_idx: 0,
+            indent_stack: vec![],
+            queued: VecDeque::new(),
         };
-        // Fill the window.
-        for i in 0..LOOKAHEAD {
-            lexer.window[i] = lexer.chars.next();
-        }
-        // The BOM is skipped so nothing else has to deal with it.
-        lexer.advance_if(pat!(BOM));
+        // The index might not start at 0.
         lexer.mark();
+        // Queue up the shebang comment here instead of the main loop since it can appear only at the start.
+        if lexer.peek_pair() == (Some('#'), Some('!')) {
+            lexer.advance_until(pat!('\n'));
+            lexer.emit_queue(Token::ShebangComment);
+        }
         lexer
     }
 
+    /// Shorthand for accessing the index.
+    fn idx(&self) -> usize {
+        self.text_chars.idx()
+    }
+
     /// Set the mark index to the current index.
     fn mark(&mut self) {
-        self.mark_idx = self.idx;
+        self.mark_idx = self.idx();
     }
 
     /// Get the string from the mark index (inclusive) to the current index (exclusive).
     fn as_marked_str(&self) -> &str {
-        &self.text.as_str()[self.mark_idx..self.idx]
+        &self.text.as_str()[self.mark_idx..self.idx()]
+    }
+
+    /// Peek the next character without advancing.
+    fn peek(&mut self) -> Option<char> {
+        self.text_chars.peek()
+    }
+
+    /// Peek the next two characters without advancing.
+    fn peek_pair(&mut self) -> (Option<char>, Option<char>) {
+        (self.text_chars.peek_nth(0), self.text_chars.peek_nth(1))
+    }
+
+    /// Peek the next three characters without advancing.
+    fn peek_triplet(&mut self) -> (Option<char>, Option<char>, Option<char>) {
+        (
+            self.text_chars.peek_nth(0),
+            self.text_chars.peek_nth(1),
+            self.text_chars.peek_nth(2),
+        )
     }
 
     /// Move the lexer ahead by one character.
     fn advance(&mut self) {
-        // Update the position.
-        if let Some(c) = self.window[0] {
-            self.idx += c.len_utf8();
-        }
-        // Shift the next character into the window.
-        self.window.rotate_left(1);
-        self.window[LOOKAHEAD - 1] = self.chars.next();
+        self.text_chars.next();
     }
 
     /// Call [`Self::advance()`] `n` times.
@@ -162,7 +174,7 @@ impl<'a> Lexer<'a> {
     where
         P: Fn(char) -> bool,
     {
-        self.window[0]
+        self.peek()
             .is_some_and(pred)
             .then(|| self.advance())
             .is_some()
@@ -196,7 +208,13 @@ impl<'a> Lexer<'a> {
 
     /// Create a token that spans from the marked start index to the current index.
     fn emit(&mut self, kind: Token) -> Option<Spanned> {
-        Some(Ok((self.mark_idx, kind, self.idx)))
+        Some(Ok((self.mark_idx, kind, self.idx())))
+    }
+
+    /// Create a token that spans from the marked start index to the current index and push it on
+    /// to the queue.
+    fn emit_queue(&mut self, kind: Token) {
+        self.queued.push_back(Ok((self.mark_idx, kind, self.idx())));
     }
 
     /// Advance the lexer by `n` characters and then [`Self::emit()`] a token of the given `kind`.
@@ -207,18 +225,7 @@ impl<'a> Lexer<'a> {
 
     /// Create an error that spans from the marked start index to the current index.
     fn emit_error(&mut self, error: LexicalError) -> Option<Spanned> {
-        Some(Err((self.mark_idx, error, self.idx)))
-    }
-
-    /// Check if a [`Token::ShebangComment`] is allowed at the current location.
-    fn shebang_comment_location(&self) -> bool {
-        // Check if the current location is at the start of the text.
-        if self.idx == 0 {
-            return true;
-        }
-        // Check if the current location is at the second character in the text and the first
-        // character was a BOM.
-        self.idx == BOM.len_utf8() && self.text.as_str().starts_with('\u{feff}')
+        Some(Err((self.mark_idx, error, self.idx())))
     }
 
     /// Produce the next token and advance the lexer.
@@ -226,73 +233,72 @@ impl<'a> Lexer<'a> {
     /// This returns `None` at the end of the source text, and once `None` is returned, it will
     /// always return `None` on subsequent calls.
     fn scan(&mut self) -> Option<Spanned> {
+        if !self.queued.is_empty() {
+            return self.queued.pop_front();
+        }
         self.advance_while(char::is_whitespace);
         self.mark();
-        match self.window {
+        match self.peek_triplet() {
             //-------------------------------------------------------------------------------------
             // Comments.
             //-------------------------------------------------------------------------------------
-            [Some('#'), Some('!'), ..] if self.shebang_comment_location() => {
-                self.advance_until(pat!('\n'));
-                self.emit(Token::ShebangComment)
-            }
-            [Some('/'), Some('/'), Some('/'), ..] => {
+            (Some('/'), Some('/'), Some('/'), ..) => {
                 self.advance_until(pat!('\n'));
                 self.emit(Token::DocComment)
             }
-            [Some('/'), Some('/'), ..] => {
+            (Some('/'), Some('/'), ..) => {
                 self.advance_until(pat!('\n'));
                 self.emit(Token::LineComment)
             }
             //-------------------------------------------------------------------------------------
             // Operators and punctuation.
             //-------------------------------------------------------------------------------------
-            [Some('#'), Some('['), ..] => self.advance_by_and_emit(2, Token::HashLeftSquare),
-            [Some('#'), ..] => self.advance_by_and_emit(1, Token::Hash),
-            [Some('%'), ..] => self.advance_by_and_emit(1, Token::Percent),
-            [Some('&'), Some('&'), ..] => self.advance_by_and_emit(2, Token::AmpAmp),
-            [Some('&'), Some('['), ..] => self.advance_by_and_emit(2, Token::AmpLeftSquare),
-            [Some('('), ..] => self.advance_by_and_emit(1, Token::LeftParen),
-            [Some(')'), ..] => self.advance_by_and_emit(1, Token::RightParen),
-            [Some('*'), Some('*'), ..] => self.advance_by_and_emit(2, Token::StarStar),
-            [Some('*'), ..] => self.advance_by_and_emit(1, Token::Star),
-            [Some('+'), ..] => self.advance_by_and_emit(1, Token::Plus),
-            [Some(','), ..] => self.advance_by_and_emit(1, Token::Comma),
-            [Some('-'), Some('>'), ..] => self.advance_by_and_emit(2, Token::MinusGt),
-            [Some('-'), ..] => self.advance_by_and_emit(1, Token::Minus),
-            [Some('.'), Some('.'), ..] => self.advance_by_and_emit(2, Token::DotDot),
-            [Some('.'), ..] => self.advance_by_and_emit(1, Token::Dot),
-            [Some('/'), Some('='), ..] => self.advance_by_and_emit(2, Token::SlashEq),
-            [Some('/'), ..] => self.advance_by_and_emit(1, Token::Slash),
-            [Some(':'), ..] => self.advance_by_and_emit(1, Token::Colon),
-            [Some(';'), ..] => self.advance_by_and_emit(1, Token::Semicolon),
-            [Some('<'), Some('='), ..] => self.advance_by_and_emit(2, Token::LtEq),
-            [Some('<'), ..] => self.advance_by_and_emit(2, Token::Lt),
-            [Some('='), Some('='), ..] => self.advance_by_and_emit(2, Token::EqEq),
-            [Some('='), ..] => self.advance_by_and_emit(1, Token::Eq),
-            [Some('>'), Some('='), ..] => self.advance_by_and_emit(2, Token::GtEq),
-            [Some('>'), ..] => self.advance_by_and_emit(1, Token::Gt),
-            [Some('@'), ..] => self.advance_by_and_emit(1, Token::At),
-            [Some('['), ..] => self.advance_by_and_emit(1, Token::LeftSquare),
-            [Some('\\'), ..] => self.advance_by_and_emit(1, Token::BackSlash),
-            [Some(']'), ..] => self.advance_by_and_emit(1, Token::RightSquare),
-            [Some('{'), ..] => self.advance_by_and_emit(1, Token::LeftBrace(None)),
-            [Some('|'), Some('|'), ..] => self.advance_by_and_emit(2, Token::PipePipe),
-            [Some('|'), ..] => self.advance_by_and_emit(1, Token::Pipe),
-            [Some('}'), ..] => self.advance_by_and_emit(1, Token::RightBrace(None)),
+            (Some('#'), Some('['), ..) => self.advance_by_and_emit(2, Token::HashLeftSquare),
+            (Some('#'), ..) => self.advance_by_and_emit(1, Token::Hash),
+            (Some('%'), ..) => self.advance_by_and_emit(1, Token::Percent),
+            (Some('&'), Some('&'), ..) => self.advance_by_and_emit(2, Token::AmpAmp),
+            (Some('&'), Some('['), ..) => self.advance_by_and_emit(2, Token::AmpLeftSquare),
+            (Some('('), ..) => self.advance_by_and_emit(1, Token::LeftParen),
+            (Some(')'), ..) => self.advance_by_and_emit(1, Token::RightParen),
+            (Some('*'), Some('*'), ..) => self.advance_by_and_emit(2, Token::StarStar),
+            (Some('*'), ..) => self.advance_by_and_emit(1, Token::Star),
+            (Some('+'), ..) => self.advance_by_and_emit(1, Token::Plus),
+            (Some(','), ..) => self.advance_by_and_emit(1, Token::Comma),
+            (Some('-'), Some('>'), ..) => self.advance_by_and_emit(2, Token::MinusGt),
+            (Some('-'), ..) => self.advance_by_and_emit(1, Token::Minus),
+            (Some('.'), Some('.'), ..) => self.advance_by_and_emit(2, Token::DotDot),
+            (Some('.'), ..) => self.advance_by_and_emit(1, Token::Dot),
+            (Some('/'), Some('='), ..) => self.advance_by_and_emit(2, Token::SlashEq),
+            (Some('/'), ..) => self.advance_by_and_emit(1, Token::Slash),
+            (Some(':'), ..) => self.advance_by_and_emit(1, Token::Colon),
+            (Some(';'), ..) => self.advance_by_and_emit(1, Token::Semicolon),
+            (Some('<'), Some('='), ..) => self.advance_by_and_emit(2, Token::LtEq),
+            (Some('<'), ..) => self.advance_by_and_emit(2, Token::Lt),
+            (Some('='), Some('='), ..) => self.advance_by_and_emit(2, Token::EqEq),
+            (Some('='), ..) => self.advance_by_and_emit(1, Token::Eq),
+            (Some('>'), Some('='), ..) => self.advance_by_and_emit(2, Token::GtEq),
+            (Some('>'), ..) => self.advance_by_and_emit(1, Token::Gt),
+            (Some('@'), ..) => self.advance_by_and_emit(1, Token::At),
+            (Some('['), ..) => self.advance_by_and_emit(1, Token::LeftSquare),
+            (Some('\\'), ..) => self.advance_by_and_emit(1, Token::BackSlash),
+            (Some(']'), ..) => self.advance_by_and_emit(1, Token::RightSquare),
+            (Some('{'), ..) => self.advance_by_and_emit(1, Token::LeftBrace(None)),
+            (Some('|'), Some('|'), ..) => self.advance_by_and_emit(2, Token::PipePipe),
+            (Some('|'), ..) => self.advance_by_and_emit(1, Token::Pipe),
+            (Some('}'), ..) => self.advance_by_and_emit(1, Token::RightBrace(None)),
             //-------------------------------------------------------------------------------------
             // Numbers.
             //-------------------------------------------------------------------------------------
-            [Some('0'), Some('b'), ..] => scan_int!(self, '0'..='1', Token::BinInt),
-            [Some('0'), Some('o'), ..] => scan_int!(self, '0'..='7', Token::OctInt),
-            [Some('0'), Some('x'), ..] => {
+            (Some('0'), Some('b'), ..) => scan_int!(self, '0'..='1', Token::BinInt),
+            (Some('0'), Some('o'), ..) => scan_int!(self, '0'..='7', Token::OctInt),
+            (Some('0'), Some('x'), ..) => {
                 scan_int!(self, '0'..='9' | 'A'..='F' | 'a'..='f', Token::HexInt)
             }
-            [Some('0'), Some('B' | 'O' | 'X'), ..] => {
+            (Some('0'), Some('B' | 'O' | 'X'), ..) => {
                 self.advance_while(is_text_like);
                 self.emit_error(LexicalError::BaseMarker)
             }
-            [Some('0'..='9'), ..] => {
+            (Some('0'..='9'), ..) => {
                 // Decimal number.
                 // Assume the number is an integer until proven otherwise.
                 let mut kind_or_error = Ok(Token::DecInt);
@@ -311,13 +317,13 @@ impl<'a> Lexer<'a> {
                 // least one character.
                 self.advance_while(pat!('0'..='9' | '_'));
                 // Phase 2: (\.\d[_\d]*)?
-                if matches!(self.window, [Some('.'), Some('0'..='9'), ..]) {
+                if matches!(self.peek_pair(), (Some('.'), Some('0'..='9'))) {
                     kind_or_error = Ok(Token::Float);
                     self.advance(); // Consume '.'.
                     self.advance_while(pat!('0'..='9' | '_'));
                 }
                 // Phase 3: (e[+-]?\d[_\d]*)?
-                if matches!(self.window, [Some('e'), ..]) {
+                if self.peek() == Some('e') {
                     kind_or_error = Ok(Token::Float);
                     self.advance(); // Consume `e`.
                     if self.advance_while(pat!('_')) > 0 {
@@ -329,7 +335,7 @@ impl<'a> Lexer<'a> {
                     }
                     self.advance_while(pat!('0'..='9' | '_'));
                 }
-                if self.window[0].is_some_and(is_text_like) {
+                if self.peek().is_some_and(is_text_like) {
                     kind_or_error = Err(LexicalError::Number);
                     self.advance_while(is_text_like);
                 }
@@ -341,15 +347,15 @@ impl<'a> Lexer<'a> {
             //-------------------------------------------------------------------------------------
             // Strings.
             //-------------------------------------------------------------------------------------
-            [Some('"'), Some('"'), Some('"'), ..] => {
+            (Some('"'), Some('"'), Some('"'), ..) => {
                 self.advance_by(3); // Consume `"""`.
                 self.scan_string(Token::BlockString, 3, 0, true)
             }
-            [Some('"'), ..] => {
+            (Some('"'), ..) => {
                 self.advance(); // Consume `"`.
                 self.scan_string(Token::String, 1, 0, true)
             }
-            [Some('r'), Some('"'), ..] | [Some('r'), Some('#'), Some('#' | '"'), ..] => {
+            (Some('r'), Some('"'), ..) | (Some('r'), Some('#'), Some('#' | '"'), ..) => {
                 self.advance(); // Consume `r`.
                 let hashes = self.advance_while(pat!('#'));
                 if !self.advance_if(pat!('"')) {
@@ -361,7 +367,7 @@ impl<'a> Lexer<'a> {
             //-------------------------------------------------------------------------------------
             // Identifiers and keywords.
             //-------------------------------------------------------------------------------------
-            [Some('A'..='Z' | 'a'..='z' | '_'), ..] => {
+            (Some('A'..='Z' | 'a'..='z' | '_'), ..) => {
                 self.advance_while(|c| is_text_like(c) || c == '#');
                 match self.as_marked_str() {
                     // Keywords.
@@ -419,12 +425,12 @@ impl<'a> Lexer<'a> {
             // Other.
             //-------------------------------------------------------------------------------------
             // Illegal character to start a token.
-            [Some(_), ..] => {
+            (Some(_), ..) => {
                 self.advance();
                 self.emit_error(LexicalError::Char)
             }
             // All done.
-            [None, ..] => None,
+            (None, ..) => None,
         }
     }
 
@@ -445,20 +451,21 @@ impl<'a> Lexer<'a> {
         escapes: bool,
     ) -> Option<Spanned> {
         loop {
-            if matches!(self.window, [None, ..]) {
+            if self.peek().is_none() {
                 return self.emit_error(LexicalError::UnclosedString);
             }
             // Advance the lexer until it is sitting on a `"` that can start the closing delimiter.
             if escapes {
                 // Advance to the next string-like terminator or escape.
                 self.advance_until(pat!('"' | '\\'));
-                match self.window {
-                    [Some('"'), ..] => { /* Do nothing. */ }
-                    [Some('\\'), ..] => {
+                match self.peek() {
+                    Some('"') => { /* Do nothing. */ }
+                    Some('\\') => {
+                        // TODO: Need more advanced logic. What about `\u{...}` escapes?
                         self.advance_by(2); // Skip the escape (e.g., the sequence `\"`).
                         continue;
                     }
-                    [None, ..] => continue, // End of input.
+                    None => continue, // End of input.
                     _ => unreachable!(),
                 }
             } else {
@@ -487,7 +494,7 @@ impl Iterator for Lexer<'_> {
     }
 }
 
-impl iter::FusedIterator for Lexer<'_> {}
+impl FusedIterator for Lexer<'_> {}
 
 #[cfg(test)]
 mod tests {
@@ -524,34 +531,35 @@ mod tests {
 
     #[test]
     fn new() {
-        lexer!(lexer <- "abc");
+        lexer!(mut lexer <- "abc");
         assert_eq!(lexer.text.as_str(), "abc");
-        assert_eq!(lexer.window, [Some('a'), Some('b'), Some('c')]);
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.peek(), Some('a'));
+        assert_eq!(lexer.idx(), 0);
         assert_eq!(lexer.mark_idx, 0);
         assert_eq!(lexer.text.as_str(), "abc");
     }
 
     #[test]
     fn new_with_bom() {
-        lexer!(lexer <- "\u{feff}abc");
+        // The byte order mark is U+FEFF.
+        lexer!(mut lexer <- "\u{feff}abc");
         assert_eq!(lexer.text.as_str(), "\u{feff}abc");
-        assert_eq!(lexer.window, [Some('a'), Some('b'), Some('c')]);
-        assert_eq!(lexer.idx, BOM.len_utf8());
-        assert_eq!(lexer.mark_idx, BOM.len_utf8());
+        assert_eq!(lexer.peek(), Some('a'));
+        assert_eq!(lexer.idx(), '\u{feff}'.len_utf8());
+        assert_eq!(lexer.mark_idx, '\u{feff}'.len_utf8());
         assert_eq!(lexer.text.as_str(), "\u{feff}abc");
     }
 
     #[test]
     fn mark() {
         lexer!(mut lexer <- "abcdef");
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.idx(), 0);
         assert_eq!(lexer.mark_idx, 0);
         lexer.advance();
-        assert_eq!(lexer.idx, 1);
+        assert_eq!(lexer.idx(), 1);
         assert_eq!(lexer.mark_idx, 0);
         lexer.mark();
-        assert_eq!(lexer.idx, 1);
+        assert_eq!(lexer.idx(), 1);
         assert_eq!(lexer.mark_idx, 1);
     }
 
@@ -571,28 +579,28 @@ mod tests {
     #[test]
     fn advance() {
         lexer!(mut lexer <- "abc");
-        assert_eq!(lexer.idx, 0);
-        assert_eq!(lexer.window, [Some('a'), Some('b'), Some('c')]);
+        assert_eq!(lexer.idx(), 0);
+        assert_eq!(lexer.peek(), Some('a'));
         lexer.advance();
-        assert_eq!(lexer.idx, 1);
-        assert_eq!(lexer.window, [Some('b'), Some('c'), None]);
+        assert_eq!(lexer.idx(), 1);
+        assert_eq!(lexer.peek(), Some('b'));
         lexer.advance();
-        assert_eq!(lexer.idx, 2);
-        assert_eq!(lexer.window, [Some('c'), None, None]);
+        assert_eq!(lexer.idx(), 2);
+        assert_eq!(lexer.peek(), Some('c'));
         lexer.advance();
-        assert_eq!(lexer.idx, 3);
-        assert_eq!(lexer.window, [None, None, None]);
+        assert_eq!(lexer.idx(), 3);
+        assert_eq!(lexer.peek(), None);
         lexer.advance();
         // Test idempotency.
-        assert_eq!(lexer.idx, 3);
-        assert_eq!(lexer.window, [None, None, None]);
+        assert_eq!(lexer.idx(), 3);
+        assert_eq!(lexer.peek(), None);
     }
 
     #[test]
     fn advance_by() {
         lexer!(mut lexer <- "abcdef");
         lexer.advance_by(3);
-        assert_eq!(lexer.idx, 3);
+        assert_eq!(lexer.idx(), 3);
     }
 
     #[test]
@@ -602,42 +610,42 @@ mod tests {
         // - U+001E8D (ẍ): Latin Extended Additional require 3 bytes in UTF-8.
         // - U+01D50D (𝔍): Mathematical Alphanumeric Symbols require 4 bytes in UTF-8.
         lexer!(mut lexer <- "\u{0041}\u{003B2}\u{001E8D}\u{01D50D}");
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.idx(), 0);
         lexer.advance();
-        assert_eq!(lexer.idx, 1);
+        assert_eq!(lexer.idx(), 1);
         lexer.advance();
-        assert_eq!(lexer.idx, 3);
+        assert_eq!(lexer.idx(), 3);
         lexer.advance();
-        assert_eq!(lexer.idx, 6);
+        assert_eq!(lexer.idx(), 6);
         lexer.advance();
-        assert_eq!(lexer.idx, 10);
+        assert_eq!(lexer.idx(), 10);
     }
 
     #[test]
     fn advance_if() {
         lexer!(mut lexer <- "abcdef");
         lexer.advance_if(|_| false);
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.idx(), 0);
         lexer.advance_if(|_| true);
-        assert_eq!(lexer.idx, 1);
+        assert_eq!(lexer.idx(), 1);
     }
 
     #[test]
     fn advance_while() {
         lexer!(mut lexer <- "abcdef");
         lexer.advance_while(|_| false);
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.idx(), 0);
         lexer.advance_while(pat!('a'..='c'));
-        assert_eq!(lexer.idx, 3);
+        assert_eq!(lexer.idx(), 3);
     }
 
     #[test]
     fn advance_until() {
         lexer!(mut lexer <- "abcdef");
         lexer.advance_until(|_| true);
-        assert_eq!(lexer.idx, 0);
+        assert_eq!(lexer.idx(), 0);
         lexer.advance_until(pat!('d'..='f'));
-        assert_eq!(lexer.idx, 3);
+        assert_eq!(lexer.idx(), 3);
     }
 
     #[test]
@@ -666,35 +674,12 @@ mod tests {
         assert_eq!(error, (0, LexicalError::Ident, 3));
     }
 
-    #[test]
-    fn shebang_comment_location() {
-        lexer!(mut lexer <- "#!shebang");
-        assert!(lexer.shebang_comment_location());
-        lexer.advance();
-        assert!(!lexer.shebang_comment_location());
-    }
-
-    #[test]
-    fn shebang_comment_location_with_bom() {
-        lexer!(mut lexer <- "\u{feff}#!shebang");
-        assert!(lexer.shebang_comment_location());
-        lexer.advance();
-        assert!(!lexer.shebang_comment_location());
-    }
-
     #[test]
     fn scan_shebang_comment() {
         // Valid.
         scan!("#!x", ok: Token::ShebangComment);
         scan!("#!x\na", Some(Ok((0, Token::ShebangComment, 3))));
-        scan!(
-            "\u{feff}#!x\na",
-            Some(Ok((
-                BOM.len_utf8(),
-                Token::ShebangComment,
-                BOM.len_utf8() + 3
-            )))
-        );
+        scan!("\u{feff}#!bom\na", Some(Ok((3, Token::ShebangComment, 8))));
         // Not a shebang comment.
         scan!(" #!x\na", Some(Ok((1, Token::Hash, 2))));
     }
@@ -992,7 +977,7 @@ mod tests {
 
     #[test]
     fn scan_illegal_char() {
-        // There are lots of illegal chars but only 2 are tested.
+        // There are lots of illegal chararacters, but only 2 are tested.
         scan!("$", err: LexicalError::Char);
         scan!("ä", err: LexicalError::Char);
     }
@@ -1000,7 +985,7 @@ mod tests {
     #[test]
     fn scan_eof() {
         scan!("", None);
-        scan!("\r\n \t", None);
+        scan!("\n \r\n ", None);
     }
 
     #[test]
diff --git a/fuyu-core/src/parse/text.rs b/fuyu-core/src/parse/text.rs
index 494136d..d046242 100644
--- a/fuyu-core/src/parse/text.rs
+++ b/fuyu-core/src/parse/text.rs
@@ -4,6 +4,9 @@
 //!
 //! [`Text`] is optimized for quick look ups of line and column position based on a [`ByteIdx`].
 
+use std::collections::VecDeque;
+use std::str::Chars;
+
 /// An index into a [`Text`] aligned at the byte level.
 ///
 /// Byte indices are used rather than `char` indices for performance.
@@ -119,6 +122,133 @@ pub struct LineCol {
     pub col: usize,
 }
 
+/// The Unicode byte order mark (BOM).
+const BOM: char = '\u{feff}';
+
+// TODO: Tests for this.
+/// An iterator over the characters of a [`Text`].
+///
+/// This iterator has some benefits that simplifies processing:
+///
+/// - The byte order mark (BOM) (U+FEFF) is automatically skipped if it is the first character.
+/// - Sequences of `\r\n` are collapsed into `\n`. The column is the location of the `\r`.
+///
+/// # 💀 Warning 💀
+///
+/// The current implementation of this iterator does not play "nice". It panics when any of the
+/// following are encountered:
+///
+/// - An `\r` not immediately followed by an `\n`.
+/// - Any `\t` (tabs are not allowed).
+/// - In general, any control character other than `\r` or `\n`.
+///
+/// These rules may be relaxed in the future, but for now are present to simplify error handling
+/// during developement.
+#[derive(Debug)]
+pub(super) struct TextChars<'a> {
+    /// The underlying iterator.
+    chars: Chars<'a>,
+    /// The characters that were peaked.
+    peaked: VecDeque<char>,
+    /// The byte index of the next character (if any) to be emitted.
+    idx: ByteIdx,
+    /// The 0-indexed column of the next character (if any) to be emitted.
+    ///
+    /// Columns are counted in Unicode scalars.
+    column: usize,
+}
+
+impl<'a> TextChars<'a> {
+    /// Create a new [`TextChars`] iterator over a [`Text`].
+    pub fn new(text: &'a Text) -> Self {
+        let mut text_chars = Self {
+            chars: text.as_str().chars(),
+            peaked: VecDeque::new(),
+            idx: 0,
+            column: 0,
+        };
+        // Skip the BOM.
+        if text_chars.peek() == Some(BOM) {
+            text_chars.next();
+        }
+        text_chars
+    }
+
+    /// Get the next character in the text.
+    ///
+    /// This pulls from the queue (if possible) before advancing the underlying iterator.
+    fn next_char(&mut self) -> Option<char> {
+        if !self.peaked.is_empty() {
+            self.peaked.pop_front()
+        } else {
+            self.chars.next()
+        }
+    }
+
+    /// Peek the next character in the text without advancing the iterator.
+    pub fn peek(&mut self) -> Option<char> {
+        self.peek_nth(0)
+    }
+
+    /// Peek the next character `n` positions ahead in the text without advancing the iterator.
+    pub fn peek_nth(&mut self, n: usize) -> Option<char> {
+        // Make sure that the peak buffer has at least `n` characters (unless the end of the
+        // iterator is encountered).
+        while n >= self.peaked.len() {
+            match self.chars.next() {
+                Some(c) => self.peaked.push_back(c),
+                None => return None,
+            }
+        }
+        // At this point it is guaranteed that `n < self.peaked.len()`.
+        Some(self.peaked[n])
+    }
+
+    /// The byte index of the next character (if any) to be emitted.
+    pub fn idx(&self) -> usize {
+        self.idx
+    }
+
+    /// The 0-indexed column of the next character (if any) to be emitted.
+    ///
+    /// Columns are counted in Unicode scalars.
+    pub fn column(&self) -> usize {
+        self.column
+    }
+}
+
+impl Iterator for TextChars<'_> {
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.next_char() {
+            // Tabs are specific enough to get their own special case.
+            Some('\t') => panic!("Tabs (U+0009) are illegal."),
+            // The only control characters that are allowed are `\r` and `\n`.
+            Some(c) if c.is_control() && c != '\r' && c != '\n' => {
+                panic!("Illegal control character: U+{:06X}.", c as u32)
+            }
+            // All `\r` must be followed by `\n`.
+            Some('\r') => {
+                if let Some('\n') = self.next_char() {
+                    // Treat this sequence as if it was just a `\n`.
+                    self.idx += '\r'.len_utf8() + '\n'.len_utf8();
+                    self.column = 0;
+                    Some('\n')
+                } else {
+                    panic!("Carriage return (U+000D) must be followed by linefeed (U+000A).")
+                }
+            }
+            Some(c) => {
+                self.idx += c.len_utf8();
+                self.column = if c == '\n' { 0 } else { self.column + 1 };
+                Some(c)
+            }
+            None => None,
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From f8ebb3a64ad191e90528e5ea57ba67613f40ea92 Mon Sep 17 00:00:00 2001
From: Adam Christiansen <code@adamc.io>
Date: Tue, 8 Apr 2025 16:57:56 -0600
Subject: [PATCH 3/3] feat: add `;`, `{`, and `}` inference

---
 fuyu-core/src/parse/lexer.rs | 351 ++++++++++++++++++++++++++++++++---
 fuyu-core/src/parse/text.rs  |  40 ++--
 fuyu-core/src/parse/token.rs |   6 +-
 3 files changed, 351 insertions(+), 46 deletions(-)

diff --git a/fuyu-core/src/parse/lexer.rs b/fuyu-core/src/parse/lexer.rs
index c74e398..1062f54 100644
--- a/fuyu-core/src/parse/lexer.rs
+++ b/fuyu-core/src/parse/lexer.rs
@@ -3,6 +3,7 @@
 use crate::parse::{ByteIdx, Text, TextChars, Token};
 use std::collections::VecDeque;
 use std::iter::FusedIterator;
+use std::num::NonZeroUsize;
 use std::str::Chars;
 
 /// The `Lexer` produces an iterator of [`Spanned`] [`Token`]s from a source [`Text`].
@@ -17,11 +18,18 @@ pub struct Lexer<'a> {
     ///
     /// It is guaranteed that `self.text_chars.idx() >= self.mark_idx`.
     mark_idx: ByteIdx,
-    // TODO: Use this to (1) track indentation and (2) emit `{`, `}`, and `;`.
     /// The indentations tracked by the lexer.
-    indent_stack: Vec<usize>,
+    ///
+    /// The indentation is 1-indexed. When an indentation is `Some`, then it corresponds to a real
+    /// indentation level, however, when an indentation is `None`, then it corresponds to `{` and
+    /// `}` explicitly present in the source.
+    indent_stack: Vec<Option<NonZeroUsize>>,
     /// Queued tokens to emit from the iterator the next time it is advanced.
     queued: VecDeque<Spanned>,
+    /// Tracks whether a token was just emitted that should be followed by a `{`.
+    next_must_be_brace: bool,
+    /// The previous token that was emitted.
+    prev_token: Option<Token>,
 }
 
 /// The values that are produced by the [`Lexer`].
@@ -105,8 +113,10 @@ impl<'a> Lexer<'a> {
             text,
             text_chars: TextChars::new(text),
             mark_idx: 0,
-            indent_stack: vec![],
+            indent_stack: vec![NonZeroUsize::new(1)],
             queued: VecDeque::new(),
+            next_must_be_brace: false,
+            prev_token: None,
         };
         // The index might not start at 0.
         lexer.mark();
@@ -123,6 +133,11 @@ impl<'a> Lexer<'a> {
         self.text_chars.idx()
     }
 
+    /// Shorthand for accessing the column.
+    fn column(&self) -> NonZeroUsize {
+        self.text_chars.column()
+    }
+
     /// Set the mark index to the current index.
     fn mark(&mut self) {
         self.mark_idx = self.idx();
@@ -208,6 +223,7 @@ impl<'a> Lexer<'a> {
 
     /// Create a token that spans from the marked start index to the current index.
     fn emit(&mut self, kind: Token) -> Option<Spanned> {
+        self.prev_token = Some(kind.clone());
         Some(Ok((self.mark_idx, kind, self.idx())))
     }
 
@@ -233,10 +249,126 @@ impl<'a> Lexer<'a> {
     /// This returns `None` at the end of the source text, and once `None` is returned, it will
     /// always return `None` on subsequent calls.
     fn scan(&mut self) -> Option<Spanned> {
+        // Pop from the queue if there is anything ready.
+        if !self.queued.is_empty() {
+            return self.queued.pop_front();
+        }
+        // Skip the whitespace and maybe put tokens in the queue.
+        self.scan_through_whitespace();
         if !self.queued.is_empty() {
             return self.queued.pop_front();
         }
+        // At the end of the text, go through the indentation stack and emit `}` for all unclosed
+        // indentation.
+        if self.peek().is_none() && self.indent_stack.len() > 1 {
+            if let Some(indent) = self.indent_stack.pop() {
+                return self.emit(Token::RightBrace(indent));
+            } else {
+                todo!("TODO: There was an unclosed `{{` in the source.")
+            }
+            // TODO: Does this also need to emit a semicolon?
+        }
+        // Nothing was found so try to find the next token normally. The lexer is now sitting on a
+        // character that can start a token.
+        self.scan_next()
+    }
+
+    /// Scan for tokens in the following whitespace.
+    ///
+    /// This does not return any tokens, but may put some in the queue. This skips all whitespace.
+    fn scan_through_whitespace(&mut self) {
+        // Skip all whitespace and detect if a newline was seen.
+        self.advance_while(|c| char::is_whitespace(c) && c != '\n');
+        let saw_newline = self.advance_if(pat!('\n'));
         self.advance_while(char::is_whitespace);
+        self.mark();
+        // It is possible to emit a `;` and zero of more `}` based on indentation.
+        if saw_newline {
+            self.scan_through_whitespace_handle_newline();
+        }
+        // When a brace is expected, either confirm that it is there or insert one.
+        self.scan_through_whitespace_infer_braces();
+    }
+
+    /// A helper for [`Self::scan_through_whitespace()`] that emits `;` and `{` tokens as required
+    /// after a newline.
+    ///
+    /// This should be called after skipping whitespace if at least one newline was present in the
+    /// skipped space.
+    fn scan_through_whitespace_handle_newline(&mut self) {
+        macro_rules! emit_semicolon {
+            ($lexer:expr) => {
+                // A semicolon is emitted when all of the following hold:
+                //
+                // - The top of the indent stack is an inferred `{`.
+                // - The previous token was not a semicolon.
+                // - At least one token has be emitted so far.
+                if self.indent_stack.last().unwrap().is_some()
+                    && self.prev_token != Some(Token::Semicolon)
+                    && self.prev_token.is_some()
+                {
+                    self.emit_queue(Token::Semicolon);
+                }
+            };
+        };
+        let Some(Some(indent)) = self.indent_stack.last() else {
+            return;
+        };
+        if self.column() == *indent && self.indent_stack.last().unwrap().is_some() {
+            emit_semicolon!(self);
+        } else if self.column() > *indent {
+            // Do nothing, as the expression on the previous line continues on this one.
+        } else {
+            emit_semicolon!(self);
+            loop {
+                let Some(Some(indent)) = self.indent_stack.last() else {
+                    return;
+                };
+                if *indent > self.column() {
+                    // This `NonZeroUsize::new()` can never return `None`, so it is fine to use the
+                    // returned option directly without checking if it is `None`.
+                    self.emit_queue(Token::RightBrace(Some(*indent)));
+                    self.indent_stack.pop();
+                } else {
+                    break;
+                }
+            }
+            // The top of the indentation stack is equal to the current column, otherwise the
+            // program is malformed.
+            let indent = self.indent_stack.last().unwrap().unwrap();
+            if indent != self.column() {
+                // TODO: Emit and error instead of panicking.
+                panic!("Indentation did not return to an allowed level.");
+            }
+        }
+    }
+
+    /// A helper for [`Self::scan_through_whitespace()`] that infers `{` if it is flagged that a
+    /// `{` must appear at the current location.
+    fn scan_through_whitespace_infer_braces(&mut self) {
+        // Condition for whether to run this logic.
+        if !self.next_must_be_brace {
+            return;
+        }
+        self.next_must_be_brace = false;
+        // Handle the `{` token.
+        if self.peek() == Some('{') {
+            // Do nothing, as this token will be lexer normally.
+        } else {
+            // Unwrapping is used instead of simply pushing the `Option<NonZeroUsize>` from
+            // `NonZeroUsize::new()` to panic on the error if `self.column()` ever returns
+            // zero (which it should not, by design).
+            let indent = Some(self.column());
+            self.indent_stack.push(indent);
+            self.emit_queue(Token::LeftBrace(indent));
+        }
+    }
+
+    /// Scan the next token.
+    ///
+    /// It is assumed that the lexer is either on a character that can start a token or at the end
+    /// of the text.
+    fn scan_next(&mut self) -> Option<Spanned> {
         self.mark();
         match self.peek_triplet() {
             //-------------------------------------------------------------------------------------
@@ -282,10 +414,17 @@ impl<'a> Lexer<'a> {
             (Some('['), ..) => self.advance_by_and_emit(1, Token::LeftSquare),
             (Some('\\'), ..) => self.advance_by_and_emit(1, Token::BackSlash),
             (Some(']'), ..) => self.advance_by_and_emit(1, Token::RightSquare),
-            (Some('{'), ..) => self.advance_by_and_emit(1, Token::LeftBrace(None)),
+            (Some('{'), ..) => {
+                self.indent_stack.push(None);
+                self.advance_by_and_emit(1, Token::LeftBrace(None))
+            }
             (Some('|'), Some('|'), ..) => self.advance_by_and_emit(2, Token::PipePipe),
             (Some('|'), ..) => self.advance_by_and_emit(1, Token::Pipe),
-            (Some('}'), ..) => self.advance_by_and_emit(1, Token::RightBrace(None)),
+            (Some('}'), ..) => {
+                // TODO: Need to check if popping the appropriate thing.
+                self.indent_stack.pop();
+                self.advance_by_and_emit(1, Token::RightBrace(None))
+            }
             //-------------------------------------------------------------------------------------
             // Numbers.
             //-------------------------------------------------------------------------------------
@@ -372,14 +511,20 @@ impl<'a> Lexer<'a> {
                 match self.as_marked_str() {
                     // Keywords.
                     "as" => self.emit(Token::KwAs),
-                    "do" => self.emit(Token::KwDo),
+                    "do" => {
+                        self.next_must_be_brace = true;
+                        self.emit(Token::KwDo)
+                    }
                     "else" => self.emit(Token::KwElse),
                     "for" => self.emit(Token::KwFor),
                     "if" => self.emit(Token::KwIf),
                     "immediate" => self.emit(Token::KwImmediate),
                     "import" => self.emit(Token::KwImport),
                     "let" => self.emit(Token::KwLet),
-                    "match" => self.emit(Token::KwMatch),
+                    "match" => {
+                        self.next_must_be_brace = true;
+                        self.emit(Token::KwMatch)
+                    }
                     "panic" => self.emit(Token::KwPanic),
                     "require" => self.emit(Token::KwRequire),
                     "return" => self.emit(Token::KwReturn),
@@ -390,7 +535,10 @@ impl<'a> Lexer<'a> {
                     "unimplemented" => self.emit(Token::KwUnimplemented),
                     "unreachable" => self.emit(Token::KwUnreachable),
                     "use" => self.emit(Token::KwUse),
-                    "when" => self.emit(Token::KwWhen),
+                    "when" => {
+                        self.next_must_be_brace = true;
+                        self.emit(Token::KwWhen)
+                    }
                     // If not a keyword then this must be an identifier.
                     "_" => self.emit(Token::Underscore),
                     ident => {
@@ -512,6 +660,15 @@ mod tests {
         };
     }
 
+    /// Collect all of the tokens from the lexer into a vector ignoring the positions.
+    ///
+    /// This panics if the lexer emits any errors.
+    macro_rules! collect_tokens {
+        ($lexer:expr) => {
+            $lexer.map(|spanned| spanned.unwrap().1).collect::<Vec<_>>()
+        };
+    }
+
     /// Shorthand for asserting that a scan produces a value.
     macro_rules! scan {
         // Assert that the `source` string produces the specified [`Spanned`] `token`.
@@ -989,39 +1146,179 @@ mod tests {
     }
 
     #[test]
-    fn iter() {
+    fn iter_basic() {
         // This does not test every token, only a handful are needed to test the iterator.
         lexer!(lexer <- "let x = 123;");
         assert_eq!(
-            lexer.collect::<Vec<_>>(),
+            collect_tokens!(lexer),
             vec![
-                Ok((0, Token::KwLet, 3)),
-                Ok((4, Token::LowerIdent, 5)),
-                Ok((6, Token::Eq, 7)),
-                Ok((8, Token::DecInt, 11)),
-                Ok((11, Token::Semicolon, 12)),
+                Token::KwLet,
+                Token::LowerIdent,
+                Token::Eq,
+                Token::DecInt,
+                Token::Semicolon,
             ]
         );
         lexer!(lexer <- "  let x = 123;\n");
         assert_eq!(
-            lexer.collect::<Vec<_>>(),
+            collect_tokens!(lexer),
             vec![
-                Ok((2, Token::KwLet, 5)),
-                Ok((6, Token::LowerIdent, 7)),
-                Ok((8, Token::Eq, 9)),
-                Ok((10, Token::DecInt, 13)),
-                Ok((13, Token::Semicolon, 14)),
+                Token::KwLet,
+                Token::LowerIdent,
+                Token::Eq,
+                Token::DecInt,
+                Token::Semicolon,
             ]
         );
         lexer!(lexer <- "let x=123;");
         assert_eq!(
-            lexer.collect::<Vec<_>>(),
+            collect_tokens!(lexer),
+            vec![
+                Token::KwLet,
+                Token::LowerIdent,
+                Token::Eq,
+                Token::DecInt,
+                Token::Semicolon,
+            ]
+        );
+    }
+
+    #[test]
+    fn iter_infer_semicolons() {
+        // Semicolons should be inferred after `x`, `y`, and `z`.
+        lexer!(lexer <- concat!(
+            "x\n",
+            "y\n",
+            "z\n",
+        ));
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::LowerIdent,
+                Token::Semicolon,
+            ]
+        );
+        // Semicolons should be inferred after `x`, and `z`.
+        lexer!(lexer <- concat!(
+            "x\n",
+            "  y\n",
+            "z\n",
+        ));
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::LowerIdent,
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::LowerIdent,
+                Token::Semicolon,
+            ]
+        );
+    }
+
+    #[test]
+    fn iter_infer_braces() {
+        lexer!(lexer <- "do");
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::KwDo,
+                Token::LeftBrace(Some(NonZeroUsize::new(3).unwrap())),
+                Token::RightBrace(Some(NonZeroUsize::new(3).unwrap())),
+            ]
+        );
+        lexer!(lexer <- "match");
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::KwMatch,
+                Token::LeftBrace(Some(NonZeroUsize::new(6).unwrap())),
+                Token::RightBrace(Some(NonZeroUsize::new(6).unwrap())),
+            ]
+        );
+        lexer!(lexer <- "when");
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::KwWhen,
+                Token::LeftBrace(Some(NonZeroUsize::new(5).unwrap())),
+                Token::RightBrace(Some(NonZeroUsize::new(5).unwrap())),
+            ]
+        );
+    }
+
+    #[test]
+    fn iter_infer_nested_braces() {
+        lexer!(lexer <- concat!(
+            "do\n",
+            "  do\n",
+            "    x\n",
+        ));
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::KwDo,
+                Token::LeftBrace(Some(NonZeroUsize::new(3).unwrap())),
+                Token::KwDo,
+                Token::LeftBrace(Some(NonZeroUsize::new(5).unwrap())),
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::RightBrace(Some(NonZeroUsize::new(5).unwrap())),
+                Token::RightBrace(Some(NonZeroUsize::new(3).unwrap())),
+            ]
+        );
+    }
+
+    #[test]
+    fn iter_do_not_infer_semicolons_in_explicit_braces() {
+        // Do not infer semicolons within `{ ... }`."
+        lexer!(lexer <- concat!(
+            "do {\n",
+            "  x\n",
+            "  y\n",
+            "}\n",
+        ));
+        assert_eq!(
+            collect_tokens!(lexer),
+            vec![
+                Token::KwDo,
+                Token::LeftBrace(None),
+                Token::LowerIdent,
+                Token::LowerIdent,
+                Token::RightBrace(None),
+                Token::Semicolon,
+            ]
+        );
+    }
+
+    #[test]
+    fn iter_infer_semicolons_in_inferred_braces() {
+        // Infer semicolons inside an inferred brace block inside and explicit `{ ... }`.
+        lexer!(lexer <- concat!(
+            "do {\n",
+            "  do \n",
+            "    x\n",
+            "    y\n",
+            "}\n",
+        ));
+        assert_eq!(
+            collect_tokens!(lexer),
             vec![
-                Ok((0, Token::KwLet, 3)),
-                Ok((4, Token::LowerIdent, 5)),
-                Ok((5, Token::Eq, 6)),
-                Ok((6, Token::DecInt, 9)),
-                Ok((9, Token::Semicolon, 10)),
+                Token::KwDo,
+                Token::LeftBrace(None),
+                Token::KwDo,
+                Token::LeftBrace(Some(NonZeroUsize::new(5).unwrap())),
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::LowerIdent,
+                Token::Semicolon,
+                Token::RightBrace(Some(NonZeroUsize::new(5).unwrap())),
+                Token::RightBrace(None),
+                Token::Semicolon,
             ]
         );
     }
diff --git a/fuyu-core/src/parse/text.rs b/fuyu-core/src/parse/text.rs
index d046242..e103cf2 100644
--- a/fuyu-core/src/parse/text.rs
+++ b/fuyu-core/src/parse/text.rs
@@ -5,6 +5,7 @@
 //! [`Text`] is optimized for quick look ups of line and column position based on a [`ByteIdx`].
 
 use std::collections::VecDeque;
+use std::num::NonZeroUsize;
 use std::str::Chars;
 
 /// An index into a [`Text`] aligned at the byte level.
@@ -111,6 +112,7 @@ impl LineTable {
     }
 }
 
+// TODO: Make these NonZeroUsize.
 /// A line and column position in a source [`Text`].
 ///
 /// Both the line and column are 0-indexed.
@@ -148,14 +150,14 @@ const BOM: char = '\u{feff}';
 pub(super) struct TextChars<'a> {
     /// The underlying iterator.
     chars: Chars<'a>,
-    /// The characters that were peaked.
-    peaked: VecDeque<char>,
+    /// The characters that were peeked.
+    peeked: VecDeque<char>,
     /// The byte index of the next character (if any) to be emitted.
     idx: ByteIdx,
-    /// The 0-indexed column of the next character (if any) to be emitted.
+    /// The 1-indexed column of the next character (if any) to be emitted.
     ///
     /// Columns are counted in Unicode scalars.
-    column: usize,
+    column: NonZeroUsize,
 }
 
 impl<'a> TextChars<'a> {
@@ -163,9 +165,9 @@ impl<'a> TextChars<'a> {
     pub fn new(text: &'a Text) -> Self {
         let mut text_chars = Self {
             chars: text.as_str().chars(),
-            peaked: VecDeque::new(),
+            peeked: VecDeque::new(),
             idx: 0,
-            column: 0,
+            column: NonZeroUsize::new(1).unwrap(),
         };
         // Skip the BOM.
         if text_chars.peek() == Some(BOM) {
@@ -178,8 +180,8 @@ impl<'a> TextChars<'a> {
     ///
     /// This pulls from the queue (if possible) before advancing the underlying iterator.
     fn next_char(&mut self) -> Option<char> {
-        if !self.peaked.is_empty() {
-            self.peaked.pop_front()
+        if !self.peeked.is_empty() {
+            self.peeked.pop_front()
         } else {
             self.chars.next()
         }
@@ -192,16 +194,16 @@ impl<'a> TextChars<'a> {
 
     /// Peek the next character `n` positions ahead in the text without advancing the iterator.
     pub fn peek_nth(&mut self, n: usize) -> Option<char> {
-        // Make sure that the peak buffer has at least `n` characters (unless the end of the
+        // Make sure that the peek buffer has at least `n` characters (unless the end of the
         // iterator is encountered).
-        while n >= self.peaked.len() {
+        while n >= self.peeked.len() {
             match self.chars.next() {
-                Some(c) => self.peaked.push_back(c),
+                Some(c) => self.peeked.push_back(c),
                 None => return None,
             }
         }
-        // At this point it is guaranteed that `n < self.peaked.len()`.
-        Some(self.peaked[n])
+        // At this point it is guaranteed that `n < self.peeked.len()`.
+        Some(self.peeked[n])
     }
 
     /// The byte index of the next character (if any) to be emitted.
@@ -209,10 +211,10 @@ impl<'a> TextChars<'a> {
         self.idx
     }
 
-    /// The 0-indexed column of the next character (if any) to be emitted.
+    /// The 1-indexed column of the next character (if any) to be emitted.
     ///
     /// Columns are counted in Unicode scalars.
-    pub fn column(&self) -> usize {
+    pub fn column(&self) -> NonZeroUsize {
         self.column
     }
 }
@@ -233,7 +235,7 @@ impl Iterator for TextChars<'_> {
                 if let Some('\n') = self.next_char() {
                     // Treat this sequence as if it was just a `\n`.
                     self.idx += '\r'.len_utf8() + '\n'.len_utf8();
-                    self.column = 0;
+                    self.column = NonZeroUsize::new(1).unwrap();
                     Some('\n')
                 } else {
                     panic!("Carriage return (U+000D) must be followed by linefeed (U+000A).")
@@ -241,7 +243,11 @@ impl Iterator for TextChars<'_> {
             }
             Some(c) => {
                 self.idx += c.len_utf8();
-                self.column = if c == '\n' { 0 } else { self.column + 1 };
+                self.column = if c == '\n' {
+                    NonZeroUsize::new(1).unwrap()
+                } else {
+                    self.column.checked_add(1).unwrap()
+                };
                 Some(c)
             }
             None => None,
diff --git a/fuyu-core/src/parse/token.rs b/fuyu-core/src/parse/token.rs
index cc4a6cd..5d70387 100644
--- a/fuyu-core/src/parse/token.rs
+++ b/fuyu-core/src/parse/token.rs
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: MPL-2.0
 
+use std::num::NonZeroUsize;
+
 /// A `Token` is an abstract representation the token type not tied to any value.
 ///
 /// This value is often called the token "kind" throughout this project.
@@ -88,7 +90,7 @@ pub enum Token {
     /// The associated value is `None` if the brace appears in the source. When the brace is
     /// inserted by the lexer, the value is `Some` with the 1-indexed indentation of the column
     /// where the brace is inserted.
-    LeftBrace(Option<usize>),
+    LeftBrace(Option<NonZeroUsize>),
     /// `[`.
     LeftSquare,
     /// `(`.
@@ -98,7 +100,7 @@ pub enum Token {
     /// The associated value is `None` if the brace appears in the source. When the brace is
     /// inserted by the lexer, the value is `Some` with the 1-indexed indentation of the column
     /// of the matching automatically inserted [`LeftBrace`][Token::LeftBrace].
-    RightBrace(Option<usize>),
+    RightBrace(Option<NonZeroUsize>),
     /// `]`.
     RightSquare,
     /// `)`.