chenglou · sid-sadhana · Apr 17, 2026 · Apr 17, 2026
diff --git a/TODO.md b/TODO.md
@@ -16,8 +16,6 @@ Current priorities:
 - Expand the sampled font matrix only where a canary still looks genuinely imperfect.
 - Treat strongly font-sensitive or shaping-sensitive misses as boundary-finding for the current architecture, not automatic invitations for another local glue rule.
 - Keep the hot `layout()` path simple and allocation-light while the rich path absorbs more userland layout needs.
-- If chunk-heavy manual layout keeps growing, consider a stateful streaming variant or a cursor-carried chunk hint so sequential `layoutNextLine()` flows can stay overall linear instead of paying a lookup per emitted line.
-- If arbitrary interior rich cursors become common, consider a compact `segmentIndex -> chunkIndex` side table, ideally only on rich prepared handles or only when `chunks.length > 1`.
 
 3. Demo work
 

diff --git a/src/layout.ts b/src/layout.ts
@@ -1,37 +1,3 @@
-// Text measurement for browser environments using canvas measureText.
-//
-// Problem: DOM-based text measurement (getBoundingClientRect, offsetHeight)
-// forces synchronous layout reflow. When components independently measure text,
-// each measurement triggers a reflow of the entire document. This creates
-// read/write interleaving that can cost 30ms+ per frame for 500 text blocks.
-//
-// Solution: two-phase measurement centered around canvas measureText.
-//   prepare(text, font) — segments text via Intl.Segmenter, measures each word
-//     via canvas, caches widths, and does one cached DOM calibration read per
-//     font when emoji correction is needed. Call once when text first appears.
-//   layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure
-//     arithmetic to count lines and compute height. Call on every resize.
-//     ~0.0002ms per text.
-//
-// i18n: Intl.Segmenter handles CJK (per-character breaking), Thai, Arabic, etc.
-//   Bidi: simplified rich-path metadata for mixed LTR/RTL custom rendering.
-//   Punctuation merging: "better." measured as one unit (matches CSS behavior).
-//   Trailing whitespace: hangs past line edge without triggering breaks (CSS behavior).
-//   overflow-wrap: pre-measured grapheme widths enable character-level word breaking.
-//
-// Emoji correction: Chrome/Firefox canvas measures emoji wider than DOM at font
-//   sizes <24px on macOS (Apple Color Emoji). The inflation is constant per emoji
-//   grapheme at a given size, font-independent. Auto-detected by comparing canvas
-//   vs actual DOM emoji width (one cached DOM read per font). Safari canvas and
-//   DOM agree (both wider than fontSize), so correction = 0 there.
-//
-// Limitations:
-//   - system-ui font: canvas resolves to different optical variants than DOM on macOS.
-//     Use named fonts (Helvetica, Inter, etc.) for guaranteed accuracy.
-//     See RESEARCH.md "Discovery: system-ui font resolution mismatch".
-//
-// Based on Sebastian Markbage's text-layout research (github.com/chenglou/text-layout).
-
 import { computeSegmentLevels } from './bidi.js'
 import {
   analyzeText,
@@ -82,45 +48,41 @@ function getSharedGraphemeSegmenter(): Intl.Segmenter {
   return sharedGraphemeSegmenter
 }
 
-// --- Public types ---
 
 declare const preparedTextBrand: unique symbol
 
 type PreparedCore = {
-  widths: number[] // Segment widths, e.g. [42.5, 4.4, 37.2]
-  lineEndFitAdvances: number[] // Width contribution when a line ends after this segment
-  lineEndPaintAdvances: number[] // Painted width contribution when a line ends after this segment
-  kinds: SegmentBreakKind[] // Break behavior per segment, e.g. ['text', 'space', 'text']
-  simpleLineWalkFastPath: boolean // Normal text can use the simpler old line walker across all layout APIs
-  segLevels: Int8Array | null // Rich-path bidi metadata for custom rendering; layout() never reads it
-  breakableFitAdvances: (number[] | null)[] // Per-grapheme fit advances for breakable segments, else null
-  discretionaryHyphenWidth: number // Visible width added when a soft hyphen is chosen as the break
-  tabStopAdvance: number // Absolute advance between tab stops for pre-wrap tab segments
-  chunks: PreparedLineChunk[] // Precompiled hard-break chunks for line walking
-}
-
-// Keep the main prepared handle opaque so the public API does not accidentally
-// calcify around the current parallel-array representation.
+  widths: number[]
+  lineEndFitAdvances: number[]
+  lineEndPaintAdvances: number[]
+  kinds: SegmentBreakKind[]
+  simpleLineWalkFastPath: boolean
+  segLevels: Int8Array | null
+  breakableFitAdvances: (number[] | null)[]
+  discretionaryHyphenWidth: number
+  tabStopAdvance: number
+  chunks: PreparedLineChunk[]
+  chunkBySegment: Uint32Array | null
+}
+
 export type PreparedText = {
   readonly [preparedTextBrand]: true
 }
 
 type InternalPreparedText = PreparedText & PreparedCore
 
-// Rich/diagnostic variant that still exposes the structural segment data.
-// Treat this as the unstable escape hatch for experiments and custom rendering.
 export type PreparedTextWithSegments = InternalPreparedText & {
-  segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world']
+  segments: string[]
 }
 
 export type LayoutCursor = {
-  segmentIndex: number // Segment index in `segments`
-  graphemeIndex: number // Grapheme index within that segment; `0` at segment boundaries
+  segmentIndex: number
+  graphemeIndex: number
 }
 
 export type LayoutResult = {
-  lineCount: number // Number of wrapped lines, e.g. 3
-  height: number // Total block height, e.g. lineCount * lineHeight = 57
+  lineCount: number
+  height: number
 }
 
 export type LineStats = {
@@ -129,20 +91,20 @@ export type LineStats = {
 }
 
 export type LayoutLine = {
-  text: string // Full text content of this line, e.g. 'hello world'
-  width: number // Measured width of this line, e.g. 87.5
-  start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
-  end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+  text: string
+  width: number
+  start: LayoutCursor
+  end: LayoutCursor
 }
 
 export type LayoutLineRange = {
-  width: number // Measured width of this line, e.g. 87.5
-  start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
-  end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+  width: number
+  start: LayoutCursor
+  end: LayoutCursor
 }
 
 export type LayoutLinesResult = LayoutResult & {
-  lines: LayoutLine[] // Per-line text/width pairs for custom rendering
+  lines: LayoutLine[]
 }
 
 export type WordBreakMode = AnalysisWordBreakMode
@@ -152,15 +114,12 @@ export type PrepareOptions = {
   wordBreak?: WordBreakMode
 }
 
-// Internal hard-break chunk hint for the line walker. Not public because
-// callers should not depend on the current chunking representation.
 type PreparedLineChunk = {
   startSegmentIndex: number
   endSegmentIndex: number
   consumedEndSegmentIndex: number
 }
 
-// --- Public API ---
 
 function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments {
   if (includeSegments) {
@@ -175,6 +134,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P
       discretionaryHyphenWidth: 0,
       tabStopAdvance: 0,
       chunks: [],
+      chunkBySegment: null,
       segments: [],
     } as unknown as PreparedTextWithSegments
   }
@@ -189,6 +149,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P
     discretionaryHyphenWidth: 0,
     tabStopAdvance: 0,
     chunks: [],
+    chunkBySegment: null,
   } as unknown as InternalPreparedText
 }
 
@@ -472,6 +433,19 @@ function measureAnalysis(
 
   const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, widths.length)
   const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts)
+
+  let chunkBySegment: Uint32Array | null = null
+  if (includeSegments && chunks.length > 1) {
+    chunkBySegment = new Uint32Array(widths.length)
+    let c = 0
+    for (let i = 0; i < widths.length; i++) {
+      while (c < chunks.length && i >= chunks[c]!.consumedEndSegmentIndex) {
+        c++
+      }
+      chunkBySegment[i] = c
+    }
+  }
+
   if (segments !== null) {
     return {
       widths,
@@ -484,6 +458,7 @@ function measureAnalysis(
       discretionaryHyphenWidth,
       tabStopAdvance,
       chunks,
+      chunkBySegment,
       segments,
     } as unknown as PreparedTextWithSegments
   }
@@ -498,6 +473,7 @@ function measureAnalysis(
     discretionaryHyphenWidth,
     tabStopAdvance,
     chunks,
+    chunkBySegment,
   } as unknown as InternalPreparedText
 }
 
@@ -542,26 +518,10 @@ function prepareInternal(
   return measureAnalysis(analysis, font, includeSegments, wordBreak)
 }
 
-// Prepare text for layout. Segments the text, measures each segment via canvas,
-// and stores the widths for fast relayout at any width. Call once per text block
-// (e.g. when a comment first appears). The result is width-independent — the
-// same PreparedText can be laid out at any maxWidth and lineHeight via layout().
-//
-// Steps:
-//   1. Normalize collapsible whitespace (CSS white-space: normal behavior)
-//   2. Segment via Intl.Segmenter (handles CJK, Thai, etc.)
-//   3. Merge punctuation into preceding word ("better." as one unit)
-//   4. Split CJK words into individual graphemes (per-character line breaks)
-//   5. Measure each segment via canvas measureText, cache by (segment, font)
-//   6. Pre-measure graphemes of long words (for overflow-wrap: break-word)
-//   7. Correct emoji canvas inflation (auto-detected per font size)
-//   8. Optionally compute rich-path bidi metadata for custom renderers
 export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText {
   return prepareInternal(text, font, false, options) as PreparedText
 }
 
-// Rich variant used by callers that need enough information to render the
-// laid-out lines themselves.
 export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments {
   return prepareInternal(text, font, true, options) as PreparedTextWithSegments
 }
@@ -570,19 +530,7 @@ function getInternalPrepared(prepared: PreparedText): InternalPreparedText {
   return prepared as InternalPreparedText
 }
 
-// Layout prepared text at a given max width and caller-provided lineHeight.
-// Pure arithmetic on cached widths — no canvas calls, no DOM reads, no string
-// operations, no allocations.
-// ~0.0002ms per text block. Call on every resize.
-//
-// Line breaking rules (matching CSS white-space: normal + overflow-wrap: break-word):
-//   - Break before any non-space segment that would overflow the line
-//   - Trailing whitespace hangs past the line edge (doesn't trigger breaks)
-//   - Segments wider than maxWidth are broken at grapheme boundaries
 export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult {
-  // Keep the resize hot path specialized. `layoutWithLines()` shares the same
-  // break semantics but also tracks line ranges; the extra bookkeeping is too
-  // expensive to pay on every hot-path `layout()` call.
   const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth)
   return { lineCount, height: lineCount * lineHeight }
 }
@@ -652,8 +600,6 @@ export function materializeLineRange(
   )
 }
 
-// Batch low-level line-range pass. This is the non-materializing counterpart
-// to layoutWithLines(), useful for shrinkwrap and other aggregate stats work.
 export function walkLineRanges(
   prepared: PreparedTextWithSegments,
   maxWidth: number,
@@ -683,9 +629,6 @@ export function measureLineStats(
   return measurePreparedLineGeometry(getInternalPrepared(prepared), maxWidth)
 }
 
-// Intrinsic-width helper for rich/userland layout work. This asks "how wide is
-// the prepared text when container width is not the thing forcing wraps?".
-// Explicit hard breaks still count, so this returns the widest forced line.
 export function measureNaturalWidth(prepared: PreparedTextWithSegments): number {
   let maxWidth = 0
   walkPreparedLinesRaw(getInternalPrepared(prepared), Number.POSITIVE_INFINITY, width => {
@@ -746,10 +689,6 @@ export function layoutNextLineRange(
   )
 }
 
-// Rich layout API for callers that want the actual line contents and widths.
-// Caller still supplies lineHeight at layout time. Mirrors layout()'s break
-// decisions, but keeps extra per-line bookkeeping so it should stay off the
-// resize hot path.
 export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult {
   const lines: LayoutLine[] = []
   if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines }

diff --git a/src/line-break.ts b/src/line-break.ts
@@ -20,6 +20,7 @@ export type PreparedLineBreakData = {
     endSegmentIndex: number
     consumedEndSegmentIndex: number
   }[]
+  chunkBySegment: Uint32Array | null
 }
 
 export type InternalLayoutLine = {
@@ -82,6 +83,11 @@ function fitSoftHyphenBreak(
 }
 
 function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number {
+  if (prepared.chunkBySegment !== null && segmentIndex >= 0 && segmentIndex < prepared.chunkBySegment.length) {
+    const c = prepared.chunkBySegment[segmentIndex]!
+    return c < prepared.chunks.length ? c : -1
+  }
+
   let lo = 0
   let hi = prepared.chunks.length