diff --git a/TODO.md b/TODO.md index a65542f5..4e1218eb 100644 --- a/TODO.md +++ b/TODO.md @@ -16,8 +16,6 @@ Current priorities: - Expand the sampled font matrix only where a canary still looks genuinely imperfect. - Treat strongly font-sensitive or shaping-sensitive misses as boundary-finding for the current architecture, not automatic invitations for another local glue rule. - Keep the hot `layout()` path simple and allocation-light while the rich path absorbs more userland layout needs. -- If chunk-heavy manual layout keeps growing, consider a stateful streaming variant or a cursor-carried chunk hint so sequential `layoutNextLine()` flows can stay overall linear instead of paying a lookup per emitted line. -- If arbitrary interior rich cursors become common, consider a compact `segmentIndex -> chunkIndex` side table, ideally only on rich prepared handles or only when `chunks.length > 1`. 3. Demo work diff --git a/src/layout.ts b/src/layout.ts index ec8e0e8b..dd34c30d 100644 --- a/src/layout.ts +++ b/src/layout.ts @@ -1,37 +1,3 @@ -// Text measurement for browser environments using canvas measureText. -// -// Problem: DOM-based text measurement (getBoundingClientRect, offsetHeight) -// forces synchronous layout reflow. When components independently measure text, -// each measurement triggers a reflow of the entire document. This creates -// read/write interleaving that can cost 30ms+ per frame for 500 text blocks. -// -// Solution: two-phase measurement centered around canvas measureText. -// prepare(text, font) — segments text via Intl.Segmenter, measures each word -// via canvas, caches widths, and does one cached DOM calibration read per -// font when emoji correction is needed. Call once when text first appears. -// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure -// arithmetic to count lines and compute height. Call on every resize. -// ~0.0002ms per text. -// -// i18n: Intl.Segmenter handles CJK (per-character breaking), Thai, Arabic, etc. -// Bidi: simplified rich-path metadata for mixed LTR/RTL custom rendering. -// Punctuation merging: "better." measured as one unit (matches CSS behavior). -// Trailing whitespace: hangs past line edge without triggering breaks (CSS behavior). -// overflow-wrap: pre-measured grapheme widths enable character-level word breaking. -// -// Emoji correction: Chrome/Firefox canvas measures emoji wider than DOM at font -// sizes <24px on macOS (Apple Color Emoji). The inflation is constant per emoji -// grapheme at a given size, font-independent. Auto-detected by comparing canvas -// vs actual DOM emoji width (one cached DOM read per font). Safari canvas and -// DOM agree (both wider than fontSize), so correction = 0 there. -// -// Limitations: -// - system-ui font: canvas resolves to different optical variants than DOM on macOS. -// Use named fonts (Helvetica, Inter, etc.) for guaranteed accuracy. -// See RESEARCH.md "Discovery: system-ui font resolution mismatch". -// -// Based on Sebastian Markbage's text-layout research (github.com/chenglou/text-layout). - import { computeSegmentLevels } from './bidi.js' import { analyzeText, @@ -82,45 +48,41 @@ function getSharedGraphemeSegmenter(): Intl.Segmenter { return sharedGraphemeSegmenter } -// --- Public types --- declare const preparedTextBrand: unique symbol type PreparedCore = { - widths: number[] // Segment widths, e.g. [42.5, 4.4, 37.2] - lineEndFitAdvances: number[] // Width contribution when a line ends after this segment - lineEndPaintAdvances: number[] // Painted width contribution when a line ends after this segment - kinds: SegmentBreakKind[] // Break behavior per segment, e.g. ['text', 'space', 'text'] - simpleLineWalkFastPath: boolean // Normal text can use the simpler old line walker across all layout APIs - segLevels: Int8Array | null // Rich-path bidi metadata for custom rendering; layout() never reads it - breakableFitAdvances: (number[] | null)[] // Per-grapheme fit advances for breakable segments, else null - discretionaryHyphenWidth: number // Visible width added when a soft hyphen is chosen as the break - tabStopAdvance: number // Absolute advance between tab stops for pre-wrap tab segments - chunks: PreparedLineChunk[] // Precompiled hard-break chunks for line walking -} - -// Keep the main prepared handle opaque so the public API does not accidentally -// calcify around the current parallel-array representation. + widths: number[] + lineEndFitAdvances: number[] + lineEndPaintAdvances: number[] + kinds: SegmentBreakKind[] + simpleLineWalkFastPath: boolean + segLevels: Int8Array | null + breakableFitAdvances: (number[] | null)[] + discretionaryHyphenWidth: number + tabStopAdvance: number + chunks: PreparedLineChunk[] + chunkBySegment: Uint32Array | null +} + export type PreparedText = { readonly [preparedTextBrand]: true } type InternalPreparedText = PreparedText & PreparedCore -// Rich/diagnostic variant that still exposes the structural segment data. -// Treat this as the unstable escape hatch for experiments and custom rendering. export type PreparedTextWithSegments = InternalPreparedText & { - segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world'] + segments: string[] } export type LayoutCursor = { - segmentIndex: number // Segment index in `segments` - graphemeIndex: number // Grapheme index within that segment; `0` at segment boundaries + segmentIndex: number + graphemeIndex: number } export type LayoutResult = { - lineCount: number // Number of wrapped lines, e.g. 3 - height: number // Total block height, e.g. lineCount * lineHeight = 57 + lineCount: number + height: number } export type LineStats = { @@ -129,20 +91,20 @@ export type LineStats = { } export type LayoutLine = { - text: string // Full text content of this line, e.g. 'hello world' - width: number // Measured width of this line, e.g. 87.5 - start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes - end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes + text: string + width: number + start: LayoutCursor + end: LayoutCursor } export type LayoutLineRange = { - width: number // Measured width of this line, e.g. 87.5 - start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes - end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes + width: number + start: LayoutCursor + end: LayoutCursor } export type LayoutLinesResult = LayoutResult & { - lines: LayoutLine[] // Per-line text/width pairs for custom rendering + lines: LayoutLine[] } export type WordBreakMode = AnalysisWordBreakMode @@ -152,15 +114,12 @@ export type PrepareOptions = { wordBreak?: WordBreakMode } -// Internal hard-break chunk hint for the line walker. Not public because -// callers should not depend on the current chunking representation. type PreparedLineChunk = { startSegmentIndex: number endSegmentIndex: number consumedEndSegmentIndex: number } -// --- Public API --- function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments { if (includeSegments) { @@ -175,6 +134,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P discretionaryHyphenWidth: 0, tabStopAdvance: 0, chunks: [], + chunkBySegment: null, segments: [], } as unknown as PreparedTextWithSegments } @@ -189,6 +149,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P discretionaryHyphenWidth: 0, tabStopAdvance: 0, chunks: [], + chunkBySegment: null, } as unknown as InternalPreparedText } @@ -472,6 +433,19 @@ function measureAnalysis( const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, widths.length) const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts) + + let chunkBySegment: Uint32Array | null = null + if (includeSegments && chunks.length > 1) { + chunkBySegment = new Uint32Array(widths.length) + let c = 0 + for (let i = 0; i < widths.length; i++) { + while (c < chunks.length && i >= chunks[c]!.consumedEndSegmentIndex) { + c++ + } + chunkBySegment[i] = c + } + } + if (segments !== null) { return { widths, @@ -484,6 +458,7 @@ function measureAnalysis( discretionaryHyphenWidth, tabStopAdvance, chunks, + chunkBySegment, segments, } as unknown as PreparedTextWithSegments } @@ -498,6 +473,7 @@ function measureAnalysis( discretionaryHyphenWidth, tabStopAdvance, chunks, + chunkBySegment, } as unknown as InternalPreparedText } @@ -542,26 +518,10 @@ function prepareInternal( return measureAnalysis(analysis, font, includeSegments, wordBreak) } -// Prepare text for layout. Segments the text, measures each segment via canvas, -// and stores the widths for fast relayout at any width. Call once per text block -// (e.g. when a comment first appears). The result is width-independent — the -// same PreparedText can be laid out at any maxWidth and lineHeight via layout(). -// -// Steps: -// 1. Normalize collapsible whitespace (CSS white-space: normal behavior) -// 2. Segment via Intl.Segmenter (handles CJK, Thai, etc.) -// 3. Merge punctuation into preceding word ("better." as one unit) -// 4. Split CJK words into individual graphemes (per-character line breaks) -// 5. Measure each segment via canvas measureText, cache by (segment, font) -// 6. Pre-measure graphemes of long words (for overflow-wrap: break-word) -// 7. Correct emoji canvas inflation (auto-detected per font size) -// 8. Optionally compute rich-path bidi metadata for custom renderers export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText { return prepareInternal(text, font, false, options) as PreparedText } -// Rich variant used by callers that need enough information to render the -// laid-out lines themselves. export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments { return prepareInternal(text, font, true, options) as PreparedTextWithSegments } @@ -570,19 +530,7 @@ function getInternalPrepared(prepared: PreparedText): InternalPreparedText { return prepared as InternalPreparedText } -// Layout prepared text at a given max width and caller-provided lineHeight. -// Pure arithmetic on cached widths — no canvas calls, no DOM reads, no string -// operations, no allocations. -// ~0.0002ms per text block. Call on every resize. -// -// Line breaking rules (matching CSS white-space: normal + overflow-wrap: break-word): -// - Break before any non-space segment that would overflow the line -// - Trailing whitespace hangs past the line edge (doesn't trigger breaks) -// - Segments wider than maxWidth are broken at grapheme boundaries export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult { - // Keep the resize hot path specialized. `layoutWithLines()` shares the same - // break semantics but also tracks line ranges; the extra bookkeeping is too - // expensive to pay on every hot-path `layout()` call. const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth) return { lineCount, height: lineCount * lineHeight } } @@ -652,8 +600,6 @@ export function materializeLineRange( ) } -// Batch low-level line-range pass. This is the non-materializing counterpart -// to layoutWithLines(), useful for shrinkwrap and other aggregate stats work. export function walkLineRanges( prepared: PreparedTextWithSegments, maxWidth: number, @@ -683,9 +629,6 @@ export function measureLineStats( return measurePreparedLineGeometry(getInternalPrepared(prepared), maxWidth) } -// Intrinsic-width helper for rich/userland layout work. This asks "how wide is -// the prepared text when container width is not the thing forcing wraps?". -// Explicit hard breaks still count, so this returns the widest forced line. export function measureNaturalWidth(prepared: PreparedTextWithSegments): number { let maxWidth = 0 walkPreparedLinesRaw(getInternalPrepared(prepared), Number.POSITIVE_INFINITY, width => { @@ -746,10 +689,6 @@ export function layoutNextLineRange( ) } -// Rich layout API for callers that want the actual line contents and widths. -// Caller still supplies lineHeight at layout time. Mirrors layout()'s break -// decisions, but keeps extra per-line bookkeeping so it should stay off the -// resize hot path. export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult { const lines: LayoutLine[] = [] if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines } diff --git a/src/line-break.ts b/src/line-break.ts index 363e0d8c..ee1a6899 100644 --- a/src/line-break.ts +++ b/src/line-break.ts @@ -20,6 +20,7 @@ export type PreparedLineBreakData = { endSegmentIndex: number consumedEndSegmentIndex: number }[] + chunkBySegment: Uint32Array | null } export type InternalLayoutLine = { @@ -82,6 +83,11 @@ function fitSoftHyphenBreak( } function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number { + if (prepared.chunkBySegment !== null && segmentIndex >= 0 && segmentIndex < prepared.chunkBySegment.length) { + const c = prepared.chunkBySegment[segmentIndex]! + return c < prepared.chunks.length ? c : -1 + } + let lo = 0 let hi = prepared.chunks.length