Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ Current priorities:
- Expand the sampled font matrix only where a canary still looks genuinely imperfect.
- Treat strongly font-sensitive or shaping-sensitive misses as boundary-finding for the current architecture, not automatic invitations for another local glue rule.
- Keep the hot `layout()` path simple and allocation-light while the rich path absorbs more userland layout needs.
- If chunk-heavy manual layout keeps growing, consider a stateful streaming variant or a cursor-carried chunk hint so sequential `layoutNextLine()` flows can stay overall linear instead of paying a lookup per emitted line.
- If arbitrary interior rich cursors become common, consider a compact `segmentIndex -> chunkIndex` side table, ideally only on rich prepared handles or only when `chunks.length > 1`.

3. Demo work

Expand Down
147 changes: 43 additions & 104 deletions src/layout.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,3 @@
// Text measurement for browser environments using canvas measureText.
//
// Problem: DOM-based text measurement (getBoundingClientRect, offsetHeight)
// forces synchronous layout reflow. When components independently measure text,
// each measurement triggers a reflow of the entire document. This creates
// read/write interleaving that can cost 30ms+ per frame for 500 text blocks.
//
// Solution: two-phase measurement centered around canvas measureText.
// prepare(text, font) — segments text via Intl.Segmenter, measures each word
// via canvas, caches widths, and does one cached DOM calibration read per
// font when emoji correction is needed. Call once when text first appears.
// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure
// arithmetic to count lines and compute height. Call on every resize.
// ~0.0002ms per text.
//
// i18n: Intl.Segmenter handles CJK (per-character breaking), Thai, Arabic, etc.
// Bidi: simplified rich-path metadata for mixed LTR/RTL custom rendering.
// Punctuation merging: "better." measured as one unit (matches CSS behavior).
// Trailing whitespace: hangs past line edge without triggering breaks (CSS behavior).
// overflow-wrap: pre-measured grapheme widths enable character-level word breaking.
//
// Emoji correction: Chrome/Firefox canvas measures emoji wider than DOM at font
// sizes <24px on macOS (Apple Color Emoji). The inflation is constant per emoji
// grapheme at a given size, font-independent. Auto-detected by comparing canvas
// vs actual DOM emoji width (one cached DOM read per font). Safari canvas and
// DOM agree (both wider than fontSize), so correction = 0 there.
//
// Limitations:
// - system-ui font: canvas resolves to different optical variants than DOM on macOS.
// Use named fonts (Helvetica, Inter, etc.) for guaranteed accuracy.
// See RESEARCH.md "Discovery: system-ui font resolution mismatch".
//
// Based on Sebastian Markbage's text-layout research (github.com/chenglou/text-layout).

import { computeSegmentLevels } from './bidi.js'
import {
analyzeText,
Expand Down Expand Up @@ -82,45 +48,41 @@ function getSharedGraphemeSegmenter(): Intl.Segmenter {
return sharedGraphemeSegmenter
}

// --- Public types ---

declare const preparedTextBrand: unique symbol

type PreparedCore = {
widths: number[] // Segment widths, e.g. [42.5, 4.4, 37.2]
lineEndFitAdvances: number[] // Width contribution when a line ends after this segment
lineEndPaintAdvances: number[] // Painted width contribution when a line ends after this segment
kinds: SegmentBreakKind[] // Break behavior per segment, e.g. ['text', 'space', 'text']
simpleLineWalkFastPath: boolean // Normal text can use the simpler old line walker across all layout APIs
segLevels: Int8Array | null // Rich-path bidi metadata for custom rendering; layout() never reads it
breakableFitAdvances: (number[] | null)[] // Per-grapheme fit advances for breakable segments, else null
discretionaryHyphenWidth: number // Visible width added when a soft hyphen is chosen as the break
tabStopAdvance: number // Absolute advance between tab stops for pre-wrap tab segments
chunks: PreparedLineChunk[] // Precompiled hard-break chunks for line walking
}

// Keep the main prepared handle opaque so the public API does not accidentally
// calcify around the current parallel-array representation.
widths: number[]
lineEndFitAdvances: number[]
lineEndPaintAdvances: number[]
kinds: SegmentBreakKind[]
simpleLineWalkFastPath: boolean
segLevels: Int8Array | null
breakableFitAdvances: (number[] | null)[]
discretionaryHyphenWidth: number
tabStopAdvance: number
chunks: PreparedLineChunk[]
chunkBySegment: Uint32Array | null
}

export type PreparedText = {
readonly [preparedTextBrand]: true
}

type InternalPreparedText = PreparedText & PreparedCore

// Rich/diagnostic variant that still exposes the structural segment data.
// Treat this as the unstable escape hatch for experiments and custom rendering.
export type PreparedTextWithSegments = InternalPreparedText & {
segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world']
segments: string[]
}

export type LayoutCursor = {
segmentIndex: number // Segment index in `segments`
graphemeIndex: number // Grapheme index within that segment; `0` at segment boundaries
segmentIndex: number
graphemeIndex: number
}

export type LayoutResult = {
lineCount: number // Number of wrapped lines, e.g. 3
height: number // Total block height, e.g. lineCount * lineHeight = 57
lineCount: number
height: number
}

export type LineStats = {
Expand All @@ -129,20 +91,20 @@ export type LineStats = {
}

export type LayoutLine = {
text: string // Full text content of this line, e.g. 'hello world'
width: number // Measured width of this line, e.g. 87.5
start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
text: string
width: number
start: LayoutCursor
end: LayoutCursor
}

export type LayoutLineRange = {
width: number // Measured width of this line, e.g. 87.5
start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
width: number
start: LayoutCursor
end: LayoutCursor
}

export type LayoutLinesResult = LayoutResult & {
lines: LayoutLine[] // Per-line text/width pairs for custom rendering
lines: LayoutLine[]
}

export type WordBreakMode = AnalysisWordBreakMode
Expand All @@ -152,15 +114,12 @@ export type PrepareOptions = {
wordBreak?: WordBreakMode
}

// Internal hard-break chunk hint for the line walker. Not public because
// callers should not depend on the current chunking representation.
type PreparedLineChunk = {
startSegmentIndex: number
endSegmentIndex: number
consumedEndSegmentIndex: number
}

// --- Public API ---

function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments {
if (includeSegments) {
Expand All @@ -175,6 +134,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P
discretionaryHyphenWidth: 0,
tabStopAdvance: 0,
chunks: [],
chunkBySegment: null,
segments: [],
} as unknown as PreparedTextWithSegments
}
Expand All @@ -189,6 +149,7 @@ function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | P
discretionaryHyphenWidth: 0,
tabStopAdvance: 0,
chunks: [],
chunkBySegment: null,
} as unknown as InternalPreparedText
}

Expand Down Expand Up @@ -472,6 +433,19 @@ function measureAnalysis(

const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, widths.length)
const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts)

let chunkBySegment: Uint32Array | null = null
if (includeSegments && chunks.length > 1) {
chunkBySegment = new Uint32Array(widths.length)
let c = 0
for (let i = 0; i < widths.length; i++) {
while (c < chunks.length && i >= chunks[c]!.consumedEndSegmentIndex) {
c++
}
chunkBySegment[i] = c
}
}

if (segments !== null) {
return {
widths,
Expand All @@ -484,6 +458,7 @@ function measureAnalysis(
discretionaryHyphenWidth,
tabStopAdvance,
chunks,
chunkBySegment,
segments,
} as unknown as PreparedTextWithSegments
}
Expand All @@ -498,6 +473,7 @@ function measureAnalysis(
discretionaryHyphenWidth,
tabStopAdvance,
chunks,
chunkBySegment,
} as unknown as InternalPreparedText
}

Expand Down Expand Up @@ -542,26 +518,10 @@ function prepareInternal(
return measureAnalysis(analysis, font, includeSegments, wordBreak)
}

// Prepare text for layout. Segments the text, measures each segment via canvas,
// and stores the widths for fast relayout at any width. Call once per text block
// (e.g. when a comment first appears). The result is width-independent — the
// same PreparedText can be laid out at any maxWidth and lineHeight via layout().
//
// Steps:
// 1. Normalize collapsible whitespace (CSS white-space: normal behavior)
// 2. Segment via Intl.Segmenter (handles CJK, Thai, etc.)
// 3. Merge punctuation into preceding word ("better." as one unit)
// 4. Split CJK words into individual graphemes (per-character line breaks)
// 5. Measure each segment via canvas measureText, cache by (segment, font)
// 6. Pre-measure graphemes of long words (for overflow-wrap: break-word)
// 7. Correct emoji canvas inflation (auto-detected per font size)
// 8. Optionally compute rich-path bidi metadata for custom renderers
export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText {
return prepareInternal(text, font, false, options) as PreparedText
}

// Rich variant used by callers that need enough information to render the
// laid-out lines themselves.
export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments {
return prepareInternal(text, font, true, options) as PreparedTextWithSegments
}
Expand All @@ -570,19 +530,7 @@ function getInternalPrepared(prepared: PreparedText): InternalPreparedText {
return prepared as InternalPreparedText
}

// Layout prepared text at a given max width and caller-provided lineHeight.
// Pure arithmetic on cached widths — no canvas calls, no DOM reads, no string
// operations, no allocations.
// ~0.0002ms per text block. Call on every resize.
//
// Line breaking rules (matching CSS white-space: normal + overflow-wrap: break-word):
// - Break before any non-space segment that would overflow the line
// - Trailing whitespace hangs past the line edge (doesn't trigger breaks)
// - Segments wider than maxWidth are broken at grapheme boundaries
export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult {
// Keep the resize hot path specialized. `layoutWithLines()` shares the same
// break semantics but also tracks line ranges; the extra bookkeeping is too
// expensive to pay on every hot-path `layout()` call.
const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth)
return { lineCount, height: lineCount * lineHeight }
}
Expand Down Expand Up @@ -652,8 +600,6 @@ export function materializeLineRange(
)
}

// Batch low-level line-range pass. This is the non-materializing counterpart
// to layoutWithLines(), useful for shrinkwrap and other aggregate stats work.
export function walkLineRanges(
prepared: PreparedTextWithSegments,
maxWidth: number,
Expand Down Expand Up @@ -683,9 +629,6 @@ export function measureLineStats(
return measurePreparedLineGeometry(getInternalPrepared(prepared), maxWidth)
}

// Intrinsic-width helper for rich/userland layout work. This asks "how wide is
// the prepared text when container width is not the thing forcing wraps?".
// Explicit hard breaks still count, so this returns the widest forced line.
export function measureNaturalWidth(prepared: PreparedTextWithSegments): number {
let maxWidth = 0
walkPreparedLinesRaw(getInternalPrepared(prepared), Number.POSITIVE_INFINITY, width => {
Expand Down Expand Up @@ -746,10 +689,6 @@ export function layoutNextLineRange(
)
}

// Rich layout API for callers that want the actual line contents and widths.
// Caller still supplies lineHeight at layout time. Mirrors layout()'s break
// decisions, but keeps extra per-line bookkeeping so it should stay off the
// resize hot path.
export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult {
const lines: LayoutLine[] = []
if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines }
Expand Down
6 changes: 6 additions & 0 deletions src/line-break.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export type PreparedLineBreakData = {
endSegmentIndex: number
consumedEndSegmentIndex: number
}[]
chunkBySegment: Uint32Array | null
}

export type InternalLayoutLine = {
Expand Down Expand Up @@ -82,6 +83,11 @@ function fitSoftHyphenBreak(
}

function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number {
if (prepared.chunkBySegment !== null && segmentIndex >= 0 && segmentIndex < prepared.chunkBySegment.length) {
const c = prepared.chunkBySegment[segmentIndex]!
return c < prepared.chunks.length ? c : -1
}

let lo = 0
let hi = prepared.chunks.length

Expand Down