diff --git a/examples/ascii-arts/lynx.config.ts b/examples/ascii-arts/lynx.config.ts
index 857da94..da2bc0b 100644
--- a/examples/ascii-arts/lynx.config.ts
+++ b/examples/ascii-arts/lynx.config.ts
@@ -4,6 +4,10 @@ import { pluginReactLynx } from '@lynx-js/react-rsbuild-plugin'
import { pluginTypeCheck } from '@rsbuild/plugin-type-check'
export default defineConfig({
+ environments: {
+ lynx: {},
+ web: {},
+ },
source: {
entry: {
torus: './src/torus.tsx',
diff --git a/examples/basic/lynx.config.ts b/examples/basic/lynx.config.ts
index eeeb8c5..9fcdc69 100644
--- a/examples/basic/lynx.config.ts
+++ b/examples/basic/lynx.config.ts
@@ -4,6 +4,10 @@ import { pluginReactLynx } from '@lynx-js/react-rsbuild-plugin'
import { pluginTypeCheck } from '@rsbuild/plugin-type-check'
export default defineConfig({
+ environments: {
+ lynx: {},
+ web: {},
+ },
source: {
entry: {
main: './src/index.tsx',
diff --git a/examples/basic/src/accuracy.tsx b/examples/basic/src/accuracy.tsx
index 55b7af5..ecffa96 100644
--- a/examples/basic/src/accuracy.tsx
+++ b/examples/basic/src/accuracy.tsx
@@ -7,6 +7,11 @@ import {
} from 'lynx-pretext'
import { TEXTS, WIDTHS, FONT_SIZE, LINE_HEIGHT } from '../src/test-data'
+const nativeGetTextInfo =
+ typeof lynx !== 'undefined' && typeof lynx.getTextInfo === 'function'
+ ? lynx.getTextInfo.bind(lynx)
+ : null
+
type TestResult = {
label: string
width: number
@@ -31,6 +36,19 @@ type Summary = {
}
function runAccuracyCheck(): Summary {
+ if (nativeGetTextInfo === null) {
+ return {
+ total: 0,
+ passed: 0,
+ failed: 0,
+ passRate: '0.0',
+ englishTotal: 0,
+ englishPassed: 0,
+ englishPassRate: '0.0',
+ results: [],
+ }
+ }
+
const results: TestResult[] = []
let total = 0
let passed = 0
@@ -49,7 +67,7 @@ function runAccuracyCheck(): Summary {
for (const width of WIDTHS) {
// Native oracle: getTextInfo with maxWidth
- const native = lynx.getTextInfo(text, {
+ const native = nativeGetTextInfo(text, {
fontSize: fontSizeStr,
maxWidth: `${width}px`,
})
@@ -131,6 +149,19 @@ export function AccuracyPage() {
)
}
+ if (nativeGetTextInfo === null) {
+ return (
+
+
+ Accuracy Validation
+
+
+ lynx.getTextInfo is unavailable on Web, so the native-oracle comparison page is disabled there.
+
+
+ )
+ }
+
const displayResults = showFailuresOnly
? summary.results.filter(r => !r.pass)
: summary.results
diff --git a/examples/basic/src/basic-height.tsx b/examples/basic/src/basic-height.tsx
index b27ac81..5fbedd3 100644
--- a/examples/basic/src/basic-height.tsx
+++ b/examples/basic/src/basic-height.tsx
@@ -11,6 +11,10 @@ const SAMPLE_TEXT =
const FONT_SIZE = 16
const LINE_HEIGHT = 24
const FONT = `${FONT_SIZE}px`
+const nativeGetTextInfo =
+ typeof lynx !== 'undefined' && typeof lynx.getTextInfo === 'function'
+ ? lynx.getTextInfo.bind(lynx)
+ : null
export function BasicHeightPage() {
const [maxWidth, setMaxWidth] = useState(360)
@@ -23,16 +27,22 @@ export function BasicHeightPage() {
const prepared = prepare(SAMPLE_TEXT, FONT)
const result = layout(prepared, contentWidth, LINE_HEIGHT)
- const native = lynx.getTextInfo(SAMPLE_TEXT, {
+ const native = nativeGetTextInfo?.(SAMPLE_TEXT, {
fontSize: `${FONT_SIZE}px`,
maxWidth: `${contentWidth}px`,
- })
- // Debug: log native result
- console.log('[basic-height] native.getTextInfo:', JSON.stringify(native), 'contentWidth:', contentWidth)
- const nativeContent = native.content ?? [SAMPLE_TEXT]
- const nativeLineCount = nativeContent.length
- const nativeHeight = nativeLineCount * LINE_HEIGHT
- const match = result.lineCount === nativeLineCount
+ }) ?? null
+ if (native !== null) {
+ console.log('[basic-height] native.getTextInfo:', JSON.stringify(native), 'contentWidth:', contentWidth)
+ }
+ const nativeContent = native?.content ?? null
+ const nativeLineCount = nativeContent?.length ?? null
+ const nativeHeight = nativeLineCount === null ? null : nativeLineCount * LINE_HEIGHT
+ const match = nativeLineCount === null ? null : result.lineCount === nativeLineCount
+ const comparisonText = match === null
+ ? 'lynx.getTextInfo is unavailable on Web, so this page only shows the pretext result.'
+ : match
+ ? `Both agree: ${result.lineCount} lines, ${result.height}px height`
+ : `Height diff: ${Math.abs(result.height - nativeHeight!)}px | Lines: pretext=${result.lineCount} native=${nativeLineCount!}`
// BTS FPS tick on every render
btsFpsTick()
@@ -71,10 +81,10 @@ export function BasicHeightPage() {
Native
- {`${nativeHeight}px`}
+ {nativeHeight === null ? 'N/A' : `${nativeHeight}px`}
- {`${nativeLineCount} lines`}
+ {nativeLineCount === null ? 'Unavailable on Web' : `${nativeLineCount} lines`}
@@ -89,14 +99,12 @@ export function BasicHeightPage() {
- {match ? 'MATCH' : 'MISMATCH'}
+ {match === null ? 'WEB FALLBACK' : match ? 'MATCH' : 'MISMATCH'}
- {match
- ? `Both agree: ${result.lineCount} lines, ${result.height}px height`
- : `Height diff: ${Math.abs(result.height - nativeHeight)}px | Lines: pretext=${result.lineCount} native=${nativeLineCount}`}
+ {comparisonText}
@@ -110,7 +118,7 @@ export function BasicHeightPage() {
-
+
0 && normalized.charCodeAt(normalized.length - 1) === 0x20) {
- normalized = normalized.slice(0, -1)
- }
- return normalized
-}
-
-export function normalizeWhitespacePreWrap(text: string): string {
- if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n')
- return text
- .replace(/\r\n/g, '\n')
- .replace(/[\r\f]/g, '\n')
-}
-
-export function classifySegmentBreakChar(ch: string, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind {
- if (whiteSpaceProfile.preserveOrdinarySpaces || whiteSpaceProfile.preserveHardBreaks) {
- if (ch === ' ') return 'preserved-space'
- if (ch === '\t') return 'tab'
- if (whiteSpaceProfile.preserveHardBreaks && ch === '\n') return 'hard-break'
- }
- if (ch === ' ') return 'space'
- if (ch === '\u00A0' || ch === '\u202F' || ch === '\u2060' || ch === '\uFEFF') {
- return 'glue'
- }
- if (ch === '\u200B') return 'zero-width-break'
- if (ch === '\u00AD') return 'soft-hyphen'
- return 'text'
-}
-
-export function splitSegmentByBreakKind(
- segment: string,
- isWordLike: boolean,
- start: number,
- whiteSpaceProfile: WhiteSpaceProfile,
-): SegmentationPiece[] {
- const pieces: SegmentationPiece[] = []
- let currentKind: SegmentBreakKind | null = null
- let currentText = ''
- let currentStart = start
- let currentWordLike = false
- let offset = 0
-
- for (const ch of segment) {
- const kind = classifySegmentBreakChar(ch, whiteSpaceProfile)
- const wordLike = kind === 'text' && isWordLike
-
- if (currentKind !== null && kind === currentKind && wordLike === currentWordLike) {
- currentText += ch
- offset += ch.length
- continue
- }
-
- if (currentKind !== null) {
- pieces.push({
- text: currentText,
- isWordLike: currentWordLike,
- kind: currentKind,
- start: currentStart,
- })
- }
-
- currentKind = kind
- currentText = ch
- currentStart = start + offset
- currentWordLike = wordLike
- offset += ch.length
- }
-
- if (currentKind !== null) {
- pieces.push({
- text: currentText,
- isWordLike: currentWordLike,
- kind: currentKind,
- start: currentStart,
- })
- }
-
- return pieces
-}
-
-// --- Character sets and merge helpers (US-003) ---
-
-// PrimJS doesn't support \p{} unicode property escapes — use transpiled ranges
-const arabicScriptRe = /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u0891\u0897-\u08E1\u08E3-\u08FF\uFB50-\uFD3D\uFD40-\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC\u{10E60}-\u{10E7E}\u{10EC2}-\u{10EC7}\u{10ED0}-\u{10ED8}\u{10EFA}-\u{10EFF}\u{1EE00}-\u{1EE03}\u{1EE05}-\u{1EE1F}\u{1EE21}\u{1EE22}\u{1EE24}\u{1EE27}\u{1EE29}-\u{1EE32}\u{1EE34}-\u{1EE37}\u{1EE39}\u{1EE3B}\u{1EE42}\u{1EE47}\u{1EE49}\u{1EE4B}\u{1EE4D}-\u{1EE4F}\u{1EE51}\u{1EE52}\u{1EE54}\u{1EE57}\u{1EE59}\u{1EE5B}\u{1EE5D}\u{1EE5F}\u{1EE61}\u{1EE62}\u{1EE64}\u{1EE67}-\u{1EE6A}\u{1EE6C}-\u{1EE72}\u{1EE74}-\u{1EE77}\u{1EE79}-\u{1EE7C}\u{1EE7E}\u{1EE80}-\u{1EE89}\u{1EE8B}-\u{1EE9B}\u{1EEA1}-\u{1EEA3}\u{1EEA5}-\u{1EEA9}\u{1EEAB}-\u{1EEBB}\u{1EEF0}\u{1EEF1}]/u
-const combiningMarkRe = /[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0897-\u089F\u08CA-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09FE\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0AFA-\u0AFF\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B55-\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C04\u0C3C\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0CF3\u0D00-\u0D03\u0D3B\u0D3C\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D81-\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EBC\u0EC8-\u0ECE\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1715\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u180B-\u180D\u180F\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1AB0-\u1ADD\u1AE0-\u1AEB\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF4\u1CF7-\u1CF9\u1DC0-\u1DFF\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA82C\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9E5\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}\u{10AE6}\u{10D24}-\u{10D27}\u{10D69}-\u{10D6D}\u{10EAB}\u{10EAC}\u{10EFA}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11000}-\u{11002}\u{11038}-\u{11046}\u{11070}\u{11073}\u{11074}\u{1107F}-\u{11082}\u{110B0}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{11134}\u{11145}\u{11146}\u{11173}\u{11180}-\u{11182}\u{111B3}-\u{111C0}\u{111C9}-\u{111CC}\u{111CE}\u{111CF}\u{1122C}-\u{11237}\u{1123E}\u{11241}\u{112DF}-\u{112EA}\u{11300}-\u{11303}\u{1133B}\u{1133C}\u{1133E}-\u{11344}\u{11347}\u{11348}\u{1134B}-\u{1134D}\u{11357}\u{11362}\u{11363}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{113B8}-\u{113C0}\u{113C2}\u{113C5}\u{113C7}-\u{113CA}\u{113CC}-\u{113D0}\u{113D2}\u{113E1}\u{113E2}\u{11435}-\u{11446}\u{1145E}\u{114B0}-\u{114C3}\u{115AF}-\u{115B5}\u{115B8}-\u{115C0}\u{115DC}\u{115DD}\u{11630}-\u{11640}\u{116AB}-\u{116B7}\u{1171D}-\u{1172B}\u{1182C}-\u{1183A}\u{11930}-\u{11935}\u{11937}\u{11938}\u{1193B}-\u{1193E}\u{11940}\u{11942}\u{11943}\u{119D1}-\u{119D7}\u{119DA}-\u{119E0}\u{119E4}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A39}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A5B}\u{11A8A}-\u{11A99}\u{11B60}-\u{11B67}\u{11C2F}-\u{11C36}\u{11C38}-\u{11C3F}\u{11C92}-\u{11CA7}\u{11CA9}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D8A}-\u{11D8E}\u{11D90}\u{11D91}\u{11D93}-\u{11D97}\u{11EF3}-\u{11EF6}\u{11F00}\u{11F01}\u{11F03}\u{11F34}-\u{11F3A}\u{11F3E}-\u{11F42}\u{11F5A}\u{13440}\u{13447}-\u{13455}\u{1611E}-\u{1612F}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F51}-\u{16F87}\u{16F8F}-\u{16F92}\u{16FE4}\u{16FF0}\u{16FF1}\u{1BC9D}\u{1BC9E}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D165}-\u{1D169}\u{1D16D}-\u{1D172}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E5EE}\u{1E5EF}\u{1E6E3}\u{1E6E6}\u{1E6EE}\u{1E6EF}\u{1E6F5}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{E0100}-\u{E01EF}]/u
-const decimalDigitRe = /[0-9\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19\u{104A0}-\u{104A9}\u{10D30}-\u{10D39}\u{10D40}-\u{10D49}\u{11066}-\u{1106F}\u{110F0}-\u{110F9}\u{11136}-\u{1113F}\u{111D0}-\u{111D9}\u{112F0}-\u{112F9}\u{11450}-\u{11459}\u{114D0}-\u{114D9}\u{11650}-\u{11659}\u{116C0}-\u{116C9}\u{116D0}-\u{116E3}\u{11730}-\u{11739}\u{118E0}-\u{118E9}\u{11950}-\u{11959}\u{11BF0}-\u{11BF9}\u{11C50}-\u{11C59}\u{11D50}-\u{11D59}\u{11DA0}-\u{11DA9}\u{11DE0}-\u{11DE9}\u{11F50}-\u{11F59}\u{16130}-\u{16139}\u{16A60}-\u{16A69}\u{16AC0}-\u{16AC9}\u{16B50}-\u{16B59}\u{16D70}-\u{16D79}\u{1CCF0}-\u{1CCF9}\u{1D7CE}-\u{1D7FF}\u{1E140}-\u{1E149}\u{1E2F0}-\u{1E2F9}\u{1E4F0}-\u{1E4F9}\u{1E5F1}-\u{1E5FA}\u{1E950}-\u{1E959}\u{1FBF0}-\u{1FBF9}]/u
-
-function containsArabicScript(text: string): boolean {
- return arabicScriptRe.test(text)
-}
-
-export function isCJK(s: string): boolean {
- for (const ch of s) {
- const c = ch.codePointAt(0)!
- if ((c >= 0x4E00 && c <= 0x9FFF) ||
- (c >= 0x3400 && c <= 0x4DBF) ||
- (c >= 0x20000 && c <= 0x2A6DF) ||
- (c >= 0x2A700 && c <= 0x2B73F) ||
- (c >= 0x2B740 && c <= 0x2B81F) ||
- (c >= 0x2B820 && c <= 0x2CEAF) ||
- (c >= 0x2CEB0 && c <= 0x2EBEF) ||
- (c >= 0x30000 && c <= 0x3134F) ||
- (c >= 0xF900 && c <= 0xFAFF) ||
- (c >= 0x2F800 && c <= 0x2FA1F) ||
- (c >= 0x3000 && c <= 0x303F) ||
- (c >= 0x3040 && c <= 0x309F) ||
- (c >= 0x30A0 && c <= 0x30FF) ||
- (c >= 0xAC00 && c <= 0xD7AF) ||
- (c >= 0xFF00 && c <= 0xFFEF)) {
- return true
- }
- }
- return false
-}
-
-export const kinsokuStart = new Set([
- '\uFF0C',
- '\uFF0E',
- '\uFF01',
- '\uFF1A',
- '\uFF1B',
- '\uFF1F',
- '\u3001',
- '\u3002',
- '\u30FB',
- '\uFF09',
- '\u3015',
- '\u3009',
- '\u300B',
- '\u300D',
- '\u300F',
- '\u3011',
- '\u3017',
- '\u3019',
- '\u301B',
- '\u30FC',
- '\u3005',
- '\u303B',
- '\u309D',
- '\u309E',
- '\u30FD',
- '\u30FE',
-])
-
-export const kinsokuEnd = new Set([
- '"',
- '(', '[', '{',
- '\u201C', '\u2018', '\u00AB', '\u2039',
- '\uFF08',
- '\u3014',
- '\u3008',
- '\u300A',
- '\u300C',
- '\u300E',
- '\u3010',
- '\u3016',
- '\u3018',
- '\u301A',
-])
-
-const forwardStickyGlue = new Set([
- "'", '\u2018',
-])
-
-export const leftStickyPunctuation = new Set([
- '.', ',', '!', '?', ':', ';',
- '\u060C',
- '\u061B',
- '\u061F',
- '\u0964',
- '\u0965',
- '\u104A',
- '\u104B',
- '\u104C',
- '\u104D',
- '\u104F',
- ')', ']', '}',
- '%',
- '"',
- '\u201D', '\u2019', '\u00BB', '\u203A',
- '\u2026',
-])
-
-const arabicNoSpaceTrailingPunctuation = new Set([
- ':',
- '.',
- '\u060C',
- '\u061B',
-])
-
-const myanmarMedialGlue = new Set([
- '\u104F',
-])
-
-const closingQuoteChars = new Set([
- '\u201D', '\u2019', '\u00BB', '\u203A',
- '\u300D',
- '\u300F',
- '\u3011',
- '\u300B',
- '\u3009',
- '\u3015',
- '\uFF09',
-])
-
-export function isLeftStickyPunctuationSegment(segment: string): boolean {
- if (isEscapedQuoteClusterSegment(segment)) return true
- let sawPunctuation = false
- for (const ch of segment) {
- if (leftStickyPunctuation.has(ch)) {
- sawPunctuation = true
- continue
- }
- if (sawPunctuation && combiningMarkRe.test(ch)) continue
- return false
- }
- return sawPunctuation
-}
-
-function isCJKLineStartProhibitedSegment(segment: string): boolean {
- for (const ch of segment) {
- if (!kinsokuStart.has(ch) && !leftStickyPunctuation.has(ch)) return false
- }
- return segment.length > 0
-}
-
-export function isForwardStickyClusterSegment(segment: string): boolean {
- if (isEscapedQuoteClusterSegment(segment)) return true
- for (const ch of segment) {
- if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !combiningMarkRe.test(ch)) return false
- }
- return segment.length > 0
-}
-
-export function isEscapedQuoteClusterSegment(segment: string): boolean {
- let sawQuote = false
- for (const ch of segment) {
- if (ch === '\\' || combiningMarkRe.test(ch)) continue
- if (kinsokuEnd.has(ch) || leftStickyPunctuation.has(ch) || forwardStickyGlue.has(ch)) {
- sawQuote = true
- continue
- }
- return false
- }
- return sawQuote
-}
-
-export function splitTrailingForwardStickyCluster(text: string): { head: string, tail: string } | null {
- const chars = Array.from(text)
- let splitIndex = chars.length
-
- while (splitIndex > 0) {
- const ch = chars[splitIndex - 1]!
- if (combiningMarkRe.test(ch)) {
- splitIndex--
- continue
- }
- if (kinsokuEnd.has(ch) || forwardStickyGlue.has(ch)) {
- splitIndex--
- continue
- }
- break
- }
-
- if (splitIndex <= 0 || splitIndex === chars.length) return null
- return {
- head: chars.slice(0, splitIndex).join(''),
- tail: chars.slice(splitIndex).join(''),
- }
-}
-
-function isRepeatedSingleCharRun(segment: string, ch: string): boolean {
- if (segment.length === 0) return false
- for (const part of segment) {
- if (part !== ch) return false
- }
- return true
-}
-
-function endsWithArabicNoSpacePunctuation(segment: string): boolean {
- if (!containsArabicScript(segment) || segment.length === 0) return false
- return arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!)
-}
-
-function endsWithMyanmarMedialGlue(segment: string): boolean {
- if (segment.length === 0) return false
- return myanmarMedialGlue.has(segment[segment.length - 1]!)
-}
-
-function splitLeadingSpaceAndMarks(segment: string): { space: string, marks: string } | null {
- if (segment.length < 2 || segment[0] !== ' ') return null
- const marks = segment.slice(1)
- if (marks.length > 0 && [...marks].every(ch => combiningMarkRe.test(ch))) {
- return { space: ' ', marks }
- }
- return null
-}
-
-export function endsWithClosingQuote(text: string): boolean {
- for (let i = text.length - 1; i >= 0; i--) {
- const ch = text[i]!
- if (closingQuoteChars.has(ch)) return true
- if (!leftStickyPunctuation.has(ch)) return false
- }
- return false
-}
-
-// --- Word segmenter management (US-004) ---
-
-let sharedWordSegmenter: Intl.Segmenter | null = null
-let segmenterLocale: string | undefined
-
-function getSharedWordSegmenter(): Intl.Segmenter {
- if (sharedWordSegmenter === null) {
- sharedWordSegmenter = new Intl.Segmenter(segmenterLocale, { granularity: 'word' })
- }
- return sharedWordSegmenter
-}
-
-export function clearAnalysisCaches(): void {
- sharedWordSegmenter = null
-}
-
-export function setAnalysisLocale(locale?: string): void {
- const nextLocale = locale && locale.length > 0 ? locale : undefined
- if (segmenterLocale === nextLocale) return
- segmenterLocale = nextLocale
- sharedWordSegmenter = null
-}
-
-// --- Merge pipeline (US-004) ---
-
-function isTextRunBoundary(kind: SegmentBreakKind): boolean {
- return (
- kind === 'space' ||
- kind === 'preserved-space' ||
- kind === 'zero-width-break' ||
- kind === 'hard-break'
- )
-}
-
-const urlSchemeSegmentRe = /^[A-Za-z][A-Za-z0-9+.-]*:$/
-
-function isUrlLikeRunStart(segmentation: MergedSegmentation, index: number): boolean {
- const text = segmentation.texts[index]!
- if (text.startsWith('www.')) return true
- return (
- urlSchemeSegmentRe.test(text) &&
- index + 1 < segmentation.len &&
- segmentation.kinds[index + 1] === 'text' &&
- segmentation.texts[index + 1] === '//'
- )
-}
-
-function isUrlQueryBoundarySegment(text: string): boolean {
- return text.includes('?') && (text.includes('://') || text.startsWith('www.'))
-}
-
-function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation {
- const texts = segmentation.texts.slice()
- const isWordLike = segmentation.isWordLike.slice()
- const kinds = segmentation.kinds.slice()
- const starts = segmentation.starts.slice()
-
- for (let i = 0; i < segmentation.len; i++) {
- if (kinds[i] !== 'text' || !isUrlLikeRunStart(segmentation, i)) continue
-
- let j = i + 1
- while (j < segmentation.len && !isTextRunBoundary(kinds[j]!)) {
- texts[i] += texts[j]!
- isWordLike[i] = true
- const endsQueryPrefix = texts[j]!.includes('?')
- kinds[j] = 'text'
- texts[j] = ''
- j++
- if (endsQueryPrefix) break
- }
- }
-
- let compactLen = 0
- for (let read = 0; read < texts.length; read++) {
- const text = texts[read]!
- if (text.length === 0) continue
- if (compactLen !== read) {
- texts[compactLen] = text
- isWordLike[compactLen] = isWordLike[read]!
- kinds[compactLen] = kinds[read]!
- starts[compactLen] = starts[read]!
- }
- compactLen++
- }
-
- texts.length = compactLen
- isWordLike.length = compactLen
- kinds.length = compactLen
- starts.length = compactLen
-
- return {
- len: compactLen,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function mergeUrlQueryRuns(segmentation: MergedSegmentation): MergedSegmentation {
- const texts: string[] = []
- const isWordLike: boolean[] = []
- const kinds: SegmentBreakKind[] = []
- const starts: number[] = []
-
- for (let i = 0; i < segmentation.len; i++) {
- const text = segmentation.texts[i]!
- texts.push(text)
- isWordLike.push(segmentation.isWordLike[i]!)
- kinds.push(segmentation.kinds[i]!)
- starts.push(segmentation.starts[i]!)
-
- if (!isUrlQueryBoundarySegment(text)) continue
-
- const nextIndex = i + 1
- if (
- nextIndex >= segmentation.len ||
- isTextRunBoundary(segmentation.kinds[nextIndex]!)
- ) {
- continue
- }
-
- let queryText = ''
- const queryStart = segmentation.starts[nextIndex]!
- let j = nextIndex
- while (j < segmentation.len && !isTextRunBoundary(segmentation.kinds[j]!)) {
- queryText += segmentation.texts[j]!
- j++
- }
-
- if (queryText.length > 0) {
- texts.push(queryText)
- isWordLike.push(true)
- kinds.push('text')
- starts.push(queryStart)
- i = j - 1
- }
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-const numericJoinerChars = new Set([
- ':', '-', '/', '\u00D7', ',', '.', '+',
- '\u2013',
- '\u2014',
-])
-
-const asciiPunctuationChainSegmentRe = /^[A-Za-z0-9_]+[,:;]*$/
-const asciiPunctuationChainTrailingJoinersRe = /[,:;]+$/
-
-function segmentContainsDecimalDigit(text: string): boolean {
- for (const ch of text) {
- if (decimalDigitRe.test(ch)) return true
- }
- return false
-}
-
-function isNumericRunSegment(text: string): boolean {
- if (text.length === 0) return false
- for (const ch of text) {
- if (decimalDigitRe.test(ch) || numericJoinerChars.has(ch)) continue
- return false
- }
- return true
-}
-
-function mergeNumericRuns(segmentation: MergedSegmentation): MergedSegmentation {
- const texts: string[] = []
- const isWordLike: boolean[] = []
- const kinds: SegmentBreakKind[] = []
- const starts: number[] = []
-
- for (let i = 0; i < segmentation.len; i++) {
- const text = segmentation.texts[i]!
- const kind = segmentation.kinds[i]!
-
- if (kind === 'text' && isNumericRunSegment(text) && segmentContainsDecimalDigit(text)) {
- let mergedText = text
- let j = i + 1
- while (
- j < segmentation.len &&
- segmentation.kinds[j] === 'text' &&
- isNumericRunSegment(segmentation.texts[j]!)
- ) {
- mergedText += segmentation.texts[j]!
- j++
- }
-
- texts.push(mergedText)
- isWordLike.push(true)
- kinds.push('text')
- starts.push(segmentation.starts[i]!)
- i = j - 1
- continue
- }
-
- texts.push(text)
- isWordLike.push(segmentation.isWordLike[i]!)
- kinds.push(kind)
- starts.push(segmentation.starts[i]!)
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSegmentation {
- const texts: string[] = []
- const isWordLike: boolean[] = []
- const kinds: SegmentBreakKind[] = []
- const starts: number[] = []
-
- for (let i = 0; i < segmentation.len; i++) {
- const text = segmentation.texts[i]!
- const kind = segmentation.kinds[i]!
- const wordLike = segmentation.isWordLike[i]!
-
- if (kind === 'text' && wordLike && asciiPunctuationChainSegmentRe.test(text)) {
- let mergedText = text
- let j = i + 1
-
- while (
- asciiPunctuationChainTrailingJoinersRe.test(mergedText) &&
- j < segmentation.len &&
- segmentation.kinds[j] === 'text' &&
- segmentation.isWordLike[j] &&
- asciiPunctuationChainSegmentRe.test(segmentation.texts[j]!)
- ) {
- mergedText += segmentation.texts[j]!
- j++
- }
-
- texts.push(mergedText)
- isWordLike.push(true)
- kinds.push('text')
- starts.push(segmentation.starts[i]!)
- i = j - 1
- continue
- }
-
- texts.push(text)
- isWordLike.push(wordLike)
- kinds.push(kind)
- starts.push(segmentation.starts[i]!)
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSegmentation {
- const texts: string[] = []
- const isWordLike: boolean[] = []
- const kinds: SegmentBreakKind[] = []
- const starts: number[] = []
-
- for (let i = 0; i < segmentation.len; i++) {
- const text = segmentation.texts[i]!
- if (segmentation.kinds[i] === 'text' && text.includes('-')) {
- const parts = text.split('-')
- let shouldSplit = parts.length > 1
- for (let j = 0; j < parts.length; j++) {
- const part = parts[j]!
- if (!shouldSplit) break
- if (
- part.length === 0 ||
- !segmentContainsDecimalDigit(part) ||
- !isNumericRunSegment(part)
- ) {
- shouldSplit = false
- }
- }
-
- if (shouldSplit) {
- let offset = 0
- for (let j = 0; j < parts.length; j++) {
- const part = parts[j]!
- const splitText = j < parts.length - 1 ? `${part}-` : part
- texts.push(splitText)
- isWordLike.push(true)
- kinds.push('text')
- starts.push(segmentation.starts[i]! + offset)
- offset += splitText.length
- }
- continue
- }
- }
-
- texts.push(text)
- isWordLike.push(segmentation.isWordLike[i]!)
- kinds.push(segmentation.kinds[i]!)
- starts.push(segmentation.starts[i]!)
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSegmentation {
- const texts: string[] = []
- const isWordLike: boolean[] = []
- const kinds: SegmentBreakKind[] = []
- const starts: number[] = []
-
- let read = 0
- while (read < segmentation.len) {
- let text = segmentation.texts[read]!
- let wordLike = segmentation.isWordLike[read]!
- let kind = segmentation.kinds[read]!
- let start = segmentation.starts[read]!
-
- if (kind === 'glue') {
- let glueText = text
- const glueStart = start
- read++
- while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
- glueText += segmentation.texts[read]!
- read++
- }
-
- if (read < segmentation.len && segmentation.kinds[read] === 'text') {
- text = glueText + segmentation.texts[read]!
- wordLike = segmentation.isWordLike[read]!
- kind = 'text'
- start = glueStart
- read++
- } else {
- texts.push(glueText)
- isWordLike.push(false)
- kinds.push('glue')
- starts.push(glueStart)
- continue
- }
- } else {
- read++
- }
-
- if (kind === 'text') {
- while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
- let glueText = ''
- while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
- glueText += segmentation.texts[read]!
- read++
- }
-
- if (read < segmentation.len && segmentation.kinds[read] === 'text') {
- text += glueText + segmentation.texts[read]!
- wordLike = wordLike || segmentation.isWordLike[read]!
- read++
- continue
- }
-
- text += glueText
- }
- }
-
- texts.push(text)
- isWordLike.push(wordLike)
- kinds.push(kind)
- starts.push(start)
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegmentation): MergedSegmentation {
- const texts = segmentation.texts.slice()
- const isWordLike = segmentation.isWordLike.slice()
- const kinds = segmentation.kinds.slice()
- const starts = segmentation.starts.slice()
-
- for (let i = 0; i < texts.length - 1; i++) {
- if (kinds[i] !== 'text' || kinds[i + 1] !== 'text') continue
- if (!isCJK(texts[i]!) || !isCJK(texts[i + 1]!)) continue
-
- const split = splitTrailingForwardStickyCluster(texts[i]!)
- if (split === null) continue
-
- texts[i] = split.head
- texts[i + 1] = split.tail + texts[i + 1]!
- starts[i + 1] = starts[i]! + split.head.length
- }
-
- return {
- len: texts.length,
- texts,
- isWordLike,
- kinds,
- starts,
- }
-}
-
-function buildMergedSegmentation(
- normalized: string,
- profile: AnalysisProfile,
- whiteSpaceProfile: WhiteSpaceProfile,
-): MergedSegmentation {
- const wordSegmenter = getSharedWordSegmenter()
- let mergedLen = 0
- const mergedTexts: string[] = []
- const mergedWordLike: boolean[] = []
- const mergedKinds: SegmentBreakKind[] = []
- const mergedStarts: number[] = []
-
- for (const s of wordSegmenter.segment(normalized)) {
- for (const piece of splitSegmentByBreakKind(s.segment, s.isWordLike ?? false, s.index, whiteSpaceProfile)) {
- const isText = piece.kind === 'text'
-
- if (
- profile.carryCJKAfterClosingQuote &&
- isText &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- isCJK(piece.text) &&
- isCJK(mergedTexts[mergedLen - 1]!) &&
- endsWithClosingQuote(mergedTexts[mergedLen - 1]!)
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
- } else if (
- isText &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- isCJKLineStartProhibitedSegment(piece.text) &&
- isCJK(mergedTexts[mergedLen - 1]!)
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
- } else if (
- isText &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- endsWithMyanmarMedialGlue(mergedTexts[mergedLen - 1]!)
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
- } else if (
- isText &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- piece.isWordLike &&
- containsArabicScript(piece.text) &&
- endsWithArabicNoSpacePunctuation(mergedTexts[mergedLen - 1]!)
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- mergedWordLike[mergedLen - 1] = true
- } else if (
- isText &&
- !piece.isWordLike &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- piece.text.length === 1 &&
- piece.text !== '-' &&
- piece.text !== '\u2014' &&
- isRepeatedSingleCharRun(mergedTexts[mergedLen - 1]!, piece.text)
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- } else if (
- isText &&
- !piece.isWordLike &&
- mergedLen > 0 &&
- mergedKinds[mergedLen - 1] === 'text' &&
- (
- isLeftStickyPunctuationSegment(piece.text) ||
- (piece.text === '-' && mergedWordLike[mergedLen - 1]!)
- )
- ) {
- mergedTexts[mergedLen - 1] += piece.text
- } else {
- mergedTexts[mergedLen] = piece.text
- mergedWordLike[mergedLen] = piece.isWordLike
- mergedKinds[mergedLen] = piece.kind
- mergedStarts[mergedLen] = piece.start
- mergedLen++
- }
- }
- }
-
- for (let i = 1; i < mergedLen; i++) {
- if (
- mergedKinds[i] === 'text' &&
- !mergedWordLike[i]! &&
- isEscapedQuoteClusterSegment(mergedTexts[i]!) &&
- mergedKinds[i - 1] === 'text'
- ) {
- mergedTexts[i - 1] += mergedTexts[i]!
- mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]!
- mergedTexts[i] = ''
- }
- }
-
- for (let i = mergedLen - 2; i >= 0; i--) {
- if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) {
- let j = i + 1
- while (j < mergedLen && mergedTexts[j] === '') j++
- if (j < mergedLen && mergedKinds[j] === 'text') {
- mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]!
- mergedStarts[j] = mergedStarts[i]!
- mergedTexts[i] = ''
- }
- }
- }
-
- let compactLen = 0
- for (let read = 0; read < mergedLen; read++) {
- const text = mergedTexts[read]!
- if (text.length === 0) continue
- if (compactLen !== read) {
- mergedTexts[compactLen] = text
- mergedWordLike[compactLen] = mergedWordLike[read]!
- mergedKinds[compactLen] = mergedKinds[read]!
- mergedStarts[compactLen] = mergedStarts[read]!
- }
- compactLen++
- }
-
- mergedTexts.length = compactLen
- mergedWordLike.length = compactLen
- mergedKinds.length = compactLen
- mergedStarts.length = compactLen
-
- const compacted = mergeGlueConnectedTextRuns({
- len: compactLen,
- texts: mergedTexts,
- isWordLike: mergedWordLike,
- kinds: mergedKinds,
- starts: mergedStarts,
- })
- const withMergedUrls = carryTrailingForwardStickyAcrossCJKBoundary(
- mergeAsciiPunctuationChains(
- splitHyphenatedNumericRuns(mergeNumericRuns(mergeUrlQueryRuns(mergeUrlLikeRuns(compacted)))),
- ),
- )
-
- for (let i = 0; i < withMergedUrls.len - 1; i++) {
- const split = splitLeadingSpaceAndMarks(withMergedUrls.texts[i]!)
- if (split === null) continue
- if (
- (withMergedUrls.kinds[i] !== 'space' && withMergedUrls.kinds[i] !== 'preserved-space') ||
- withMergedUrls.kinds[i + 1] !== 'text' ||
- !containsArabicScript(withMergedUrls.texts[i + 1]!)
- ) {
- continue
- }
-
- withMergedUrls.texts[i] = split.space
- withMergedUrls.isWordLike[i] = false
- withMergedUrls.kinds[i] = withMergedUrls.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space'
- withMergedUrls.texts[i + 1] = split.marks + withMergedUrls.texts[i + 1]!
- withMergedUrls.starts[i + 1] = withMergedUrls.starts[i]! + split.space.length
- }
-
- return withMergedUrls
-}
-
-function compileAnalysisChunks(segmentation: MergedSegmentation, whiteSpaceProfile: WhiteSpaceProfile): AnalysisChunk[] {
- if (segmentation.len === 0) return []
- if (!whiteSpaceProfile.preserveHardBreaks) {
- return [{
- startSegmentIndex: 0,
- endSegmentIndex: segmentation.len,
- consumedEndSegmentIndex: segmentation.len,
- }]
- }
-
- const chunks: AnalysisChunk[] = []
- let startSegmentIndex = 0
-
- for (let i = 0; i < segmentation.len; i++) {
- if (segmentation.kinds[i] !== 'hard-break') continue
-
- chunks.push({
- startSegmentIndex,
- endSegmentIndex: i,
- consumedEndSegmentIndex: i + 1,
- })
- startSegmentIndex = i + 1
- }
-
- if (startSegmentIndex < segmentation.len) {
- chunks.push({
- startSegmentIndex,
- endSegmentIndex: segmentation.len,
- consumedEndSegmentIndex: segmentation.len,
- })
- }
-
- return chunks
-}
-
-export function analyzeText(
- text: string,
- profile: AnalysisProfile,
- whiteSpace: WhiteSpaceMode = 'normal',
-): TextAnalysis {
- const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
- const normalized = whiteSpaceProfile.mode === 'pre-wrap'
- ? normalizeWhitespacePreWrap(text)
- : normalizeWhitespaceNormal(text)
- if (normalized.length === 0) {
- return {
- normalized,
- chunks: [],
- len: 0,
- texts: [],
- isWordLike: [],
- kinds: [],
- starts: [],
- }
- }
- const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
- return {
- normalized,
- chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
- ...segmentation,
- }
-}
+export * from './pretext/analysis.js'
diff --git a/packages/lynx-pretext/src/intl-segmenter.d.ts b/packages/lynx-pretext/src/intl-segmenter.d.ts
new file mode 100644
index 0000000..051cd5c
--- /dev/null
+++ b/packages/lynx-pretext/src/intl-segmenter.d.ts
@@ -0,0 +1,19 @@
+declare namespace Intl {
+ interface SegmenterOptions {
+ granularity?: 'grapheme' | 'word' | 'sentence'
+ }
+
+ interface SegmentData {
+ segment: string
+ index: number
+ input?: string
+ isWordLike?: boolean
+ }
+
+ interface Segments extends Iterable {}
+
+ class Segmenter {
+ constructor(locales?: string | string[], options?: SegmenterOptions)
+ segment(input: string): Segments
+ }
+}
diff --git a/packages/lynx-pretext/src/layout.test.mjs b/packages/lynx-pretext/src/layout.test.mjs
new file mode 100644
index 0000000..47125eb
--- /dev/null
+++ b/packages/lynx-pretext/src/layout.test.mjs
@@ -0,0 +1,33 @@
+import { beforeEach, expect, test } from 'bun:test'
+import { readFile } from 'node:fs/promises'
+import path from 'node:path'
+import { fileURLToPath } from 'node:url'
+
+let getTextInfoCalls = 0
+
+beforeEach(() => {
+ getTextInfoCalls = 0
+ globalThis.lynx = {
+ getTextInfo(text) {
+ getTextInfoCalls += 1
+ return { width: text.length * 8, content: [text] }
+ },
+ }
+})
+
+test('prepareWithSegments returns bidi metadata while measuring via lynx.getTextInfo', async () => {
+ const { prepareWithSegments } = await import('./layout.ts')
+ const prepared = prepareWithSegments('hello مرحبا', '16px Arial')
+
+ expect(getTextInfoCalls).toBeGreaterThan(0)
+ expect(prepared.segLevels).not.toBeNull()
+})
+
+test('package is publishable without a linked upstream pretext dependency', async () => {
+ const here = path.dirname(fileURLToPath(import.meta.url))
+ const packageJson = JSON.parse(await readFile(path.join(here, '..', 'package.json'), 'utf8'))
+ const layoutSource = await readFile(path.join(here, 'layout.ts'), 'utf8')
+
+ expect(packageJson.dependencies?.['@chenglou/pretext']).toBeUndefined()
+ expect(layoutSource).not.toContain('@chenglou/pretext')
+})
diff --git a/packages/lynx-pretext/src/layout.ts b/packages/lynx-pretext/src/layout.ts
index 51af429..61f58b4 100644
--- a/packages/lynx-pretext/src/layout.ts
+++ b/packages/lynx-pretext/src/layout.ts
@@ -1,621 +1,43 @@
-// Text measurement for Lynx using main-thread lynx.getTextInfo().
-//
-// Two-phase measurement:
-// prepare(text, font) — segments text via Intl.Segmenter polyfill, measures
-// each word via getTextInfo, caches widths. Call once when text first appears.
-// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure
-// arithmetic to count lines and compute height. Call on every resize.
-//
-// Based on chenglou/pretext, adapted for Lynx main thread.
-
-import {
- analyzeText,
- clearAnalysisCaches,
- endsWithClosingQuote,
- isCJK,
- kinsokuEnd,
- kinsokuStart,
- leftStickyPunctuation,
- setAnalysisLocale,
- type AnalysisChunk,
- type SegmentBreakKind,
- type TextAnalysis,
- type WhiteSpaceMode,
-} from './analysis'
-import {
- clearMeasurementCaches,
- getCorrectedSegmentWidth,
- getEngineProfile,
- getFontMeasurementState,
- getSegmentGraphemePrefixWidths,
- getSegmentGraphemeWidths,
- getSegmentMetrics,
- textMayContainEmoji,
-} from './measurement'
-import {
- countPreparedLines,
- layoutNextLineRange as stepPreparedLineRange,
- walkPreparedLines,
- type InternalLayoutLine,
-} from './line-break'
-
-let sharedGraphemeSegmenter: Intl.Segmenter | null = null
-// Rich-path only. Reuses grapheme splits while materializing multiple lines
-// from the same prepared handle, without pushing that cache into the API.
-let sharedLineTextCaches = new WeakMap>()
-
-function getSharedGraphemeSegmenter(): Intl.Segmenter {
- if (sharedGraphemeSegmenter === null) {
- sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' })
- }
- return sharedGraphemeSegmenter
-}
-
-// Bidi stub for MVP — returns null (no bidi metadata).
-function computeSegmentLevels(
- _normalized: string,
- _segStarts: number[],
-): Int8Array | null {
- return null
-}
-
-// --- Public types ---
-
-declare const preparedTextBrand: unique symbol
-
-type PreparedCore = {
- widths: number[]
- lineEndFitAdvances: number[]
- lineEndPaintAdvances: number[]
- kinds: SegmentBreakKind[]
- simpleLineWalkFastPath: boolean
- segLevels: Int8Array | null
- breakableWidths: (number[] | null)[]
- breakablePrefixWidths: (number[] | null)[]
- discretionaryHyphenWidth: number
- tabStopAdvance: number
- chunks: PreparedLineChunk[]
-}
-
-export type PreparedText = {
- readonly [preparedTextBrand]: true
-}
-
-type InternalPreparedText = PreparedText & PreparedCore
-
-export type PreparedTextWithSegments = InternalPreparedText & {
- segments: string[]
-}
-
-export type LayoutCursor = {
- segmentIndex: number
- graphemeIndex: number
-}
-
-export type LayoutResult = {
- lineCount: number
- height: number
-}
-
-export type LayoutLine = {
- text: string
- width: number
- start: LayoutCursor
- end: LayoutCursor
-}
-
-export type LayoutLineRange = {
- width: number
- start: LayoutCursor
- end: LayoutCursor
-}
-
-export type LayoutLinesResult = LayoutResult & {
- lines: LayoutLine[]
-}
-
-export type PrepareOptions = {
- whiteSpace?: WhiteSpaceMode
-}
-
-export type PreparedLineChunk = {
- startSegmentIndex: number
- endSegmentIndex: number
- consumedEndSegmentIndex: number
-}
-
-// --- Internal helpers ---
-
-function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments {
- const base = {
- widths: [],
- lineEndFitAdvances: [],
- lineEndPaintAdvances: [],
- kinds: [],
- simpleLineWalkFastPath: true,
- segLevels: null,
- breakableWidths: [],
- breakablePrefixWidths: [],
- discretionaryHyphenWidth: 0,
- tabStopAdvance: 0,
- chunks: [],
- }
- if (includeSegments) {
- return { ...base, segments: [] } as unknown as PreparedTextWithSegments
- }
- return base as unknown as InternalPreparedText
-}
-
-function measureAnalysis(
- analysis: TextAnalysis,
- font: string,
- includeSegments: boolean,
-): InternalPreparedText | PreparedTextWithSegments {
- const graphemeSegmenter = getSharedGraphemeSegmenter()
- const engineProfile = getEngineProfile()
- const { cache, emojiCorrection } = getFontMeasurementState(
- font,
- textMayContainEmoji(analysis.normalized),
- )
- const discretionaryHyphenWidth = getCorrectedSegmentWidth('-', getSegmentMetrics('-', cache), emojiCorrection)
- const spaceWidth = getCorrectedSegmentWidth(' ', getSegmentMetrics(' ', cache), emojiCorrection)
- const tabStopAdvance = spaceWidth * 8
-
- if (analysis.len === 0) return createEmptyPrepared(includeSegments)
-
- const widths: number[] = []
- const lineEndFitAdvances: number[] = []
- const lineEndPaintAdvances: number[] = []
- const kinds: SegmentBreakKind[] = []
- let simpleLineWalkFastPath = analysis.chunks.length <= 1
- const segStarts = includeSegments ? [] as number[] : null
- const breakableWidths: (number[] | null)[] = []
- const breakablePrefixWidths: (number[] | null)[] = []
- const segments = includeSegments ? [] as string[] : null
- const preparedStartByAnalysisIndex = Array.from({ length: analysis.len })
- const preparedEndByAnalysisIndex = Array.from({ length: analysis.len })
-
- function pushMeasuredSegment(
- text: string,
- width: number,
- lineEndFitAdvance: number,
- lineEndPaintAdvance: number,
- kind: SegmentBreakKind,
- start: number,
- breakable: number[] | null,
- breakablePrefix: number[] | null,
- ): void {
- if (kind !== 'text' && kind !== 'space' && kind !== 'zero-width-break') {
- simpleLineWalkFastPath = false
- }
- widths.push(width)
- lineEndFitAdvances.push(lineEndFitAdvance)
- lineEndPaintAdvances.push(lineEndPaintAdvance)
- kinds.push(kind)
- segStarts?.push(start)
- breakableWidths.push(breakable)
- breakablePrefixWidths.push(breakablePrefix)
- if (segments !== null) segments.push(text)
- }
-
- for (let mi = 0; mi < analysis.len; mi++) {
- preparedStartByAnalysisIndex[mi] = widths.length
- const segText = analysis.texts[mi]!
- const segWordLike = analysis.isWordLike[mi]!
- const segKind = analysis.kinds[mi]!
- const segStart = analysis.starts[mi]!
-
- if (segKind === 'soft-hyphen') {
- pushMeasuredSegment(
- segText,
- 0,
- discretionaryHyphenWidth,
- discretionaryHyphenWidth,
- segKind,
- segStart,
- null,
- null,
- )
- preparedEndByAnalysisIndex[mi] = widths.length
- continue
- }
-
- if (segKind === 'hard-break') {
- pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
- preparedEndByAnalysisIndex[mi] = widths.length
- continue
- }
-
- if (segKind === 'tab') {
- pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
- preparedEndByAnalysisIndex[mi] = widths.length
- continue
- }
-
- const segMetrics = getSegmentMetrics(segText, cache)
-
- if (segKind === 'text' && segMetrics.containsCJK) {
- let unitText = ''
- let unitStart = 0
-
- for (const gs of graphemeSegmenter.segment(segText)) {
- const grapheme = gs.segment
-
- if (unitText.length === 0) {
- unitText = grapheme
- unitStart = gs.index
- continue
- }
-
- if (
- kinsokuEnd.has(unitText) ||
- kinsokuStart.has(grapheme) ||
- leftStickyPunctuation.has(grapheme) ||
- (engineProfile.carryCJKAfterClosingQuote &&
- isCJK(grapheme) &&
- endsWithClosingQuote(unitText))
- ) {
- unitText += grapheme
- continue
- }
-
- const unitMetrics = getSegmentMetrics(unitText, cache)
- const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
- pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
-
- unitText = grapheme
- unitStart = gs.index
- }
-
- if (unitText.length > 0) {
- const unitMetrics = getSegmentMetrics(unitText, cache)
- const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
- pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
- }
- preparedEndByAnalysisIndex[mi] = widths.length
- continue
- }
-
- const w = getCorrectedSegmentWidth(segText, segMetrics, emojiCorrection)
- const lineEndFitAdvance =
- segKind === 'space' || segKind === 'preserved-space' || segKind === 'zero-width-break'
- ? 0
- : w
- const lineEndPaintAdvance =
- segKind === 'space' || segKind === 'zero-width-break'
- ? 0
- : w
-
- if (segWordLike && segText.length > 1) {
- const graphemeWidths = getSegmentGraphemeWidths(segText, segMetrics, cache, emojiCorrection)
- const graphemePrefixWidths = engineProfile.preferPrefixWidthsForBreakableRuns
- ? getSegmentGraphemePrefixWidths(segText, segMetrics, cache, emojiCorrection)
- : null
- pushMeasuredSegment(
- segText,
- w,
- lineEndFitAdvance,
- lineEndPaintAdvance,
- segKind,
- segStart,
- graphemeWidths,
- graphemePrefixWidths,
- )
- } else {
- pushMeasuredSegment(
- segText,
- w,
- lineEndFitAdvance,
- lineEndPaintAdvance,
- segKind,
- segStart,
- null,
- null,
- )
- }
- preparedEndByAnalysisIndex[mi] = widths.length
- }
-
- const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, preparedEndByAnalysisIndex)
- const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts)
- if (segments !== null) {
- return {
- widths,
- lineEndFitAdvances,
- lineEndPaintAdvances,
- kinds,
- simpleLineWalkFastPath,
- segLevels,
- breakableWidths,
- breakablePrefixWidths,
- discretionaryHyphenWidth,
- tabStopAdvance,
- chunks,
- segments,
- } as unknown as PreparedTextWithSegments
- }
- return {
- widths,
- lineEndFitAdvances,
- lineEndPaintAdvances,
- kinds,
- simpleLineWalkFastPath,
- segLevels,
- breakableWidths,
- breakablePrefixWidths,
- discretionaryHyphenWidth,
- tabStopAdvance,
- chunks,
- } as unknown as InternalPreparedText
-}
-
-function mapAnalysisChunksToPreparedChunks(
- chunks: AnalysisChunk[],
- preparedStartByAnalysisIndex: number[],
- preparedEndByAnalysisIndex: number[],
-): PreparedLineChunk[] {
- const preparedChunks: PreparedLineChunk[] = []
- for (let i = 0; i < chunks.length; i++) {
- const chunk = chunks[i]!
- const startSegmentIndex =
- chunk.startSegmentIndex < preparedStartByAnalysisIndex.length
- ? preparedStartByAnalysisIndex[chunk.startSegmentIndex]!
- : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
- const endSegmentIndex =
- chunk.endSegmentIndex < preparedStartByAnalysisIndex.length
- ? preparedStartByAnalysisIndex[chunk.endSegmentIndex]!
- : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
- const consumedEndSegmentIndex =
- chunk.consumedEndSegmentIndex < preparedStartByAnalysisIndex.length
- ? preparedStartByAnalysisIndex[chunk.consumedEndSegmentIndex]!
- : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
-
- preparedChunks.push({
- startSegmentIndex,
- endSegmentIndex,
- consumedEndSegmentIndex,
- })
- }
- return preparedChunks
-}
-
-function prepareInternal(
- text: string,
- font: string,
- includeSegments: boolean,
- options?: PrepareOptions,
-): InternalPreparedText | PreparedTextWithSegments {
- const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace)
- return measureAnalysis(analysis, font, includeSegments)
-}
-
-function getInternalPrepared(prepared: PreparedText): InternalPreparedText {
- return prepared as InternalPreparedText
-}
-
-// --- Public API ---
-
-export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText {
- return prepareInternal(text, font, false, options) as PreparedText
-}
-
-export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments {
- return prepareInternal(text, font, true, options) as PreparedTextWithSegments
-}
-
-export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult {
- const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth)
- return { lineCount, height: lineCount * lineHeight }
-}
-
-// --- Rich-path helpers (used by layoutWithLines, walkLineRanges, layoutNextLine) ---
-
-function getSegmentGraphemes(
- segmentIndex: number,
- segments: string[],
- cache: Map,
-): string[] {
- let graphemes = cache.get(segmentIndex)
- if (graphemes !== undefined) return graphemes
-
- graphemes = []
- const graphemeSegmenter = getSharedGraphemeSegmenter()
- for (const gs of graphemeSegmenter.segment(segments[segmentIndex]!)) {
- graphemes.push(gs.segment)
- }
- cache.set(segmentIndex, graphemes)
- return graphemes
-}
-
-function getLineTextCache(prepared: PreparedTextWithSegments): Map {
- let cache = sharedLineTextCaches.get(prepared)
- if (cache !== undefined) return cache
-
- cache = new Map()
- sharedLineTextCaches.set(prepared, cache)
- return cache
-}
-
-function lineHasDiscretionaryHyphen(
- kinds: SegmentBreakKind[],
- startSegmentIndex: number,
- startGraphemeIndex: number,
- endSegmentIndex: number,
-): boolean {
- return (
- endSegmentIndex > 0 &&
- kinds[endSegmentIndex - 1] === 'soft-hyphen' &&
- !(startSegmentIndex === endSegmentIndex && startGraphemeIndex > 0)
- )
-}
-
-function buildLineTextFromRange(
- segments: string[],
- kinds: SegmentBreakKind[],
- cache: Map,
- startSegmentIndex: number,
- startGraphemeIndex: number,
- endSegmentIndex: number,
- endGraphemeIndex: number,
-): string {
- let text = ''
- const endsWithDiscretionaryHyphen = lineHasDiscretionaryHyphen(
- kinds,
- startSegmentIndex,
- startGraphemeIndex,
- endSegmentIndex,
- )
-
- for (let i = startSegmentIndex; i < endSegmentIndex; i++) {
- if (kinds[i] === 'soft-hyphen' || kinds[i] === 'hard-break') continue
- if (i === startSegmentIndex && startGraphemeIndex > 0) {
- text += getSegmentGraphemes(i, segments, cache).slice(startGraphemeIndex).join('')
- } else {
- text += segments[i]!
- }
- }
-
- if (endGraphemeIndex > 0) {
- if (endsWithDiscretionaryHyphen) text += '-'
- text += getSegmentGraphemes(endSegmentIndex, segments, cache).slice(
- startSegmentIndex === endSegmentIndex ? startGraphemeIndex : 0,
- endGraphemeIndex,
- ).join('')
- } else if (endsWithDiscretionaryHyphen) {
- text += '-'
- }
-
- return text
-}
-
-function createLayoutLine(
- prepared: PreparedTextWithSegments,
- cache: Map,
- width: number,
- startSegmentIndex: number,
- startGraphemeIndex: number,
- endSegmentIndex: number,
- endGraphemeIndex: number,
-): LayoutLine {
- return {
- text: buildLineTextFromRange(
- prepared.segments,
- prepared.kinds,
- cache,
- startSegmentIndex,
- startGraphemeIndex,
- endSegmentIndex,
- endGraphemeIndex,
- ),
- width,
- start: {
- segmentIndex: startSegmentIndex,
- graphemeIndex: startGraphemeIndex,
- },
- end: {
- segmentIndex: endSegmentIndex,
- graphemeIndex: endGraphemeIndex,
- },
- }
-}
-
-function materializeLayoutLine(
- prepared: PreparedTextWithSegments,
- cache: Map,
- line: InternalLayoutLine,
-): LayoutLine {
- return createLayoutLine(
- prepared,
- cache,
- line.width,
- line.startSegmentIndex,
- line.startGraphemeIndex,
- line.endSegmentIndex,
- line.endGraphemeIndex,
- )
-}
-
-function toLayoutLineRange(line: InternalLayoutLine): LayoutLineRange {
- return {
- width: line.width,
- start: {
- segmentIndex: line.startSegmentIndex,
- graphemeIndex: line.startGraphemeIndex,
- },
- end: {
- segmentIndex: line.endSegmentIndex,
- graphemeIndex: line.endGraphemeIndex,
- },
- }
-}
-
-function stepLineRange(
- prepared: PreparedTextWithSegments,
- start: LayoutCursor,
- maxWidth: number,
-): LayoutLineRange | null {
- const line = stepPreparedLineRange(prepared, start, maxWidth)
- if (line === null) return null
- return toLayoutLineRange(line)
-}
-
-function materializeLine(
- prepared: PreparedTextWithSegments,
- line: LayoutLineRange,
-): LayoutLine {
- return createLayoutLine(
- prepared,
- getLineTextCache(prepared),
- line.width,
- line.start.segmentIndex,
- line.start.graphemeIndex,
- line.end.segmentIndex,
- line.end.graphemeIndex,
- )
-}
-
-export function walkLineRanges(
- prepared: PreparedTextWithSegments,
- maxWidth: number,
- onLine: (line: LayoutLineRange) => void,
-): number {
- if (prepared.widths.length === 0) return 0
-
- return walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => {
- onLine(toLayoutLineRange(line))
- })
-}
-
-export function layoutNextLine(
- prepared: PreparedTextWithSegments,
- start: LayoutCursor,
- maxWidth: number,
-): LayoutLine | null {
- const line = stepLineRange(prepared, start, maxWidth)
- if (line === null) return null
- return materializeLine(prepared, line)
-}
-
-export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult {
- const lines: LayoutLine[] = []
- if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines }
-
- const graphemeCache = getLineTextCache(prepared)
- const lineCount = walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => {
- lines.push(materializeLayoutLine(prepared, graphemeCache, line))
- })
-
- return { lineCount, height: lineCount * lineHeight, lines }
-}
-
-export function clearCache(): void {
- clearAnalysisCaches()
- sharedGraphemeSegmenter = null
- sharedLineTextCaches = new WeakMap>()
- clearMeasurementCaches()
-}
-
-export function setLocale(locale?: string): void {
- setAnalysisLocale(locale)
- clearCache()
+import './intl-shim'
+import './segmenter-polyfill'
+
+import { createPretext } from './pretext/host.js'
+import type {
+ LayoutCursor,
+ LayoutLine,
+ LayoutLineRange,
+ LayoutLinesResult,
+ LayoutResult,
+ PrepareOptions,
+ PrepareProfile,
+ PreparedText,
+ PreparedTextWithSegments,
+} from './pretext/layout.js'
+
+import { lynxMeasurementHost } from './measurement'
+
+const pretext = createPretext({
+ measurement: lynxMeasurementHost,
+})
+
+export const profilePrepare = pretext.profilePrepare
+export const prepare = pretext.prepare
+export const prepareWithSegments = pretext.prepareWithSegments
+export const layout = pretext.layout
+export const walkLineRanges = pretext.walkLineRanges
+export const layoutNextLine = pretext.layoutNextLine
+export const layoutWithLines = pretext.layoutWithLines
+export const clearCache = pretext.clearCache
+export const setLocale = pretext.setLocale
+
+export type {
+ LayoutCursor,
+ LayoutLine,
+ LayoutLineRange,
+ LayoutLinesResult,
+ LayoutResult,
+ PrepareOptions,
+ PrepareProfile,
+ PreparedText,
+ PreparedTextWithSegments,
}
diff --git a/packages/lynx-pretext/src/line-break.ts b/packages/lynx-pretext/src/line-break.ts
index 7564161..ac6ee2b 100644
--- a/packages/lynx-pretext/src/line-break.ts
+++ b/packages/lynx-pretext/src/line-break.ts
@@ -1,1056 +1 @@
-import type { SegmentBreakKind } from './analysis'
-import { getEngineProfile } from './measurement'
-
-export type LineBreakCursor = {
- segmentIndex: number
- graphemeIndex: number
-}
-
-export type PreparedLineBreakData = {
- widths: number[]
- lineEndFitAdvances: number[]
- lineEndPaintAdvances: number[]
- kinds: SegmentBreakKind[]
- simpleLineWalkFastPath: boolean
- breakableWidths: (number[] | null)[]
- breakablePrefixWidths: (number[] | null)[]
- discretionaryHyphenWidth: number
- tabStopAdvance: number
- chunks: {
- startSegmentIndex: number
- endSegmentIndex: number
- consumedEndSegmentIndex: number
- }[]
-}
-
-export type InternalLayoutLine = {
- startSegmentIndex: number
- startGraphemeIndex: number
- endSegmentIndex: number
- endGraphemeIndex: number
- width: number
-}
-
-function canBreakAfter(kind: SegmentBreakKind): boolean {
- return (
- kind === 'space' ||
- kind === 'preserved-space' ||
- kind === 'tab' ||
- kind === 'zero-width-break' ||
- kind === 'soft-hyphen'
- )
-}
-
-function isSimpleCollapsibleSpace(kind: SegmentBreakKind): boolean {
- return kind === 'space'
-}
-
-function getTabAdvance(lineWidth: number, tabStopAdvance: number): number {
- if (tabStopAdvance <= 0) return 0
-
- const remainder = lineWidth % tabStopAdvance
- if (Math.abs(remainder) <= 1e-6) return tabStopAdvance
- return tabStopAdvance - remainder
-}
-
-function getBreakableAdvance(
- graphemeWidths: number[],
- graphemePrefixWidths: number[] | null,
- graphemeIndex: number,
- preferPrefixWidths: boolean,
-): number {
- if (!preferPrefixWidths || graphemePrefixWidths === null) {
- return graphemeWidths[graphemeIndex]!
- }
- return graphemePrefixWidths[graphemeIndex]! - (graphemeIndex > 0 ? graphemePrefixWidths[graphemeIndex - 1]! : 0)
-}
-
-function fitSoftHyphenBreak(
- graphemeWidths: number[],
- initialWidth: number,
- maxWidth: number,
- lineFitEpsilon: number,
- discretionaryHyphenWidth: number,
- cumulativeWidths: boolean,
-): { fitCount: number, fittedWidth: number } {
- let fitCount = 0
- let fittedWidth = initialWidth
-
- while (fitCount < graphemeWidths.length) {
- const nextWidth = cumulativeWidths
- ? initialWidth + graphemeWidths[fitCount]!
- : fittedWidth + graphemeWidths[fitCount]!
- const nextLineWidth = fitCount + 1 < graphemeWidths.length
- ? nextWidth + discretionaryHyphenWidth
- : nextWidth
- if (nextLineWidth > maxWidth + lineFitEpsilon) break
- fittedWidth = nextWidth
- fitCount++
- }
-
- return { fitCount, fittedWidth }
-}
-
-function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number {
- for (let i = 0; i < prepared.chunks.length; i++) {
- const chunk = prepared.chunks[i]!
- if (segmentIndex < chunk.consumedEndSegmentIndex) return i
- }
- return -1
-}
-
-export function normalizeLineStart(
- prepared: PreparedLineBreakData,
- start: LineBreakCursor,
-): LineBreakCursor | null {
- let segmentIndex = start.segmentIndex
- const graphemeIndex = start.graphemeIndex
-
- if (segmentIndex >= prepared.widths.length) return null
- if (graphemeIndex > 0) return start
-
- const chunkIndex = findChunkIndexForStart(prepared, segmentIndex)
- if (chunkIndex < 0) return null
-
- const chunk = prepared.chunks[chunkIndex]!
- if (chunk.startSegmentIndex === chunk.endSegmentIndex && segmentIndex === chunk.startSegmentIndex) {
- return { segmentIndex, graphemeIndex: 0 }
- }
-
- if (segmentIndex < chunk.startSegmentIndex) segmentIndex = chunk.startSegmentIndex
- while (segmentIndex < chunk.endSegmentIndex) {
- const kind = prepared.kinds[segmentIndex]!
- if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') {
- return { segmentIndex, graphemeIndex: 0 }
- }
- segmentIndex++
- }
-
- if (chunk.consumedEndSegmentIndex >= prepared.widths.length) return null
- return { segmentIndex: chunk.consumedEndSegmentIndex, graphemeIndex: 0 }
-}
-
-export function countPreparedLines(prepared: PreparedLineBreakData, maxWidth: number): number {
- if (prepared.simpleLineWalkFastPath) {
- return countPreparedLinesSimple(prepared, maxWidth)
- }
- return walkPreparedLines(prepared, maxWidth)
-}
-
-function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number {
- const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
- if (widths.length === 0) return 0
-
- const engineProfile = getEngineProfile()
- const lineFitEpsilon = engineProfile.lineFitEpsilon
-
- let lineCount = 0
- let lineW = 0
- let hasContent = false
-
- function placeOnFreshLine(segmentIndex: number): void {
- const w = widths[segmentIndex]!
- if (w > maxWidth && breakableWidths[segmentIndex] !== null) {
- const gWidths = breakableWidths[segmentIndex]!
- const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
- lineW = 0
- for (let g = 0; g < gWidths.length; g++) {
- const gw = getBreakableAdvance(
- gWidths,
- gPrefixWidths,
- g,
- engineProfile.preferPrefixWidthsForBreakableRuns,
- )
- if (lineW > 0 && lineW + gw > maxWidth + lineFitEpsilon) {
- lineCount++
- lineW = gw
- } else {
- if (lineW === 0) lineCount++
- lineW += gw
- }
- }
- } else {
- lineW = w
- lineCount++
- }
- hasContent = true
- }
-
- for (let i = 0; i < widths.length; i++) {
- const w = widths[i]!
- const kind = kinds[i]!
-
- if (!hasContent) {
- placeOnFreshLine(i)
- continue
- }
-
- const newW = lineW + w
- if (newW > maxWidth + lineFitEpsilon) {
- if (isSimpleCollapsibleSpace(kind)) continue
- lineW = 0
- hasContent = false
- placeOnFreshLine(i)
- continue
- }
-
- lineW = newW
- }
-
- if (!hasContent) return lineCount + 1
- return lineCount
-}
-
-function walkPreparedLinesSimple(
- prepared: PreparedLineBreakData,
- maxWidth: number,
- onLine?: (line: InternalLayoutLine) => void,
-): number {
- const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
- if (widths.length === 0) return 0
-
- const engineProfile = getEngineProfile()
- const lineFitEpsilon = engineProfile.lineFitEpsilon
-
- let lineCount = 0
- let lineW = 0
- let hasContent = false
- let lineStartSegmentIndex = 0
- let lineStartGraphemeIndex = 0
- let lineEndSegmentIndex = 0
- let lineEndGraphemeIndex = 0
- let pendingBreakSegmentIndex = -1
- let pendingBreakPaintWidth = 0
-
- function clearPendingBreak(): void {
- pendingBreakSegmentIndex = -1
- pendingBreakPaintWidth = 0
- }
-
- function emitCurrentLine(
- endSegmentIndex = lineEndSegmentIndex,
- endGraphemeIndex = lineEndGraphemeIndex,
- width = lineW,
- ): void {
- lineCount++
- onLine?.({
- startSegmentIndex: lineStartSegmentIndex,
- startGraphemeIndex: lineStartGraphemeIndex,
- endSegmentIndex,
- endGraphemeIndex,
- width,
- })
- lineW = 0
- hasContent = false
- clearPendingBreak()
- }
-
- function startLineAtSegment(segmentIndex: number, width: number): void {
- hasContent = true
- lineStartSegmentIndex = segmentIndex
- lineStartGraphemeIndex = 0
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- lineW = width
- }
-
- function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
- hasContent = true
- lineStartSegmentIndex = segmentIndex
- lineStartGraphemeIndex = graphemeIndex
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = graphemeIndex + 1
- lineW = width
- }
-
- function appendWholeSegment(segmentIndex: number, width: number): void {
- if (!hasContent) {
- startLineAtSegment(segmentIndex, width)
- return
- }
- lineW += width
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
-
- function updatePendingBreak(segmentIndex: number, segmentWidth: number): void {
- if (!canBreakAfter(kinds[segmentIndex]!)) return
- pendingBreakSegmentIndex = segmentIndex + 1
- pendingBreakPaintWidth = lineW - segmentWidth
- }
-
- function appendBreakableSegment(segmentIndex: number): void {
- appendBreakableSegmentFrom(segmentIndex, 0)
- }
-
- function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void {
- const gWidths = breakableWidths[segmentIndex]!
- const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
- for (let g = startGraphemeIndex; g < gWidths.length; g++) {
- const gw = getBreakableAdvance(
- gWidths,
- gPrefixWidths,
- g,
- engineProfile.preferPrefixWidthsForBreakableRuns,
- )
-
- if (!hasContent) {
- startLineAtGrapheme(segmentIndex, g, gw)
- continue
- }
-
- if (lineW + gw > maxWidth + lineFitEpsilon) {
- emitCurrentLine()
- startLineAtGrapheme(segmentIndex, g, gw)
- } else {
- lineW += gw
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = g + 1
- }
- }
-
- if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
- }
-
- let i = 0
- while (i < widths.length) {
- const w = widths[i]!
- const kind = kinds[i]!
-
- if (!hasContent) {
- if (w > maxWidth && breakableWidths[i] !== null) {
- appendBreakableSegment(i)
- } else {
- startLineAtSegment(i, w)
- }
- updatePendingBreak(i, w)
- i++
- continue
- }
-
- const newW = lineW + w
- if (newW > maxWidth + lineFitEpsilon) {
- if (canBreakAfter(kind)) {
- appendWholeSegment(i, w)
- emitCurrentLine(i + 1, 0, lineW - w)
- i++
- continue
- }
-
- if (pendingBreakSegmentIndex >= 0) {
- emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- continue
- }
-
- if (w > maxWidth && breakableWidths[i] !== null) {
- emitCurrentLine()
- appendBreakableSegment(i)
- i++
- continue
- }
-
- emitCurrentLine()
- continue
- }
-
- appendWholeSegment(i, w)
- updatePendingBreak(i, w)
- i++
- }
-
- if (hasContent) emitCurrentLine()
- return lineCount
-}
-
-export function walkPreparedLines(
- prepared: PreparedLineBreakData,
- maxWidth: number,
- onLine?: (line: InternalLayoutLine) => void,
-): number {
- if (prepared.simpleLineWalkFastPath) {
- return walkPreparedLinesSimple(prepared, maxWidth, onLine)
- }
-
- const {
- widths,
- lineEndFitAdvances,
- lineEndPaintAdvances,
- kinds,
- breakableWidths,
- breakablePrefixWidths,
- discretionaryHyphenWidth,
- tabStopAdvance,
- chunks,
- } = prepared
- if (widths.length === 0 || chunks.length === 0) return 0
-
- const engineProfile = getEngineProfile()
- const lineFitEpsilon = engineProfile.lineFitEpsilon
-
- let lineCount = 0
- let lineW = 0
- let hasContent = false
- let lineStartSegmentIndex = 0
- let lineStartGraphemeIndex = 0
- let lineEndSegmentIndex = 0
- let lineEndGraphemeIndex = 0
- let pendingBreakSegmentIndex = -1
- let pendingBreakFitWidth = 0
- let pendingBreakPaintWidth = 0
- let pendingBreakKind: SegmentBreakKind | null = null
-
- function clearPendingBreak(): void {
- pendingBreakSegmentIndex = -1
- pendingBreakFitWidth = 0
- pendingBreakPaintWidth = 0
- pendingBreakKind = null
- }
-
- function emitCurrentLine(
- endSegmentIndex = lineEndSegmentIndex,
- endGraphemeIndex = lineEndGraphemeIndex,
- width = lineW,
- ): void {
- lineCount++
- onLine?.({
- startSegmentIndex: lineStartSegmentIndex,
- startGraphemeIndex: lineStartGraphemeIndex,
- endSegmentIndex,
- endGraphemeIndex,
- width,
- })
- lineW = 0
- hasContent = false
- clearPendingBreak()
- }
-
- function startLineAtSegment(segmentIndex: number, width: number): void {
- hasContent = true
- lineStartSegmentIndex = segmentIndex
- lineStartGraphemeIndex = 0
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- lineW = width
- }
-
- function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
- hasContent = true
- lineStartSegmentIndex = segmentIndex
- lineStartGraphemeIndex = graphemeIndex
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = graphemeIndex + 1
- lineW = width
- }
-
- function appendWholeSegment(segmentIndex: number, width: number): void {
- if (!hasContent) {
- startLineAtSegment(segmentIndex, width)
- return
- }
- lineW += width
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
-
- function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void {
- if (!canBreakAfter(kinds[segmentIndex]!)) return
- const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]!
- const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]!
- pendingBreakSegmentIndex = segmentIndex + 1
- pendingBreakFitWidth = lineW - segmentWidth + fitAdvance
- pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance
- pendingBreakKind = kinds[segmentIndex]!
- }
-
- function appendBreakableSegment(segmentIndex: number): void {
- appendBreakableSegmentFrom(segmentIndex, 0)
- }
-
- function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void {
- const gWidths = breakableWidths[segmentIndex]!
- const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
- for (let g = startGraphemeIndex; g < gWidths.length; g++) {
- const gw = getBreakableAdvance(
- gWidths,
- gPrefixWidths,
- g,
- engineProfile.preferPrefixWidthsForBreakableRuns,
- )
-
- if (!hasContent) {
- startLineAtGrapheme(segmentIndex, g, gw)
- continue
- }
-
- if (lineW + gw > maxWidth + lineFitEpsilon) {
- emitCurrentLine()
- startLineAtGrapheme(segmentIndex, g, gw)
- } else {
- lineW += gw
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = g + 1
- }
- }
-
- if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
- }
-
- function continueSoftHyphenBreakableSegment(segmentIndex: number): boolean {
- if (pendingBreakKind !== 'soft-hyphen') return false
- const gWidths = breakableWidths[segmentIndex]!
- if (gWidths === null) return false
- const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns
- ? breakablePrefixWidths[segmentIndex] ?? gWidths
- : gWidths
- const usesPrefixWidths = fitWidths !== gWidths
- const { fitCount, fittedWidth } = fitSoftHyphenBreak(
- fitWidths,
- lineW,
- maxWidth,
- lineFitEpsilon,
- discretionaryHyphenWidth,
- usesPrefixWidths,
- )
- if (fitCount === 0) return false
-
- lineW = fittedWidth
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = fitCount
- clearPendingBreak()
-
- if (fitCount === gWidths.length) {
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- return true
- }
-
- emitCurrentLine(
- segmentIndex,
- fitCount,
- fittedWidth + discretionaryHyphenWidth,
- )
- appendBreakableSegmentFrom(segmentIndex, fitCount)
- return true
- }
-
- function emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void {
- lineCount++
- onLine?.({
- startSegmentIndex: chunk.startSegmentIndex,
- startGraphemeIndex: 0,
- endSegmentIndex: chunk.consumedEndSegmentIndex,
- endGraphemeIndex: 0,
- width: 0,
- })
- clearPendingBreak()
- }
-
- for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
- const chunk = chunks[chunkIndex]!
- if (chunk.startSegmentIndex === chunk.endSegmentIndex) {
- emitEmptyChunk(chunk)
- continue
- }
-
- hasContent = false
- lineW = 0
- lineStartSegmentIndex = chunk.startSegmentIndex
- lineStartGraphemeIndex = 0
- lineEndSegmentIndex = chunk.startSegmentIndex
- lineEndGraphemeIndex = 0
- clearPendingBreak()
-
- let i = chunk.startSegmentIndex
- while (i < chunk.endSegmentIndex) {
- const kind = kinds[i]!
- const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
-
- if (kind === 'soft-hyphen') {
- if (hasContent) {
- lineEndSegmentIndex = i + 1
- lineEndGraphemeIndex = 0
- pendingBreakSegmentIndex = i + 1
- pendingBreakFitWidth = lineW + discretionaryHyphenWidth
- pendingBreakPaintWidth = lineW + discretionaryHyphenWidth
- pendingBreakKind = kind
- }
- i++
- continue
- }
-
- if (!hasContent) {
- if (w > maxWidth && breakableWidths[i] !== null) {
- appendBreakableSegment(i)
- } else {
- startLineAtSegment(i, w)
- }
- updatePendingBreakForWholeSegment(i, w)
- i++
- continue
- }
-
- const newW = lineW + w
- if (newW > maxWidth + lineFitEpsilon) {
- const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!)
- const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!)
-
- if (
- pendingBreakKind === 'soft-hyphen' &&
- engineProfile.preferEarlySoftHyphenBreak &&
- pendingBreakFitWidth <= maxWidth + lineFitEpsilon
- ) {
- emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- continue
- }
-
- if (pendingBreakKind === 'soft-hyphen' && continueSoftHyphenBreakableSegment(i)) {
- i++
- continue
- }
-
- if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) {
- appendWholeSegment(i, w)
- emitCurrentLine(i + 1, 0, currentBreakPaintWidth)
- i++
- continue
- }
-
- if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
- emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- continue
- }
-
- if (w > maxWidth && breakableWidths[i] !== null) {
- emitCurrentLine()
- appendBreakableSegment(i)
- i++
- continue
- }
-
- emitCurrentLine()
- continue
- }
-
- appendWholeSegment(i, w)
- updatePendingBreakForWholeSegment(i, w)
- i++
- }
-
- if (hasContent) {
- const finalPaintWidth =
- pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex
- ? pendingBreakPaintWidth
- : lineW
- emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth)
- }
- }
-
- return lineCount
-}
-
-export function layoutNextLineRange(
- prepared: PreparedLineBreakData,
- start: LineBreakCursor,
- maxWidth: number,
-): InternalLayoutLine | null {
- const normalizedStart = normalizeLineStart(prepared, start)
- if (normalizedStart === null) return null
-
- if (prepared.simpleLineWalkFastPath) {
- return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth)
- }
-
- const chunkIndex = findChunkIndexForStart(prepared, normalizedStart.segmentIndex)
- if (chunkIndex < 0) return null
-
- const chunk = prepared.chunks[chunkIndex]!
- if (chunk.startSegmentIndex === chunk.endSegmentIndex) {
- return {
- startSegmentIndex: chunk.startSegmentIndex,
- startGraphemeIndex: 0,
- endSegmentIndex: chunk.consumedEndSegmentIndex,
- endGraphemeIndex: 0,
- width: 0,
- }
- }
-
- const {
- widths,
- lineEndFitAdvances,
- lineEndPaintAdvances,
- kinds,
- breakableWidths,
- breakablePrefixWidths,
- discretionaryHyphenWidth,
- tabStopAdvance,
- } = prepared
- const engineProfile = getEngineProfile()
- const lineFitEpsilon = engineProfile.lineFitEpsilon
-
- let lineW = 0
- let hasContent = false
- const lineStartSegmentIndex = normalizedStart.segmentIndex
- const lineStartGraphemeIndex = normalizedStart.graphemeIndex
- let lineEndSegmentIndex = lineStartSegmentIndex
- let lineEndGraphemeIndex = lineStartGraphemeIndex
- let pendingBreakSegmentIndex = -1
- let pendingBreakFitWidth = 0
- let pendingBreakPaintWidth = 0
- let pendingBreakKind: SegmentBreakKind | null = null
-
- function clearPendingBreak(): void {
- pendingBreakSegmentIndex = -1
- pendingBreakFitWidth = 0
- pendingBreakPaintWidth = 0
- pendingBreakKind = null
- }
-
- function finishLine(
- endSegmentIndex = lineEndSegmentIndex,
- endGraphemeIndex = lineEndGraphemeIndex,
- width = lineW,
- ): InternalLayoutLine | null {
- if (!hasContent) return null
-
- return {
- startSegmentIndex: lineStartSegmentIndex,
- startGraphemeIndex: lineStartGraphemeIndex,
- endSegmentIndex,
- endGraphemeIndex,
- width,
- }
- }
-
- function startLineAtSegment(segmentIndex: number, width: number): void {
- hasContent = true
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- lineW = width
- }
-
- function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
- hasContent = true
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = graphemeIndex + 1
- lineW = width
- }
-
- function appendWholeSegment(segmentIndex: number, width: number): void {
- if (!hasContent) {
- startLineAtSegment(segmentIndex, width)
- return
- }
- lineW += width
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
-
- function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void {
- if (!canBreakAfter(kinds[segmentIndex]!)) return
- const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]!
- const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]!
- pendingBreakSegmentIndex = segmentIndex + 1
- pendingBreakFitWidth = lineW - segmentWidth + fitAdvance
- pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance
- pendingBreakKind = kinds[segmentIndex]!
- }
-
- function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null {
- const gWidths = breakableWidths[segmentIndex]!
- const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
- for (let g = startGraphemeIndex; g < gWidths.length; g++) {
- const gw = getBreakableAdvance(
- gWidths,
- gPrefixWidths,
- g,
- engineProfile.preferPrefixWidthsForBreakableRuns,
- )
-
- if (!hasContent) {
- startLineAtGrapheme(segmentIndex, g, gw)
- continue
- }
-
- if (lineW + gw > maxWidth + lineFitEpsilon) {
- return finishLine()
- }
-
- lineW += gw
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = g + 1
- }
-
- if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
- return null
- }
-
- function maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null {
- if (pendingBreakKind !== 'soft-hyphen' || pendingBreakSegmentIndex < 0) return null
-
- const gWidths = breakableWidths[segmentIndex] ?? null
- if (gWidths !== null) {
- const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns
- ? breakablePrefixWidths[segmentIndex] ?? gWidths
- : gWidths
- const usesPrefixWidths = fitWidths !== gWidths
- const { fitCount, fittedWidth } = fitSoftHyphenBreak(
- fitWidths,
- lineW,
- maxWidth,
- lineFitEpsilon,
- discretionaryHyphenWidth,
- usesPrefixWidths,
- )
-
- if (fitCount === gWidths.length) {
- lineW = fittedWidth
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- clearPendingBreak()
- return null
- }
-
- if (fitCount > 0) {
- return finishLine(
- segmentIndex,
- fitCount,
- fittedWidth + discretionaryHyphenWidth,
- )
- }
- }
-
- if (pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
- return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- }
-
- return null
- }
-
- for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) {
- const kind = kinds[i]!
- const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0
- const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
-
- if (kind === 'soft-hyphen' && startGraphemeIndex === 0) {
- if (hasContent) {
- lineEndSegmentIndex = i + 1
- lineEndGraphemeIndex = 0
- pendingBreakSegmentIndex = i + 1
- pendingBreakFitWidth = lineW + discretionaryHyphenWidth
- pendingBreakPaintWidth = lineW + discretionaryHyphenWidth
- pendingBreakKind = kind
- }
- continue
- }
-
- if (!hasContent) {
- if (startGraphemeIndex > 0) {
- const line = appendBreakableSegmentFrom(i, startGraphemeIndex)
- if (line !== null) return line
- } else if (w > maxWidth && breakableWidths[i] !== null) {
- const line = appendBreakableSegmentFrom(i, 0)
- if (line !== null) return line
- } else {
- startLineAtSegment(i, w)
- }
- updatePendingBreakForWholeSegment(i, w)
- continue
- }
-
- const newW = lineW + w
- if (newW > maxWidth + lineFitEpsilon) {
- const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!)
- const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!)
-
- if (
- pendingBreakKind === 'soft-hyphen' &&
- engineProfile.preferEarlySoftHyphenBreak &&
- pendingBreakFitWidth <= maxWidth + lineFitEpsilon
- ) {
- return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- }
-
- const softBreakLine = maybeFinishAtSoftHyphen(i)
- if (softBreakLine !== null) return softBreakLine
-
- if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) {
- appendWholeSegment(i, w)
- return finishLine(i + 1, 0, currentBreakPaintWidth)
- }
-
- if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
- return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- }
-
- if (w > maxWidth && breakableWidths[i] !== null) {
- const currentLine = finishLine()
- if (currentLine !== null) return currentLine
- const line = appendBreakableSegmentFrom(i, 0)
- if (line !== null) return line
- }
-
- return finishLine()
- }
-
- appendWholeSegment(i, w)
- updatePendingBreakForWholeSegment(i, w)
- }
-
- if (pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && lineEndGraphemeIndex === 0) {
- return finishLine(chunk.consumedEndSegmentIndex, 0, pendingBreakPaintWidth)
- }
-
- return finishLine(chunk.consumedEndSegmentIndex, 0, lineW)
-}
-
-function layoutNextLineRangeSimple(
- prepared: PreparedLineBreakData,
- normalizedStart: LineBreakCursor,
- maxWidth: number,
-): InternalLayoutLine | null {
- const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
- const engineProfile = getEngineProfile()
- const lineFitEpsilon = engineProfile.lineFitEpsilon
-
- let lineW = 0
- let hasContent = false
- const lineStartSegmentIndex = normalizedStart.segmentIndex
- const lineStartGraphemeIndex = normalizedStart.graphemeIndex
- let lineEndSegmentIndex = lineStartSegmentIndex
- let lineEndGraphemeIndex = lineStartGraphemeIndex
- let pendingBreakSegmentIndex = -1
- let pendingBreakPaintWidth = 0
-
- function finishLine(
- endSegmentIndex = lineEndSegmentIndex,
- endGraphemeIndex = lineEndGraphemeIndex,
- width = lineW,
- ): InternalLayoutLine | null {
- if (!hasContent) return null
-
- return {
- startSegmentIndex: lineStartSegmentIndex,
- startGraphemeIndex: lineStartGraphemeIndex,
- endSegmentIndex,
- endGraphemeIndex,
- width,
- }
- }
-
- function startLineAtSegment(segmentIndex: number, width: number): void {
- hasContent = true
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- lineW = width
- }
-
- function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
- hasContent = true
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = graphemeIndex + 1
- lineW = width
- }
-
- function appendWholeSegment(segmentIndex: number, width: number): void {
- if (!hasContent) {
- startLineAtSegment(segmentIndex, width)
- return
- }
- lineW += width
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
-
- function updatePendingBreak(segmentIndex: number, segmentWidth: number): void {
- if (!canBreakAfter(kinds[segmentIndex]!)) return
- pendingBreakSegmentIndex = segmentIndex + 1
- pendingBreakPaintWidth = lineW - segmentWidth
- }
-
- function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null {
- const gWidths = breakableWidths[segmentIndex]!
- const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
- for (let g = startGraphemeIndex; g < gWidths.length; g++) {
- const gw = getBreakableAdvance(
- gWidths,
- gPrefixWidths,
- g,
- engineProfile.preferPrefixWidthsForBreakableRuns,
- )
-
- if (!hasContent) {
- startLineAtGrapheme(segmentIndex, g, gw)
- continue
- }
-
- if (lineW + gw > maxWidth + lineFitEpsilon) {
- return finishLine()
- }
-
- lineW += gw
- lineEndSegmentIndex = segmentIndex
- lineEndGraphemeIndex = g + 1
- }
-
- if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
- lineEndSegmentIndex = segmentIndex + 1
- lineEndGraphemeIndex = 0
- }
- return null
- }
-
- for (let i = normalizedStart.segmentIndex; i < widths.length; i++) {
- const w = widths[i]!
- const kind = kinds[i]!
- const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0
-
- if (!hasContent) {
- if (startGraphemeIndex > 0) {
- const line = appendBreakableSegmentFrom(i, startGraphemeIndex)
- if (line !== null) return line
- } else if (w > maxWidth && breakableWidths[i] !== null) {
- const line = appendBreakableSegmentFrom(i, 0)
- if (line !== null) return line
- } else {
- startLineAtSegment(i, w)
- }
- updatePendingBreak(i, w)
- continue
- }
-
- const newW = lineW + w
- if (newW > maxWidth + lineFitEpsilon) {
- if (canBreakAfter(kind)) {
- appendWholeSegment(i, w)
- return finishLine(i + 1, 0, lineW - w)
- }
-
- if (pendingBreakSegmentIndex >= 0) {
- return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
- }
-
- if (w > maxWidth && breakableWidths[i] !== null) {
- const currentLine = finishLine()
- if (currentLine !== null) return currentLine
- const line = appendBreakableSegmentFrom(i, 0)
- if (line !== null) return line
- }
-
- return finishLine()
- }
-
- appendWholeSegment(i, w)
- updatePendingBreak(i, w)
- }
-
- return finishLine()
-}
+export * from './pretext/line-break.js'
diff --git a/packages/lynx-pretext/src/measurement.ts b/packages/lynx-pretext/src/measurement.ts
index 11d71c2..b15fda7 100644
--- a/packages/lynx-pretext/src/measurement.ts
+++ b/packages/lynx-pretext/src/measurement.ts
@@ -1,4 +1,5 @@
import { isCJK } from './analysis'
+import type { MeasurementHost } from './pretext/host.js'
export type SegmentMetrics = {
width: number
@@ -15,6 +16,12 @@ export type EngineProfile = {
preferEarlySoftHyphenBreak: boolean
}
+export type FontMeasurementState = {
+ cache: Map
+ fontSize: number
+ emojiCorrection: number
+}
+
// Module-level font context (replaces Canvas ctx.font)
let currentFontSizeStr: string = '16px'
let currentFontFamily: string | undefined = undefined
@@ -127,11 +134,7 @@ export function getSegmentGraphemePrefixWidths(
return metrics.graphemePrefixWidths
}
-export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): {
- cache: Map
- fontSize: number
- emojiCorrection: number
-} {
+export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState {
const fontSize = parseFontSize(font)
const fontFamily = parseFontFamily(font)
// Set module-level font context for getSegmentMetrics
@@ -147,3 +150,16 @@ export function clearMeasurementCaches(): void {
segmentMetricCaches.clear()
sharedGraphemeSegmenter = null
}
+
+export const lynxMeasurementHost: MeasurementHost = {
+ clearMeasurementCaches,
+ getSegmentMetrics,
+ getEngineProfile,
+ getCorrectedSegmentWidth,
+ getSegmentGraphemeWidths,
+ getSegmentGraphemePrefixWidths,
+ getFontMeasurementState,
+ textMayContainEmoji,
+}
+
+export type { MeasurementHost }
diff --git a/packages/lynx-pretext/src/pretext/analysis.ts b/packages/lynx-pretext/src/pretext/analysis.ts
new file mode 100644
index 0000000..a4200d9
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/analysis.ts
@@ -0,0 +1,1019 @@
+export type WhiteSpaceMode = 'normal' | 'pre-wrap'
+
+export type SegmentBreakKind =
+ | 'text'
+ | 'space'
+ | 'preserved-space'
+ | 'tab'
+ | 'glue'
+ | 'zero-width-break'
+ | 'soft-hyphen'
+ | 'hard-break'
+
+type SegmentationPiece = {
+ text: string
+ isWordLike: boolean
+ kind: SegmentBreakKind
+ start: number
+}
+
+export type MergedSegmentation = {
+ len: number
+ texts: string[]
+ isWordLike: boolean[]
+ kinds: SegmentBreakKind[]
+ starts: number[]
+}
+
+export type AnalysisChunk = {
+ startSegmentIndex: number
+ endSegmentIndex: number
+ consumedEndSegmentIndex: number
+}
+
+export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[] } & MergedSegmentation
+
+export type AnalysisProfile = {
+ carryCJKAfterClosingQuote: boolean
+}
+
+const collapsibleWhitespaceRunRe = /[ \t\n\r\f]+/g
+const needsWhitespaceNormalizationRe = /[\t\n\r\f]| {2,}|^ | $/
+
+type WhiteSpaceProfile = {
+ mode: WhiteSpaceMode
+ preserveOrdinarySpaces: boolean
+ preserveHardBreaks: boolean
+}
+
+function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile {
+ const mode = whiteSpace ?? 'normal'
+ return mode === 'pre-wrap'
+ ? { mode, preserveOrdinarySpaces: true, preserveHardBreaks: true }
+ : { mode, preserveOrdinarySpaces: false, preserveHardBreaks: false }
+}
+
+export function normalizeWhitespaceNormal(text: string): string {
+ if (!needsWhitespaceNormalizationRe.test(text)) return text
+
+ let normalized = text.replace(collapsibleWhitespaceRunRe, ' ')
+ if (normalized.charCodeAt(0) === 0x20) {
+ normalized = normalized.slice(1)
+ }
+ if (normalized.length > 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) {
+ normalized = normalized.slice(0, -1)
+ }
+ return normalized
+}
+
+function normalizeWhitespacePreWrap(text: string): string {
+ if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n')
+ return text
+ .replace(/\r\n/g, '\n')
+ .replace(/[\r\f]/g, '\n')
+}
+
+let sharedWordSegmenter: Intl.Segmenter | null = null
+let segmenterLocale: string | undefined
+
+function getSharedWordSegmenter(): Intl.Segmenter {
+ if (sharedWordSegmenter === null) {
+ sharedWordSegmenter = new Intl.Segmenter(segmenterLocale, { granularity: 'word' })
+ }
+ return sharedWordSegmenter
+}
+
+export function clearAnalysisCaches(): void {
+ sharedWordSegmenter = null
+}
+
+export function setAnalysisLocale(locale?: string): void {
+ const nextLocale = locale && locale.length > 0 ? locale : undefined
+ if (segmenterLocale === nextLocale) return
+ segmenterLocale = nextLocale
+ sharedWordSegmenter = null
+}
+
+const arabicScriptFallbackRe = /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u0891\u0897-\u08E1\u08E3-\u08FF\uFB50-\uFD3D\uFD40-\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC\u{10E60}-\u{10E7E}\u{10EC2}-\u{10EC7}\u{10ED0}-\u{10ED8}\u{10EFA}-\u{10EFF}\u{1EE00}-\u{1EE03}\u{1EE05}-\u{1EE1F}\u{1EE21}\u{1EE22}\u{1EE24}\u{1EE27}\u{1EE29}-\u{1EE32}\u{1EE34}-\u{1EE37}\u{1EE39}\u{1EE3B}\u{1EE42}\u{1EE47}\u{1EE49}\u{1EE4B}\u{1EE4D}-\u{1EE4F}\u{1EE51}\u{1EE52}\u{1EE54}\u{1EE57}\u{1EE59}\u{1EE5B}\u{1EE5D}\u{1EE5F}\u{1EE61}\u{1EE62}\u{1EE64}\u{1EE67}-\u{1EE6A}\u{1EE6C}-\u{1EE72}\u{1EE74}-\u{1EE77}\u{1EE79}-\u{1EE7C}\u{1EE7E}\u{1EE80}-\u{1EE89}\u{1EE8B}-\u{1EE9B}\u{1EEA1}-\u{1EEA3}\u{1EEA5}-\u{1EEA9}\u{1EEAB}-\u{1EEBB}\u{1EEF0}\u{1EEF1}]/u
+const combiningMarkFallbackRe = /[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0897-\u089F\u08CA-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09FE\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0AFA-\u0AFF\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B55-\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C04\u0C3C\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0CF3\u0D00-\u0D03\u0D3B\u0D3C\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D81-\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EBC\u0EC8-\u0ECE\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1715\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u180B-\u180D\u180F\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1AB0-\u1ADD\u1AE0-\u1AEB\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF4\u1CF7-\u1CF9\u1DC0-\u1DFF\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA82C\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9E5\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}\u{10AE6}\u{10D24}-\u{10D27}\u{10D69}-\u{10D6D}\u{10EAB}\u{10EAC}\u{10EFA}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11000}-\u{11002}\u{11038}-\u{11046}\u{11070}\u{11073}\u{11074}\u{1107F}-\u{11082}\u{110B0}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{11134}\u{11145}\u{11146}\u{11173}\u{11180}-\u{11182}\u{111B3}-\u{111C0}\u{111C9}-\u{111CC}\u{111CE}\u{111CF}\u{1122C}-\u{11237}\u{1123E}\u{11241}\u{112DF}-\u{112EA}\u{11300}-\u{11303}\u{1133B}\u{1133C}\u{1133E}-\u{11344}\u{11347}\u{11348}\u{1134B}-\u{1134D}\u{11357}\u{11362}\u{11363}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{113B8}-\u{113C0}\u{113C2}\u{113C5}\u{113C7}-\u{113CA}\u{113CC}-\u{113D0}\u{113D2}\u{113E1}\u{113E2}\u{11435}-\u{11446}\u{1145E}\u{114B0}-\u{114C3}\u{115AF}-\u{115B5}\u{115B8}-\u{115C0}\u{115DC}\u{115DD}\u{11630}-\u{11640}\u{116AB}-\u{116B7}\u{1171D}-\u{1172B}\u{1182C}-\u{1183A}\u{11930}-\u{11935}\u{11937}\u{11938}\u{1193B}-\u{1193E}\u{11940}\u{11942}\u{11943}\u{119D1}-\u{119D7}\u{119DA}-\u{119E0}\u{119E4}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A39}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A5B}\u{11A8A}-\u{11A99}\u{11B60}-\u{11B67}\u{11C2F}-\u{11C36}\u{11C38}-\u{11C3F}\u{11C92}-\u{11CA7}\u{11CA9}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D8A}-\u{11D8E}\u{11D90}\u{11D91}\u{11D93}-\u{11D97}\u{11EF3}-\u{11EF6}\u{11F00}\u{11F01}\u{11F03}\u{11F34}-\u{11F3A}\u{11F3E}-\u{11F42}\u{11F5A}\u{13440}\u{13447}-\u{13455}\u{1611E}-\u{1612F}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F51}-\u{16F87}\u{16F8F}-\u{16F92}\u{16FE4}\u{16FF0}\u{16FF1}\u{1BC9D}\u{1BC9E}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D165}-\u{1D169}\u{1D16D}-\u{1D172}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E5EE}\u{1E5EF}\u{1E6E3}\u{1E6E6}\u{1E6EE}\u{1E6EF}\u{1E6F5}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{E0100}-\u{E01EF}]/u
+const decimalDigitFallbackRe = /[0-9\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19\u{104A0}-\u{104A9}\u{10D30}-\u{10D39}\u{10D40}-\u{10D49}\u{11066}-\u{1106F}\u{110F0}-\u{110F9}\u{11136}-\u{1113F}\u{111D0}-\u{111D9}\u{112F0}-\u{112F9}\u{11450}-\u{11459}\u{114D0}-\u{114D9}\u{11650}-\u{11659}\u{116C0}-\u{116C9}\u{116D0}-\u{116E3}\u{11730}-\u{11739}\u{118E0}-\u{118E9}\u{11950}-\u{11959}\u{11BF0}-\u{11BF9}\u{11C50}-\u{11C59}\u{11D50}-\u{11D59}\u{11DA0}-\u{11DA9}\u{11DE0}-\u{11DE9}\u{11F50}-\u{11F59}\u{16130}-\u{16139}\u{16A60}-\u{16A69}\u{16AC0}-\u{16AC9}\u{16B50}-\u{16B59}\u{16D70}-\u{16D79}\u{1CCF0}-\u{1CCF9}\u{1D7CE}-\u{1D7FF}\u{1E140}-\u{1E149}\u{1E2F0}-\u{1E2F9}\u{1E4F0}-\u{1E4F9}\u{1E5F1}-\u{1E5FA}\u{1E950}-\u{1E959}\u{1FBF0}-\u{1FBF9}]/u
+
+function createUnicodePropertyRegex(source: string, fallback: RegExp): RegExp {
+ try {
+ return new RegExp(source, 'u')
+ } catch {
+ return fallback
+ }
+}
+
+const arabicScriptRe = createUnicodePropertyRegex('\\p{Script=Arabic}', arabicScriptFallbackRe)
+const combiningMarkRe = createUnicodePropertyRegex('\\p{M}', combiningMarkFallbackRe)
+const decimalDigitRe = createUnicodePropertyRegex('\\p{Nd}', decimalDigitFallbackRe)
+
+function containsArabicScript(text: string): boolean {
+ return arabicScriptRe.test(text)
+}
+
+export function isCJK(s: string): boolean {
+ for (const ch of s) {
+ const c = ch.codePointAt(0)!
+ if ((c >= 0x4E00 && c <= 0x9FFF) ||
+ (c >= 0x3400 && c <= 0x4DBF) ||
+ (c >= 0x20000 && c <= 0x2A6DF) ||
+ (c >= 0x2A700 && c <= 0x2B73F) ||
+ (c >= 0x2B740 && c <= 0x2B81F) ||
+ (c >= 0x2B820 && c <= 0x2CEAF) ||
+ (c >= 0x2CEB0 && c <= 0x2EBEF) ||
+ (c >= 0x30000 && c <= 0x3134F) ||
+ (c >= 0xF900 && c <= 0xFAFF) ||
+ (c >= 0x2F800 && c <= 0x2FA1F) ||
+ (c >= 0x3000 && c <= 0x303F) ||
+ (c >= 0x3040 && c <= 0x309F) ||
+ (c >= 0x30A0 && c <= 0x30FF) ||
+ (c >= 0xAC00 && c <= 0xD7AF) ||
+ (c >= 0xFF00 && c <= 0xFFEF)) {
+ return true
+ }
+ }
+ return false
+}
+
+export const kinsokuStart = new Set([
+ '\uFF0C',
+ '\uFF0E',
+ '\uFF01',
+ '\uFF1A',
+ '\uFF1B',
+ '\uFF1F',
+ '\u3001',
+ '\u3002',
+ '\u30FB',
+ '\uFF09',
+ '\u3015',
+ '\u3009',
+ '\u300B',
+ '\u300D',
+ '\u300F',
+ '\u3011',
+ '\u3017',
+ '\u3019',
+ '\u301B',
+ '\u30FC',
+ '\u3005',
+ '\u303B',
+ '\u309D',
+ '\u309E',
+ '\u30FD',
+ '\u30FE',
+])
+
+export const kinsokuEnd = new Set([
+ '"',
+ '(', '[', '{',
+ '“', '‘', '«', '‹',
+ '\uFF08',
+ '\u3014',
+ '\u3008',
+ '\u300A',
+ '\u300C',
+ '\u300E',
+ '\u3010',
+ '\u3016',
+ '\u3018',
+ '\u301A',
+])
+
+const forwardStickyGlue = new Set([
+ "'", '’',
+])
+
+export const leftStickyPunctuation = new Set([
+ '.', ',', '!', '?', ':', ';',
+ '\u060C',
+ '\u061B',
+ '\u061F',
+ '\u0964',
+ '\u0965',
+ '\u104A',
+ '\u104B',
+ '\u104C',
+ '\u104D',
+ '\u104F',
+ ')', ']', '}',
+ '%',
+ '"',
+ '”', '’', '»', '›',
+ '…',
+])
+
+const arabicNoSpaceTrailingPunctuation = new Set([
+ ':',
+ '.',
+ '\u060C',
+ '\u061B',
+])
+
+const myanmarMedialGlue = new Set([
+ '\u104F',
+])
+
+const closingQuoteChars = new Set([
+ '”', '’', '»', '›',
+ '\u300D',
+ '\u300F',
+ '\u3011',
+ '\u300B',
+ '\u3009',
+ '\u3015',
+ '\uFF09',
+])
+
+function isLeftStickyPunctuationSegment(segment: string): boolean {
+ if (isEscapedQuoteClusterSegment(segment)) return true
+ let sawPunctuation = false
+ for (const ch of segment) {
+ if (leftStickyPunctuation.has(ch)) {
+ sawPunctuation = true
+ continue
+ }
+ if (sawPunctuation && combiningMarkRe.test(ch)) continue
+ return false
+ }
+ return sawPunctuation
+}
+
+function isCJKLineStartProhibitedSegment(segment: string): boolean {
+ for (const ch of segment) {
+ if (!kinsokuStart.has(ch) && !leftStickyPunctuation.has(ch)) return false
+ }
+ return segment.length > 0
+}
+
+function isForwardStickyClusterSegment(segment: string): boolean {
+ if (isEscapedQuoteClusterSegment(segment)) return true
+ for (const ch of segment) {
+ if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !combiningMarkRe.test(ch)) return false
+ }
+ return segment.length > 0
+}
+
+function isEscapedQuoteClusterSegment(segment: string): boolean {
+ let sawQuote = false
+ for (const ch of segment) {
+ if (ch === '\\' || combiningMarkRe.test(ch)) continue
+ if (kinsokuEnd.has(ch) || leftStickyPunctuation.has(ch) || forwardStickyGlue.has(ch)) {
+ sawQuote = true
+ continue
+ }
+ return false
+ }
+ return sawQuote
+}
+
+function splitTrailingForwardStickyCluster(text: string): { head: string, tail: string } | null {
+ const chars = Array.from(text)
+ let splitIndex = chars.length
+
+ while (splitIndex > 0) {
+ const ch = chars[splitIndex - 1]!
+ if (combiningMarkRe.test(ch)) {
+ splitIndex--
+ continue
+ }
+ if (kinsokuEnd.has(ch) || forwardStickyGlue.has(ch)) {
+ splitIndex--
+ continue
+ }
+ break
+ }
+
+ if (splitIndex <= 0 || splitIndex === chars.length) return null
+ return {
+ head: chars.slice(0, splitIndex).join(''),
+ tail: chars.slice(splitIndex).join(''),
+ }
+}
+
+function isRepeatedSingleCharRun(segment: string, ch: string): boolean {
+ if (segment.length === 0) return false
+ for (const part of segment) {
+ if (part !== ch) return false
+ }
+ return true
+}
+
+function endsWithArabicNoSpacePunctuation(segment: string): boolean {
+ if (!containsArabicScript(segment) || segment.length === 0) return false
+ return arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!)
+}
+
+function endsWithMyanmarMedialGlue(segment: string): boolean {
+ if (segment.length === 0) return false
+ return myanmarMedialGlue.has(segment[segment.length - 1]!)
+}
+
+function splitLeadingSpaceAndMarks(segment: string): { space: string, marks: string } | null {
+ if (segment.length < 2 || segment[0] !== ' ') return null
+ const marks = segment.slice(1)
+ if (marks.length > 0 && Array.from(marks).every((mark) => combiningMarkRe.test(mark))) {
+ return { space: ' ', marks }
+ }
+ return null
+}
+
+export function endsWithClosingQuote(text: string): boolean {
+ for (let i = text.length - 1; i >= 0; i--) {
+ const ch = text[i]!
+ if (closingQuoteChars.has(ch)) return true
+ if (!leftStickyPunctuation.has(ch)) return false
+ }
+ return false
+}
+
+function classifySegmentBreakChar(ch: string, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind {
+ if (whiteSpaceProfile.preserveOrdinarySpaces || whiteSpaceProfile.preserveHardBreaks) {
+ if (ch === ' ') return 'preserved-space'
+ if (ch === '\t') return 'tab'
+ if (whiteSpaceProfile.preserveHardBreaks && ch === '\n') return 'hard-break'
+ }
+ if (ch === ' ') return 'space'
+ if (ch === '\u00A0' || ch === '\u202F' || ch === '\u2060' || ch === '\uFEFF') {
+ return 'glue'
+ }
+ if (ch === '\u200B') return 'zero-width-break'
+ if (ch === '\u00AD') return 'soft-hyphen'
+ return 'text'
+}
+
+function splitSegmentByBreakKind(
+ segment: string,
+ isWordLike: boolean,
+ start: number,
+ whiteSpaceProfile: WhiteSpaceProfile,
+): SegmentationPiece[] {
+ const pieces: SegmentationPiece[] = []
+ let currentKind: SegmentBreakKind | null = null
+ let currentText = ''
+ let currentStart = start
+ let currentWordLike = false
+ let offset = 0
+
+ for (const ch of segment) {
+ const kind = classifySegmentBreakChar(ch, whiteSpaceProfile)
+ const wordLike = kind === 'text' && isWordLike
+
+ if (currentKind !== null && kind === currentKind && wordLike === currentWordLike) {
+ currentText += ch
+ offset += ch.length
+ continue
+ }
+
+ if (currentKind !== null) {
+ pieces.push({
+ text: currentText,
+ isWordLike: currentWordLike,
+ kind: currentKind,
+ start: currentStart,
+ })
+ }
+
+ currentKind = kind
+ currentText = ch
+ currentStart = start + offset
+ currentWordLike = wordLike
+ offset += ch.length
+ }
+
+ if (currentKind !== null) {
+ pieces.push({
+ text: currentText,
+ isWordLike: currentWordLike,
+ kind: currentKind,
+ start: currentStart,
+ })
+ }
+
+ return pieces
+}
+
+function isTextRunBoundary(kind: SegmentBreakKind): boolean {
+ return (
+ kind === 'space' ||
+ kind === 'preserved-space' ||
+ kind === 'zero-width-break' ||
+ kind === 'hard-break'
+ )
+}
+
+const urlSchemeSegmentRe = /^[A-Za-z][A-Za-z0-9+.-]*:$/
+
+function isUrlLikeRunStart(segmentation: MergedSegmentation, index: number): boolean {
+ const text = segmentation.texts[index]!
+ if (text.startsWith('www.')) return true
+ return (
+ urlSchemeSegmentRe.test(text) &&
+ index + 1 < segmentation.len &&
+ segmentation.kinds[index + 1] === 'text' &&
+ segmentation.texts[index + 1] === '//'
+ )
+}
+
+function isUrlQueryBoundarySegment(text: string): boolean {
+ return text.includes('?') && (text.includes('://') || text.startsWith('www.'))
+}
+
+function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts = segmentation.texts.slice()
+ const isWordLike = segmentation.isWordLike.slice()
+ const kinds = segmentation.kinds.slice()
+ const starts = segmentation.starts.slice()
+
+ for (let i = 0; i < segmentation.len; i++) {
+ if (kinds[i] !== 'text' || !isUrlLikeRunStart(segmentation, i)) continue
+
+ let j = i + 1
+ while (j < segmentation.len && !isTextRunBoundary(kinds[j]!)) {
+ texts[i] += texts[j]!
+ isWordLike[i] = true
+ const endsQueryPrefix = texts[j]!.includes('?')
+ kinds[j] = 'text'
+ texts[j] = ''
+ j++
+ if (endsQueryPrefix) break
+ }
+ }
+
+ let compactLen = 0
+ for (let read = 0; read < texts.length; read++) {
+ const text = texts[read]!
+ if (text.length === 0) continue
+ if (compactLen !== read) {
+ texts[compactLen] = text
+ isWordLike[compactLen] = isWordLike[read]!
+ kinds[compactLen] = kinds[read]!
+ starts[compactLen] = starts[read]!
+ }
+ compactLen++
+ }
+
+ texts.length = compactLen
+ isWordLike.length = compactLen
+ kinds.length = compactLen
+ starts.length = compactLen
+
+ return {
+ len: compactLen,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+function mergeUrlQueryRuns(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts: string[] = []
+ const isWordLike: boolean[] = []
+ const kinds: SegmentBreakKind[] = []
+ const starts: number[] = []
+
+ for (let i = 0; i < segmentation.len; i++) {
+ const text = segmentation.texts[i]!
+ texts.push(text)
+ isWordLike.push(segmentation.isWordLike[i]!)
+ kinds.push(segmentation.kinds[i]!)
+ starts.push(segmentation.starts[i]!)
+
+ if (!isUrlQueryBoundarySegment(text)) continue
+
+ const nextIndex = i + 1
+ if (
+ nextIndex >= segmentation.len ||
+ isTextRunBoundary(segmentation.kinds[nextIndex]!)
+ ) {
+ continue
+ }
+
+ let queryText = ''
+ const queryStart = segmentation.starts[nextIndex]!
+ let j = nextIndex
+ while (j < segmentation.len && !isTextRunBoundary(segmentation.kinds[j]!)) {
+ queryText += segmentation.texts[j]!
+ j++
+ }
+
+ if (queryText.length > 0) {
+ texts.push(queryText)
+ isWordLike.push(true)
+ kinds.push('text')
+ starts.push(queryStart)
+ i = j - 1
+ }
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+const numericJoinerChars = new Set([
+ ':', '-', '/', '×', ',', '.', '+',
+ '\u2013',
+ '\u2014',
+])
+
+const asciiPunctuationChainSegmentRe = /^[A-Za-z0-9_]+[,:;]*$/
+const asciiPunctuationChainTrailingJoinersRe = /[,:;]+$/
+
+function segmentContainsDecimalDigit(text: string): boolean {
+ for (const ch of text) {
+ if (decimalDigitRe.test(ch)) return true
+ }
+ return false
+}
+
+function isNumericRunSegment(text: string): boolean {
+ if (text.length === 0) return false
+ for (const ch of text) {
+ if (decimalDigitRe.test(ch) || numericJoinerChars.has(ch)) continue
+ return false
+ }
+ return true
+}
+
+function mergeNumericRuns(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts: string[] = []
+ const isWordLike: boolean[] = []
+ const kinds: SegmentBreakKind[] = []
+ const starts: number[] = []
+
+ for (let i = 0; i < segmentation.len; i++) {
+ const text = segmentation.texts[i]!
+ const kind = segmentation.kinds[i]!
+
+ if (kind === 'text' && isNumericRunSegment(text) && segmentContainsDecimalDigit(text)) {
+ let mergedText = text
+ let j = i + 1
+ while (
+ j < segmentation.len &&
+ segmentation.kinds[j] === 'text' &&
+ isNumericRunSegment(segmentation.texts[j]!)
+ ) {
+ mergedText += segmentation.texts[j]!
+ j++
+ }
+
+ texts.push(mergedText)
+ isWordLike.push(true)
+ kinds.push('text')
+ starts.push(segmentation.starts[i]!)
+ i = j - 1
+ continue
+ }
+
+ texts.push(text)
+ isWordLike.push(segmentation.isWordLike[i]!)
+ kinds.push(kind)
+ starts.push(segmentation.starts[i]!)
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts: string[] = []
+ const isWordLike: boolean[] = []
+ const kinds: SegmentBreakKind[] = []
+ const starts: number[] = []
+
+ for (let i = 0; i < segmentation.len; i++) {
+ const text = segmentation.texts[i]!
+ const kind = segmentation.kinds[i]!
+ const wordLike = segmentation.isWordLike[i]!
+
+ if (kind === 'text' && wordLike && asciiPunctuationChainSegmentRe.test(text)) {
+ let mergedText = text
+ let j = i + 1
+
+ while (
+ asciiPunctuationChainTrailingJoinersRe.test(mergedText) &&
+ j < segmentation.len &&
+ segmentation.kinds[j] === 'text' &&
+ segmentation.isWordLike[j] &&
+ asciiPunctuationChainSegmentRe.test(segmentation.texts[j]!)
+ ) {
+ mergedText += segmentation.texts[j]!
+ j++
+ }
+
+ texts.push(mergedText)
+ isWordLike.push(true)
+ kinds.push('text')
+ starts.push(segmentation.starts[i]!)
+ i = j - 1
+ continue
+ }
+
+ texts.push(text)
+ isWordLike.push(wordLike)
+ kinds.push(kind)
+ starts.push(segmentation.starts[i]!)
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts: string[] = []
+ const isWordLike: boolean[] = []
+ const kinds: SegmentBreakKind[] = []
+ const starts: number[] = []
+
+ for (let i = 0; i < segmentation.len; i++) {
+ const text = segmentation.texts[i]!
+ if (segmentation.kinds[i] === 'text' && text.includes('-')) {
+ const parts = text.split('-')
+ let shouldSplit = parts.length > 1
+ for (let j = 0; j < parts.length; j++) {
+ const part = parts[j]!
+ if (!shouldSplit) break
+ if (
+ part.length === 0 ||
+ !segmentContainsDecimalDigit(part) ||
+ !isNumericRunSegment(part)
+ ) {
+ shouldSplit = false
+ }
+ }
+
+ if (shouldSplit) {
+ let offset = 0
+ for (let j = 0; j < parts.length; j++) {
+ const part = parts[j]!
+ const splitText = j < parts.length - 1 ? `${part}-` : part
+ texts.push(splitText)
+ isWordLike.push(true)
+ kinds.push('text')
+ starts.push(segmentation.starts[i]! + offset)
+ offset += splitText.length
+ }
+ continue
+ }
+ }
+
+ texts.push(text)
+ isWordLike.push(segmentation.isWordLike[i]!)
+ kinds.push(segmentation.kinds[i]!)
+ starts.push(segmentation.starts[i]!)
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts: string[] = []
+ const isWordLike: boolean[] = []
+ const kinds: SegmentBreakKind[] = []
+ const starts: number[] = []
+
+ let read = 0
+ while (read < segmentation.len) {
+ let text = segmentation.texts[read]!
+ let wordLike = segmentation.isWordLike[read]!
+ let kind = segmentation.kinds[read]!
+ let start = segmentation.starts[read]!
+
+ if (kind === 'glue') {
+ let glueText = text
+ const glueStart = start
+ read++
+ while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
+ glueText += segmentation.texts[read]!
+ read++
+ }
+
+ if (read < segmentation.len && segmentation.kinds[read] === 'text') {
+ text = glueText + segmentation.texts[read]!
+ wordLike = segmentation.isWordLike[read]!
+ kind = 'text'
+ start = glueStart
+ read++
+ } else {
+ texts.push(glueText)
+ isWordLike.push(false)
+ kinds.push('glue')
+ starts.push(glueStart)
+ continue
+ }
+ } else {
+ read++
+ }
+
+ if (kind === 'text') {
+ while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
+ let glueText = ''
+ while (read < segmentation.len && segmentation.kinds[read] === 'glue') {
+ glueText += segmentation.texts[read]!
+ read++
+ }
+
+ if (read < segmentation.len && segmentation.kinds[read] === 'text') {
+ text += glueText + segmentation.texts[read]!
+ wordLike = wordLike || segmentation.isWordLike[read]!
+ read++
+ continue
+ }
+
+ text += glueText
+ }
+ }
+
+ texts.push(text)
+ isWordLike.push(wordLike)
+ kinds.push(kind)
+ starts.push(start)
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegmentation): MergedSegmentation {
+ const texts = segmentation.texts.slice()
+ const isWordLike = segmentation.isWordLike.slice()
+ const kinds = segmentation.kinds.slice()
+ const starts = segmentation.starts.slice()
+
+ for (let i = 0; i < texts.length - 1; i++) {
+ if (kinds[i] !== 'text' || kinds[i + 1] !== 'text') continue
+ if (!isCJK(texts[i]!) || !isCJK(texts[i + 1]!)) continue
+
+ const split = splitTrailingForwardStickyCluster(texts[i]!)
+ if (split === null) continue
+
+ texts[i] = split.head
+ texts[i + 1] = split.tail + texts[i + 1]!
+ starts[i + 1] = starts[i]! + split.head.length
+ }
+
+ return {
+ len: texts.length,
+ texts,
+ isWordLike,
+ kinds,
+ starts,
+ }
+}
+
+
+function buildMergedSegmentation(
+ normalized: string,
+ profile: AnalysisProfile,
+ whiteSpaceProfile: WhiteSpaceProfile,
+): MergedSegmentation {
+ const wordSegmenter = getSharedWordSegmenter()
+ let mergedLen = 0
+ const mergedTexts: string[] = []
+ const mergedWordLike: boolean[] = []
+ const mergedKinds: SegmentBreakKind[] = []
+ const mergedStarts: number[] = []
+
+ for (const s of wordSegmenter.segment(normalized)) {
+ for (const piece of splitSegmentByBreakKind(s.segment, s.isWordLike ?? false, s.index, whiteSpaceProfile)) {
+ const isText = piece.kind === 'text'
+
+ if (
+ profile.carryCJKAfterClosingQuote &&
+ isText &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ isCJK(piece.text) &&
+ isCJK(mergedTexts[mergedLen - 1]!) &&
+ endsWithClosingQuote(mergedTexts[mergedLen - 1]!)
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
+ } else if (
+ isText &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ isCJKLineStartProhibitedSegment(piece.text) &&
+ isCJK(mergedTexts[mergedLen - 1]!)
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
+ } else if (
+ isText &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ endsWithMyanmarMedialGlue(mergedTexts[mergedLen - 1]!)
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike
+ } else if (
+ isText &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ piece.isWordLike &&
+ containsArabicScript(piece.text) &&
+ endsWithArabicNoSpacePunctuation(mergedTexts[mergedLen - 1]!)
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ mergedWordLike[mergedLen - 1] = true
+ } else if (
+ isText &&
+ !piece.isWordLike &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ piece.text.length === 1 &&
+ piece.text !== '-' &&
+ piece.text !== '—' &&
+ isRepeatedSingleCharRun(mergedTexts[mergedLen - 1]!, piece.text)
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ } else if (
+ isText &&
+ !piece.isWordLike &&
+ mergedLen > 0 &&
+ mergedKinds[mergedLen - 1] === 'text' &&
+ (
+ isLeftStickyPunctuationSegment(piece.text) ||
+ (piece.text === '-' && mergedWordLike[mergedLen - 1]!)
+ )
+ ) {
+ mergedTexts[mergedLen - 1] += piece.text
+ } else {
+ mergedTexts[mergedLen] = piece.text
+ mergedWordLike[mergedLen] = piece.isWordLike
+ mergedKinds[mergedLen] = piece.kind
+ mergedStarts[mergedLen] = piece.start
+ mergedLen++
+ }
+ }
+ }
+
+ for (let i = 1; i < mergedLen; i++) {
+ if (
+ mergedKinds[i] === 'text' &&
+ !mergedWordLike[i]! &&
+ isEscapedQuoteClusterSegment(mergedTexts[i]!) &&
+ mergedKinds[i - 1] === 'text'
+ ) {
+ mergedTexts[i - 1] += mergedTexts[i]!
+ mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]!
+ mergedTexts[i] = ''
+ }
+ }
+
+ for (let i = mergedLen - 2; i >= 0; i--) {
+ if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) {
+ let j = i + 1
+ while (j < mergedLen && mergedTexts[j] === '') j++
+ if (j < mergedLen && mergedKinds[j] === 'text') {
+ mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]!
+ mergedStarts[j] = mergedStarts[i]!
+ mergedTexts[i] = ''
+ }
+ }
+ }
+
+ let compactLen = 0
+ for (let read = 0; read < mergedLen; read++) {
+ const text = mergedTexts[read]!
+ if (text.length === 0) continue
+ if (compactLen !== read) {
+ mergedTexts[compactLen] = text
+ mergedWordLike[compactLen] = mergedWordLike[read]!
+ mergedKinds[compactLen] = mergedKinds[read]!
+ mergedStarts[compactLen] = mergedStarts[read]!
+ }
+ compactLen++
+ }
+
+ mergedTexts.length = compactLen
+ mergedWordLike.length = compactLen
+ mergedKinds.length = compactLen
+ mergedStarts.length = compactLen
+
+ const compacted = mergeGlueConnectedTextRuns({
+ len: compactLen,
+ texts: mergedTexts,
+ isWordLike: mergedWordLike,
+ kinds: mergedKinds,
+ starts: mergedStarts,
+ })
+ const withMergedUrls = carryTrailingForwardStickyAcrossCJKBoundary(
+ mergeAsciiPunctuationChains(
+ splitHyphenatedNumericRuns(mergeNumericRuns(mergeUrlQueryRuns(mergeUrlLikeRuns(compacted)))),
+ ),
+ )
+
+ for (let i = 0; i < withMergedUrls.len - 1; i++) {
+ const split = splitLeadingSpaceAndMarks(withMergedUrls.texts[i]!)
+ if (split === null) continue
+ if (
+ (withMergedUrls.kinds[i] !== 'space' && withMergedUrls.kinds[i] !== 'preserved-space') ||
+ withMergedUrls.kinds[i + 1] !== 'text' ||
+ !containsArabicScript(withMergedUrls.texts[i + 1]!)
+ ) {
+ continue
+ }
+
+ withMergedUrls.texts[i] = split.space
+ withMergedUrls.isWordLike[i] = false
+ withMergedUrls.kinds[i] = withMergedUrls.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space'
+ withMergedUrls.texts[i + 1] = split.marks + withMergedUrls.texts[i + 1]!
+ withMergedUrls.starts[i + 1] = withMergedUrls.starts[i]! + split.space.length
+ }
+
+ return withMergedUrls
+}
+
+function compileAnalysisChunks(segmentation: MergedSegmentation, whiteSpaceProfile: WhiteSpaceProfile): AnalysisChunk[] {
+ if (segmentation.len === 0) return []
+ if (!whiteSpaceProfile.preserveHardBreaks) {
+ return [{
+ startSegmentIndex: 0,
+ endSegmentIndex: segmentation.len,
+ consumedEndSegmentIndex: segmentation.len,
+ }]
+ }
+
+ const chunks: AnalysisChunk[] = []
+ let startSegmentIndex = 0
+
+ for (let i = 0; i < segmentation.len; i++) {
+ if (segmentation.kinds[i] !== 'hard-break') continue
+
+ chunks.push({
+ startSegmentIndex,
+ endSegmentIndex: i,
+ consumedEndSegmentIndex: i + 1,
+ })
+ startSegmentIndex = i + 1
+ }
+
+ if (startSegmentIndex < segmentation.len) {
+ chunks.push({
+ startSegmentIndex,
+ endSegmentIndex: segmentation.len,
+ consumedEndSegmentIndex: segmentation.len,
+ })
+ }
+
+ return chunks
+}
+
+export function analyzeText(
+ text: string,
+ profile: AnalysisProfile,
+ whiteSpace: WhiteSpaceMode = 'normal',
+): TextAnalysis {
+ const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace)
+ const normalized = whiteSpaceProfile.mode === 'pre-wrap'
+ ? normalizeWhitespacePreWrap(text)
+ : normalizeWhitespaceNormal(text)
+ if (normalized.length === 0) {
+ return {
+ normalized,
+ chunks: [],
+ len: 0,
+ texts: [],
+ isWordLike: [],
+ kinds: [],
+ starts: [],
+ }
+ }
+ const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile)
+ return {
+ normalized,
+ chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile),
+ ...segmentation,
+ }
+}
diff --git a/packages/lynx-pretext/src/pretext/bidi.ts b/packages/lynx-pretext/src/pretext/bidi.ts
new file mode 100644
index 0000000..f530ff9
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/bidi.ts
@@ -0,0 +1,173 @@
+// Simplified bidi metadata helper for the rich prepareWithSegments() path,
+// forked from pdf.js via Sebastian's text-layout. It classifies characters
+// into bidi types, computes embedding levels, and maps them onto prepared
+// segments for custom rendering. The line-breaking engine does not consume
+// these levels.
+
+type BidiType = 'L' | 'R' | 'AL' | 'AN' | 'EN' | 'ES' | 'ET' | 'CS' |
+ 'ON' | 'BN' | 'B' | 'S' | 'WS' | 'NSM'
+
+const baseTypes: BidiType[] = [
+ 'BN','BN','BN','BN','BN','BN','BN','BN','BN','S','B','S','WS',
+ 'B','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN',
+ 'BN','BN','B','B','B','S','WS','ON','ON','ET','ET','ET','ON',
+ 'ON','ON','ON','ON','ON','CS','ON','CS','ON','EN','EN','EN',
+ 'EN','EN','EN','EN','EN','EN','EN','ON','ON','ON','ON','ON',
+ 'ON','ON','L','L','L','L','L','L','L','L','L','L','L','L','L',
+ 'L','L','L','L','L','L','L','L','L','L','L','L','L','ON','ON',
+ 'ON','ON','ON','ON','L','L','L','L','L','L','L','L','L','L',
+ 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L',
+ 'L','ON','ON','ON','ON','BN','BN','BN','BN','BN','BN','B','BN',
+ 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN',
+ 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN',
+ 'BN','CS','ON','ET','ET','ET','ET','ON','ON','ON','ON','L','ON',
+ 'ON','ON','ON','ON','ET','ET','EN','EN','ON','L','ON','ON','ON',
+ 'EN','L','ON','ON','ON','ON','ON','L','L','L','L','L','L','L',
+ 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L',
+ 'L','ON','L','L','L','L','L','L','L','L','L','L','L','L','L',
+ 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L',
+ 'L','L','L','ON','L','L','L','L','L','L','L','L'
+]
+
+const arabicTypes: BidiType[] = [
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'CS','AL','ON','ON','NSM','NSM','NSM','NSM','NSM','NSM','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM',
+ 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','AL','AL','AL','AL',
+ 'AL','AL','AL','AN','AN','AN','AN','AN','AN','AN','AN','AN',
+ 'AN','ET','AN','AN','AL','AL','AL','NSM','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM',
+ 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','ON','NSM',
+ 'NSM','NSM','NSM','AL','AL','AL','AL','AL','AL','AL','AL','AL',
+ 'AL','AL','AL','AL','AL','AL','AL','AL','AL'
+]
+
+function classifyChar(charCode: number): BidiType {
+ if (charCode <= 0x00ff) return baseTypes[charCode]!
+ if (0x0590 <= charCode && charCode <= 0x05f4) return 'R'
+ if (0x0600 <= charCode && charCode <= 0x06ff) return arabicTypes[charCode & 0xff]!
+ if (0x0700 <= charCode && charCode <= 0x08AC) return 'AL'
+ return 'L'
+}
+
+function computeBidiLevels(str: string): Int8Array | null {
+ const len = str.length
+ if (len === 0) return null
+
+ // eslint-disable-next-line unicorn/no-new-array
+ const types: BidiType[] = new Array(len)
+ let numBidi = 0
+
+ for (let i = 0; i < len; i++) {
+ const t = classifyChar(str.charCodeAt(i))
+ if (t === 'R' || t === 'AL' || t === 'AN') numBidi++
+ types[i] = t
+ }
+
+ if (numBidi === 0) return null
+
+ const startLevel = (len / numBidi) < 0.3 ? 0 : 1
+ const levels = new Int8Array(len)
+ for (let i = 0; i < len; i++) levels[i] = startLevel
+
+ const e: BidiType = (startLevel & 1) ? 'R' : 'L'
+ const sor = e
+
+ // W1-W7
+ let lastType: BidiType = sor
+ for (let i = 0; i < len; i++) {
+ if (types[i] === 'NSM') types[i] = lastType
+ else lastType = types[i]!
+ }
+ lastType = sor
+ for (let i = 0; i < len; i++) {
+ const t = types[i]!
+ if (t === 'EN') types[i] = lastType === 'AL' ? 'AN' : 'EN'
+ else if (t === 'R' || t === 'L' || t === 'AL') lastType = t
+ }
+ for (let i = 0; i < len; i++) {
+ if (types[i] === 'AL') types[i] = 'R'
+ }
+ for (let i = 1; i < len - 1; i++) {
+ if (types[i] === 'ES' && types[i - 1] === 'EN' && types[i + 1] === 'EN') {
+ types[i] = 'EN'
+ }
+ if (
+ types[i] === 'CS' &&
+ (types[i - 1] === 'EN' || types[i - 1] === 'AN') &&
+ types[i + 1] === types[i - 1]
+ ) {
+ types[i] = types[i - 1]!
+ }
+ }
+ for (let i = 0; i < len; i++) {
+ if (types[i] !== 'EN') continue
+ let j
+ for (j = i - 1; j >= 0 && types[j] === 'ET'; j--) types[j] = 'EN'
+ for (j = i + 1; j < len && types[j] === 'ET'; j++) types[j] = 'EN'
+ }
+ for (let i = 0; i < len; i++) {
+ const t = types[i]!
+ if (t === 'WS' || t === 'ES' || t === 'ET' || t === 'CS') types[i] = 'ON'
+ }
+ lastType = sor
+ for (let i = 0; i < len; i++) {
+ const t = types[i]!
+ if (t === 'EN') types[i] = lastType === 'L' ? 'L' : 'EN'
+ else if (t === 'R' || t === 'L') lastType = t
+ }
+
+ // N1-N2
+ for (let i = 0; i < len; i++) {
+ if (types[i] !== 'ON') continue
+ let end = i + 1
+ while (end < len && types[end] === 'ON') end++
+ const before: BidiType = i > 0 ? types[i - 1]! : sor
+ const after: BidiType = end < len ? types[end]! : sor
+ const bDir: BidiType = before !== 'L' ? 'R' : 'L'
+ const aDir: BidiType = after !== 'L' ? 'R' : 'L'
+ if (bDir === aDir) {
+ for (let j = i; j < end; j++) types[j] = bDir
+ }
+ i = end - 1
+ }
+ for (let i = 0; i < len; i++) {
+ if (types[i] === 'ON') types[i] = e
+ }
+
+ // I1-I2
+ for (let i = 0; i < len; i++) {
+ const t = types[i]!
+ if ((levels[i]! & 1) === 0) {
+ if (t === 'R') levels[i]!++
+ else if (t === 'AN' || t === 'EN') levels[i]! += 2
+ } else if (t === 'L' || t === 'AN' || t === 'EN') {
+ levels[i]!++
+ }
+ }
+
+ return levels
+}
+
+export function computeSegmentLevels(normalized: string, segStarts: number[]): Int8Array | null {
+ const bidiLevels = computeBidiLevels(normalized)
+ if (bidiLevels === null) return null
+
+ const segLevels = new Int8Array(segStarts.length)
+ for (let i = 0; i < segStarts.length; i++) {
+ segLevels[i] = bidiLevels[segStarts[i]!]!
+ }
+ return segLevels
+}
diff --git a/packages/lynx-pretext/src/pretext/host.ts b/packages/lynx-pretext/src/pretext/host.ts
new file mode 100644
index 0000000..a9cfccd
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/host.ts
@@ -0,0 +1,67 @@
+import {
+ clearCache,
+ layout,
+ layoutNextLine,
+ layoutWithLines,
+ prepare,
+ prepareWithSegments,
+ profilePrepare,
+ setLocale,
+ walkLineRanges,
+ type LayoutCursor,
+ type LayoutLine,
+ type LayoutLineRange,
+ type LayoutLinesResult,
+ type LayoutResult,
+ type PrepareOptions,
+ type PrepareProfile,
+ type PreparedText,
+ type PreparedTextWithSegments,
+} from './layout.js'
+import { withMeasurementHost, type MeasurementHost } from './measurement.js'
+
+export type PretextHostConfig = {
+ measurement: MeasurementHost
+}
+
+export type PretextHostApi = {
+ profilePrepare(text: string, font: string, options?: PrepareOptions): PrepareProfile
+ prepare(text: string, font: string, options?: PrepareOptions): PreparedText
+ prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments
+ layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult
+ walkLineRanges(
+ prepared: PreparedTextWithSegments,
+ maxWidth: number,
+ onLine?: (line: LayoutLineRange) => void,
+ ): number
+ layoutNextLine(
+ prepared: PreparedTextWithSegments,
+ start: LayoutCursor,
+ maxWidth: number,
+ ): LayoutLine | null
+ layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult
+ clearCache(): void
+ setLocale(locale?: string): void
+}
+
+export { type MeasurementHost } from './measurement.js'
+
+export function createPretext(config: PretextHostConfig): PretextHostApi {
+ const bind = (
+ fn: (...args: Args) => Result,
+ ): ((...args: Args) => Result) => {
+ return (...args: Args) => withMeasurementHost(config.measurement, () => fn(...args))
+ }
+
+ return {
+ profilePrepare: bind(profilePrepare),
+ prepare: bind(prepare),
+ prepareWithSegments: bind(prepareWithSegments),
+ layout: bind(layout),
+ walkLineRanges: bind(walkLineRanges),
+ layoutNextLine: bind(layoutNextLine),
+ layoutWithLines: bind(layoutWithLines),
+ clearCache: bind(clearCache),
+ setLocale,
+ }
+}
diff --git a/packages/lynx-pretext/src/pretext/layout.ts b/packages/lynx-pretext/src/pretext/layout.ts
new file mode 100644
index 0000000..465a067
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/layout.ts
@@ -0,0 +1,717 @@
+// Text measurement for browser environments using canvas measureText.
+//
+// Problem: DOM-based text measurement (getBoundingClientRect, offsetHeight)
+// forces synchronous layout reflow. When components independently measure text,
+// each measurement triggers a reflow of the entire document. This creates
+// read/write interleaving that can cost 30ms+ per frame for 500 text blocks.
+//
+// Solution: two-phase measurement centered around canvas measureText.
+// prepare(text, font) — segments text via Intl.Segmenter, measures each word
+// via canvas, caches widths, and does one cached DOM calibration read per
+// font when emoji correction is needed. Call once when text first appears.
+// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure
+// arithmetic to count lines and compute height. Call on every resize.
+// ~0.0002ms per text.
+//
+// i18n: Intl.Segmenter handles CJK (per-character breaking), Thai, Arabic, etc.
+// Bidi: simplified rich-path metadata for mixed LTR/RTL custom rendering.
+// Punctuation merging: "better." measured as one unit (matches CSS behavior).
+// Trailing whitespace: hangs past line edge without triggering breaks (CSS behavior).
+// overflow-wrap: pre-measured grapheme widths enable character-level word breaking.
+//
+// Emoji correction: Chrome/Firefox canvas measures emoji wider than DOM at font
+// sizes <24px on macOS (Apple Color Emoji). The inflation is constant per emoji
+// grapheme at a given size, font-independent. Auto-detected by comparing canvas
+// vs actual DOM emoji width (one cached DOM read per font). Safari canvas and
+// DOM agree (both wider than fontSize), so correction = 0 there.
+//
+// Limitations:
+// - system-ui font: canvas resolves to different optical variants than DOM on macOS.
+// Use named fonts (Helvetica, Inter, etc.) for guaranteed accuracy.
+// See RESEARCH.md "Discovery: system-ui font resolution mismatch".
+//
+// Based on Sebastian Markbage's text-layout research (github.com/chenglou/text-layout).
+
+import { computeSegmentLevels } from './bidi.js'
+import {
+ analyzeText,
+ clearAnalysisCaches,
+ endsWithClosingQuote,
+ isCJK,
+ kinsokuEnd,
+ kinsokuStart,
+ leftStickyPunctuation,
+ setAnalysisLocale,
+ type AnalysisChunk,
+ type SegmentBreakKind,
+ type TextAnalysis,
+ type WhiteSpaceMode,
+} from './analysis.js'
+import {
+ clearMeasurementCaches,
+ getCorrectedSegmentWidth,
+ getEngineProfile,
+ getFontMeasurementState,
+ getSegmentGraphemePrefixWidths,
+ getSegmentGraphemeWidths,
+ getSegmentMetrics,
+ textMayContainEmoji,
+} from './measurement.js'
+import {
+ countPreparedLines,
+ layoutNextLineRange as stepPreparedLineRange,
+ walkPreparedLines,
+ type InternalLayoutLine,
+} from './line-break.js'
+
+let sharedGraphemeSegmenter: Intl.Segmenter | null = null
+// Rich-path only. Reuses grapheme splits while materializing multiple lines
+// from the same prepared handle, without pushing that cache into the API.
+let sharedLineTextCaches = new WeakMap>()
+
+function getSharedGraphemeSegmenter(): Intl.Segmenter {
+ if (sharedGraphemeSegmenter === null) {
+ sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' })
+ }
+ return sharedGraphemeSegmenter
+}
+
+// --- Public types ---
+
+declare const preparedTextBrand: unique symbol
+
+type PreparedCore = {
+ widths: number[] // Segment widths, e.g. [42.5, 4.4, 37.2]
+ lineEndFitAdvances: number[] // Width contribution when a line ends after this segment
+ lineEndPaintAdvances: number[] // Painted width contribution when a line ends after this segment
+ kinds: SegmentBreakKind[] // Break behavior per segment, e.g. ['text', 'space', 'text']
+ simpleLineWalkFastPath: boolean // Normal text can use the simpler old line walker across all layout APIs
+ segLevels: Int8Array | null // Rich-path bidi metadata for custom rendering; layout() never reads it
+ breakableWidths: (number[] | null)[] // Grapheme widths for overflow-wrap segments, else null
+ breakablePrefixWidths: (number[] | null)[] // Cumulative grapheme prefix widths for narrow browser-policy shims
+ discretionaryHyphenWidth: number // Visible width added when a soft hyphen is chosen as the break
+ tabStopAdvance: number // Absolute advance between tab stops for pre-wrap tab segments
+ chunks: PreparedLineChunk[] // Precompiled hard-break chunks for line walking
+}
+
+// Keep the main prepared handle opaque so the public API does not accidentally
+// calcify around the current parallel-array representation.
+export type PreparedText = {
+ readonly [preparedTextBrand]: true
+}
+
+type InternalPreparedText = PreparedText & PreparedCore
+
+// Rich/diagnostic variant that still exposes the structural segment data.
+// Treat this as the unstable escape hatch for experiments and custom rendering.
+export type PreparedTextWithSegments = InternalPreparedText & {
+ segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world']
+}
+
+export type LayoutCursor = {
+ segmentIndex: number // Segment index in `segments`
+ graphemeIndex: number // Grapheme index within that segment; `0` at segment boundaries
+}
+
+export type LayoutResult = {
+ lineCount: number // Number of wrapped lines, e.g. 3
+ height: number // Total block height, e.g. lineCount * lineHeight = 57
+}
+
+export type LayoutLine = {
+ text: string // Full text content of this line, e.g. 'hello world'
+ width: number // Measured width of this line, e.g. 87.5
+ start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
+ end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+}
+
+export type LayoutLineRange = {
+ width: number // Measured width of this line, e.g. 87.5
+ start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes
+ end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes
+}
+
+export type LayoutLinesResult = LayoutResult & {
+ lines: LayoutLine[] // Per-line text/width pairs for custom rendering
+}
+
+export type PrepareProfile = {
+ analysisMs: number
+ measureMs: number
+ totalMs: number
+ analysisSegments: number
+ preparedSegments: number
+ breakableSegments: number
+}
+
+export type PrepareOptions = {
+ whiteSpace?: WhiteSpaceMode
+}
+
+export type PreparedLineChunk = {
+ startSegmentIndex: number
+ endSegmentIndex: number
+ consumedEndSegmentIndex: number
+}
+
+// --- Public API ---
+
+function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments {
+ if (includeSegments) {
+ return {
+ widths: [],
+ lineEndFitAdvances: [],
+ lineEndPaintAdvances: [],
+ kinds: [],
+ simpleLineWalkFastPath: true,
+ segLevels: null,
+ breakableWidths: [],
+ breakablePrefixWidths: [],
+ discretionaryHyphenWidth: 0,
+ tabStopAdvance: 0,
+ chunks: [],
+ segments: [],
+ } as unknown as PreparedTextWithSegments
+ }
+ return {
+ widths: [],
+ lineEndFitAdvances: [],
+ lineEndPaintAdvances: [],
+ kinds: [],
+ simpleLineWalkFastPath: true,
+ segLevels: null,
+ breakableWidths: [],
+ breakablePrefixWidths: [],
+ discretionaryHyphenWidth: 0,
+ tabStopAdvance: 0,
+ chunks: [],
+ } as unknown as InternalPreparedText
+}
+
+function measureAnalysis(
+ analysis: TextAnalysis,
+ font: string,
+ includeSegments: boolean,
+): InternalPreparedText | PreparedTextWithSegments {
+ const graphemeSegmenter = getSharedGraphemeSegmenter()
+ const engineProfile = getEngineProfile()
+ const { cache, emojiCorrection } = getFontMeasurementState(
+ font,
+ textMayContainEmoji(analysis.normalized),
+ )
+ const discretionaryHyphenWidth = getCorrectedSegmentWidth('-', getSegmentMetrics('-', cache), emojiCorrection)
+ const spaceWidth = getCorrectedSegmentWidth(' ', getSegmentMetrics(' ', cache), emojiCorrection)
+ const tabStopAdvance = spaceWidth * 8
+
+ if (analysis.len === 0) return createEmptyPrepared(includeSegments)
+
+ const widths: number[] = []
+ const lineEndFitAdvances: number[] = []
+ const lineEndPaintAdvances: number[] = []
+ const kinds: SegmentBreakKind[] = []
+ let simpleLineWalkFastPath = analysis.chunks.length <= 1
+ const segStarts = includeSegments ? [] as number[] : null
+ const breakableWidths: (number[] | null)[] = []
+ const breakablePrefixWidths: (number[] | null)[] = []
+ const segments = includeSegments ? [] as string[] : null
+ const preparedStartByAnalysisIndex = Array.from({ length: analysis.len })
+ const preparedEndByAnalysisIndex = Array.from({ length: analysis.len })
+
+ function pushMeasuredSegment(
+ text: string,
+ width: number,
+ lineEndFitAdvance: number,
+ lineEndPaintAdvance: number,
+ kind: SegmentBreakKind,
+ start: number,
+ breakable: number[] | null,
+ breakablePrefix: number[] | null,
+ ): void {
+ if (kind !== 'text' && kind !== 'space' && kind !== 'zero-width-break') {
+ simpleLineWalkFastPath = false
+ }
+ widths.push(width)
+ lineEndFitAdvances.push(lineEndFitAdvance)
+ lineEndPaintAdvances.push(lineEndPaintAdvance)
+ kinds.push(kind)
+ segStarts?.push(start)
+ breakableWidths.push(breakable)
+ breakablePrefixWidths.push(breakablePrefix)
+ if (segments !== null) segments.push(text)
+ }
+
+ for (let mi = 0; mi < analysis.len; mi++) {
+ preparedStartByAnalysisIndex[mi] = widths.length
+ const segText = analysis.texts[mi]!
+ const segWordLike = analysis.isWordLike[mi]!
+ const segKind = analysis.kinds[mi]!
+ const segStart = analysis.starts[mi]!
+
+ if (segKind === 'soft-hyphen') {
+ pushMeasuredSegment(
+ segText,
+ 0,
+ discretionaryHyphenWidth,
+ discretionaryHyphenWidth,
+ segKind,
+ segStart,
+ null,
+ null,
+ )
+ preparedEndByAnalysisIndex[mi] = widths.length
+ continue
+ }
+
+ if (segKind === 'hard-break') {
+ pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
+ preparedEndByAnalysisIndex[mi] = widths.length
+ continue
+ }
+
+ if (segKind === 'tab') {
+ pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null)
+ preparedEndByAnalysisIndex[mi] = widths.length
+ continue
+ }
+
+ const segMetrics = getSegmentMetrics(segText, cache)
+
+ if (segKind === 'text' && segMetrics.containsCJK) {
+ let unitText = ''
+ let unitStart = 0
+
+ for (const gs of graphemeSegmenter.segment(segText)) {
+ const grapheme = gs.segment
+
+ if (unitText.length === 0) {
+ unitText = grapheme
+ unitStart = gs.index
+ continue
+ }
+
+ if (
+ kinsokuEnd.has(unitText) ||
+ kinsokuStart.has(grapheme) ||
+ leftStickyPunctuation.has(grapheme) ||
+ (engineProfile.carryCJKAfterClosingQuote &&
+ isCJK(grapheme) &&
+ endsWithClosingQuote(unitText))
+ ) {
+ unitText += grapheme
+ continue
+ }
+
+ const unitMetrics = getSegmentMetrics(unitText, cache)
+ const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
+ pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
+
+ unitText = grapheme
+ unitStart = gs.index
+ }
+
+ if (unitText.length > 0) {
+ const unitMetrics = getSegmentMetrics(unitText, cache)
+ const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection)
+ pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null)
+ }
+ preparedEndByAnalysisIndex[mi] = widths.length
+ continue
+ }
+
+ const w = getCorrectedSegmentWidth(segText, segMetrics, emojiCorrection)
+ const lineEndFitAdvance =
+ segKind === 'space' || segKind === 'preserved-space' || segKind === 'zero-width-break'
+ ? 0
+ : w
+ const lineEndPaintAdvance =
+ segKind === 'space' || segKind === 'zero-width-break'
+ ? 0
+ : w
+
+ if (segWordLike && segText.length > 1) {
+ const graphemeWidths = getSegmentGraphemeWidths(segText, segMetrics, cache, emojiCorrection)
+ const graphemePrefixWidths = engineProfile.preferPrefixWidthsForBreakableRuns
+ ? getSegmentGraphemePrefixWidths(segText, segMetrics, cache, emojiCorrection)
+ : null
+ pushMeasuredSegment(
+ segText,
+ w,
+ lineEndFitAdvance,
+ lineEndPaintAdvance,
+ segKind,
+ segStart,
+ graphemeWidths,
+ graphemePrefixWidths,
+ )
+ } else {
+ pushMeasuredSegment(
+ segText,
+ w,
+ lineEndFitAdvance,
+ lineEndPaintAdvance,
+ segKind,
+ segStart,
+ null,
+ null,
+ )
+ }
+ preparedEndByAnalysisIndex[mi] = widths.length
+ }
+
+ const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, preparedEndByAnalysisIndex)
+ const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts)
+ if (segments !== null) {
+ return {
+ widths,
+ lineEndFitAdvances,
+ lineEndPaintAdvances,
+ kinds,
+ simpleLineWalkFastPath,
+ segLevels,
+ breakableWidths,
+ breakablePrefixWidths,
+ discretionaryHyphenWidth,
+ tabStopAdvance,
+ chunks,
+ segments,
+ } as unknown as PreparedTextWithSegments
+ }
+ return {
+ widths,
+ lineEndFitAdvances,
+ lineEndPaintAdvances,
+ kinds,
+ simpleLineWalkFastPath,
+ segLevels,
+ breakableWidths,
+ breakablePrefixWidths,
+ discretionaryHyphenWidth,
+ tabStopAdvance,
+ chunks,
+ } as unknown as InternalPreparedText
+}
+
+function mapAnalysisChunksToPreparedChunks(
+ chunks: AnalysisChunk[],
+ preparedStartByAnalysisIndex: number[],
+ preparedEndByAnalysisIndex: number[],
+): PreparedLineChunk[] {
+ const preparedChunks: PreparedLineChunk[] = []
+ for (let i = 0; i < chunks.length; i++) {
+ const chunk = chunks[i]!
+ const startSegmentIndex =
+ chunk.startSegmentIndex < preparedStartByAnalysisIndex.length
+ ? preparedStartByAnalysisIndex[chunk.startSegmentIndex]!
+ : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
+ const endSegmentIndex =
+ chunk.endSegmentIndex < preparedStartByAnalysisIndex.length
+ ? preparedStartByAnalysisIndex[chunk.endSegmentIndex]!
+ : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
+ const consumedEndSegmentIndex =
+ chunk.consumedEndSegmentIndex < preparedStartByAnalysisIndex.length
+ ? preparedStartByAnalysisIndex[chunk.consumedEndSegmentIndex]!
+ : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0
+
+ preparedChunks.push({
+ startSegmentIndex,
+ endSegmentIndex,
+ consumedEndSegmentIndex,
+ })
+ }
+ return preparedChunks
+}
+
+function prepareInternal(
+ text: string,
+ font: string,
+ includeSegments: boolean,
+ options?: PrepareOptions,
+): InternalPreparedText | PreparedTextWithSegments {
+ const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace)
+ return measureAnalysis(analysis, font, includeSegments)
+}
+
+// Diagnostic-only helper used by the browser benchmark harness to separate the
+// text-analysis and measurement phases without duplicating the prepare logic.
+export function profilePrepare(text: string, font: string, options?: PrepareOptions): PrepareProfile {
+ const t0 = performance.now()
+ const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace)
+ const t1 = performance.now()
+ const prepared = measureAnalysis(analysis, font, false) as InternalPreparedText
+ const t2 = performance.now()
+
+ let breakableSegments = 0
+ for (const widths of prepared.breakableWidths) {
+ if (widths !== null) breakableSegments++
+ }
+
+ return {
+ analysisMs: t1 - t0,
+ measureMs: t2 - t1,
+ totalMs: t2 - t0,
+ analysisSegments: analysis.len,
+ preparedSegments: prepared.widths.length,
+ breakableSegments,
+ }
+}
+
+// Prepare text for layout. Segments the text, measures each segment via canvas,
+// and stores the widths for fast relayout at any width. Call once per text block
+// (e.g. when a comment first appears). The result is width-independent — the
+// same PreparedText can be laid out at any maxWidth and lineHeight via layout().
+//
+// Steps:
+// 1. Normalize collapsible whitespace (CSS white-space: normal behavior)
+// 2. Segment via Intl.Segmenter (handles CJK, Thai, etc.)
+// 3. Merge punctuation into preceding word ("better." as one unit)
+// 4. Split CJK words into individual graphemes (per-character line breaks)
+// 5. Measure each segment via canvas measureText, cache by (segment, font)
+// 6. Pre-measure graphemes of long words (for overflow-wrap: break-word)
+// 7. Correct emoji canvas inflation (auto-detected per font size)
+// 8. Optionally compute rich-path bidi metadata for custom renderers
+export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText {
+ return prepareInternal(text, font, false, options) as PreparedText
+}
+
+// Rich variant used by callers that need enough information to render the
+// laid-out lines themselves.
+export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments {
+ return prepareInternal(text, font, true, options) as PreparedTextWithSegments
+}
+
+function getInternalPrepared(prepared: PreparedText): InternalPreparedText {
+ return prepared as InternalPreparedText
+}
+
+// Layout prepared text at a given max width and caller-provided lineHeight.
+// Pure arithmetic on cached widths — no canvas calls, no DOM reads, no string
+// operations, no allocations.
+// ~0.0002ms per text block. Call on every resize.
+//
+// Line breaking rules (matching CSS white-space: normal + overflow-wrap: break-word):
+// - Break before any non-space segment that would overflow the line
+// - Trailing whitespace hangs past the line edge (doesn't trigger breaks)
+// - Segments wider than maxWidth are broken at grapheme boundaries
+export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult {
+ // Keep the resize hot path specialized. `layoutWithLines()` shares the same
+ // break semantics but also tracks line ranges; the extra bookkeeping is too
+ // expensive to pay on every hot-path `layout()` call.
+ const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth)
+ return { lineCount, height: lineCount * lineHeight }
+}
+
+function getSegmentGraphemes(
+ segmentIndex: number,
+ segments: string[],
+ cache: Map,
+): string[] {
+ let graphemes = cache.get(segmentIndex)
+ if (graphemes !== undefined) return graphemes
+
+ graphemes = []
+ const graphemeSegmenter = getSharedGraphemeSegmenter()
+ for (const gs of graphemeSegmenter.segment(segments[segmentIndex]!)) {
+ graphemes.push(gs.segment)
+ }
+ cache.set(segmentIndex, graphemes)
+ return graphemes
+}
+
+function getLineTextCache(prepared: PreparedTextWithSegments): Map {
+ let cache = sharedLineTextCaches.get(prepared)
+ if (cache !== undefined) return cache
+
+ cache = new Map()
+ sharedLineTextCaches.set(prepared, cache)
+ return cache
+}
+
+function lineHasDiscretionaryHyphen(
+ kinds: SegmentBreakKind[],
+ startSegmentIndex: number,
+ startGraphemeIndex: number,
+ endSegmentIndex: number,
+): boolean {
+ return (
+ endSegmentIndex > 0 &&
+ kinds[endSegmentIndex - 1] === 'soft-hyphen' &&
+ !(startSegmentIndex === endSegmentIndex && startGraphemeIndex > 0)
+ )
+}
+
+function buildLineTextFromRange(
+ segments: string[],
+ kinds: SegmentBreakKind[],
+ cache: Map,
+ startSegmentIndex: number,
+ startGraphemeIndex: number,
+ endSegmentIndex: number,
+ endGraphemeIndex: number,
+): string {
+ let text = ''
+ const endsWithDiscretionaryHyphen = lineHasDiscretionaryHyphen(
+ kinds,
+ startSegmentIndex,
+ startGraphemeIndex,
+ endSegmentIndex,
+ )
+
+ for (let i = startSegmentIndex; i < endSegmentIndex; i++) {
+ if (kinds[i] === 'soft-hyphen' || kinds[i] === 'hard-break') continue
+ if (i === startSegmentIndex && startGraphemeIndex > 0) {
+ text += getSegmentGraphemes(i, segments, cache).slice(startGraphemeIndex).join('')
+ } else {
+ text += segments[i]!
+ }
+ }
+
+ if (endGraphemeIndex > 0) {
+ if (endsWithDiscretionaryHyphen) text += '-'
+ text += getSegmentGraphemes(endSegmentIndex, segments, cache).slice(
+ startSegmentIndex === endSegmentIndex ? startGraphemeIndex : 0,
+ endGraphemeIndex,
+ ).join('')
+ } else if (endsWithDiscretionaryHyphen) {
+ text += '-'
+ }
+
+ return text
+}
+
+function createLayoutLine(
+ prepared: PreparedTextWithSegments,
+ cache: Map,
+ width: number,
+ startSegmentIndex: number,
+ startGraphemeIndex: number,
+ endSegmentIndex: number,
+ endGraphemeIndex: number,
+): LayoutLine {
+ return {
+ text: buildLineTextFromRange(
+ prepared.segments,
+ prepared.kinds,
+ cache,
+ startSegmentIndex,
+ startGraphemeIndex,
+ endSegmentIndex,
+ endGraphemeIndex,
+ ),
+ width,
+ start: {
+ segmentIndex: startSegmentIndex,
+ graphemeIndex: startGraphemeIndex,
+ },
+ end: {
+ segmentIndex: endSegmentIndex,
+ graphemeIndex: endGraphemeIndex,
+ },
+ }
+}
+
+function materializeLayoutLine(
+ prepared: PreparedTextWithSegments,
+ cache: Map,
+ line: InternalLayoutLine,
+): LayoutLine {
+ return createLayoutLine(
+ prepared,
+ cache,
+ line.width,
+ line.startSegmentIndex,
+ line.startGraphemeIndex,
+ line.endSegmentIndex,
+ line.endGraphemeIndex,
+ )
+}
+
+function toLayoutLineRange(line: InternalLayoutLine): LayoutLineRange {
+ return {
+ width: line.width,
+ start: {
+ segmentIndex: line.startSegmentIndex,
+ graphemeIndex: line.startGraphemeIndex,
+ },
+ end: {
+ segmentIndex: line.endSegmentIndex,
+ graphemeIndex: line.endGraphemeIndex,
+ },
+ }
+}
+
+function stepLineRange(
+ prepared: PreparedTextWithSegments,
+ start: LayoutCursor,
+ maxWidth: number,
+): LayoutLineRange | null {
+ const line = stepPreparedLineRange(prepared, start, maxWidth)
+ if (line === null) return null
+ return toLayoutLineRange(line)
+}
+
+function materializeLine(
+ prepared: PreparedTextWithSegments,
+ line: LayoutLineRange,
+): LayoutLine {
+ return createLayoutLine(
+ prepared,
+ getLineTextCache(prepared),
+ line.width,
+ line.start.segmentIndex,
+ line.start.graphemeIndex,
+ line.end.segmentIndex,
+ line.end.graphemeIndex,
+ )
+}
+
+// Batch low-level line geometry pass. This is the non-materializing counterpart
+// to layoutWithLines(), useful for shrinkwrap and other aggregate geometry work.
+export function walkLineRanges(
+ prepared: PreparedTextWithSegments,
+ maxWidth: number,
+ onLine: (line: LayoutLineRange) => void,
+): number {
+ if (prepared.widths.length === 0) return 0
+
+ return walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => {
+ onLine(toLayoutLineRange(line))
+ })
+}
+
+export function layoutNextLine(
+ prepared: PreparedTextWithSegments,
+ start: LayoutCursor,
+ maxWidth: number,
+): LayoutLine | null {
+ const line = stepLineRange(prepared, start, maxWidth)
+ if (line === null) return null
+ return materializeLine(prepared, line)
+}
+
+// Rich layout API for callers that want the actual line contents and widths.
+// Caller still supplies lineHeight at layout time. Mirrors layout()'s break
+// decisions, but keeps extra per-line bookkeeping so it should stay off the
+// resize hot path.
+export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult {
+ const lines: LayoutLine[] = []
+ if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines }
+
+ const graphemeCache = getLineTextCache(prepared)
+ const lineCount = walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => {
+ lines.push(materializeLayoutLine(prepared, graphemeCache, line))
+ })
+
+ return { lineCount, height: lineCount * lineHeight, lines }
+}
+
+export function clearCache(): void {
+ clearAnalysisCaches()
+ sharedGraphemeSegmenter = null
+ sharedLineTextCaches = new WeakMap>()
+ clearMeasurementCaches()
+}
+
+export function setLocale(locale?: string): void {
+ setAnalysisLocale(locale)
+ clearCache()
+}
diff --git a/packages/lynx-pretext/src/pretext/line-break.ts b/packages/lynx-pretext/src/pretext/line-break.ts
new file mode 100644
index 0000000..57fa113
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/line-break.ts
@@ -0,0 +1,1056 @@
+import type { SegmentBreakKind } from './analysis.js'
+import { getEngineProfile } from './measurement.js'
+
+export type LineBreakCursor = {
+ segmentIndex: number
+ graphemeIndex: number
+}
+
+export type PreparedLineBreakData = {
+ widths: number[]
+ lineEndFitAdvances: number[]
+ lineEndPaintAdvances: number[]
+ kinds: SegmentBreakKind[]
+ simpleLineWalkFastPath: boolean
+ breakableWidths: (number[] | null)[]
+ breakablePrefixWidths: (number[] | null)[]
+ discretionaryHyphenWidth: number
+ tabStopAdvance: number
+ chunks: {
+ startSegmentIndex: number
+ endSegmentIndex: number
+ consumedEndSegmentIndex: number
+ }[]
+}
+
+export type InternalLayoutLine = {
+ startSegmentIndex: number
+ startGraphemeIndex: number
+ endSegmentIndex: number
+ endGraphemeIndex: number
+ width: number
+}
+
+function canBreakAfter(kind: SegmentBreakKind): boolean {
+ return (
+ kind === 'space' ||
+ kind === 'preserved-space' ||
+ kind === 'tab' ||
+ kind === 'zero-width-break' ||
+ kind === 'soft-hyphen'
+ )
+}
+
+function isSimpleCollapsibleSpace(kind: SegmentBreakKind): boolean {
+ return kind === 'space'
+}
+
+function getTabAdvance(lineWidth: number, tabStopAdvance: number): number {
+ if (tabStopAdvance <= 0) return 0
+
+ const remainder = lineWidth % tabStopAdvance
+ if (Math.abs(remainder) <= 1e-6) return tabStopAdvance
+ return tabStopAdvance - remainder
+}
+
+function getBreakableAdvance(
+ graphemeWidths: number[],
+ graphemePrefixWidths: number[] | null,
+ graphemeIndex: number,
+ preferPrefixWidths: boolean,
+): number {
+ if (!preferPrefixWidths || graphemePrefixWidths === null) {
+ return graphemeWidths[graphemeIndex]!
+ }
+ return graphemePrefixWidths[graphemeIndex]! - (graphemeIndex > 0 ? graphemePrefixWidths[graphemeIndex - 1]! : 0)
+}
+
+function fitSoftHyphenBreak(
+ graphemeWidths: number[],
+ initialWidth: number,
+ maxWidth: number,
+ lineFitEpsilon: number,
+ discretionaryHyphenWidth: number,
+ cumulativeWidths: boolean,
+): { fitCount: number, fittedWidth: number } {
+ let fitCount = 0
+ let fittedWidth = initialWidth
+
+ while (fitCount < graphemeWidths.length) {
+ const nextWidth = cumulativeWidths
+ ? initialWidth + graphemeWidths[fitCount]!
+ : fittedWidth + graphemeWidths[fitCount]!
+ const nextLineWidth = fitCount + 1 < graphemeWidths.length
+ ? nextWidth + discretionaryHyphenWidth
+ : nextWidth
+ if (nextLineWidth > maxWidth + lineFitEpsilon) break
+ fittedWidth = nextWidth
+ fitCount++
+ }
+
+ return { fitCount, fittedWidth }
+}
+
+function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number {
+ for (let i = 0; i < prepared.chunks.length; i++) {
+ const chunk = prepared.chunks[i]!
+ if (segmentIndex < chunk.consumedEndSegmentIndex) return i
+ }
+ return -1
+}
+
+export function normalizeLineStart(
+ prepared: PreparedLineBreakData,
+ start: LineBreakCursor,
+): LineBreakCursor | null {
+ let segmentIndex = start.segmentIndex
+ const graphemeIndex = start.graphemeIndex
+
+ if (segmentIndex >= prepared.widths.length) return null
+ if (graphemeIndex > 0) return start
+
+ const chunkIndex = findChunkIndexForStart(prepared, segmentIndex)
+ if (chunkIndex < 0) return null
+
+ const chunk = prepared.chunks[chunkIndex]!
+ if (chunk.startSegmentIndex === chunk.endSegmentIndex && segmentIndex === chunk.startSegmentIndex) {
+ return { segmentIndex, graphemeIndex: 0 }
+ }
+
+ if (segmentIndex < chunk.startSegmentIndex) segmentIndex = chunk.startSegmentIndex
+ while (segmentIndex < chunk.endSegmentIndex) {
+ const kind = prepared.kinds[segmentIndex]!
+ if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') {
+ return { segmentIndex, graphemeIndex: 0 }
+ }
+ segmentIndex++
+ }
+
+ if (chunk.consumedEndSegmentIndex >= prepared.widths.length) return null
+ return { segmentIndex: chunk.consumedEndSegmentIndex, graphemeIndex: 0 }
+}
+
+export function countPreparedLines(prepared: PreparedLineBreakData, maxWidth: number): number {
+ if (prepared.simpleLineWalkFastPath) {
+ return countPreparedLinesSimple(prepared, maxWidth)
+ }
+ return walkPreparedLines(prepared, maxWidth)
+}
+
+function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number {
+ const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
+ if (widths.length === 0) return 0
+
+ const engineProfile = getEngineProfile()
+ const lineFitEpsilon = engineProfile.lineFitEpsilon
+
+ let lineCount = 0
+ let lineW = 0
+ let hasContent = false
+
+ function placeOnFreshLine(segmentIndex: number): void {
+ const w = widths[segmentIndex]!
+ if (w > maxWidth && breakableWidths[segmentIndex] !== null) {
+ const gWidths = breakableWidths[segmentIndex]!
+ const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
+ lineW = 0
+ for (let g = 0; g < gWidths.length; g++) {
+ const gw = getBreakableAdvance(
+ gWidths,
+ gPrefixWidths,
+ g,
+ engineProfile.preferPrefixWidthsForBreakableRuns,
+ )
+ if (lineW > 0 && lineW + gw > maxWidth + lineFitEpsilon) {
+ lineCount++
+ lineW = gw
+ } else {
+ if (lineW === 0) lineCount++
+ lineW += gw
+ }
+ }
+ } else {
+ lineW = w
+ lineCount++
+ }
+ hasContent = true
+ }
+
+ for (let i = 0; i < widths.length; i++) {
+ const w = widths[i]!
+ const kind = kinds[i]!
+
+ if (!hasContent) {
+ placeOnFreshLine(i)
+ continue
+ }
+
+ const newW = lineW + w
+ if (newW > maxWidth + lineFitEpsilon) {
+ if (isSimpleCollapsibleSpace(kind)) continue
+ lineW = 0
+ hasContent = false
+ placeOnFreshLine(i)
+ continue
+ }
+
+ lineW = newW
+ }
+
+ if (!hasContent) return lineCount + 1
+ return lineCount
+}
+
+function walkPreparedLinesSimple(
+ prepared: PreparedLineBreakData,
+ maxWidth: number,
+ onLine?: (line: InternalLayoutLine) => void,
+): number {
+ const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
+ if (widths.length === 0) return 0
+
+ const engineProfile = getEngineProfile()
+ const lineFitEpsilon = engineProfile.lineFitEpsilon
+
+ let lineCount = 0
+ let lineW = 0
+ let hasContent = false
+ let lineStartSegmentIndex = 0
+ let lineStartGraphemeIndex = 0
+ let lineEndSegmentIndex = 0
+ let lineEndGraphemeIndex = 0
+ let pendingBreakSegmentIndex = -1
+ let pendingBreakPaintWidth = 0
+
+ function clearPendingBreak(): void {
+ pendingBreakSegmentIndex = -1
+ pendingBreakPaintWidth = 0
+ }
+
+ function emitCurrentLine(
+ endSegmentIndex = lineEndSegmentIndex,
+ endGraphemeIndex = lineEndGraphemeIndex,
+ width = lineW,
+ ): void {
+ lineCount++
+ onLine?.({
+ startSegmentIndex: lineStartSegmentIndex,
+ startGraphemeIndex: lineStartGraphemeIndex,
+ endSegmentIndex,
+ endGraphemeIndex,
+ width,
+ })
+ lineW = 0
+ hasContent = false
+ clearPendingBreak()
+ }
+
+ function startLineAtSegment(segmentIndex: number, width: number): void {
+ hasContent = true
+ lineStartSegmentIndex = segmentIndex
+ lineStartGraphemeIndex = 0
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ lineW = width
+ }
+
+ function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
+ hasContent = true
+ lineStartSegmentIndex = segmentIndex
+ lineStartGraphemeIndex = graphemeIndex
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = graphemeIndex + 1
+ lineW = width
+ }
+
+ function appendWholeSegment(segmentIndex: number, width: number): void {
+ if (!hasContent) {
+ startLineAtSegment(segmentIndex, width)
+ return
+ }
+ lineW += width
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+
+ function updatePendingBreak(segmentIndex: number, segmentWidth: number): void {
+ if (!canBreakAfter(kinds[segmentIndex]!)) return
+ pendingBreakSegmentIndex = segmentIndex + 1
+ pendingBreakPaintWidth = lineW - segmentWidth
+ }
+
+ function appendBreakableSegment(segmentIndex: number): void {
+ appendBreakableSegmentFrom(segmentIndex, 0)
+ }
+
+ function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void {
+ const gWidths = breakableWidths[segmentIndex]!
+ const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
+ for (let g = startGraphemeIndex; g < gWidths.length; g++) {
+ const gw = getBreakableAdvance(
+ gWidths,
+ gPrefixWidths,
+ g,
+ engineProfile.preferPrefixWidthsForBreakableRuns,
+ )
+
+ if (!hasContent) {
+ startLineAtGrapheme(segmentIndex, g, gw)
+ continue
+ }
+
+ if (lineW + gw > maxWidth + lineFitEpsilon) {
+ emitCurrentLine()
+ startLineAtGrapheme(segmentIndex, g, gw)
+ } else {
+ lineW += gw
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = g + 1
+ }
+ }
+
+ if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+ }
+
+ let i = 0
+ while (i < widths.length) {
+ const w = widths[i]!
+ const kind = kinds[i]!
+
+ if (!hasContent) {
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ appendBreakableSegment(i)
+ } else {
+ startLineAtSegment(i, w)
+ }
+ updatePendingBreak(i, w)
+ i++
+ continue
+ }
+
+ const newW = lineW + w
+ if (newW > maxWidth + lineFitEpsilon) {
+ if (canBreakAfter(kind)) {
+ appendWholeSegment(i, w)
+ emitCurrentLine(i + 1, 0, lineW - w)
+ i++
+ continue
+ }
+
+ if (pendingBreakSegmentIndex >= 0) {
+ emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ continue
+ }
+
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ emitCurrentLine()
+ appendBreakableSegment(i)
+ i++
+ continue
+ }
+
+ emitCurrentLine()
+ continue
+ }
+
+ appendWholeSegment(i, w)
+ updatePendingBreak(i, w)
+ i++
+ }
+
+ if (hasContent) emitCurrentLine()
+ return lineCount
+}
+
+export function walkPreparedLines(
+ prepared: PreparedLineBreakData,
+ maxWidth: number,
+ onLine?: (line: InternalLayoutLine) => void,
+): number {
+ if (prepared.simpleLineWalkFastPath) {
+ return walkPreparedLinesSimple(prepared, maxWidth, onLine)
+ }
+
+ const {
+ widths,
+ lineEndFitAdvances,
+ lineEndPaintAdvances,
+ kinds,
+ breakableWidths,
+ breakablePrefixWidths,
+ discretionaryHyphenWidth,
+ tabStopAdvance,
+ chunks,
+ } = prepared
+ if (widths.length === 0 || chunks.length === 0) return 0
+
+ const engineProfile = getEngineProfile()
+ const lineFitEpsilon = engineProfile.lineFitEpsilon
+
+ let lineCount = 0
+ let lineW = 0
+ let hasContent = false
+ let lineStartSegmentIndex = 0
+ let lineStartGraphemeIndex = 0
+ let lineEndSegmentIndex = 0
+ let lineEndGraphemeIndex = 0
+ let pendingBreakSegmentIndex = -1
+ let pendingBreakFitWidth = 0
+ let pendingBreakPaintWidth = 0
+ let pendingBreakKind: SegmentBreakKind | null = null
+
+ function clearPendingBreak(): void {
+ pendingBreakSegmentIndex = -1
+ pendingBreakFitWidth = 0
+ pendingBreakPaintWidth = 0
+ pendingBreakKind = null
+ }
+
+ function emitCurrentLine(
+ endSegmentIndex = lineEndSegmentIndex,
+ endGraphemeIndex = lineEndGraphemeIndex,
+ width = lineW,
+ ): void {
+ lineCount++
+ onLine?.({
+ startSegmentIndex: lineStartSegmentIndex,
+ startGraphemeIndex: lineStartGraphemeIndex,
+ endSegmentIndex,
+ endGraphemeIndex,
+ width,
+ })
+ lineW = 0
+ hasContent = false
+ clearPendingBreak()
+ }
+
+ function startLineAtSegment(segmentIndex: number, width: number): void {
+ hasContent = true
+ lineStartSegmentIndex = segmentIndex
+ lineStartGraphemeIndex = 0
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ lineW = width
+ }
+
+ function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
+ hasContent = true
+ lineStartSegmentIndex = segmentIndex
+ lineStartGraphemeIndex = graphemeIndex
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = graphemeIndex + 1
+ lineW = width
+ }
+
+ function appendWholeSegment(segmentIndex: number, width: number): void {
+ if (!hasContent) {
+ startLineAtSegment(segmentIndex, width)
+ return
+ }
+ lineW += width
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+
+ function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void {
+ if (!canBreakAfter(kinds[segmentIndex]!)) return
+ const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]!
+ const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]!
+ pendingBreakSegmentIndex = segmentIndex + 1
+ pendingBreakFitWidth = lineW - segmentWidth + fitAdvance
+ pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance
+ pendingBreakKind = kinds[segmentIndex]!
+ }
+
+ function appendBreakableSegment(segmentIndex: number): void {
+ appendBreakableSegmentFrom(segmentIndex, 0)
+ }
+
+ function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void {
+ const gWidths = breakableWidths[segmentIndex]!
+ const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
+ for (let g = startGraphemeIndex; g < gWidths.length; g++) {
+ const gw = getBreakableAdvance(
+ gWidths,
+ gPrefixWidths,
+ g,
+ engineProfile.preferPrefixWidthsForBreakableRuns,
+ )
+
+ if (!hasContent) {
+ startLineAtGrapheme(segmentIndex, g, gw)
+ continue
+ }
+
+ if (lineW + gw > maxWidth + lineFitEpsilon) {
+ emitCurrentLine()
+ startLineAtGrapheme(segmentIndex, g, gw)
+ } else {
+ lineW += gw
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = g + 1
+ }
+ }
+
+ if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+ }
+
+ function continueSoftHyphenBreakableSegment(segmentIndex: number): boolean {
+ if (pendingBreakKind !== 'soft-hyphen') return false
+ const gWidths = breakableWidths[segmentIndex]!
+ if (gWidths === null) return false
+ const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns
+ ? breakablePrefixWidths[segmentIndex] ?? gWidths
+ : gWidths
+ const usesPrefixWidths = fitWidths !== gWidths
+ const { fitCount, fittedWidth } = fitSoftHyphenBreak(
+ fitWidths,
+ lineW,
+ maxWidth,
+ lineFitEpsilon,
+ discretionaryHyphenWidth,
+ usesPrefixWidths,
+ )
+ if (fitCount === 0) return false
+
+ lineW = fittedWidth
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = fitCount
+ clearPendingBreak()
+
+ if (fitCount === gWidths.length) {
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ return true
+ }
+
+ emitCurrentLine(
+ segmentIndex,
+ fitCount,
+ fittedWidth + discretionaryHyphenWidth,
+ )
+ appendBreakableSegmentFrom(segmentIndex, fitCount)
+ return true
+ }
+
+ function emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void {
+ lineCount++
+ onLine?.({
+ startSegmentIndex: chunk.startSegmentIndex,
+ startGraphemeIndex: 0,
+ endSegmentIndex: chunk.consumedEndSegmentIndex,
+ endGraphemeIndex: 0,
+ width: 0,
+ })
+ clearPendingBreak()
+ }
+
+ for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
+ const chunk = chunks[chunkIndex]!
+ if (chunk.startSegmentIndex === chunk.endSegmentIndex) {
+ emitEmptyChunk(chunk)
+ continue
+ }
+
+ hasContent = false
+ lineW = 0
+ lineStartSegmentIndex = chunk.startSegmentIndex
+ lineStartGraphemeIndex = 0
+ lineEndSegmentIndex = chunk.startSegmentIndex
+ lineEndGraphemeIndex = 0
+ clearPendingBreak()
+
+ let i = chunk.startSegmentIndex
+ while (i < chunk.endSegmentIndex) {
+ const kind = kinds[i]!
+ const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
+
+ if (kind === 'soft-hyphen') {
+ if (hasContent) {
+ lineEndSegmentIndex = i + 1
+ lineEndGraphemeIndex = 0
+ pendingBreakSegmentIndex = i + 1
+ pendingBreakFitWidth = lineW + discretionaryHyphenWidth
+ pendingBreakPaintWidth = lineW + discretionaryHyphenWidth
+ pendingBreakKind = kind
+ }
+ i++
+ continue
+ }
+
+ if (!hasContent) {
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ appendBreakableSegment(i)
+ } else {
+ startLineAtSegment(i, w)
+ }
+ updatePendingBreakForWholeSegment(i, w)
+ i++
+ continue
+ }
+
+ const newW = lineW + w
+ if (newW > maxWidth + lineFitEpsilon) {
+ const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!)
+ const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!)
+
+ if (
+ pendingBreakKind === 'soft-hyphen' &&
+ engineProfile.preferEarlySoftHyphenBreak &&
+ pendingBreakFitWidth <= maxWidth + lineFitEpsilon
+ ) {
+ emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ continue
+ }
+
+ if (pendingBreakKind === 'soft-hyphen' && continueSoftHyphenBreakableSegment(i)) {
+ i++
+ continue
+ }
+
+ if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) {
+ appendWholeSegment(i, w)
+ emitCurrentLine(i + 1, 0, currentBreakPaintWidth)
+ i++
+ continue
+ }
+
+ if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
+ emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ continue
+ }
+
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ emitCurrentLine()
+ appendBreakableSegment(i)
+ i++
+ continue
+ }
+
+ emitCurrentLine()
+ continue
+ }
+
+ appendWholeSegment(i, w)
+ updatePendingBreakForWholeSegment(i, w)
+ i++
+ }
+
+ if (hasContent) {
+ const finalPaintWidth =
+ pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex
+ ? pendingBreakPaintWidth
+ : lineW
+ emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth)
+ }
+ }
+
+ return lineCount
+}
+
+export function layoutNextLineRange(
+ prepared: PreparedLineBreakData,
+ start: LineBreakCursor,
+ maxWidth: number,
+): InternalLayoutLine | null {
+ const normalizedStart = normalizeLineStart(prepared, start)
+ if (normalizedStart === null) return null
+
+ if (prepared.simpleLineWalkFastPath) {
+ return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth)
+ }
+
+ const chunkIndex = findChunkIndexForStart(prepared, normalizedStart.segmentIndex)
+ if (chunkIndex < 0) return null
+
+ const chunk = prepared.chunks[chunkIndex]!
+ if (chunk.startSegmentIndex === chunk.endSegmentIndex) {
+ return {
+ startSegmentIndex: chunk.startSegmentIndex,
+ startGraphemeIndex: 0,
+ endSegmentIndex: chunk.consumedEndSegmentIndex,
+ endGraphemeIndex: 0,
+ width: 0,
+ }
+ }
+
+ const {
+ widths,
+ lineEndFitAdvances,
+ lineEndPaintAdvances,
+ kinds,
+ breakableWidths,
+ breakablePrefixWidths,
+ discretionaryHyphenWidth,
+ tabStopAdvance,
+ } = prepared
+ const engineProfile = getEngineProfile()
+ const lineFitEpsilon = engineProfile.lineFitEpsilon
+
+ let lineW = 0
+ let hasContent = false
+ const lineStartSegmentIndex = normalizedStart.segmentIndex
+ const lineStartGraphemeIndex = normalizedStart.graphemeIndex
+ let lineEndSegmentIndex = lineStartSegmentIndex
+ let lineEndGraphemeIndex = lineStartGraphemeIndex
+ let pendingBreakSegmentIndex = -1
+ let pendingBreakFitWidth = 0
+ let pendingBreakPaintWidth = 0
+ let pendingBreakKind: SegmentBreakKind | null = null
+
+ function clearPendingBreak(): void {
+ pendingBreakSegmentIndex = -1
+ pendingBreakFitWidth = 0
+ pendingBreakPaintWidth = 0
+ pendingBreakKind = null
+ }
+
+ function finishLine(
+ endSegmentIndex = lineEndSegmentIndex,
+ endGraphemeIndex = lineEndGraphemeIndex,
+ width = lineW,
+ ): InternalLayoutLine | null {
+ if (!hasContent) return null
+
+ return {
+ startSegmentIndex: lineStartSegmentIndex,
+ startGraphemeIndex: lineStartGraphemeIndex,
+ endSegmentIndex,
+ endGraphemeIndex,
+ width,
+ }
+ }
+
+ function startLineAtSegment(segmentIndex: number, width: number): void {
+ hasContent = true
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ lineW = width
+ }
+
+ function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
+ hasContent = true
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = graphemeIndex + 1
+ lineW = width
+ }
+
+ function appendWholeSegment(segmentIndex: number, width: number): void {
+ if (!hasContent) {
+ startLineAtSegment(segmentIndex, width)
+ return
+ }
+ lineW += width
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+
+ function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void {
+ if (!canBreakAfter(kinds[segmentIndex]!)) return
+ const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]!
+ const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]!
+ pendingBreakSegmentIndex = segmentIndex + 1
+ pendingBreakFitWidth = lineW - segmentWidth + fitAdvance
+ pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance
+ pendingBreakKind = kinds[segmentIndex]!
+ }
+
+ function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null {
+ const gWidths = breakableWidths[segmentIndex]!
+ const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
+ for (let g = startGraphemeIndex; g < gWidths.length; g++) {
+ const gw = getBreakableAdvance(
+ gWidths,
+ gPrefixWidths,
+ g,
+ engineProfile.preferPrefixWidthsForBreakableRuns,
+ )
+
+ if (!hasContent) {
+ startLineAtGrapheme(segmentIndex, g, gw)
+ continue
+ }
+
+ if (lineW + gw > maxWidth + lineFitEpsilon) {
+ return finishLine()
+ }
+
+ lineW += gw
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = g + 1
+ }
+
+ if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+ return null
+ }
+
+ function maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null {
+ if (pendingBreakKind !== 'soft-hyphen' || pendingBreakSegmentIndex < 0) return null
+
+ const gWidths = breakableWidths[segmentIndex] ?? null
+ if (gWidths !== null) {
+ const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns
+ ? breakablePrefixWidths[segmentIndex] ?? gWidths
+ : gWidths
+ const usesPrefixWidths = fitWidths !== gWidths
+ const { fitCount, fittedWidth } = fitSoftHyphenBreak(
+ fitWidths,
+ lineW,
+ maxWidth,
+ lineFitEpsilon,
+ discretionaryHyphenWidth,
+ usesPrefixWidths,
+ )
+
+ if (fitCount === gWidths.length) {
+ lineW = fittedWidth
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ clearPendingBreak()
+ return null
+ }
+
+ if (fitCount > 0) {
+ return finishLine(
+ segmentIndex,
+ fitCount,
+ fittedWidth + discretionaryHyphenWidth,
+ )
+ }
+ }
+
+ if (pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
+ return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ }
+
+ return null
+ }
+
+ for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) {
+ const kind = kinds[i]!
+ const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0
+ const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]!
+
+ if (kind === 'soft-hyphen' && startGraphemeIndex === 0) {
+ if (hasContent) {
+ lineEndSegmentIndex = i + 1
+ lineEndGraphemeIndex = 0
+ pendingBreakSegmentIndex = i + 1
+ pendingBreakFitWidth = lineW + discretionaryHyphenWidth
+ pendingBreakPaintWidth = lineW + discretionaryHyphenWidth
+ pendingBreakKind = kind
+ }
+ continue
+ }
+
+ if (!hasContent) {
+ if (startGraphemeIndex > 0) {
+ const line = appendBreakableSegmentFrom(i, startGraphemeIndex)
+ if (line !== null) return line
+ } else if (w > maxWidth && breakableWidths[i] !== null) {
+ const line = appendBreakableSegmentFrom(i, 0)
+ if (line !== null) return line
+ } else {
+ startLineAtSegment(i, w)
+ }
+ updatePendingBreakForWholeSegment(i, w)
+ continue
+ }
+
+ const newW = lineW + w
+ if (newW > maxWidth + lineFitEpsilon) {
+ const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!)
+ const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!)
+
+ if (
+ pendingBreakKind === 'soft-hyphen' &&
+ engineProfile.preferEarlySoftHyphenBreak &&
+ pendingBreakFitWidth <= maxWidth + lineFitEpsilon
+ ) {
+ return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ }
+
+ const softBreakLine = maybeFinishAtSoftHyphen(i)
+ if (softBreakLine !== null) return softBreakLine
+
+ if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) {
+ appendWholeSegment(i, w)
+ return finishLine(i + 1, 0, currentBreakPaintWidth)
+ }
+
+ if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) {
+ return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ }
+
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ const currentLine = finishLine()
+ if (currentLine !== null) return currentLine
+ const line = appendBreakableSegmentFrom(i, 0)
+ if (line !== null) return line
+ }
+
+ return finishLine()
+ }
+
+ appendWholeSegment(i, w)
+ updatePendingBreakForWholeSegment(i, w)
+ }
+
+ if (pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && lineEndGraphemeIndex === 0) {
+ return finishLine(chunk.consumedEndSegmentIndex, 0, pendingBreakPaintWidth)
+ }
+
+ return finishLine(chunk.consumedEndSegmentIndex, 0, lineW)
+}
+
+function layoutNextLineRangeSimple(
+ prepared: PreparedLineBreakData,
+ normalizedStart: LineBreakCursor,
+ maxWidth: number,
+): InternalLayoutLine | null {
+ const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared
+ const engineProfile = getEngineProfile()
+ const lineFitEpsilon = engineProfile.lineFitEpsilon
+
+ let lineW = 0
+ let hasContent = false
+ const lineStartSegmentIndex = normalizedStart.segmentIndex
+ const lineStartGraphemeIndex = normalizedStart.graphemeIndex
+ let lineEndSegmentIndex = lineStartSegmentIndex
+ let lineEndGraphemeIndex = lineStartGraphemeIndex
+ let pendingBreakSegmentIndex = -1
+ let pendingBreakPaintWidth = 0
+
+ function finishLine(
+ endSegmentIndex = lineEndSegmentIndex,
+ endGraphemeIndex = lineEndGraphemeIndex,
+ width = lineW,
+ ): InternalLayoutLine | null {
+ if (!hasContent) return null
+
+ return {
+ startSegmentIndex: lineStartSegmentIndex,
+ startGraphemeIndex: lineStartGraphemeIndex,
+ endSegmentIndex,
+ endGraphemeIndex,
+ width,
+ }
+ }
+
+ function startLineAtSegment(segmentIndex: number, width: number): void {
+ hasContent = true
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ lineW = width
+ }
+
+ function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void {
+ hasContent = true
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = graphemeIndex + 1
+ lineW = width
+ }
+
+ function appendWholeSegment(segmentIndex: number, width: number): void {
+ if (!hasContent) {
+ startLineAtSegment(segmentIndex, width)
+ return
+ }
+ lineW += width
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+
+ function updatePendingBreak(segmentIndex: number, segmentWidth: number): void {
+ if (!canBreakAfter(kinds[segmentIndex]!)) return
+ pendingBreakSegmentIndex = segmentIndex + 1
+ pendingBreakPaintWidth = lineW - segmentWidth
+ }
+
+ function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null {
+ const gWidths = breakableWidths[segmentIndex]!
+ const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null
+ for (let g = startGraphemeIndex; g < gWidths.length; g++) {
+ const gw = getBreakableAdvance(
+ gWidths,
+ gPrefixWidths,
+ g,
+ engineProfile.preferPrefixWidthsForBreakableRuns,
+ )
+
+ if (!hasContent) {
+ startLineAtGrapheme(segmentIndex, g, gw)
+ continue
+ }
+
+ if (lineW + gw > maxWidth + lineFitEpsilon) {
+ return finishLine()
+ }
+
+ lineW += gw
+ lineEndSegmentIndex = segmentIndex
+ lineEndGraphemeIndex = g + 1
+ }
+
+ if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) {
+ lineEndSegmentIndex = segmentIndex + 1
+ lineEndGraphemeIndex = 0
+ }
+ return null
+ }
+
+ for (let i = normalizedStart.segmentIndex; i < widths.length; i++) {
+ const w = widths[i]!
+ const kind = kinds[i]!
+ const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0
+
+ if (!hasContent) {
+ if (startGraphemeIndex > 0) {
+ const line = appendBreakableSegmentFrom(i, startGraphemeIndex)
+ if (line !== null) return line
+ } else if (w > maxWidth && breakableWidths[i] !== null) {
+ const line = appendBreakableSegmentFrom(i, 0)
+ if (line !== null) return line
+ } else {
+ startLineAtSegment(i, w)
+ }
+ updatePendingBreak(i, w)
+ continue
+ }
+
+ const newW = lineW + w
+ if (newW > maxWidth + lineFitEpsilon) {
+ if (canBreakAfter(kind)) {
+ appendWholeSegment(i, w)
+ return finishLine(i + 1, 0, lineW - w)
+ }
+
+ if (pendingBreakSegmentIndex >= 0) {
+ return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth)
+ }
+
+ if (w > maxWidth && breakableWidths[i] !== null) {
+ const currentLine = finishLine()
+ if (currentLine !== null) return currentLine
+ const line = appendBreakableSegmentFrom(i, 0)
+ if (line !== null) return line
+ }
+
+ return finishLine()
+ }
+
+ appendWholeSegment(i, w)
+ updatePendingBreak(i, w)
+ }
+
+ return finishLine()
+}
diff --git a/packages/lynx-pretext/src/pretext/measurement.ts b/packages/lynx-pretext/src/pretext/measurement.ts
new file mode 100644
index 0000000..892417a
--- /dev/null
+++ b/packages/lynx-pretext/src/pretext/measurement.ts
@@ -0,0 +1,350 @@
+import { isCJK } from './analysis.js'
+
+export type SegmentMetrics = {
+ width: number
+ containsCJK: boolean
+ emojiCount?: number
+ graphemeWidths?: number[] | null
+ graphemePrefixWidths?: number[] | null
+}
+
+export type EngineProfile = {
+ lineFitEpsilon: number
+ carryCJKAfterClosingQuote: boolean
+ preferPrefixWidthsForBreakableRuns: boolean
+ preferEarlySoftHyphenBreak: boolean
+}
+
+export type FontMeasurementState = {
+ cache: Map
+ fontSize: number
+ emojiCorrection: number
+}
+
+export type MeasurementHost = {
+ clearMeasurementCaches(): void
+ getSegmentMetrics(seg: string, cache: Map): SegmentMetrics
+ getEngineProfile(): EngineProfile
+ getCorrectedSegmentWidth(seg: string, metrics: SegmentMetrics, emojiCorrection: number): number
+ getSegmentGraphemeWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+ ): number[] | null
+ getSegmentGraphemePrefixWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+ ): number[] | null
+ getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState
+ textMayContainEmoji(text: string): boolean
+}
+
+let measureContext: CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D | null = null
+const segmentMetricCaches = new Map>()
+let cachedEngineProfile: EngineProfile | null = null
+let measurementHostOverride: MeasurementHost | null = null
+
+const maybeEmojiFallbackRe = /[\xA9\xAE\u203C\u2049\u20E3\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u261D\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692-\u2697\u2699\u269B\u269C\u26A0\u26A1\u26A7\u26AA\u26AB\u26B0\u26B1\u26BD\u26BE\u26C4\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3\u26D4\u26E9\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299\uFE0F\u{1F004}\u{1F02C}-\u{1F02F}\u{1F094}-\u{1F09F}\u{1F0AF}\u{1F0B0}\u{1F0C0}\u{1F0CF}\u{1F0D0}\u{1F0F6}-\u{1F0FF}\u{1F170}\u{1F171}\u{1F17E}\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1AE}-\u{1F1FF}\u{1F201}-\u{1F20F}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F23C}-\u{1F23F}\u{1F249}-\u{1F25F}\u{1F266}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F8}-\u{1F4FD}\u{1F3F7}-\u{1F3FA}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}\u{1F570}\u{1F573}-\u{1F57A}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}\u{1F596}\u{1F5A4}\u{1F5A5}\u{1F5A8}\u{1F5B1}\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D2}\u{1F6D5}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6F0}\u{1F6F3}-\u{1F6FF}\u{1F7DA}-\u{1F7FF}\u{1F80C}-\u{1F80F}\u{1F848}-\u{1F84F}\u{1F85A}-\u{1F85F}\u{1F888}-\u{1F88F}\u{1F8AE}\u{1F8AF}\u{1F8BC}-\u{1F8BF}\u{1F8C2}-\u{1F8CF}\u{1F8D9}-\u{1F8FF}\u{1F90C}-\u{1F93A}\u{1F93C}-\u{1F945}\u{1F947}-\u{1F9FF}\u{1FA58}-\u{1FA5F}\u{1FA6E}-\u{1FAFF}\u{1FC00}-\u{1FFFD}]/u
+let emojiPresentationRe: RegExp | null = null
+let maybeEmojiRe: RegExp | null = null
+let sharedGraphemeSegmenter: Intl.Segmenter | null = null
+const emojiCorrectionCache = new Map()
+
+function getEmojiPresentationRe(): RegExp {
+ if (emojiPresentationRe !== null) return emojiPresentationRe
+ try {
+ emojiPresentationRe = new RegExp('\\p{Emoji_Presentation}', 'u')
+ } catch {
+ emojiPresentationRe = maybeEmojiFallbackRe
+ }
+ return emojiPresentationRe
+}
+
+function getMaybeEmojiRe(): RegExp {
+ if (maybeEmojiRe !== null) return maybeEmojiRe
+ try {
+ maybeEmojiRe = new RegExp(
+ '[\\p{Emoji_Presentation}\\p{Extended_Pictographic}\\p{Regional_Indicator}\\uFE0F\\u20E3]',
+ 'u',
+ )
+ } catch {
+ maybeEmojiRe = maybeEmojiFallbackRe
+ }
+ return maybeEmojiRe
+}
+
+export function getMeasureContext(): CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D {
+ if (measureContext !== null) return measureContext
+
+ if (typeof OffscreenCanvas !== 'undefined') {
+ measureContext = new OffscreenCanvas(1, 1).getContext('2d')!
+ return measureContext
+ }
+
+ if (typeof document !== 'undefined') {
+ measureContext = document.createElement('canvas').getContext('2d')!
+ return measureContext
+ }
+
+ throw new Error('Text measurement requires OffscreenCanvas or a DOM canvas context.')
+}
+
+export function getSegmentMetricCache(font: string): Map {
+ let cache = segmentMetricCaches.get(font)
+ if (!cache) {
+ cache = new Map()
+ segmentMetricCaches.set(font, cache)
+ }
+ return cache
+}
+
+function browserGetSegmentMetrics(seg: string, cache: Map): SegmentMetrics {
+ let metrics = cache.get(seg)
+ if (metrics === undefined) {
+ const ctx = getMeasureContext()
+ metrics = {
+ width: ctx.measureText(seg).width,
+ containsCJK: isCJK(seg),
+ }
+ cache.set(seg, metrics)
+ }
+ return metrics
+}
+
+function browserGetEngineProfile(): EngineProfile {
+ if (cachedEngineProfile !== null) return cachedEngineProfile
+
+ if (typeof navigator === 'undefined') {
+ cachedEngineProfile = {
+ lineFitEpsilon: 0.005,
+ carryCJKAfterClosingQuote: false,
+ preferPrefixWidthsForBreakableRuns: false,
+ preferEarlySoftHyphenBreak: false,
+ }
+ return cachedEngineProfile
+ }
+
+ const ua = navigator.userAgent
+ const vendor = navigator.vendor
+ const isSafari =
+ vendor === 'Apple Computer, Inc.' &&
+ ua.includes('Safari/') &&
+ !ua.includes('Chrome/') &&
+ !ua.includes('Chromium/') &&
+ !ua.includes('CriOS/') &&
+ !ua.includes('FxiOS/') &&
+ !ua.includes('EdgiOS/')
+ const isChromium =
+ ua.includes('Chrome/') ||
+ ua.includes('Chromium/') ||
+ ua.includes('CriOS/') ||
+ ua.includes('Edg/')
+
+ cachedEngineProfile = {
+ lineFitEpsilon: isSafari ? 1 / 64 : 0.005,
+ carryCJKAfterClosingQuote: isChromium,
+ preferPrefixWidthsForBreakableRuns: isSafari,
+ preferEarlySoftHyphenBreak: isSafari,
+ }
+ return cachedEngineProfile
+}
+
+export function parseFontSize(font: string): number {
+ const m = font.match(/(\d+(?:\.\d+)?)\s*px/)
+ return m ? parseFloat(m[1]!) : 16
+}
+
+function getSharedGraphemeSegmenter(): Intl.Segmenter {
+ if (sharedGraphemeSegmenter === null) {
+ sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' })
+ }
+ return sharedGraphemeSegmenter
+}
+
+function isEmojiGrapheme(g: string): boolean {
+ return getEmojiPresentationRe().test(g) || g.includes('\uFE0F')
+}
+
+function browserTextMayContainEmoji(text: string): boolean {
+ return getMaybeEmojiRe().test(text)
+}
+
+function getEmojiCorrection(font: string, fontSize: number): number {
+ let correction = emojiCorrectionCache.get(font)
+ if (correction !== undefined) return correction
+
+ const ctx = getMeasureContext()
+ ctx.font = font
+ const canvasW = ctx.measureText('\u{1F600}').width
+ correction = 0
+ if (
+ canvasW > fontSize + 0.5 &&
+ typeof document !== 'undefined' &&
+ document.body !== null
+ ) {
+ const span = document.createElement('span')
+ span.style.font = font
+ span.style.display = 'inline-block'
+ span.style.visibility = 'hidden'
+ span.style.position = 'absolute'
+ span.textContent = '\u{1F600}'
+ document.body.appendChild(span)
+ const domW = span.getBoundingClientRect().width
+ document.body.removeChild(span)
+ if (canvasW - domW > 0.5) {
+ correction = canvasW - domW
+ }
+ }
+ emojiCorrectionCache.set(font, correction)
+ return correction
+}
+
+function countEmojiGraphemes(text: string): number {
+ let count = 0
+ const graphemeSegmenter = getSharedGraphemeSegmenter()
+ for (const g of graphemeSegmenter.segment(text)) {
+ if (isEmojiGrapheme(g.segment)) count++
+ }
+ return count
+}
+
+function getEmojiCount(seg: string, metrics: SegmentMetrics): number {
+ if (metrics.emojiCount === undefined) {
+ metrics.emojiCount = countEmojiGraphemes(seg)
+ }
+ return metrics.emojiCount
+}
+
+function browserGetCorrectedSegmentWidth(
+ seg: string,
+ metrics: SegmentMetrics,
+ emojiCorrection: number,
+): number {
+ if (emojiCorrection === 0) return metrics.width
+ return metrics.width - getEmojiCount(seg, metrics) * emojiCorrection
+}
+
+function browserGetSegmentGraphemeWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+): number[] | null {
+ if (metrics.graphemeWidths !== undefined) return metrics.graphemeWidths
+
+ const widths: number[] = []
+ const graphemeSegmenter = getSharedGraphemeSegmenter()
+ for (const gs of graphemeSegmenter.segment(seg)) {
+ const graphemeMetrics = browserGetSegmentMetrics(gs.segment, cache)
+ widths.push(browserGetCorrectedSegmentWidth(gs.segment, graphemeMetrics, emojiCorrection))
+ }
+
+ metrics.graphemeWidths = widths.length > 1 ? widths : null
+ return metrics.graphemeWidths
+}
+
+function browserGetSegmentGraphemePrefixWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+): number[] | null {
+ if (metrics.graphemePrefixWidths !== undefined) return metrics.graphemePrefixWidths
+
+ const prefixWidths: number[] = []
+ const graphemeSegmenter = getSharedGraphemeSegmenter()
+ let prefix = ''
+ for (const gs of graphemeSegmenter.segment(seg)) {
+ prefix += gs.segment
+ const prefixMetrics = browserGetSegmentMetrics(prefix, cache)
+ prefixWidths.push(browserGetCorrectedSegmentWidth(prefix, prefixMetrics, emojiCorrection))
+ }
+
+ metrics.graphemePrefixWidths = prefixWidths.length > 1 ? prefixWidths : null
+ return metrics.graphemePrefixWidths
+}
+
+function browserGetFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState {
+ const ctx = getMeasureContext()
+ ctx.font = font
+ const cache = getSegmentMetricCache(font)
+ const fontSize = parseFontSize(font)
+ const emojiCorrection = needsEmojiCorrection ? getEmojiCorrection(font, fontSize) : 0
+ return { cache, fontSize, emojiCorrection }
+}
+
+function browserClearMeasurementCaches(): void {
+ segmentMetricCaches.clear()
+ emojiCorrectionCache.clear()
+ sharedGraphemeSegmenter = null
+}
+
+export const browserMeasurementHost: MeasurementHost = {
+ clearMeasurementCaches: browserClearMeasurementCaches,
+ getSegmentMetrics: browserGetSegmentMetrics,
+ getEngineProfile: browserGetEngineProfile,
+ getCorrectedSegmentWidth: browserGetCorrectedSegmentWidth,
+ getSegmentGraphemeWidths: browserGetSegmentGraphemeWidths,
+ getSegmentGraphemePrefixWidths: browserGetSegmentGraphemePrefixWidths,
+ getFontMeasurementState: browserGetFontMeasurementState,
+ textMayContainEmoji: browserTextMayContainEmoji,
+}
+
+function getActiveMeasurementHost(): MeasurementHost {
+ return measurementHostOverride ?? browserMeasurementHost
+}
+
+export function withMeasurementHost(measurementHost: MeasurementHost, fn: () => T): T {
+ const previousHost = measurementHostOverride
+ measurementHostOverride = measurementHost
+ try {
+ return fn()
+ } finally {
+ measurementHostOverride = previousHost
+ }
+}
+
+export function getSegmentMetrics(seg: string, cache: Map): SegmentMetrics {
+ return getActiveMeasurementHost().getSegmentMetrics(seg, cache)
+}
+
+export function getEngineProfile(): EngineProfile {
+ return getActiveMeasurementHost().getEngineProfile()
+}
+
+export function textMayContainEmoji(text: string): boolean {
+ return getActiveMeasurementHost().textMayContainEmoji(text)
+}
+
+export function getCorrectedSegmentWidth(seg: string, metrics: SegmentMetrics, emojiCorrection: number): number {
+ return getActiveMeasurementHost().getCorrectedSegmentWidth(seg, metrics, emojiCorrection)
+}
+
+export function getSegmentGraphemeWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+): number[] | null {
+ return getActiveMeasurementHost().getSegmentGraphemeWidths(seg, metrics, cache, emojiCorrection)
+}
+
+export function getSegmentGraphemePrefixWidths(
+ seg: string,
+ metrics: SegmentMetrics,
+ cache: Map,
+ emojiCorrection: number,
+): number[] | null {
+ return getActiveMeasurementHost().getSegmentGraphemePrefixWidths(seg, metrics, cache, emojiCorrection)
+}
+
+export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState {
+ return getActiveMeasurementHost().getFontMeasurementState(font, needsEmojiCorrection)
+}
+
+export function clearMeasurementCaches(): void {
+ getActiveMeasurementHost().clearMeasurementCaches()
+}
diff --git a/packages/lynx-pretext/tsconfig.json b/packages/lynx-pretext/tsconfig.json
index b89bf66..3f01f1e 100644
--- a/packages/lynx-pretext/tsconfig.json
+++ b/packages/lynx-pretext/tsconfig.json
@@ -2,6 +2,7 @@
"extends": "../../tsconfig.json",
"compilerOptions": {
"composite": true,
+ "lib": ["ES2021", "DOM", "ESNext.Intl"],
"module": "ESNext",
"moduleResolution": "Bundler",
"noEmit": true,