diff --git a/examples/ascii-arts/lynx.config.ts b/examples/ascii-arts/lynx.config.ts index 857da94..da2bc0b 100644 --- a/examples/ascii-arts/lynx.config.ts +++ b/examples/ascii-arts/lynx.config.ts @@ -4,6 +4,10 @@ import { pluginReactLynx } from '@lynx-js/react-rsbuild-plugin' import { pluginTypeCheck } from '@rsbuild/plugin-type-check' export default defineConfig({ + environments: { + lynx: {}, + web: {}, + }, source: { entry: { torus: './src/torus.tsx', diff --git a/examples/basic/lynx.config.ts b/examples/basic/lynx.config.ts index eeeb8c5..9fcdc69 100644 --- a/examples/basic/lynx.config.ts +++ b/examples/basic/lynx.config.ts @@ -4,6 +4,10 @@ import { pluginReactLynx } from '@lynx-js/react-rsbuild-plugin' import { pluginTypeCheck } from '@rsbuild/plugin-type-check' export default defineConfig({ + environments: { + lynx: {}, + web: {}, + }, source: { entry: { main: './src/index.tsx', diff --git a/examples/basic/src/accuracy.tsx b/examples/basic/src/accuracy.tsx index 55b7af5..ecffa96 100644 --- a/examples/basic/src/accuracy.tsx +++ b/examples/basic/src/accuracy.tsx @@ -7,6 +7,11 @@ import { } from 'lynx-pretext' import { TEXTS, WIDTHS, FONT_SIZE, LINE_HEIGHT } from '../src/test-data' +const nativeGetTextInfo = + typeof lynx !== 'undefined' && typeof lynx.getTextInfo === 'function' + ? lynx.getTextInfo.bind(lynx) + : null + type TestResult = { label: string width: number @@ -31,6 +36,19 @@ type Summary = { } function runAccuracyCheck(): Summary { + if (nativeGetTextInfo === null) { + return { + total: 0, + passed: 0, + failed: 0, + passRate: '0.0', + englishTotal: 0, + englishPassed: 0, + englishPassRate: '0.0', + results: [], + } + } + const results: TestResult[] = [] let total = 0 let passed = 0 @@ -49,7 +67,7 @@ function runAccuracyCheck(): Summary { for (const width of WIDTHS) { // Native oracle: getTextInfo with maxWidth - const native = lynx.getTextInfo(text, { + const native = nativeGetTextInfo(text, { fontSize: fontSizeStr, maxWidth: `${width}px`, }) @@ -131,6 +149,19 @@ export function AccuracyPage() { ) } + if (nativeGetTextInfo === null) { + return ( + + + Accuracy Validation + + + lynx.getTextInfo is unavailable on Web, so the native-oracle comparison page is disabled there. + + + ) + } + const displayResults = showFailuresOnly ? summary.results.filter(r => !r.pass) : summary.results diff --git a/examples/basic/src/basic-height.tsx b/examples/basic/src/basic-height.tsx index b27ac81..5fbedd3 100644 --- a/examples/basic/src/basic-height.tsx +++ b/examples/basic/src/basic-height.tsx @@ -11,6 +11,10 @@ const SAMPLE_TEXT = const FONT_SIZE = 16 const LINE_HEIGHT = 24 const FONT = `${FONT_SIZE}px` +const nativeGetTextInfo = + typeof lynx !== 'undefined' && typeof lynx.getTextInfo === 'function' + ? lynx.getTextInfo.bind(lynx) + : null export function BasicHeightPage() { const [maxWidth, setMaxWidth] = useState(360) @@ -23,16 +27,22 @@ export function BasicHeightPage() { const prepared = prepare(SAMPLE_TEXT, FONT) const result = layout(prepared, contentWidth, LINE_HEIGHT) - const native = lynx.getTextInfo(SAMPLE_TEXT, { + const native = nativeGetTextInfo?.(SAMPLE_TEXT, { fontSize: `${FONT_SIZE}px`, maxWidth: `${contentWidth}px`, - }) - // Debug: log native result - console.log('[basic-height] native.getTextInfo:', JSON.stringify(native), 'contentWidth:', contentWidth) - const nativeContent = native.content ?? [SAMPLE_TEXT] - const nativeLineCount = nativeContent.length - const nativeHeight = nativeLineCount * LINE_HEIGHT - const match = result.lineCount === nativeLineCount + }) ?? null + if (native !== null) { + console.log('[basic-height] native.getTextInfo:', JSON.stringify(native), 'contentWidth:', contentWidth) + } + const nativeContent = native?.content ?? null + const nativeLineCount = nativeContent?.length ?? null + const nativeHeight = nativeLineCount === null ? null : nativeLineCount * LINE_HEIGHT + const match = nativeLineCount === null ? null : result.lineCount === nativeLineCount + const comparisonText = match === null + ? 'lynx.getTextInfo is unavailable on Web, so this page only shows the pretext result.' + : match + ? `Both agree: ${result.lineCount} lines, ${result.height}px height` + : `Height diff: ${Math.abs(result.height - nativeHeight!)}px | Lines: pretext=${result.lineCount} native=${nativeLineCount!}` // BTS FPS tick on every render btsFpsTick() @@ -71,10 +81,10 @@ export function BasicHeightPage() { Native - {`${nativeHeight}px`} + {nativeHeight === null ? 'N/A' : `${nativeHeight}px`} - {`${nativeLineCount} lines`} + {nativeLineCount === null ? 'Unavailable on Web' : `${nativeLineCount} lines`} @@ -89,14 +99,12 @@ export function BasicHeightPage() { - {match ? 'MATCH' : 'MISMATCH'} + {match === null ? 'WEB FALLBACK' : match ? 'MATCH' : 'MISMATCH'} - {match - ? `Both agree: ${result.lineCount} lines, ${result.height}px height` - : `Height diff: ${Math.abs(result.height - nativeHeight)}px | Lines: pretext=${result.lineCount} native=${nativeLineCount}`} + {comparisonText} @@ -110,7 +118,7 @@ export function BasicHeightPage() { - + 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) { - normalized = normalized.slice(0, -1) - } - return normalized -} - -export function normalizeWhitespacePreWrap(text: string): string { - if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n') - return text - .replace(/\r\n/g, '\n') - .replace(/[\r\f]/g, '\n') -} - -export function classifySegmentBreakChar(ch: string, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind { - if (whiteSpaceProfile.preserveOrdinarySpaces || whiteSpaceProfile.preserveHardBreaks) { - if (ch === ' ') return 'preserved-space' - if (ch === '\t') return 'tab' - if (whiteSpaceProfile.preserveHardBreaks && ch === '\n') return 'hard-break' - } - if (ch === ' ') return 'space' - if (ch === '\u00A0' || ch === '\u202F' || ch === '\u2060' || ch === '\uFEFF') { - return 'glue' - } - if (ch === '\u200B') return 'zero-width-break' - if (ch === '\u00AD') return 'soft-hyphen' - return 'text' -} - -export function splitSegmentByBreakKind( - segment: string, - isWordLike: boolean, - start: number, - whiteSpaceProfile: WhiteSpaceProfile, -): SegmentationPiece[] { - const pieces: SegmentationPiece[] = [] - let currentKind: SegmentBreakKind | null = null - let currentText = '' - let currentStart = start - let currentWordLike = false - let offset = 0 - - for (const ch of segment) { - const kind = classifySegmentBreakChar(ch, whiteSpaceProfile) - const wordLike = kind === 'text' && isWordLike - - if (currentKind !== null && kind === currentKind && wordLike === currentWordLike) { - currentText += ch - offset += ch.length - continue - } - - if (currentKind !== null) { - pieces.push({ - text: currentText, - isWordLike: currentWordLike, - kind: currentKind, - start: currentStart, - }) - } - - currentKind = kind - currentText = ch - currentStart = start + offset - currentWordLike = wordLike - offset += ch.length - } - - if (currentKind !== null) { - pieces.push({ - text: currentText, - isWordLike: currentWordLike, - kind: currentKind, - start: currentStart, - }) - } - - return pieces -} - -// --- Character sets and merge helpers (US-003) --- - -// PrimJS doesn't support \p{} unicode property escapes — use transpiled ranges -const arabicScriptRe = /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u0891\u0897-\u08E1\u08E3-\u08FF\uFB50-\uFD3D\uFD40-\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC\u{10E60}-\u{10E7E}\u{10EC2}-\u{10EC7}\u{10ED0}-\u{10ED8}\u{10EFA}-\u{10EFF}\u{1EE00}-\u{1EE03}\u{1EE05}-\u{1EE1F}\u{1EE21}\u{1EE22}\u{1EE24}\u{1EE27}\u{1EE29}-\u{1EE32}\u{1EE34}-\u{1EE37}\u{1EE39}\u{1EE3B}\u{1EE42}\u{1EE47}\u{1EE49}\u{1EE4B}\u{1EE4D}-\u{1EE4F}\u{1EE51}\u{1EE52}\u{1EE54}\u{1EE57}\u{1EE59}\u{1EE5B}\u{1EE5D}\u{1EE5F}\u{1EE61}\u{1EE62}\u{1EE64}\u{1EE67}-\u{1EE6A}\u{1EE6C}-\u{1EE72}\u{1EE74}-\u{1EE77}\u{1EE79}-\u{1EE7C}\u{1EE7E}\u{1EE80}-\u{1EE89}\u{1EE8B}-\u{1EE9B}\u{1EEA1}-\u{1EEA3}\u{1EEA5}-\u{1EEA9}\u{1EEAB}-\u{1EEBB}\u{1EEF0}\u{1EEF1}]/u -const combiningMarkRe = /[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0897-\u089F\u08CA-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09FE\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0AFA-\u0AFF\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B55-\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C04\u0C3C\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0CF3\u0D00-\u0D03\u0D3B\u0D3C\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D81-\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EBC\u0EC8-\u0ECE\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1715\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u180B-\u180D\u180F\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1AB0-\u1ADD\u1AE0-\u1AEB\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF4\u1CF7-\u1CF9\u1DC0-\u1DFF\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA82C\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9E5\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}\u{10AE6}\u{10D24}-\u{10D27}\u{10D69}-\u{10D6D}\u{10EAB}\u{10EAC}\u{10EFA}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11000}-\u{11002}\u{11038}-\u{11046}\u{11070}\u{11073}\u{11074}\u{1107F}-\u{11082}\u{110B0}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{11134}\u{11145}\u{11146}\u{11173}\u{11180}-\u{11182}\u{111B3}-\u{111C0}\u{111C9}-\u{111CC}\u{111CE}\u{111CF}\u{1122C}-\u{11237}\u{1123E}\u{11241}\u{112DF}-\u{112EA}\u{11300}-\u{11303}\u{1133B}\u{1133C}\u{1133E}-\u{11344}\u{11347}\u{11348}\u{1134B}-\u{1134D}\u{11357}\u{11362}\u{11363}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{113B8}-\u{113C0}\u{113C2}\u{113C5}\u{113C7}-\u{113CA}\u{113CC}-\u{113D0}\u{113D2}\u{113E1}\u{113E2}\u{11435}-\u{11446}\u{1145E}\u{114B0}-\u{114C3}\u{115AF}-\u{115B5}\u{115B8}-\u{115C0}\u{115DC}\u{115DD}\u{11630}-\u{11640}\u{116AB}-\u{116B7}\u{1171D}-\u{1172B}\u{1182C}-\u{1183A}\u{11930}-\u{11935}\u{11937}\u{11938}\u{1193B}-\u{1193E}\u{11940}\u{11942}\u{11943}\u{119D1}-\u{119D7}\u{119DA}-\u{119E0}\u{119E4}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A39}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A5B}\u{11A8A}-\u{11A99}\u{11B60}-\u{11B67}\u{11C2F}-\u{11C36}\u{11C38}-\u{11C3F}\u{11C92}-\u{11CA7}\u{11CA9}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D8A}-\u{11D8E}\u{11D90}\u{11D91}\u{11D93}-\u{11D97}\u{11EF3}-\u{11EF6}\u{11F00}\u{11F01}\u{11F03}\u{11F34}-\u{11F3A}\u{11F3E}-\u{11F42}\u{11F5A}\u{13440}\u{13447}-\u{13455}\u{1611E}-\u{1612F}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F51}-\u{16F87}\u{16F8F}-\u{16F92}\u{16FE4}\u{16FF0}\u{16FF1}\u{1BC9D}\u{1BC9E}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D165}-\u{1D169}\u{1D16D}-\u{1D172}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E5EE}\u{1E5EF}\u{1E6E3}\u{1E6E6}\u{1E6EE}\u{1E6EF}\u{1E6F5}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{E0100}-\u{E01EF}]/u -const decimalDigitRe = /[0-9\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19\u{104A0}-\u{104A9}\u{10D30}-\u{10D39}\u{10D40}-\u{10D49}\u{11066}-\u{1106F}\u{110F0}-\u{110F9}\u{11136}-\u{1113F}\u{111D0}-\u{111D9}\u{112F0}-\u{112F9}\u{11450}-\u{11459}\u{114D0}-\u{114D9}\u{11650}-\u{11659}\u{116C0}-\u{116C9}\u{116D0}-\u{116E3}\u{11730}-\u{11739}\u{118E0}-\u{118E9}\u{11950}-\u{11959}\u{11BF0}-\u{11BF9}\u{11C50}-\u{11C59}\u{11D50}-\u{11D59}\u{11DA0}-\u{11DA9}\u{11DE0}-\u{11DE9}\u{11F50}-\u{11F59}\u{16130}-\u{16139}\u{16A60}-\u{16A69}\u{16AC0}-\u{16AC9}\u{16B50}-\u{16B59}\u{16D70}-\u{16D79}\u{1CCF0}-\u{1CCF9}\u{1D7CE}-\u{1D7FF}\u{1E140}-\u{1E149}\u{1E2F0}-\u{1E2F9}\u{1E4F0}-\u{1E4F9}\u{1E5F1}-\u{1E5FA}\u{1E950}-\u{1E959}\u{1FBF0}-\u{1FBF9}]/u - -function containsArabicScript(text: string): boolean { - return arabicScriptRe.test(text) -} - -export function isCJK(s: string): boolean { - for (const ch of s) { - const c = ch.codePointAt(0)! - if ((c >= 0x4E00 && c <= 0x9FFF) || - (c >= 0x3400 && c <= 0x4DBF) || - (c >= 0x20000 && c <= 0x2A6DF) || - (c >= 0x2A700 && c <= 0x2B73F) || - (c >= 0x2B740 && c <= 0x2B81F) || - (c >= 0x2B820 && c <= 0x2CEAF) || - (c >= 0x2CEB0 && c <= 0x2EBEF) || - (c >= 0x30000 && c <= 0x3134F) || - (c >= 0xF900 && c <= 0xFAFF) || - (c >= 0x2F800 && c <= 0x2FA1F) || - (c >= 0x3000 && c <= 0x303F) || - (c >= 0x3040 && c <= 0x309F) || - (c >= 0x30A0 && c <= 0x30FF) || - (c >= 0xAC00 && c <= 0xD7AF) || - (c >= 0xFF00 && c <= 0xFFEF)) { - return true - } - } - return false -} - -export const kinsokuStart = new Set([ - '\uFF0C', - '\uFF0E', - '\uFF01', - '\uFF1A', - '\uFF1B', - '\uFF1F', - '\u3001', - '\u3002', - '\u30FB', - '\uFF09', - '\u3015', - '\u3009', - '\u300B', - '\u300D', - '\u300F', - '\u3011', - '\u3017', - '\u3019', - '\u301B', - '\u30FC', - '\u3005', - '\u303B', - '\u309D', - '\u309E', - '\u30FD', - '\u30FE', -]) - -export const kinsokuEnd = new Set([ - '"', - '(', '[', '{', - '\u201C', '\u2018', '\u00AB', '\u2039', - '\uFF08', - '\u3014', - '\u3008', - '\u300A', - '\u300C', - '\u300E', - '\u3010', - '\u3016', - '\u3018', - '\u301A', -]) - -const forwardStickyGlue = new Set([ - "'", '\u2018', -]) - -export const leftStickyPunctuation = new Set([ - '.', ',', '!', '?', ':', ';', - '\u060C', - '\u061B', - '\u061F', - '\u0964', - '\u0965', - '\u104A', - '\u104B', - '\u104C', - '\u104D', - '\u104F', - ')', ']', '}', - '%', - '"', - '\u201D', '\u2019', '\u00BB', '\u203A', - '\u2026', -]) - -const arabicNoSpaceTrailingPunctuation = new Set([ - ':', - '.', - '\u060C', - '\u061B', -]) - -const myanmarMedialGlue = new Set([ - '\u104F', -]) - -const closingQuoteChars = new Set([ - '\u201D', '\u2019', '\u00BB', '\u203A', - '\u300D', - '\u300F', - '\u3011', - '\u300B', - '\u3009', - '\u3015', - '\uFF09', -]) - -export function isLeftStickyPunctuationSegment(segment: string): boolean { - if (isEscapedQuoteClusterSegment(segment)) return true - let sawPunctuation = false - for (const ch of segment) { - if (leftStickyPunctuation.has(ch)) { - sawPunctuation = true - continue - } - if (sawPunctuation && combiningMarkRe.test(ch)) continue - return false - } - return sawPunctuation -} - -function isCJKLineStartProhibitedSegment(segment: string): boolean { - for (const ch of segment) { - if (!kinsokuStart.has(ch) && !leftStickyPunctuation.has(ch)) return false - } - return segment.length > 0 -} - -export function isForwardStickyClusterSegment(segment: string): boolean { - if (isEscapedQuoteClusterSegment(segment)) return true - for (const ch of segment) { - if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !combiningMarkRe.test(ch)) return false - } - return segment.length > 0 -} - -export function isEscapedQuoteClusterSegment(segment: string): boolean { - let sawQuote = false - for (const ch of segment) { - if (ch === '\\' || combiningMarkRe.test(ch)) continue - if (kinsokuEnd.has(ch) || leftStickyPunctuation.has(ch) || forwardStickyGlue.has(ch)) { - sawQuote = true - continue - } - return false - } - return sawQuote -} - -export function splitTrailingForwardStickyCluster(text: string): { head: string, tail: string } | null { - const chars = Array.from(text) - let splitIndex = chars.length - - while (splitIndex > 0) { - const ch = chars[splitIndex - 1]! - if (combiningMarkRe.test(ch)) { - splitIndex-- - continue - } - if (kinsokuEnd.has(ch) || forwardStickyGlue.has(ch)) { - splitIndex-- - continue - } - break - } - - if (splitIndex <= 0 || splitIndex === chars.length) return null - return { - head: chars.slice(0, splitIndex).join(''), - tail: chars.slice(splitIndex).join(''), - } -} - -function isRepeatedSingleCharRun(segment: string, ch: string): boolean { - if (segment.length === 0) return false - for (const part of segment) { - if (part !== ch) return false - } - return true -} - -function endsWithArabicNoSpacePunctuation(segment: string): boolean { - if (!containsArabicScript(segment) || segment.length === 0) return false - return arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!) -} - -function endsWithMyanmarMedialGlue(segment: string): boolean { - if (segment.length === 0) return false - return myanmarMedialGlue.has(segment[segment.length - 1]!) -} - -function splitLeadingSpaceAndMarks(segment: string): { space: string, marks: string } | null { - if (segment.length < 2 || segment[0] !== ' ') return null - const marks = segment.slice(1) - if (marks.length > 0 && [...marks].every(ch => combiningMarkRe.test(ch))) { - return { space: ' ', marks } - } - return null -} - -export function endsWithClosingQuote(text: string): boolean { - for (let i = text.length - 1; i >= 0; i--) { - const ch = text[i]! - if (closingQuoteChars.has(ch)) return true - if (!leftStickyPunctuation.has(ch)) return false - } - return false -} - -// --- Word segmenter management (US-004) --- - -let sharedWordSegmenter: Intl.Segmenter | null = null -let segmenterLocale: string | undefined - -function getSharedWordSegmenter(): Intl.Segmenter { - if (sharedWordSegmenter === null) { - sharedWordSegmenter = new Intl.Segmenter(segmenterLocale, { granularity: 'word' }) - } - return sharedWordSegmenter -} - -export function clearAnalysisCaches(): void { - sharedWordSegmenter = null -} - -export function setAnalysisLocale(locale?: string): void { - const nextLocale = locale && locale.length > 0 ? locale : undefined - if (segmenterLocale === nextLocale) return - segmenterLocale = nextLocale - sharedWordSegmenter = null -} - -// --- Merge pipeline (US-004) --- - -function isTextRunBoundary(kind: SegmentBreakKind): boolean { - return ( - kind === 'space' || - kind === 'preserved-space' || - kind === 'zero-width-break' || - kind === 'hard-break' - ) -} - -const urlSchemeSegmentRe = /^[A-Za-z][A-Za-z0-9+.-]*:$/ - -function isUrlLikeRunStart(segmentation: MergedSegmentation, index: number): boolean { - const text = segmentation.texts[index]! - if (text.startsWith('www.')) return true - return ( - urlSchemeSegmentRe.test(text) && - index + 1 < segmentation.len && - segmentation.kinds[index + 1] === 'text' && - segmentation.texts[index + 1] === '//' - ) -} - -function isUrlQueryBoundarySegment(text: string): boolean { - return text.includes('?') && (text.includes('://') || text.startsWith('www.')) -} - -function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts = segmentation.texts.slice() - const isWordLike = segmentation.isWordLike.slice() - const kinds = segmentation.kinds.slice() - const starts = segmentation.starts.slice() - - for (let i = 0; i < segmentation.len; i++) { - if (kinds[i] !== 'text' || !isUrlLikeRunStart(segmentation, i)) continue - - let j = i + 1 - while (j < segmentation.len && !isTextRunBoundary(kinds[j]!)) { - texts[i] += texts[j]! - isWordLike[i] = true - const endsQueryPrefix = texts[j]!.includes('?') - kinds[j] = 'text' - texts[j] = '' - j++ - if (endsQueryPrefix) break - } - } - - let compactLen = 0 - for (let read = 0; read < texts.length; read++) { - const text = texts[read]! - if (text.length === 0) continue - if (compactLen !== read) { - texts[compactLen] = text - isWordLike[compactLen] = isWordLike[read]! - kinds[compactLen] = kinds[read]! - starts[compactLen] = starts[read]! - } - compactLen++ - } - - texts.length = compactLen - isWordLike.length = compactLen - kinds.length = compactLen - starts.length = compactLen - - return { - len: compactLen, - texts, - isWordLike, - kinds, - starts, - } -} - -function mergeUrlQueryRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] - - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(segmentation.kinds[i]!) - starts.push(segmentation.starts[i]!) - - if (!isUrlQueryBoundarySegment(text)) continue - - const nextIndex = i + 1 - if ( - nextIndex >= segmentation.len || - isTextRunBoundary(segmentation.kinds[nextIndex]!) - ) { - continue - } - - let queryText = '' - const queryStart = segmentation.starts[nextIndex]! - let j = nextIndex - while (j < segmentation.len && !isTextRunBoundary(segmentation.kinds[j]!)) { - queryText += segmentation.texts[j]! - j++ - } - - if (queryText.length > 0) { - texts.push(queryText) - isWordLike.push(true) - kinds.push('text') - starts.push(queryStart) - i = j - 1 - } - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -const numericJoinerChars = new Set([ - ':', '-', '/', '\u00D7', ',', '.', '+', - '\u2013', - '\u2014', -]) - -const asciiPunctuationChainSegmentRe = /^[A-Za-z0-9_]+[,:;]*$/ -const asciiPunctuationChainTrailingJoinersRe = /[,:;]+$/ - -function segmentContainsDecimalDigit(text: string): boolean { - for (const ch of text) { - if (decimalDigitRe.test(ch)) return true - } - return false -} - -function isNumericRunSegment(text: string): boolean { - if (text.length === 0) return false - for (const ch of text) { - if (decimalDigitRe.test(ch) || numericJoinerChars.has(ch)) continue - return false - } - return true -} - -function mergeNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] - - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - const kind = segmentation.kinds[i]! - - if (kind === 'text' && isNumericRunSegment(text) && segmentContainsDecimalDigit(text)) { - let mergedText = text - let j = i + 1 - while ( - j < segmentation.len && - segmentation.kinds[j] === 'text' && - isNumericRunSegment(segmentation.texts[j]!) - ) { - mergedText += segmentation.texts[j]! - j++ - } - - texts.push(mergedText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]!) - i = j - 1 - continue - } - - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(kind) - starts.push(segmentation.starts[i]!) - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] - - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - const kind = segmentation.kinds[i]! - const wordLike = segmentation.isWordLike[i]! - - if (kind === 'text' && wordLike && asciiPunctuationChainSegmentRe.test(text)) { - let mergedText = text - let j = i + 1 - - while ( - asciiPunctuationChainTrailingJoinersRe.test(mergedText) && - j < segmentation.len && - segmentation.kinds[j] === 'text' && - segmentation.isWordLike[j] && - asciiPunctuationChainSegmentRe.test(segmentation.texts[j]!) - ) { - mergedText += segmentation.texts[j]! - j++ - } - - texts.push(mergedText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]!) - i = j - 1 - continue - } - - texts.push(text) - isWordLike.push(wordLike) - kinds.push(kind) - starts.push(segmentation.starts[i]!) - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] - - for (let i = 0; i < segmentation.len; i++) { - const text = segmentation.texts[i]! - if (segmentation.kinds[i] === 'text' && text.includes('-')) { - const parts = text.split('-') - let shouldSplit = parts.length > 1 - for (let j = 0; j < parts.length; j++) { - const part = parts[j]! - if (!shouldSplit) break - if ( - part.length === 0 || - !segmentContainsDecimalDigit(part) || - !isNumericRunSegment(part) - ) { - shouldSplit = false - } - } - - if (shouldSplit) { - let offset = 0 - for (let j = 0; j < parts.length; j++) { - const part = parts[j]! - const splitText = j < parts.length - 1 ? `${part}-` : part - texts.push(splitText) - isWordLike.push(true) - kinds.push('text') - starts.push(segmentation.starts[i]! + offset) - offset += splitText.length - } - continue - } - } - - texts.push(text) - isWordLike.push(segmentation.isWordLike[i]!) - kinds.push(segmentation.kinds[i]!) - starts.push(segmentation.starts[i]!) - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSegmentation { - const texts: string[] = [] - const isWordLike: boolean[] = [] - const kinds: SegmentBreakKind[] = [] - const starts: number[] = [] - - let read = 0 - while (read < segmentation.len) { - let text = segmentation.texts[read]! - let wordLike = segmentation.isWordLike[read]! - let kind = segmentation.kinds[read]! - let start = segmentation.starts[read]! - - if (kind === 'glue') { - let glueText = text - const glueStart = start - read++ - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { - glueText += segmentation.texts[read]! - read++ - } - - if (read < segmentation.len && segmentation.kinds[read] === 'text') { - text = glueText + segmentation.texts[read]! - wordLike = segmentation.isWordLike[read]! - kind = 'text' - start = glueStart - read++ - } else { - texts.push(glueText) - isWordLike.push(false) - kinds.push('glue') - starts.push(glueStart) - continue - } - } else { - read++ - } - - if (kind === 'text') { - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { - let glueText = '' - while (read < segmentation.len && segmentation.kinds[read] === 'glue') { - glueText += segmentation.texts[read]! - read++ - } - - if (read < segmentation.len && segmentation.kinds[read] === 'text') { - text += glueText + segmentation.texts[read]! - wordLike = wordLike || segmentation.isWordLike[read]! - read++ - continue - } - - text += glueText - } - } - - texts.push(text) - isWordLike.push(wordLike) - kinds.push(kind) - starts.push(start) - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegmentation): MergedSegmentation { - const texts = segmentation.texts.slice() - const isWordLike = segmentation.isWordLike.slice() - const kinds = segmentation.kinds.slice() - const starts = segmentation.starts.slice() - - for (let i = 0; i < texts.length - 1; i++) { - if (kinds[i] !== 'text' || kinds[i + 1] !== 'text') continue - if (!isCJK(texts[i]!) || !isCJK(texts[i + 1]!)) continue - - const split = splitTrailingForwardStickyCluster(texts[i]!) - if (split === null) continue - - texts[i] = split.head - texts[i + 1] = split.tail + texts[i + 1]! - starts[i + 1] = starts[i]! + split.head.length - } - - return { - len: texts.length, - texts, - isWordLike, - kinds, - starts, - } -} - -function buildMergedSegmentation( - normalized: string, - profile: AnalysisProfile, - whiteSpaceProfile: WhiteSpaceProfile, -): MergedSegmentation { - const wordSegmenter = getSharedWordSegmenter() - let mergedLen = 0 - const mergedTexts: string[] = [] - const mergedWordLike: boolean[] = [] - const mergedKinds: SegmentBreakKind[] = [] - const mergedStarts: number[] = [] - - for (const s of wordSegmenter.segment(normalized)) { - for (const piece of splitSegmentByBreakKind(s.segment, s.isWordLike ?? false, s.index, whiteSpaceProfile)) { - const isText = piece.kind === 'text' - - if ( - profile.carryCJKAfterClosingQuote && - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - isCJK(piece.text) && - isCJK(mergedTexts[mergedLen - 1]!) && - endsWithClosingQuote(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - isCJKLineStartProhibitedSegment(piece.text) && - isCJK(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - endsWithMyanmarMedialGlue(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike - } else if ( - isText && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - piece.isWordLike && - containsArabicScript(piece.text) && - endsWithArabicNoSpacePunctuation(mergedTexts[mergedLen - 1]!) - ) { - mergedTexts[mergedLen - 1] += piece.text - mergedWordLike[mergedLen - 1] = true - } else if ( - isText && - !piece.isWordLike && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - piece.text.length === 1 && - piece.text !== '-' && - piece.text !== '\u2014' && - isRepeatedSingleCharRun(mergedTexts[mergedLen - 1]!, piece.text) - ) { - mergedTexts[mergedLen - 1] += piece.text - } else if ( - isText && - !piece.isWordLike && - mergedLen > 0 && - mergedKinds[mergedLen - 1] === 'text' && - ( - isLeftStickyPunctuationSegment(piece.text) || - (piece.text === '-' && mergedWordLike[mergedLen - 1]!) - ) - ) { - mergedTexts[mergedLen - 1] += piece.text - } else { - mergedTexts[mergedLen] = piece.text - mergedWordLike[mergedLen] = piece.isWordLike - mergedKinds[mergedLen] = piece.kind - mergedStarts[mergedLen] = piece.start - mergedLen++ - } - } - } - - for (let i = 1; i < mergedLen; i++) { - if ( - mergedKinds[i] === 'text' && - !mergedWordLike[i]! && - isEscapedQuoteClusterSegment(mergedTexts[i]!) && - mergedKinds[i - 1] === 'text' - ) { - mergedTexts[i - 1] += mergedTexts[i]! - mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]! - mergedTexts[i] = '' - } - } - - for (let i = mergedLen - 2; i >= 0; i--) { - if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) { - let j = i + 1 - while (j < mergedLen && mergedTexts[j] === '') j++ - if (j < mergedLen && mergedKinds[j] === 'text') { - mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]! - mergedStarts[j] = mergedStarts[i]! - mergedTexts[i] = '' - } - } - } - - let compactLen = 0 - for (let read = 0; read < mergedLen; read++) { - const text = mergedTexts[read]! - if (text.length === 0) continue - if (compactLen !== read) { - mergedTexts[compactLen] = text - mergedWordLike[compactLen] = mergedWordLike[read]! - mergedKinds[compactLen] = mergedKinds[read]! - mergedStarts[compactLen] = mergedStarts[read]! - } - compactLen++ - } - - mergedTexts.length = compactLen - mergedWordLike.length = compactLen - mergedKinds.length = compactLen - mergedStarts.length = compactLen - - const compacted = mergeGlueConnectedTextRuns({ - len: compactLen, - texts: mergedTexts, - isWordLike: mergedWordLike, - kinds: mergedKinds, - starts: mergedStarts, - }) - const withMergedUrls = carryTrailingForwardStickyAcrossCJKBoundary( - mergeAsciiPunctuationChains( - splitHyphenatedNumericRuns(mergeNumericRuns(mergeUrlQueryRuns(mergeUrlLikeRuns(compacted)))), - ), - ) - - for (let i = 0; i < withMergedUrls.len - 1; i++) { - const split = splitLeadingSpaceAndMarks(withMergedUrls.texts[i]!) - if (split === null) continue - if ( - (withMergedUrls.kinds[i] !== 'space' && withMergedUrls.kinds[i] !== 'preserved-space') || - withMergedUrls.kinds[i + 1] !== 'text' || - !containsArabicScript(withMergedUrls.texts[i + 1]!) - ) { - continue - } - - withMergedUrls.texts[i] = split.space - withMergedUrls.isWordLike[i] = false - withMergedUrls.kinds[i] = withMergedUrls.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space' - withMergedUrls.texts[i + 1] = split.marks + withMergedUrls.texts[i + 1]! - withMergedUrls.starts[i + 1] = withMergedUrls.starts[i]! + split.space.length - } - - return withMergedUrls -} - -function compileAnalysisChunks(segmentation: MergedSegmentation, whiteSpaceProfile: WhiteSpaceProfile): AnalysisChunk[] { - if (segmentation.len === 0) return [] - if (!whiteSpaceProfile.preserveHardBreaks) { - return [{ - startSegmentIndex: 0, - endSegmentIndex: segmentation.len, - consumedEndSegmentIndex: segmentation.len, - }] - } - - const chunks: AnalysisChunk[] = [] - let startSegmentIndex = 0 - - for (let i = 0; i < segmentation.len; i++) { - if (segmentation.kinds[i] !== 'hard-break') continue - - chunks.push({ - startSegmentIndex, - endSegmentIndex: i, - consumedEndSegmentIndex: i + 1, - }) - startSegmentIndex = i + 1 - } - - if (startSegmentIndex < segmentation.len) { - chunks.push({ - startSegmentIndex, - endSegmentIndex: segmentation.len, - consumedEndSegmentIndex: segmentation.len, - }) - } - - return chunks -} - -export function analyzeText( - text: string, - profile: AnalysisProfile, - whiteSpace: WhiteSpaceMode = 'normal', -): TextAnalysis { - const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace) - const normalized = whiteSpaceProfile.mode === 'pre-wrap' - ? normalizeWhitespacePreWrap(text) - : normalizeWhitespaceNormal(text) - if (normalized.length === 0) { - return { - normalized, - chunks: [], - len: 0, - texts: [], - isWordLike: [], - kinds: [], - starts: [], - } - } - const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile) - return { - normalized, - chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile), - ...segmentation, - } -} +export * from './pretext/analysis.js' diff --git a/packages/lynx-pretext/src/intl-segmenter.d.ts b/packages/lynx-pretext/src/intl-segmenter.d.ts new file mode 100644 index 0000000..051cd5c --- /dev/null +++ b/packages/lynx-pretext/src/intl-segmenter.d.ts @@ -0,0 +1,19 @@ +declare namespace Intl { + interface SegmenterOptions { + granularity?: 'grapheme' | 'word' | 'sentence' + } + + interface SegmentData { + segment: string + index: number + input?: string + isWordLike?: boolean + } + + interface Segments extends Iterable {} + + class Segmenter { + constructor(locales?: string | string[], options?: SegmenterOptions) + segment(input: string): Segments + } +} diff --git a/packages/lynx-pretext/src/layout.test.mjs b/packages/lynx-pretext/src/layout.test.mjs new file mode 100644 index 0000000..47125eb --- /dev/null +++ b/packages/lynx-pretext/src/layout.test.mjs @@ -0,0 +1,33 @@ +import { beforeEach, expect, test } from 'bun:test' +import { readFile } from 'node:fs/promises' +import path from 'node:path' +import { fileURLToPath } from 'node:url' + +let getTextInfoCalls = 0 + +beforeEach(() => { + getTextInfoCalls = 0 + globalThis.lynx = { + getTextInfo(text) { + getTextInfoCalls += 1 + return { width: text.length * 8, content: [text] } + }, + } +}) + +test('prepareWithSegments returns bidi metadata while measuring via lynx.getTextInfo', async () => { + const { prepareWithSegments } = await import('./layout.ts') + const prepared = prepareWithSegments('hello مرحبا', '16px Arial') + + expect(getTextInfoCalls).toBeGreaterThan(0) + expect(prepared.segLevels).not.toBeNull() +}) + +test('package is publishable without a linked upstream pretext dependency', async () => { + const here = path.dirname(fileURLToPath(import.meta.url)) + const packageJson = JSON.parse(await readFile(path.join(here, '..', 'package.json'), 'utf8')) + const layoutSource = await readFile(path.join(here, 'layout.ts'), 'utf8') + + expect(packageJson.dependencies?.['@chenglou/pretext']).toBeUndefined() + expect(layoutSource).not.toContain('@chenglou/pretext') +}) diff --git a/packages/lynx-pretext/src/layout.ts b/packages/lynx-pretext/src/layout.ts index 51af429..61f58b4 100644 --- a/packages/lynx-pretext/src/layout.ts +++ b/packages/lynx-pretext/src/layout.ts @@ -1,621 +1,43 @@ -// Text measurement for Lynx using main-thread lynx.getTextInfo(). -// -// Two-phase measurement: -// prepare(text, font) — segments text via Intl.Segmenter polyfill, measures -// each word via getTextInfo, caches widths. Call once when text first appears. -// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure -// arithmetic to count lines and compute height. Call on every resize. -// -// Based on chenglou/pretext, adapted for Lynx main thread. - -import { - analyzeText, - clearAnalysisCaches, - endsWithClosingQuote, - isCJK, - kinsokuEnd, - kinsokuStart, - leftStickyPunctuation, - setAnalysisLocale, - type AnalysisChunk, - type SegmentBreakKind, - type TextAnalysis, - type WhiteSpaceMode, -} from './analysis' -import { - clearMeasurementCaches, - getCorrectedSegmentWidth, - getEngineProfile, - getFontMeasurementState, - getSegmentGraphemePrefixWidths, - getSegmentGraphemeWidths, - getSegmentMetrics, - textMayContainEmoji, -} from './measurement' -import { - countPreparedLines, - layoutNextLineRange as stepPreparedLineRange, - walkPreparedLines, - type InternalLayoutLine, -} from './line-break' - -let sharedGraphemeSegmenter: Intl.Segmenter | null = null -// Rich-path only. Reuses grapheme splits while materializing multiple lines -// from the same prepared handle, without pushing that cache into the API. -let sharedLineTextCaches = new WeakMap>() - -function getSharedGraphemeSegmenter(): Intl.Segmenter { - if (sharedGraphemeSegmenter === null) { - sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' }) - } - return sharedGraphemeSegmenter -} - -// Bidi stub for MVP — returns null (no bidi metadata). -function computeSegmentLevels( - _normalized: string, - _segStarts: number[], -): Int8Array | null { - return null -} - -// --- Public types --- - -declare const preparedTextBrand: unique symbol - -type PreparedCore = { - widths: number[] - lineEndFitAdvances: number[] - lineEndPaintAdvances: number[] - kinds: SegmentBreakKind[] - simpleLineWalkFastPath: boolean - segLevels: Int8Array | null - breakableWidths: (number[] | null)[] - breakablePrefixWidths: (number[] | null)[] - discretionaryHyphenWidth: number - tabStopAdvance: number - chunks: PreparedLineChunk[] -} - -export type PreparedText = { - readonly [preparedTextBrand]: true -} - -type InternalPreparedText = PreparedText & PreparedCore - -export type PreparedTextWithSegments = InternalPreparedText & { - segments: string[] -} - -export type LayoutCursor = { - segmentIndex: number - graphemeIndex: number -} - -export type LayoutResult = { - lineCount: number - height: number -} - -export type LayoutLine = { - text: string - width: number - start: LayoutCursor - end: LayoutCursor -} - -export type LayoutLineRange = { - width: number - start: LayoutCursor - end: LayoutCursor -} - -export type LayoutLinesResult = LayoutResult & { - lines: LayoutLine[] -} - -export type PrepareOptions = { - whiteSpace?: WhiteSpaceMode -} - -export type PreparedLineChunk = { - startSegmentIndex: number - endSegmentIndex: number - consumedEndSegmentIndex: number -} - -// --- Internal helpers --- - -function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments { - const base = { - widths: [], - lineEndFitAdvances: [], - lineEndPaintAdvances: [], - kinds: [], - simpleLineWalkFastPath: true, - segLevels: null, - breakableWidths: [], - breakablePrefixWidths: [], - discretionaryHyphenWidth: 0, - tabStopAdvance: 0, - chunks: [], - } - if (includeSegments) { - return { ...base, segments: [] } as unknown as PreparedTextWithSegments - } - return base as unknown as InternalPreparedText -} - -function measureAnalysis( - analysis: TextAnalysis, - font: string, - includeSegments: boolean, -): InternalPreparedText | PreparedTextWithSegments { - const graphemeSegmenter = getSharedGraphemeSegmenter() - const engineProfile = getEngineProfile() - const { cache, emojiCorrection } = getFontMeasurementState( - font, - textMayContainEmoji(analysis.normalized), - ) - const discretionaryHyphenWidth = getCorrectedSegmentWidth('-', getSegmentMetrics('-', cache), emojiCorrection) - const spaceWidth = getCorrectedSegmentWidth(' ', getSegmentMetrics(' ', cache), emojiCorrection) - const tabStopAdvance = spaceWidth * 8 - - if (analysis.len === 0) return createEmptyPrepared(includeSegments) - - const widths: number[] = [] - const lineEndFitAdvances: number[] = [] - const lineEndPaintAdvances: number[] = [] - const kinds: SegmentBreakKind[] = [] - let simpleLineWalkFastPath = analysis.chunks.length <= 1 - const segStarts = includeSegments ? [] as number[] : null - const breakableWidths: (number[] | null)[] = [] - const breakablePrefixWidths: (number[] | null)[] = [] - const segments = includeSegments ? [] as string[] : null - const preparedStartByAnalysisIndex = Array.from({ length: analysis.len }) - const preparedEndByAnalysisIndex = Array.from({ length: analysis.len }) - - function pushMeasuredSegment( - text: string, - width: number, - lineEndFitAdvance: number, - lineEndPaintAdvance: number, - kind: SegmentBreakKind, - start: number, - breakable: number[] | null, - breakablePrefix: number[] | null, - ): void { - if (kind !== 'text' && kind !== 'space' && kind !== 'zero-width-break') { - simpleLineWalkFastPath = false - } - widths.push(width) - lineEndFitAdvances.push(lineEndFitAdvance) - lineEndPaintAdvances.push(lineEndPaintAdvance) - kinds.push(kind) - segStarts?.push(start) - breakableWidths.push(breakable) - breakablePrefixWidths.push(breakablePrefix) - if (segments !== null) segments.push(text) - } - - for (let mi = 0; mi < analysis.len; mi++) { - preparedStartByAnalysisIndex[mi] = widths.length - const segText = analysis.texts[mi]! - const segWordLike = analysis.isWordLike[mi]! - const segKind = analysis.kinds[mi]! - const segStart = analysis.starts[mi]! - - if (segKind === 'soft-hyphen') { - pushMeasuredSegment( - segText, - 0, - discretionaryHyphenWidth, - discretionaryHyphenWidth, - segKind, - segStart, - null, - null, - ) - preparedEndByAnalysisIndex[mi] = widths.length - continue - } - - if (segKind === 'hard-break') { - pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null) - preparedEndByAnalysisIndex[mi] = widths.length - continue - } - - if (segKind === 'tab') { - pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null) - preparedEndByAnalysisIndex[mi] = widths.length - continue - } - - const segMetrics = getSegmentMetrics(segText, cache) - - if (segKind === 'text' && segMetrics.containsCJK) { - let unitText = '' - let unitStart = 0 - - for (const gs of graphemeSegmenter.segment(segText)) { - const grapheme = gs.segment - - if (unitText.length === 0) { - unitText = grapheme - unitStart = gs.index - continue - } - - if ( - kinsokuEnd.has(unitText) || - kinsokuStart.has(grapheme) || - leftStickyPunctuation.has(grapheme) || - (engineProfile.carryCJKAfterClosingQuote && - isCJK(grapheme) && - endsWithClosingQuote(unitText)) - ) { - unitText += grapheme - continue - } - - const unitMetrics = getSegmentMetrics(unitText, cache) - const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection) - pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null) - - unitText = grapheme - unitStart = gs.index - } - - if (unitText.length > 0) { - const unitMetrics = getSegmentMetrics(unitText, cache) - const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection) - pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null) - } - preparedEndByAnalysisIndex[mi] = widths.length - continue - } - - const w = getCorrectedSegmentWidth(segText, segMetrics, emojiCorrection) - const lineEndFitAdvance = - segKind === 'space' || segKind === 'preserved-space' || segKind === 'zero-width-break' - ? 0 - : w - const lineEndPaintAdvance = - segKind === 'space' || segKind === 'zero-width-break' - ? 0 - : w - - if (segWordLike && segText.length > 1) { - const graphemeWidths = getSegmentGraphemeWidths(segText, segMetrics, cache, emojiCorrection) - const graphemePrefixWidths = engineProfile.preferPrefixWidthsForBreakableRuns - ? getSegmentGraphemePrefixWidths(segText, segMetrics, cache, emojiCorrection) - : null - pushMeasuredSegment( - segText, - w, - lineEndFitAdvance, - lineEndPaintAdvance, - segKind, - segStart, - graphemeWidths, - graphemePrefixWidths, - ) - } else { - pushMeasuredSegment( - segText, - w, - lineEndFitAdvance, - lineEndPaintAdvance, - segKind, - segStart, - null, - null, - ) - } - preparedEndByAnalysisIndex[mi] = widths.length - } - - const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, preparedEndByAnalysisIndex) - const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts) - if (segments !== null) { - return { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - simpleLineWalkFastPath, - segLevels, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - chunks, - segments, - } as unknown as PreparedTextWithSegments - } - return { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - simpleLineWalkFastPath, - segLevels, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - chunks, - } as unknown as InternalPreparedText -} - -function mapAnalysisChunksToPreparedChunks( - chunks: AnalysisChunk[], - preparedStartByAnalysisIndex: number[], - preparedEndByAnalysisIndex: number[], -): PreparedLineChunk[] { - const preparedChunks: PreparedLineChunk[] = [] - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i]! - const startSegmentIndex = - chunk.startSegmentIndex < preparedStartByAnalysisIndex.length - ? preparedStartByAnalysisIndex[chunk.startSegmentIndex]! - : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 - const endSegmentIndex = - chunk.endSegmentIndex < preparedStartByAnalysisIndex.length - ? preparedStartByAnalysisIndex[chunk.endSegmentIndex]! - : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 - const consumedEndSegmentIndex = - chunk.consumedEndSegmentIndex < preparedStartByAnalysisIndex.length - ? preparedStartByAnalysisIndex[chunk.consumedEndSegmentIndex]! - : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 - - preparedChunks.push({ - startSegmentIndex, - endSegmentIndex, - consumedEndSegmentIndex, - }) - } - return preparedChunks -} - -function prepareInternal( - text: string, - font: string, - includeSegments: boolean, - options?: PrepareOptions, -): InternalPreparedText | PreparedTextWithSegments { - const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace) - return measureAnalysis(analysis, font, includeSegments) -} - -function getInternalPrepared(prepared: PreparedText): InternalPreparedText { - return prepared as InternalPreparedText -} - -// --- Public API --- - -export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText { - return prepareInternal(text, font, false, options) as PreparedText -} - -export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments { - return prepareInternal(text, font, true, options) as PreparedTextWithSegments -} - -export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult { - const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth) - return { lineCount, height: lineCount * lineHeight } -} - -// --- Rich-path helpers (used by layoutWithLines, walkLineRanges, layoutNextLine) --- - -function getSegmentGraphemes( - segmentIndex: number, - segments: string[], - cache: Map, -): string[] { - let graphemes = cache.get(segmentIndex) - if (graphemes !== undefined) return graphemes - - graphemes = [] - const graphemeSegmenter = getSharedGraphemeSegmenter() - for (const gs of graphemeSegmenter.segment(segments[segmentIndex]!)) { - graphemes.push(gs.segment) - } - cache.set(segmentIndex, graphemes) - return graphemes -} - -function getLineTextCache(prepared: PreparedTextWithSegments): Map { - let cache = sharedLineTextCaches.get(prepared) - if (cache !== undefined) return cache - - cache = new Map() - sharedLineTextCaches.set(prepared, cache) - return cache -} - -function lineHasDiscretionaryHyphen( - kinds: SegmentBreakKind[], - startSegmentIndex: number, - startGraphemeIndex: number, - endSegmentIndex: number, -): boolean { - return ( - endSegmentIndex > 0 && - kinds[endSegmentIndex - 1] === 'soft-hyphen' && - !(startSegmentIndex === endSegmentIndex && startGraphemeIndex > 0) - ) -} - -function buildLineTextFromRange( - segments: string[], - kinds: SegmentBreakKind[], - cache: Map, - startSegmentIndex: number, - startGraphemeIndex: number, - endSegmentIndex: number, - endGraphemeIndex: number, -): string { - let text = '' - const endsWithDiscretionaryHyphen = lineHasDiscretionaryHyphen( - kinds, - startSegmentIndex, - startGraphemeIndex, - endSegmentIndex, - ) - - for (let i = startSegmentIndex; i < endSegmentIndex; i++) { - if (kinds[i] === 'soft-hyphen' || kinds[i] === 'hard-break') continue - if (i === startSegmentIndex && startGraphemeIndex > 0) { - text += getSegmentGraphemes(i, segments, cache).slice(startGraphemeIndex).join('') - } else { - text += segments[i]! - } - } - - if (endGraphemeIndex > 0) { - if (endsWithDiscretionaryHyphen) text += '-' - text += getSegmentGraphemes(endSegmentIndex, segments, cache).slice( - startSegmentIndex === endSegmentIndex ? startGraphemeIndex : 0, - endGraphemeIndex, - ).join('') - } else if (endsWithDiscretionaryHyphen) { - text += '-' - } - - return text -} - -function createLayoutLine( - prepared: PreparedTextWithSegments, - cache: Map, - width: number, - startSegmentIndex: number, - startGraphemeIndex: number, - endSegmentIndex: number, - endGraphemeIndex: number, -): LayoutLine { - return { - text: buildLineTextFromRange( - prepared.segments, - prepared.kinds, - cache, - startSegmentIndex, - startGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - ), - width, - start: { - segmentIndex: startSegmentIndex, - graphemeIndex: startGraphemeIndex, - }, - end: { - segmentIndex: endSegmentIndex, - graphemeIndex: endGraphemeIndex, - }, - } -} - -function materializeLayoutLine( - prepared: PreparedTextWithSegments, - cache: Map, - line: InternalLayoutLine, -): LayoutLine { - return createLayoutLine( - prepared, - cache, - line.width, - line.startSegmentIndex, - line.startGraphemeIndex, - line.endSegmentIndex, - line.endGraphemeIndex, - ) -} - -function toLayoutLineRange(line: InternalLayoutLine): LayoutLineRange { - return { - width: line.width, - start: { - segmentIndex: line.startSegmentIndex, - graphemeIndex: line.startGraphemeIndex, - }, - end: { - segmentIndex: line.endSegmentIndex, - graphemeIndex: line.endGraphemeIndex, - }, - } -} - -function stepLineRange( - prepared: PreparedTextWithSegments, - start: LayoutCursor, - maxWidth: number, -): LayoutLineRange | null { - const line = stepPreparedLineRange(prepared, start, maxWidth) - if (line === null) return null - return toLayoutLineRange(line) -} - -function materializeLine( - prepared: PreparedTextWithSegments, - line: LayoutLineRange, -): LayoutLine { - return createLayoutLine( - prepared, - getLineTextCache(prepared), - line.width, - line.start.segmentIndex, - line.start.graphemeIndex, - line.end.segmentIndex, - line.end.graphemeIndex, - ) -} - -export function walkLineRanges( - prepared: PreparedTextWithSegments, - maxWidth: number, - onLine: (line: LayoutLineRange) => void, -): number { - if (prepared.widths.length === 0) return 0 - - return walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => { - onLine(toLayoutLineRange(line)) - }) -} - -export function layoutNextLine( - prepared: PreparedTextWithSegments, - start: LayoutCursor, - maxWidth: number, -): LayoutLine | null { - const line = stepLineRange(prepared, start, maxWidth) - if (line === null) return null - return materializeLine(prepared, line) -} - -export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult { - const lines: LayoutLine[] = [] - if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines } - - const graphemeCache = getLineTextCache(prepared) - const lineCount = walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => { - lines.push(materializeLayoutLine(prepared, graphemeCache, line)) - }) - - return { lineCount, height: lineCount * lineHeight, lines } -} - -export function clearCache(): void { - clearAnalysisCaches() - sharedGraphemeSegmenter = null - sharedLineTextCaches = new WeakMap>() - clearMeasurementCaches() -} - -export function setLocale(locale?: string): void { - setAnalysisLocale(locale) - clearCache() +import './intl-shim' +import './segmenter-polyfill' + +import { createPretext } from './pretext/host.js' +import type { + LayoutCursor, + LayoutLine, + LayoutLineRange, + LayoutLinesResult, + LayoutResult, + PrepareOptions, + PrepareProfile, + PreparedText, + PreparedTextWithSegments, +} from './pretext/layout.js' + +import { lynxMeasurementHost } from './measurement' + +const pretext = createPretext({ + measurement: lynxMeasurementHost, +}) + +export const profilePrepare = pretext.profilePrepare +export const prepare = pretext.prepare +export const prepareWithSegments = pretext.prepareWithSegments +export const layout = pretext.layout +export const walkLineRanges = pretext.walkLineRanges +export const layoutNextLine = pretext.layoutNextLine +export const layoutWithLines = pretext.layoutWithLines +export const clearCache = pretext.clearCache +export const setLocale = pretext.setLocale + +export type { + LayoutCursor, + LayoutLine, + LayoutLineRange, + LayoutLinesResult, + LayoutResult, + PrepareOptions, + PrepareProfile, + PreparedText, + PreparedTextWithSegments, } diff --git a/packages/lynx-pretext/src/line-break.ts b/packages/lynx-pretext/src/line-break.ts index 7564161..ac6ee2b 100644 --- a/packages/lynx-pretext/src/line-break.ts +++ b/packages/lynx-pretext/src/line-break.ts @@ -1,1056 +1 @@ -import type { SegmentBreakKind } from './analysis' -import { getEngineProfile } from './measurement' - -export type LineBreakCursor = { - segmentIndex: number - graphemeIndex: number -} - -export type PreparedLineBreakData = { - widths: number[] - lineEndFitAdvances: number[] - lineEndPaintAdvances: number[] - kinds: SegmentBreakKind[] - simpleLineWalkFastPath: boolean - breakableWidths: (number[] | null)[] - breakablePrefixWidths: (number[] | null)[] - discretionaryHyphenWidth: number - tabStopAdvance: number - chunks: { - startSegmentIndex: number - endSegmentIndex: number - consumedEndSegmentIndex: number - }[] -} - -export type InternalLayoutLine = { - startSegmentIndex: number - startGraphemeIndex: number - endSegmentIndex: number - endGraphemeIndex: number - width: number -} - -function canBreakAfter(kind: SegmentBreakKind): boolean { - return ( - kind === 'space' || - kind === 'preserved-space' || - kind === 'tab' || - kind === 'zero-width-break' || - kind === 'soft-hyphen' - ) -} - -function isSimpleCollapsibleSpace(kind: SegmentBreakKind): boolean { - return kind === 'space' -} - -function getTabAdvance(lineWidth: number, tabStopAdvance: number): number { - if (tabStopAdvance <= 0) return 0 - - const remainder = lineWidth % tabStopAdvance - if (Math.abs(remainder) <= 1e-6) return tabStopAdvance - return tabStopAdvance - remainder -} - -function getBreakableAdvance( - graphemeWidths: number[], - graphemePrefixWidths: number[] | null, - graphemeIndex: number, - preferPrefixWidths: boolean, -): number { - if (!preferPrefixWidths || graphemePrefixWidths === null) { - return graphemeWidths[graphemeIndex]! - } - return graphemePrefixWidths[graphemeIndex]! - (graphemeIndex > 0 ? graphemePrefixWidths[graphemeIndex - 1]! : 0) -} - -function fitSoftHyphenBreak( - graphemeWidths: number[], - initialWidth: number, - maxWidth: number, - lineFitEpsilon: number, - discretionaryHyphenWidth: number, - cumulativeWidths: boolean, -): { fitCount: number, fittedWidth: number } { - let fitCount = 0 - let fittedWidth = initialWidth - - while (fitCount < graphemeWidths.length) { - const nextWidth = cumulativeWidths - ? initialWidth + graphemeWidths[fitCount]! - : fittedWidth + graphemeWidths[fitCount]! - const nextLineWidth = fitCount + 1 < graphemeWidths.length - ? nextWidth + discretionaryHyphenWidth - : nextWidth - if (nextLineWidth > maxWidth + lineFitEpsilon) break - fittedWidth = nextWidth - fitCount++ - } - - return { fitCount, fittedWidth } -} - -function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number { - for (let i = 0; i < prepared.chunks.length; i++) { - const chunk = prepared.chunks[i]! - if (segmentIndex < chunk.consumedEndSegmentIndex) return i - } - return -1 -} - -export function normalizeLineStart( - prepared: PreparedLineBreakData, - start: LineBreakCursor, -): LineBreakCursor | null { - let segmentIndex = start.segmentIndex - const graphemeIndex = start.graphemeIndex - - if (segmentIndex >= prepared.widths.length) return null - if (graphemeIndex > 0) return start - - const chunkIndex = findChunkIndexForStart(prepared, segmentIndex) - if (chunkIndex < 0) return null - - const chunk = prepared.chunks[chunkIndex]! - if (chunk.startSegmentIndex === chunk.endSegmentIndex && segmentIndex === chunk.startSegmentIndex) { - return { segmentIndex, graphemeIndex: 0 } - } - - if (segmentIndex < chunk.startSegmentIndex) segmentIndex = chunk.startSegmentIndex - while (segmentIndex < chunk.endSegmentIndex) { - const kind = prepared.kinds[segmentIndex]! - if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') { - return { segmentIndex, graphemeIndex: 0 } - } - segmentIndex++ - } - - if (chunk.consumedEndSegmentIndex >= prepared.widths.length) return null - return { segmentIndex: chunk.consumedEndSegmentIndex, graphemeIndex: 0 } -} - -export function countPreparedLines(prepared: PreparedLineBreakData, maxWidth: number): number { - if (prepared.simpleLineWalkFastPath) { - return countPreparedLinesSimple(prepared, maxWidth) - } - return walkPreparedLines(prepared, maxWidth) -} - -function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - if (widths.length === 0) return 0 - - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineCount = 0 - let lineW = 0 - let hasContent = false - - function placeOnFreshLine(segmentIndex: number): void { - const w = widths[segmentIndex]! - if (w > maxWidth && breakableWidths[segmentIndex] !== null) { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - lineW = 0 - for (let g = 0; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - if (lineW > 0 && lineW + gw > maxWidth + lineFitEpsilon) { - lineCount++ - lineW = gw - } else { - if (lineW === 0) lineCount++ - lineW += gw - } - } - } else { - lineW = w - lineCount++ - } - hasContent = true - } - - for (let i = 0; i < widths.length; i++) { - const w = widths[i]! - const kind = kinds[i]! - - if (!hasContent) { - placeOnFreshLine(i) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (isSimpleCollapsibleSpace(kind)) continue - lineW = 0 - hasContent = false - placeOnFreshLine(i) - continue - } - - lineW = newW - } - - if (!hasContent) return lineCount + 1 - return lineCount -} - -function walkPreparedLinesSimple( - prepared: PreparedLineBreakData, - maxWidth: number, - onLine?: (line: InternalLayoutLine) => void, -): number { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - if (widths.length === 0) return 0 - - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineCount = 0 - let lineW = 0 - let hasContent = false - let lineStartSegmentIndex = 0 - let lineStartGraphemeIndex = 0 - let lineEndSegmentIndex = 0 - let lineEndGraphemeIndex = 0 - let pendingBreakSegmentIndex = -1 - let pendingBreakPaintWidth = 0 - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakPaintWidth = 0 - } - - function emitCurrentLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): void { - lineCount++ - onLine?.({ - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - }) - lineW = 0 - hasContent = false - clearPendingBreak() - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } - - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = graphemeIndex - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } - - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakPaintWidth = lineW - segmentWidth - } - - function appendBreakableSegment(segmentIndex: number): void { - appendBreakableSegmentFrom(segmentIndex, 0) - } - - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) - continue - } - - if (lineW + gw > maxWidth + lineFitEpsilon) { - emitCurrentLine() - startLineAtGrapheme(segmentIndex, g, gw) - } else { - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } - } - - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - } - - let i = 0 - while (i < widths.length) { - const w = widths[i]! - const kind = kinds[i]! - - if (!hasContent) { - if (w > maxWidth && breakableWidths[i] !== null) { - appendBreakableSegment(i) - } else { - startLineAtSegment(i, w) - } - updatePendingBreak(i, w) - i++ - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (canBreakAfter(kind)) { - appendWholeSegment(i, w) - emitCurrentLine(i + 1, 0, lineW - w) - i++ - continue - } - - if (pendingBreakSegmentIndex >= 0) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue - } - - if (w > maxWidth && breakableWidths[i] !== null) { - emitCurrentLine() - appendBreakableSegment(i) - i++ - continue - } - - emitCurrentLine() - continue - } - - appendWholeSegment(i, w) - updatePendingBreak(i, w) - i++ - } - - if (hasContent) emitCurrentLine() - return lineCount -} - -export function walkPreparedLines( - prepared: PreparedLineBreakData, - maxWidth: number, - onLine?: (line: InternalLayoutLine) => void, -): number { - if (prepared.simpleLineWalkFastPath) { - return walkPreparedLinesSimple(prepared, maxWidth, onLine) - } - - const { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - chunks, - } = prepared - if (widths.length === 0 || chunks.length === 0) return 0 - - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineCount = 0 - let lineW = 0 - let hasContent = false - let lineStartSegmentIndex = 0 - let lineStartGraphemeIndex = 0 - let lineEndSegmentIndex = 0 - let lineEndGraphemeIndex = 0 - let pendingBreakSegmentIndex = -1 - let pendingBreakFitWidth = 0 - let pendingBreakPaintWidth = 0 - let pendingBreakKind: SegmentBreakKind | null = null - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakFitWidth = 0 - pendingBreakPaintWidth = 0 - pendingBreakKind = null - } - - function emitCurrentLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): void { - lineCount++ - onLine?.({ - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - }) - lineW = 0 - hasContent = false - clearPendingBreak() - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } - - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineStartSegmentIndex = segmentIndex - lineStartGraphemeIndex = graphemeIndex - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } - - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! - const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakFitWidth = lineW - segmentWidth + fitAdvance - pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance - pendingBreakKind = kinds[segmentIndex]! - } - - function appendBreakableSegment(segmentIndex: number): void { - appendBreakableSegmentFrom(segmentIndex, 0) - } - - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) - continue - } - - if (lineW + gw > maxWidth + lineFitEpsilon) { - emitCurrentLine() - startLineAtGrapheme(segmentIndex, g, gw) - } else { - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } - } - - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - } - - function continueSoftHyphenBreakableSegment(segmentIndex: number): boolean { - if (pendingBreakKind !== 'soft-hyphen') return false - const gWidths = breakableWidths[segmentIndex]! - if (gWidths === null) return false - const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns - ? breakablePrefixWidths[segmentIndex] ?? gWidths - : gWidths - const usesPrefixWidths = fitWidths !== gWidths - const { fitCount, fittedWidth } = fitSoftHyphenBreak( - fitWidths, - lineW, - maxWidth, - lineFitEpsilon, - discretionaryHyphenWidth, - usesPrefixWidths, - ) - if (fitCount === 0) return false - - lineW = fittedWidth - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = fitCount - clearPendingBreak() - - if (fitCount === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - return true - } - - emitCurrentLine( - segmentIndex, - fitCount, - fittedWidth + discretionaryHyphenWidth, - ) - appendBreakableSegmentFrom(segmentIndex, fitCount) - return true - } - - function emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void { - lineCount++ - onLine?.({ - startSegmentIndex: chunk.startSegmentIndex, - startGraphemeIndex: 0, - endSegmentIndex: chunk.consumedEndSegmentIndex, - endGraphemeIndex: 0, - width: 0, - }) - clearPendingBreak() - } - - for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { - const chunk = chunks[chunkIndex]! - if (chunk.startSegmentIndex === chunk.endSegmentIndex) { - emitEmptyChunk(chunk) - continue - } - - hasContent = false - lineW = 0 - lineStartSegmentIndex = chunk.startSegmentIndex - lineStartGraphemeIndex = 0 - lineEndSegmentIndex = chunk.startSegmentIndex - lineEndGraphemeIndex = 0 - clearPendingBreak() - - let i = chunk.startSegmentIndex - while (i < chunk.endSegmentIndex) { - const kind = kinds[i]! - const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! - - if (kind === 'soft-hyphen') { - if (hasContent) { - lineEndSegmentIndex = i + 1 - lineEndGraphemeIndex = 0 - pendingBreakSegmentIndex = i + 1 - pendingBreakFitWidth = lineW + discretionaryHyphenWidth - pendingBreakPaintWidth = lineW + discretionaryHyphenWidth - pendingBreakKind = kind - } - i++ - continue - } - - if (!hasContent) { - if (w > maxWidth && breakableWidths[i] !== null) { - appendBreakableSegment(i) - } else { - startLineAtSegment(i, w) - } - updatePendingBreakForWholeSegment(i, w) - i++ - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) - const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) - - if ( - pendingBreakKind === 'soft-hyphen' && - engineProfile.preferEarlySoftHyphenBreak && - pendingBreakFitWidth <= maxWidth + lineFitEpsilon - ) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue - } - - if (pendingBreakKind === 'soft-hyphen' && continueSoftHyphenBreakableSegment(i)) { - i++ - continue - } - - if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { - appendWholeSegment(i, w) - emitCurrentLine(i + 1, 0, currentBreakPaintWidth) - i++ - continue - } - - if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - continue - } - - if (w > maxWidth && breakableWidths[i] !== null) { - emitCurrentLine() - appendBreakableSegment(i) - i++ - continue - } - - emitCurrentLine() - continue - } - - appendWholeSegment(i, w) - updatePendingBreakForWholeSegment(i, w) - i++ - } - - if (hasContent) { - const finalPaintWidth = - pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex - ? pendingBreakPaintWidth - : lineW - emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth) - } - } - - return lineCount -} - -export function layoutNextLineRange( - prepared: PreparedLineBreakData, - start: LineBreakCursor, - maxWidth: number, -): InternalLayoutLine | null { - const normalizedStart = normalizeLineStart(prepared, start) - if (normalizedStart === null) return null - - if (prepared.simpleLineWalkFastPath) { - return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth) - } - - const chunkIndex = findChunkIndexForStart(prepared, normalizedStart.segmentIndex) - if (chunkIndex < 0) return null - - const chunk = prepared.chunks[chunkIndex]! - if (chunk.startSegmentIndex === chunk.endSegmentIndex) { - return { - startSegmentIndex: chunk.startSegmentIndex, - startGraphemeIndex: 0, - endSegmentIndex: chunk.consumedEndSegmentIndex, - endGraphemeIndex: 0, - width: 0, - } - } - - const { - widths, - lineEndFitAdvances, - lineEndPaintAdvances, - kinds, - breakableWidths, - breakablePrefixWidths, - discretionaryHyphenWidth, - tabStopAdvance, - } = prepared - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineW = 0 - let hasContent = false - const lineStartSegmentIndex = normalizedStart.segmentIndex - const lineStartGraphemeIndex = normalizedStart.graphemeIndex - let lineEndSegmentIndex = lineStartSegmentIndex - let lineEndGraphemeIndex = lineStartGraphemeIndex - let pendingBreakSegmentIndex = -1 - let pendingBreakFitWidth = 0 - let pendingBreakPaintWidth = 0 - let pendingBreakKind: SegmentBreakKind | null = null - - function clearPendingBreak(): void { - pendingBreakSegmentIndex = -1 - pendingBreakFitWidth = 0 - pendingBreakPaintWidth = 0 - pendingBreakKind = null - } - - function finishLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): InternalLayoutLine | null { - if (!hasContent) return null - - return { - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - } - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } - - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } - - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! - const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakFitWidth = lineW - segmentWidth + fitAdvance - pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance - pendingBreakKind = kinds[segmentIndex]! - } - - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) - continue - } - - if (lineW + gw > maxWidth + lineFitEpsilon) { - return finishLine() - } - - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } - - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - return null - } - - function maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null { - if (pendingBreakKind !== 'soft-hyphen' || pendingBreakSegmentIndex < 0) return null - - const gWidths = breakableWidths[segmentIndex] ?? null - if (gWidths !== null) { - const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns - ? breakablePrefixWidths[segmentIndex] ?? gWidths - : gWidths - const usesPrefixWidths = fitWidths !== gWidths - const { fitCount, fittedWidth } = fitSoftHyphenBreak( - fitWidths, - lineW, - maxWidth, - lineFitEpsilon, - discretionaryHyphenWidth, - usesPrefixWidths, - ) - - if (fitCount === gWidths.length) { - lineW = fittedWidth - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - clearPendingBreak() - return null - } - - if (fitCount > 0) { - return finishLine( - segmentIndex, - fitCount, - fittedWidth + discretionaryHyphenWidth, - ) - } - } - - if (pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - return null - } - - for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) { - const kind = kinds[i]! - const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 - const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! - - if (kind === 'soft-hyphen' && startGraphemeIndex === 0) { - if (hasContent) { - lineEndSegmentIndex = i + 1 - lineEndGraphemeIndex = 0 - pendingBreakSegmentIndex = i + 1 - pendingBreakFitWidth = lineW + discretionaryHyphenWidth - pendingBreakPaintWidth = lineW + discretionaryHyphenWidth - pendingBreakKind = kind - } - continue - } - - if (!hasContent) { - if (startGraphemeIndex > 0) { - const line = appendBreakableSegmentFrom(i, startGraphemeIndex) - if (line !== null) return line - } else if (w > maxWidth && breakableWidths[i] !== null) { - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } else { - startLineAtSegment(i, w) - } - updatePendingBreakForWholeSegment(i, w) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) - const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) - - if ( - pendingBreakKind === 'soft-hyphen' && - engineProfile.preferEarlySoftHyphenBreak && - pendingBreakFitWidth <= maxWidth + lineFitEpsilon - ) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - const softBreakLine = maybeFinishAtSoftHyphen(i) - if (softBreakLine !== null) return softBreakLine - - if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { - appendWholeSegment(i, w) - return finishLine(i + 1, 0, currentBreakPaintWidth) - } - - if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - if (w > maxWidth && breakableWidths[i] !== null) { - const currentLine = finishLine() - if (currentLine !== null) return currentLine - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } - - return finishLine() - } - - appendWholeSegment(i, w) - updatePendingBreakForWholeSegment(i, w) - } - - if (pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && lineEndGraphemeIndex === 0) { - return finishLine(chunk.consumedEndSegmentIndex, 0, pendingBreakPaintWidth) - } - - return finishLine(chunk.consumedEndSegmentIndex, 0, lineW) -} - -function layoutNextLineRangeSimple( - prepared: PreparedLineBreakData, - normalizedStart: LineBreakCursor, - maxWidth: number, -): InternalLayoutLine | null { - const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared - const engineProfile = getEngineProfile() - const lineFitEpsilon = engineProfile.lineFitEpsilon - - let lineW = 0 - let hasContent = false - const lineStartSegmentIndex = normalizedStart.segmentIndex - const lineStartGraphemeIndex = normalizedStart.graphemeIndex - let lineEndSegmentIndex = lineStartSegmentIndex - let lineEndGraphemeIndex = lineStartGraphemeIndex - let pendingBreakSegmentIndex = -1 - let pendingBreakPaintWidth = 0 - - function finishLine( - endSegmentIndex = lineEndSegmentIndex, - endGraphemeIndex = lineEndGraphemeIndex, - width = lineW, - ): InternalLayoutLine | null { - if (!hasContent) return null - - return { - startSegmentIndex: lineStartSegmentIndex, - startGraphemeIndex: lineStartGraphemeIndex, - endSegmentIndex, - endGraphemeIndex, - width, - } - } - - function startLineAtSegment(segmentIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - lineW = width - } - - function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { - hasContent = true - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = graphemeIndex + 1 - lineW = width - } - - function appendWholeSegment(segmentIndex: number, width: number): void { - if (!hasContent) { - startLineAtSegment(segmentIndex, width) - return - } - lineW += width - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - - function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { - if (!canBreakAfter(kinds[segmentIndex]!)) return - pendingBreakSegmentIndex = segmentIndex + 1 - pendingBreakPaintWidth = lineW - segmentWidth - } - - function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { - const gWidths = breakableWidths[segmentIndex]! - const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null - for (let g = startGraphemeIndex; g < gWidths.length; g++) { - const gw = getBreakableAdvance( - gWidths, - gPrefixWidths, - g, - engineProfile.preferPrefixWidthsForBreakableRuns, - ) - - if (!hasContent) { - startLineAtGrapheme(segmentIndex, g, gw) - continue - } - - if (lineW + gw > maxWidth + lineFitEpsilon) { - return finishLine() - } - - lineW += gw - lineEndSegmentIndex = segmentIndex - lineEndGraphemeIndex = g + 1 - } - - if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { - lineEndSegmentIndex = segmentIndex + 1 - lineEndGraphemeIndex = 0 - } - return null - } - - for (let i = normalizedStart.segmentIndex; i < widths.length; i++) { - const w = widths[i]! - const kind = kinds[i]! - const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 - - if (!hasContent) { - if (startGraphemeIndex > 0) { - const line = appendBreakableSegmentFrom(i, startGraphemeIndex) - if (line !== null) return line - } else if (w > maxWidth && breakableWidths[i] !== null) { - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } else { - startLineAtSegment(i, w) - } - updatePendingBreak(i, w) - continue - } - - const newW = lineW + w - if (newW > maxWidth + lineFitEpsilon) { - if (canBreakAfter(kind)) { - appendWholeSegment(i, w) - return finishLine(i + 1, 0, lineW - w) - } - - if (pendingBreakSegmentIndex >= 0) { - return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) - } - - if (w > maxWidth && breakableWidths[i] !== null) { - const currentLine = finishLine() - if (currentLine !== null) return currentLine - const line = appendBreakableSegmentFrom(i, 0) - if (line !== null) return line - } - - return finishLine() - } - - appendWholeSegment(i, w) - updatePendingBreak(i, w) - } - - return finishLine() -} +export * from './pretext/line-break.js' diff --git a/packages/lynx-pretext/src/measurement.ts b/packages/lynx-pretext/src/measurement.ts index 11d71c2..b15fda7 100644 --- a/packages/lynx-pretext/src/measurement.ts +++ b/packages/lynx-pretext/src/measurement.ts @@ -1,4 +1,5 @@ import { isCJK } from './analysis' +import type { MeasurementHost } from './pretext/host.js' export type SegmentMetrics = { width: number @@ -15,6 +16,12 @@ export type EngineProfile = { preferEarlySoftHyphenBreak: boolean } +export type FontMeasurementState = { + cache: Map + fontSize: number + emojiCorrection: number +} + // Module-level font context (replaces Canvas ctx.font) let currentFontSizeStr: string = '16px' let currentFontFamily: string | undefined = undefined @@ -127,11 +134,7 @@ export function getSegmentGraphemePrefixWidths( return metrics.graphemePrefixWidths } -export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): { - cache: Map - fontSize: number - emojiCorrection: number -} { +export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState { const fontSize = parseFontSize(font) const fontFamily = parseFontFamily(font) // Set module-level font context for getSegmentMetrics @@ -147,3 +150,16 @@ export function clearMeasurementCaches(): void { segmentMetricCaches.clear() sharedGraphemeSegmenter = null } + +export const lynxMeasurementHost: MeasurementHost = { + clearMeasurementCaches, + getSegmentMetrics, + getEngineProfile, + getCorrectedSegmentWidth, + getSegmentGraphemeWidths, + getSegmentGraphemePrefixWidths, + getFontMeasurementState, + textMayContainEmoji, +} + +export type { MeasurementHost } diff --git a/packages/lynx-pretext/src/pretext/analysis.ts b/packages/lynx-pretext/src/pretext/analysis.ts new file mode 100644 index 0000000..a4200d9 --- /dev/null +++ b/packages/lynx-pretext/src/pretext/analysis.ts @@ -0,0 +1,1019 @@ +export type WhiteSpaceMode = 'normal' | 'pre-wrap' + +export type SegmentBreakKind = + | 'text' + | 'space' + | 'preserved-space' + | 'tab' + | 'glue' + | 'zero-width-break' + | 'soft-hyphen' + | 'hard-break' + +type SegmentationPiece = { + text: string + isWordLike: boolean + kind: SegmentBreakKind + start: number +} + +export type MergedSegmentation = { + len: number + texts: string[] + isWordLike: boolean[] + kinds: SegmentBreakKind[] + starts: number[] +} + +export type AnalysisChunk = { + startSegmentIndex: number + endSegmentIndex: number + consumedEndSegmentIndex: number +} + +export type TextAnalysis = { normalized: string, chunks: AnalysisChunk[] } & MergedSegmentation + +export type AnalysisProfile = { + carryCJKAfterClosingQuote: boolean +} + +const collapsibleWhitespaceRunRe = /[ \t\n\r\f]+/g +const needsWhitespaceNormalizationRe = /[\t\n\r\f]| {2,}|^ | $/ + +type WhiteSpaceProfile = { + mode: WhiteSpaceMode + preserveOrdinarySpaces: boolean + preserveHardBreaks: boolean +} + +function getWhiteSpaceProfile(whiteSpace?: WhiteSpaceMode): WhiteSpaceProfile { + const mode = whiteSpace ?? 'normal' + return mode === 'pre-wrap' + ? { mode, preserveOrdinarySpaces: true, preserveHardBreaks: true } + : { mode, preserveOrdinarySpaces: false, preserveHardBreaks: false } +} + +export function normalizeWhitespaceNormal(text: string): string { + if (!needsWhitespaceNormalizationRe.test(text)) return text + + let normalized = text.replace(collapsibleWhitespaceRunRe, ' ') + if (normalized.charCodeAt(0) === 0x20) { + normalized = normalized.slice(1) + } + if (normalized.length > 0 && normalized.charCodeAt(normalized.length - 1) === 0x20) { + normalized = normalized.slice(0, -1) + } + return normalized +} + +function normalizeWhitespacePreWrap(text: string): string { + if (!/[\r\f]/.test(text)) return text.replace(/\r\n/g, '\n') + return text + .replace(/\r\n/g, '\n') + .replace(/[\r\f]/g, '\n') +} + +let sharedWordSegmenter: Intl.Segmenter | null = null +let segmenterLocale: string | undefined + +function getSharedWordSegmenter(): Intl.Segmenter { + if (sharedWordSegmenter === null) { + sharedWordSegmenter = new Intl.Segmenter(segmenterLocale, { granularity: 'word' }) + } + return sharedWordSegmenter +} + +export function clearAnalysisCaches(): void { + sharedWordSegmenter = null +} + +export function setAnalysisLocale(locale?: string): void { + const nextLocale = locale && locale.length > 0 ? locale : undefined + if (segmenterLocale === nextLocale) return + segmenterLocale = nextLocale + sharedWordSegmenter = null +} + +const arabicScriptFallbackRe = /[\u0600-\u0604\u0606-\u060B\u060D-\u061A\u061C-\u061E\u0620-\u063F\u0641-\u064A\u0656-\u066F\u0671-\u06DC\u06DE-\u06FF\u0750-\u077F\u0870-\u0891\u0897-\u08E1\u08E3-\u08FF\uFB50-\uFD3D\uFD40-\uFDCF\uFDF0-\uFDFF\uFE70-\uFE74\uFE76-\uFEFC\u{10E60}-\u{10E7E}\u{10EC2}-\u{10EC7}\u{10ED0}-\u{10ED8}\u{10EFA}-\u{10EFF}\u{1EE00}-\u{1EE03}\u{1EE05}-\u{1EE1F}\u{1EE21}\u{1EE22}\u{1EE24}\u{1EE27}\u{1EE29}-\u{1EE32}\u{1EE34}-\u{1EE37}\u{1EE39}\u{1EE3B}\u{1EE42}\u{1EE47}\u{1EE49}\u{1EE4B}\u{1EE4D}-\u{1EE4F}\u{1EE51}\u{1EE52}\u{1EE54}\u{1EE57}\u{1EE59}\u{1EE5B}\u{1EE5D}\u{1EE5F}\u{1EE61}\u{1EE62}\u{1EE64}\u{1EE67}-\u{1EE6A}\u{1EE6C}-\u{1EE72}\u{1EE74}-\u{1EE77}\u{1EE79}-\u{1EE7C}\u{1EE7E}\u{1EE80}-\u{1EE89}\u{1EE8B}-\u{1EE9B}\u{1EEA1}-\u{1EEA3}\u{1EEA5}-\u{1EEA9}\u{1EEAB}-\u{1EEBB}\u{1EEF0}\u{1EEF1}]/u +const combiningMarkFallbackRe = /[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u07FD\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0897-\u089F\u08CA-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u09FE\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0AFA-\u0AFF\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B55-\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C04\u0C3C\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0CF3\u0D00-\u0D03\u0D3B\u0D3C\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D81-\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EBC\u0EC8-\u0ECE\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1715\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u180B-\u180D\u180F\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1AB0-\u1ADD\u1AE0-\u1AEB\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF4\u1CF7-\u1CF9\u1DC0-\u1DFF\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA82C\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA8FF\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9E5\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\u{101FD}\u{102E0}\u{10376}-\u{1037A}\u{10A01}-\u{10A03}\u{10A05}\u{10A06}\u{10A0C}-\u{10A0F}\u{10A38}-\u{10A3A}\u{10A3F}\u{10AE5}\u{10AE6}\u{10D24}-\u{10D27}\u{10D69}-\u{10D6D}\u{10EAB}\u{10EAC}\u{10EFA}-\u{10EFF}\u{10F46}-\u{10F50}\u{10F82}-\u{10F85}\u{11000}-\u{11002}\u{11038}-\u{11046}\u{11070}\u{11073}\u{11074}\u{1107F}-\u{11082}\u{110B0}-\u{110BA}\u{110C2}\u{11100}-\u{11102}\u{11127}-\u{11134}\u{11145}\u{11146}\u{11173}\u{11180}-\u{11182}\u{111B3}-\u{111C0}\u{111C9}-\u{111CC}\u{111CE}\u{111CF}\u{1122C}-\u{11237}\u{1123E}\u{11241}\u{112DF}-\u{112EA}\u{11300}-\u{11303}\u{1133B}\u{1133C}\u{1133E}-\u{11344}\u{11347}\u{11348}\u{1134B}-\u{1134D}\u{11357}\u{11362}\u{11363}\u{11366}-\u{1136C}\u{11370}-\u{11374}\u{113B8}-\u{113C0}\u{113C2}\u{113C5}\u{113C7}-\u{113CA}\u{113CC}-\u{113D0}\u{113D2}\u{113E1}\u{113E2}\u{11435}-\u{11446}\u{1145E}\u{114B0}-\u{114C3}\u{115AF}-\u{115B5}\u{115B8}-\u{115C0}\u{115DC}\u{115DD}\u{11630}-\u{11640}\u{116AB}-\u{116B7}\u{1171D}-\u{1172B}\u{1182C}-\u{1183A}\u{11930}-\u{11935}\u{11937}\u{11938}\u{1193B}-\u{1193E}\u{11940}\u{11942}\u{11943}\u{119D1}-\u{119D7}\u{119DA}-\u{119E0}\u{119E4}\u{11A01}-\u{11A0A}\u{11A33}-\u{11A39}\u{11A3B}-\u{11A3E}\u{11A47}\u{11A51}-\u{11A5B}\u{11A8A}-\u{11A99}\u{11B60}-\u{11B67}\u{11C2F}-\u{11C36}\u{11C38}-\u{11C3F}\u{11C92}-\u{11CA7}\u{11CA9}-\u{11CB6}\u{11D31}-\u{11D36}\u{11D3A}\u{11D3C}\u{11D3D}\u{11D3F}-\u{11D45}\u{11D47}\u{11D8A}-\u{11D8E}\u{11D90}\u{11D91}\u{11D93}-\u{11D97}\u{11EF3}-\u{11EF6}\u{11F00}\u{11F01}\u{11F03}\u{11F34}-\u{11F3A}\u{11F3E}-\u{11F42}\u{11F5A}\u{13440}\u{13447}-\u{13455}\u{1611E}-\u{1612F}\u{16AF0}-\u{16AF4}\u{16B30}-\u{16B36}\u{16F4F}\u{16F51}-\u{16F87}\u{16F8F}-\u{16F92}\u{16FE4}\u{16FF0}\u{16FF1}\u{1BC9D}\u{1BC9E}\u{1CF00}-\u{1CF2D}\u{1CF30}-\u{1CF46}\u{1D165}-\u{1D169}\u{1D16D}-\u{1D172}\u{1D17B}-\u{1D182}\u{1D185}-\u{1D18B}\u{1D1AA}-\u{1D1AD}\u{1D242}-\u{1D244}\u{1DA00}-\u{1DA36}\u{1DA3B}-\u{1DA6C}\u{1DA75}\u{1DA84}\u{1DA9B}-\u{1DA9F}\u{1DAA1}-\u{1DAAF}\u{1E000}-\u{1E006}\u{1E008}-\u{1E018}\u{1E01B}-\u{1E021}\u{1E023}\u{1E024}\u{1E026}-\u{1E02A}\u{1E08F}\u{1E130}-\u{1E136}\u{1E2AE}\u{1E2EC}-\u{1E2EF}\u{1E4EC}-\u{1E4EF}\u{1E5EE}\u{1E5EF}\u{1E6E3}\u{1E6E6}\u{1E6EE}\u{1E6EF}\u{1E6F5}\u{1E8D0}-\u{1E8D6}\u{1E944}-\u{1E94A}\u{E0100}-\u{E01EF}]/u +const decimalDigitFallbackRe = /[0-9\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0DE6-\u0DEF\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u1946-\u194F\u19D0-\u19D9\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59\uABF0-\uABF9\uFF10-\uFF19\u{104A0}-\u{104A9}\u{10D30}-\u{10D39}\u{10D40}-\u{10D49}\u{11066}-\u{1106F}\u{110F0}-\u{110F9}\u{11136}-\u{1113F}\u{111D0}-\u{111D9}\u{112F0}-\u{112F9}\u{11450}-\u{11459}\u{114D0}-\u{114D9}\u{11650}-\u{11659}\u{116C0}-\u{116C9}\u{116D0}-\u{116E3}\u{11730}-\u{11739}\u{118E0}-\u{118E9}\u{11950}-\u{11959}\u{11BF0}-\u{11BF9}\u{11C50}-\u{11C59}\u{11D50}-\u{11D59}\u{11DA0}-\u{11DA9}\u{11DE0}-\u{11DE9}\u{11F50}-\u{11F59}\u{16130}-\u{16139}\u{16A60}-\u{16A69}\u{16AC0}-\u{16AC9}\u{16B50}-\u{16B59}\u{16D70}-\u{16D79}\u{1CCF0}-\u{1CCF9}\u{1D7CE}-\u{1D7FF}\u{1E140}-\u{1E149}\u{1E2F0}-\u{1E2F9}\u{1E4F0}-\u{1E4F9}\u{1E5F1}-\u{1E5FA}\u{1E950}-\u{1E959}\u{1FBF0}-\u{1FBF9}]/u + +function createUnicodePropertyRegex(source: string, fallback: RegExp): RegExp { + try { + return new RegExp(source, 'u') + } catch { + return fallback + } +} + +const arabicScriptRe = createUnicodePropertyRegex('\\p{Script=Arabic}', arabicScriptFallbackRe) +const combiningMarkRe = createUnicodePropertyRegex('\\p{M}', combiningMarkFallbackRe) +const decimalDigitRe = createUnicodePropertyRegex('\\p{Nd}', decimalDigitFallbackRe) + +function containsArabicScript(text: string): boolean { + return arabicScriptRe.test(text) +} + +export function isCJK(s: string): boolean { + for (const ch of s) { + const c = ch.codePointAt(0)! + if ((c >= 0x4E00 && c <= 0x9FFF) || + (c >= 0x3400 && c <= 0x4DBF) || + (c >= 0x20000 && c <= 0x2A6DF) || + (c >= 0x2A700 && c <= 0x2B73F) || + (c >= 0x2B740 && c <= 0x2B81F) || + (c >= 0x2B820 && c <= 0x2CEAF) || + (c >= 0x2CEB0 && c <= 0x2EBEF) || + (c >= 0x30000 && c <= 0x3134F) || + (c >= 0xF900 && c <= 0xFAFF) || + (c >= 0x2F800 && c <= 0x2FA1F) || + (c >= 0x3000 && c <= 0x303F) || + (c >= 0x3040 && c <= 0x309F) || + (c >= 0x30A0 && c <= 0x30FF) || + (c >= 0xAC00 && c <= 0xD7AF) || + (c >= 0xFF00 && c <= 0xFFEF)) { + return true + } + } + return false +} + +export const kinsokuStart = new Set([ + '\uFF0C', + '\uFF0E', + '\uFF01', + '\uFF1A', + '\uFF1B', + '\uFF1F', + '\u3001', + '\u3002', + '\u30FB', + '\uFF09', + '\u3015', + '\u3009', + '\u300B', + '\u300D', + '\u300F', + '\u3011', + '\u3017', + '\u3019', + '\u301B', + '\u30FC', + '\u3005', + '\u303B', + '\u309D', + '\u309E', + '\u30FD', + '\u30FE', +]) + +export const kinsokuEnd = new Set([ + '"', + '(', '[', '{', + '“', '‘', '«', '‹', + '\uFF08', + '\u3014', + '\u3008', + '\u300A', + '\u300C', + '\u300E', + '\u3010', + '\u3016', + '\u3018', + '\u301A', +]) + +const forwardStickyGlue = new Set([ + "'", '’', +]) + +export const leftStickyPunctuation = new Set([ + '.', ',', '!', '?', ':', ';', + '\u060C', + '\u061B', + '\u061F', + '\u0964', + '\u0965', + '\u104A', + '\u104B', + '\u104C', + '\u104D', + '\u104F', + ')', ']', '}', + '%', + '"', + '”', '’', '»', '›', + '…', +]) + +const arabicNoSpaceTrailingPunctuation = new Set([ + ':', + '.', + '\u060C', + '\u061B', +]) + +const myanmarMedialGlue = new Set([ + '\u104F', +]) + +const closingQuoteChars = new Set([ + '”', '’', '»', '›', + '\u300D', + '\u300F', + '\u3011', + '\u300B', + '\u3009', + '\u3015', + '\uFF09', +]) + +function isLeftStickyPunctuationSegment(segment: string): boolean { + if (isEscapedQuoteClusterSegment(segment)) return true + let sawPunctuation = false + for (const ch of segment) { + if (leftStickyPunctuation.has(ch)) { + sawPunctuation = true + continue + } + if (sawPunctuation && combiningMarkRe.test(ch)) continue + return false + } + return sawPunctuation +} + +function isCJKLineStartProhibitedSegment(segment: string): boolean { + for (const ch of segment) { + if (!kinsokuStart.has(ch) && !leftStickyPunctuation.has(ch)) return false + } + return segment.length > 0 +} + +function isForwardStickyClusterSegment(segment: string): boolean { + if (isEscapedQuoteClusterSegment(segment)) return true + for (const ch of segment) { + if (!kinsokuEnd.has(ch) && !forwardStickyGlue.has(ch) && !combiningMarkRe.test(ch)) return false + } + return segment.length > 0 +} + +function isEscapedQuoteClusterSegment(segment: string): boolean { + let sawQuote = false + for (const ch of segment) { + if (ch === '\\' || combiningMarkRe.test(ch)) continue + if (kinsokuEnd.has(ch) || leftStickyPunctuation.has(ch) || forwardStickyGlue.has(ch)) { + sawQuote = true + continue + } + return false + } + return sawQuote +} + +function splitTrailingForwardStickyCluster(text: string): { head: string, tail: string } | null { + const chars = Array.from(text) + let splitIndex = chars.length + + while (splitIndex > 0) { + const ch = chars[splitIndex - 1]! + if (combiningMarkRe.test(ch)) { + splitIndex-- + continue + } + if (kinsokuEnd.has(ch) || forwardStickyGlue.has(ch)) { + splitIndex-- + continue + } + break + } + + if (splitIndex <= 0 || splitIndex === chars.length) return null + return { + head: chars.slice(0, splitIndex).join(''), + tail: chars.slice(splitIndex).join(''), + } +} + +function isRepeatedSingleCharRun(segment: string, ch: string): boolean { + if (segment.length === 0) return false + for (const part of segment) { + if (part !== ch) return false + } + return true +} + +function endsWithArabicNoSpacePunctuation(segment: string): boolean { + if (!containsArabicScript(segment) || segment.length === 0) return false + return arabicNoSpaceTrailingPunctuation.has(segment[segment.length - 1]!) +} + +function endsWithMyanmarMedialGlue(segment: string): boolean { + if (segment.length === 0) return false + return myanmarMedialGlue.has(segment[segment.length - 1]!) +} + +function splitLeadingSpaceAndMarks(segment: string): { space: string, marks: string } | null { + if (segment.length < 2 || segment[0] !== ' ') return null + const marks = segment.slice(1) + if (marks.length > 0 && Array.from(marks).every((mark) => combiningMarkRe.test(mark))) { + return { space: ' ', marks } + } + return null +} + +export function endsWithClosingQuote(text: string): boolean { + for (let i = text.length - 1; i >= 0; i--) { + const ch = text[i]! + if (closingQuoteChars.has(ch)) return true + if (!leftStickyPunctuation.has(ch)) return false + } + return false +} + +function classifySegmentBreakChar(ch: string, whiteSpaceProfile: WhiteSpaceProfile): SegmentBreakKind { + if (whiteSpaceProfile.preserveOrdinarySpaces || whiteSpaceProfile.preserveHardBreaks) { + if (ch === ' ') return 'preserved-space' + if (ch === '\t') return 'tab' + if (whiteSpaceProfile.preserveHardBreaks && ch === '\n') return 'hard-break' + } + if (ch === ' ') return 'space' + if (ch === '\u00A0' || ch === '\u202F' || ch === '\u2060' || ch === '\uFEFF') { + return 'glue' + } + if (ch === '\u200B') return 'zero-width-break' + if (ch === '\u00AD') return 'soft-hyphen' + return 'text' +} + +function splitSegmentByBreakKind( + segment: string, + isWordLike: boolean, + start: number, + whiteSpaceProfile: WhiteSpaceProfile, +): SegmentationPiece[] { + const pieces: SegmentationPiece[] = [] + let currentKind: SegmentBreakKind | null = null + let currentText = '' + let currentStart = start + let currentWordLike = false + let offset = 0 + + for (const ch of segment) { + const kind = classifySegmentBreakChar(ch, whiteSpaceProfile) + const wordLike = kind === 'text' && isWordLike + + if (currentKind !== null && kind === currentKind && wordLike === currentWordLike) { + currentText += ch + offset += ch.length + continue + } + + if (currentKind !== null) { + pieces.push({ + text: currentText, + isWordLike: currentWordLike, + kind: currentKind, + start: currentStart, + }) + } + + currentKind = kind + currentText = ch + currentStart = start + offset + currentWordLike = wordLike + offset += ch.length + } + + if (currentKind !== null) { + pieces.push({ + text: currentText, + isWordLike: currentWordLike, + kind: currentKind, + start: currentStart, + }) + } + + return pieces +} + +function isTextRunBoundary(kind: SegmentBreakKind): boolean { + return ( + kind === 'space' || + kind === 'preserved-space' || + kind === 'zero-width-break' || + kind === 'hard-break' + ) +} + +const urlSchemeSegmentRe = /^[A-Za-z][A-Za-z0-9+.-]*:$/ + +function isUrlLikeRunStart(segmentation: MergedSegmentation, index: number): boolean { + const text = segmentation.texts[index]! + if (text.startsWith('www.')) return true + return ( + urlSchemeSegmentRe.test(text) && + index + 1 < segmentation.len && + segmentation.kinds[index + 1] === 'text' && + segmentation.texts[index + 1] === '//' + ) +} + +function isUrlQueryBoundarySegment(text: string): boolean { + return text.includes('?') && (text.includes('://') || text.startsWith('www.')) +} + +function mergeUrlLikeRuns(segmentation: MergedSegmentation): MergedSegmentation { + const texts = segmentation.texts.slice() + const isWordLike = segmentation.isWordLike.slice() + const kinds = segmentation.kinds.slice() + const starts = segmentation.starts.slice() + + for (let i = 0; i < segmentation.len; i++) { + if (kinds[i] !== 'text' || !isUrlLikeRunStart(segmentation, i)) continue + + let j = i + 1 + while (j < segmentation.len && !isTextRunBoundary(kinds[j]!)) { + texts[i] += texts[j]! + isWordLike[i] = true + const endsQueryPrefix = texts[j]!.includes('?') + kinds[j] = 'text' + texts[j] = '' + j++ + if (endsQueryPrefix) break + } + } + + let compactLen = 0 + for (let read = 0; read < texts.length; read++) { + const text = texts[read]! + if (text.length === 0) continue + if (compactLen !== read) { + texts[compactLen] = text + isWordLike[compactLen] = isWordLike[read]! + kinds[compactLen] = kinds[read]! + starts[compactLen] = starts[read]! + } + compactLen++ + } + + texts.length = compactLen + isWordLike.length = compactLen + kinds.length = compactLen + starts.length = compactLen + + return { + len: compactLen, + texts, + isWordLike, + kinds, + starts, + } +} + +function mergeUrlQueryRuns(segmentation: MergedSegmentation): MergedSegmentation { + const texts: string[] = [] + const isWordLike: boolean[] = [] + const kinds: SegmentBreakKind[] = [] + const starts: number[] = [] + + for (let i = 0; i < segmentation.len; i++) { + const text = segmentation.texts[i]! + texts.push(text) + isWordLike.push(segmentation.isWordLike[i]!) + kinds.push(segmentation.kinds[i]!) + starts.push(segmentation.starts[i]!) + + if (!isUrlQueryBoundarySegment(text)) continue + + const nextIndex = i + 1 + if ( + nextIndex >= segmentation.len || + isTextRunBoundary(segmentation.kinds[nextIndex]!) + ) { + continue + } + + let queryText = '' + const queryStart = segmentation.starts[nextIndex]! + let j = nextIndex + while (j < segmentation.len && !isTextRunBoundary(segmentation.kinds[j]!)) { + queryText += segmentation.texts[j]! + j++ + } + + if (queryText.length > 0) { + texts.push(queryText) + isWordLike.push(true) + kinds.push('text') + starts.push(queryStart) + i = j - 1 + } + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + +const numericJoinerChars = new Set([ + ':', '-', '/', '×', ',', '.', '+', + '\u2013', + '\u2014', +]) + +const asciiPunctuationChainSegmentRe = /^[A-Za-z0-9_]+[,:;]*$/ +const asciiPunctuationChainTrailingJoinersRe = /[,:;]+$/ + +function segmentContainsDecimalDigit(text: string): boolean { + for (const ch of text) { + if (decimalDigitRe.test(ch)) return true + } + return false +} + +function isNumericRunSegment(text: string): boolean { + if (text.length === 0) return false + for (const ch of text) { + if (decimalDigitRe.test(ch) || numericJoinerChars.has(ch)) continue + return false + } + return true +} + +function mergeNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { + const texts: string[] = [] + const isWordLike: boolean[] = [] + const kinds: SegmentBreakKind[] = [] + const starts: number[] = [] + + for (let i = 0; i < segmentation.len; i++) { + const text = segmentation.texts[i]! + const kind = segmentation.kinds[i]! + + if (kind === 'text' && isNumericRunSegment(text) && segmentContainsDecimalDigit(text)) { + let mergedText = text + let j = i + 1 + while ( + j < segmentation.len && + segmentation.kinds[j] === 'text' && + isNumericRunSegment(segmentation.texts[j]!) + ) { + mergedText += segmentation.texts[j]! + j++ + } + + texts.push(mergedText) + isWordLike.push(true) + kinds.push('text') + starts.push(segmentation.starts[i]!) + i = j - 1 + continue + } + + texts.push(text) + isWordLike.push(segmentation.isWordLike[i]!) + kinds.push(kind) + starts.push(segmentation.starts[i]!) + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + +function mergeAsciiPunctuationChains(segmentation: MergedSegmentation): MergedSegmentation { + const texts: string[] = [] + const isWordLike: boolean[] = [] + const kinds: SegmentBreakKind[] = [] + const starts: number[] = [] + + for (let i = 0; i < segmentation.len; i++) { + const text = segmentation.texts[i]! + const kind = segmentation.kinds[i]! + const wordLike = segmentation.isWordLike[i]! + + if (kind === 'text' && wordLike && asciiPunctuationChainSegmentRe.test(text)) { + let mergedText = text + let j = i + 1 + + while ( + asciiPunctuationChainTrailingJoinersRe.test(mergedText) && + j < segmentation.len && + segmentation.kinds[j] === 'text' && + segmentation.isWordLike[j] && + asciiPunctuationChainSegmentRe.test(segmentation.texts[j]!) + ) { + mergedText += segmentation.texts[j]! + j++ + } + + texts.push(mergedText) + isWordLike.push(true) + kinds.push('text') + starts.push(segmentation.starts[i]!) + i = j - 1 + continue + } + + texts.push(text) + isWordLike.push(wordLike) + kinds.push(kind) + starts.push(segmentation.starts[i]!) + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + +function splitHyphenatedNumericRuns(segmentation: MergedSegmentation): MergedSegmentation { + const texts: string[] = [] + const isWordLike: boolean[] = [] + const kinds: SegmentBreakKind[] = [] + const starts: number[] = [] + + for (let i = 0; i < segmentation.len; i++) { + const text = segmentation.texts[i]! + if (segmentation.kinds[i] === 'text' && text.includes('-')) { + const parts = text.split('-') + let shouldSplit = parts.length > 1 + for (let j = 0; j < parts.length; j++) { + const part = parts[j]! + if (!shouldSplit) break + if ( + part.length === 0 || + !segmentContainsDecimalDigit(part) || + !isNumericRunSegment(part) + ) { + shouldSplit = false + } + } + + if (shouldSplit) { + let offset = 0 + for (let j = 0; j < parts.length; j++) { + const part = parts[j]! + const splitText = j < parts.length - 1 ? `${part}-` : part + texts.push(splitText) + isWordLike.push(true) + kinds.push('text') + starts.push(segmentation.starts[i]! + offset) + offset += splitText.length + } + continue + } + } + + texts.push(text) + isWordLike.push(segmentation.isWordLike[i]!) + kinds.push(segmentation.kinds[i]!) + starts.push(segmentation.starts[i]!) + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + +function mergeGlueConnectedTextRuns(segmentation: MergedSegmentation): MergedSegmentation { + const texts: string[] = [] + const isWordLike: boolean[] = [] + const kinds: SegmentBreakKind[] = [] + const starts: number[] = [] + + let read = 0 + while (read < segmentation.len) { + let text = segmentation.texts[read]! + let wordLike = segmentation.isWordLike[read]! + let kind = segmentation.kinds[read]! + let start = segmentation.starts[read]! + + if (kind === 'glue') { + let glueText = text + const glueStart = start + read++ + while (read < segmentation.len && segmentation.kinds[read] === 'glue') { + glueText += segmentation.texts[read]! + read++ + } + + if (read < segmentation.len && segmentation.kinds[read] === 'text') { + text = glueText + segmentation.texts[read]! + wordLike = segmentation.isWordLike[read]! + kind = 'text' + start = glueStart + read++ + } else { + texts.push(glueText) + isWordLike.push(false) + kinds.push('glue') + starts.push(glueStart) + continue + } + } else { + read++ + } + + if (kind === 'text') { + while (read < segmentation.len && segmentation.kinds[read] === 'glue') { + let glueText = '' + while (read < segmentation.len && segmentation.kinds[read] === 'glue') { + glueText += segmentation.texts[read]! + read++ + } + + if (read < segmentation.len && segmentation.kinds[read] === 'text') { + text += glueText + segmentation.texts[read]! + wordLike = wordLike || segmentation.isWordLike[read]! + read++ + continue + } + + text += glueText + } + } + + texts.push(text) + isWordLike.push(wordLike) + kinds.push(kind) + starts.push(start) + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + +function carryTrailingForwardStickyAcrossCJKBoundary(segmentation: MergedSegmentation): MergedSegmentation { + const texts = segmentation.texts.slice() + const isWordLike = segmentation.isWordLike.slice() + const kinds = segmentation.kinds.slice() + const starts = segmentation.starts.slice() + + for (let i = 0; i < texts.length - 1; i++) { + if (kinds[i] !== 'text' || kinds[i + 1] !== 'text') continue + if (!isCJK(texts[i]!) || !isCJK(texts[i + 1]!)) continue + + const split = splitTrailingForwardStickyCluster(texts[i]!) + if (split === null) continue + + texts[i] = split.head + texts[i + 1] = split.tail + texts[i + 1]! + starts[i + 1] = starts[i]! + split.head.length + } + + return { + len: texts.length, + texts, + isWordLike, + kinds, + starts, + } +} + + +function buildMergedSegmentation( + normalized: string, + profile: AnalysisProfile, + whiteSpaceProfile: WhiteSpaceProfile, +): MergedSegmentation { + const wordSegmenter = getSharedWordSegmenter() + let mergedLen = 0 + const mergedTexts: string[] = [] + const mergedWordLike: boolean[] = [] + const mergedKinds: SegmentBreakKind[] = [] + const mergedStarts: number[] = [] + + for (const s of wordSegmenter.segment(normalized)) { + for (const piece of splitSegmentByBreakKind(s.segment, s.isWordLike ?? false, s.index, whiteSpaceProfile)) { + const isText = piece.kind === 'text' + + if ( + profile.carryCJKAfterClosingQuote && + isText && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + isCJK(piece.text) && + isCJK(mergedTexts[mergedLen - 1]!) && + endsWithClosingQuote(mergedTexts[mergedLen - 1]!) + ) { + mergedTexts[mergedLen - 1] += piece.text + mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike + } else if ( + isText && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + isCJKLineStartProhibitedSegment(piece.text) && + isCJK(mergedTexts[mergedLen - 1]!) + ) { + mergedTexts[mergedLen - 1] += piece.text + mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike + } else if ( + isText && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + endsWithMyanmarMedialGlue(mergedTexts[mergedLen - 1]!) + ) { + mergedTexts[mergedLen - 1] += piece.text + mergedWordLike[mergedLen - 1] = mergedWordLike[mergedLen - 1]! || piece.isWordLike + } else if ( + isText && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + piece.isWordLike && + containsArabicScript(piece.text) && + endsWithArabicNoSpacePunctuation(mergedTexts[mergedLen - 1]!) + ) { + mergedTexts[mergedLen - 1] += piece.text + mergedWordLike[mergedLen - 1] = true + } else if ( + isText && + !piece.isWordLike && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + piece.text.length === 1 && + piece.text !== '-' && + piece.text !== '—' && + isRepeatedSingleCharRun(mergedTexts[mergedLen - 1]!, piece.text) + ) { + mergedTexts[mergedLen - 1] += piece.text + } else if ( + isText && + !piece.isWordLike && + mergedLen > 0 && + mergedKinds[mergedLen - 1] === 'text' && + ( + isLeftStickyPunctuationSegment(piece.text) || + (piece.text === '-' && mergedWordLike[mergedLen - 1]!) + ) + ) { + mergedTexts[mergedLen - 1] += piece.text + } else { + mergedTexts[mergedLen] = piece.text + mergedWordLike[mergedLen] = piece.isWordLike + mergedKinds[mergedLen] = piece.kind + mergedStarts[mergedLen] = piece.start + mergedLen++ + } + } + } + + for (let i = 1; i < mergedLen; i++) { + if ( + mergedKinds[i] === 'text' && + !mergedWordLike[i]! && + isEscapedQuoteClusterSegment(mergedTexts[i]!) && + mergedKinds[i - 1] === 'text' + ) { + mergedTexts[i - 1] += mergedTexts[i]! + mergedWordLike[i - 1] = mergedWordLike[i - 1]! || mergedWordLike[i]! + mergedTexts[i] = '' + } + } + + for (let i = mergedLen - 2; i >= 0; i--) { + if (mergedKinds[i] === 'text' && !mergedWordLike[i]! && isForwardStickyClusterSegment(mergedTexts[i]!)) { + let j = i + 1 + while (j < mergedLen && mergedTexts[j] === '') j++ + if (j < mergedLen && mergedKinds[j] === 'text') { + mergedTexts[j] = mergedTexts[i]! + mergedTexts[j]! + mergedStarts[j] = mergedStarts[i]! + mergedTexts[i] = '' + } + } + } + + let compactLen = 0 + for (let read = 0; read < mergedLen; read++) { + const text = mergedTexts[read]! + if (text.length === 0) continue + if (compactLen !== read) { + mergedTexts[compactLen] = text + mergedWordLike[compactLen] = mergedWordLike[read]! + mergedKinds[compactLen] = mergedKinds[read]! + mergedStarts[compactLen] = mergedStarts[read]! + } + compactLen++ + } + + mergedTexts.length = compactLen + mergedWordLike.length = compactLen + mergedKinds.length = compactLen + mergedStarts.length = compactLen + + const compacted = mergeGlueConnectedTextRuns({ + len: compactLen, + texts: mergedTexts, + isWordLike: mergedWordLike, + kinds: mergedKinds, + starts: mergedStarts, + }) + const withMergedUrls = carryTrailingForwardStickyAcrossCJKBoundary( + mergeAsciiPunctuationChains( + splitHyphenatedNumericRuns(mergeNumericRuns(mergeUrlQueryRuns(mergeUrlLikeRuns(compacted)))), + ), + ) + + for (let i = 0; i < withMergedUrls.len - 1; i++) { + const split = splitLeadingSpaceAndMarks(withMergedUrls.texts[i]!) + if (split === null) continue + if ( + (withMergedUrls.kinds[i] !== 'space' && withMergedUrls.kinds[i] !== 'preserved-space') || + withMergedUrls.kinds[i + 1] !== 'text' || + !containsArabicScript(withMergedUrls.texts[i + 1]!) + ) { + continue + } + + withMergedUrls.texts[i] = split.space + withMergedUrls.isWordLike[i] = false + withMergedUrls.kinds[i] = withMergedUrls.kinds[i] === 'preserved-space' ? 'preserved-space' : 'space' + withMergedUrls.texts[i + 1] = split.marks + withMergedUrls.texts[i + 1]! + withMergedUrls.starts[i + 1] = withMergedUrls.starts[i]! + split.space.length + } + + return withMergedUrls +} + +function compileAnalysisChunks(segmentation: MergedSegmentation, whiteSpaceProfile: WhiteSpaceProfile): AnalysisChunk[] { + if (segmentation.len === 0) return [] + if (!whiteSpaceProfile.preserveHardBreaks) { + return [{ + startSegmentIndex: 0, + endSegmentIndex: segmentation.len, + consumedEndSegmentIndex: segmentation.len, + }] + } + + const chunks: AnalysisChunk[] = [] + let startSegmentIndex = 0 + + for (let i = 0; i < segmentation.len; i++) { + if (segmentation.kinds[i] !== 'hard-break') continue + + chunks.push({ + startSegmentIndex, + endSegmentIndex: i, + consumedEndSegmentIndex: i + 1, + }) + startSegmentIndex = i + 1 + } + + if (startSegmentIndex < segmentation.len) { + chunks.push({ + startSegmentIndex, + endSegmentIndex: segmentation.len, + consumedEndSegmentIndex: segmentation.len, + }) + } + + return chunks +} + +export function analyzeText( + text: string, + profile: AnalysisProfile, + whiteSpace: WhiteSpaceMode = 'normal', +): TextAnalysis { + const whiteSpaceProfile = getWhiteSpaceProfile(whiteSpace) + const normalized = whiteSpaceProfile.mode === 'pre-wrap' + ? normalizeWhitespacePreWrap(text) + : normalizeWhitespaceNormal(text) + if (normalized.length === 0) { + return { + normalized, + chunks: [], + len: 0, + texts: [], + isWordLike: [], + kinds: [], + starts: [], + } + } + const segmentation = buildMergedSegmentation(normalized, profile, whiteSpaceProfile) + return { + normalized, + chunks: compileAnalysisChunks(segmentation, whiteSpaceProfile), + ...segmentation, + } +} diff --git a/packages/lynx-pretext/src/pretext/bidi.ts b/packages/lynx-pretext/src/pretext/bidi.ts new file mode 100644 index 0000000..f530ff9 --- /dev/null +++ b/packages/lynx-pretext/src/pretext/bidi.ts @@ -0,0 +1,173 @@ +// Simplified bidi metadata helper for the rich prepareWithSegments() path, +// forked from pdf.js via Sebastian's text-layout. It classifies characters +// into bidi types, computes embedding levels, and maps them onto prepared +// segments for custom rendering. The line-breaking engine does not consume +// these levels. + +type BidiType = 'L' | 'R' | 'AL' | 'AN' | 'EN' | 'ES' | 'ET' | 'CS' | + 'ON' | 'BN' | 'B' | 'S' | 'WS' | 'NSM' + +const baseTypes: BidiType[] = [ + 'BN','BN','BN','BN','BN','BN','BN','BN','BN','S','B','S','WS', + 'B','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', + 'BN','BN','B','B','B','S','WS','ON','ON','ET','ET','ET','ON', + 'ON','ON','ON','ON','ON','CS','ON','CS','ON','EN','EN','EN', + 'EN','EN','EN','EN','EN','EN','EN','ON','ON','ON','ON','ON', + 'ON','ON','L','L','L','L','L','L','L','L','L','L','L','L','L', + 'L','L','L','L','L','L','L','L','L','L','L','L','L','ON','ON', + 'ON','ON','ON','ON','L','L','L','L','L','L','L','L','L','L', + 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', + 'L','ON','ON','ON','ON','BN','BN','BN','BN','BN','BN','B','BN', + 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', + 'BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN','BN', + 'BN','CS','ON','ET','ET','ET','ET','ON','ON','ON','ON','L','ON', + 'ON','ON','ON','ON','ET','ET','EN','EN','ON','L','ON','ON','ON', + 'EN','L','ON','ON','ON','ON','ON','L','L','L','L','L','L','L', + 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', + 'L','ON','L','L','L','L','L','L','L','L','L','L','L','L','L', + 'L','L','L','L','L','L','L','L','L','L','L','L','L','L','L', + 'L','L','L','ON','L','L','L','L','L','L','L','L' +] + +const arabicTypes: BidiType[] = [ + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'CS','AL','ON','ON','NSM','NSM','NSM','NSM','NSM','NSM','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM', + 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','AL','AL','AL','AL', + 'AL','AL','AL','AN','AN','AN','AN','AN','AN','AN','AN','AN', + 'AN','ET','AN','AN','AL','AL','AL','NSM','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM', + 'NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','NSM','ON','NSM', + 'NSM','NSM','NSM','AL','AL','AL','AL','AL','AL','AL','AL','AL', + 'AL','AL','AL','AL','AL','AL','AL','AL','AL' +] + +function classifyChar(charCode: number): BidiType { + if (charCode <= 0x00ff) return baseTypes[charCode]! + if (0x0590 <= charCode && charCode <= 0x05f4) return 'R' + if (0x0600 <= charCode && charCode <= 0x06ff) return arabicTypes[charCode & 0xff]! + if (0x0700 <= charCode && charCode <= 0x08AC) return 'AL' + return 'L' +} + +function computeBidiLevels(str: string): Int8Array | null { + const len = str.length + if (len === 0) return null + + // eslint-disable-next-line unicorn/no-new-array + const types: BidiType[] = new Array(len) + let numBidi = 0 + + for (let i = 0; i < len; i++) { + const t = classifyChar(str.charCodeAt(i)) + if (t === 'R' || t === 'AL' || t === 'AN') numBidi++ + types[i] = t + } + + if (numBidi === 0) return null + + const startLevel = (len / numBidi) < 0.3 ? 0 : 1 + const levels = new Int8Array(len) + for (let i = 0; i < len; i++) levels[i] = startLevel + + const e: BidiType = (startLevel & 1) ? 'R' : 'L' + const sor = e + + // W1-W7 + let lastType: BidiType = sor + for (let i = 0; i < len; i++) { + if (types[i] === 'NSM') types[i] = lastType + else lastType = types[i]! + } + lastType = sor + for (let i = 0; i < len; i++) { + const t = types[i]! + if (t === 'EN') types[i] = lastType === 'AL' ? 'AN' : 'EN' + else if (t === 'R' || t === 'L' || t === 'AL') lastType = t + } + for (let i = 0; i < len; i++) { + if (types[i] === 'AL') types[i] = 'R' + } + for (let i = 1; i < len - 1; i++) { + if (types[i] === 'ES' && types[i - 1] === 'EN' && types[i + 1] === 'EN') { + types[i] = 'EN' + } + if ( + types[i] === 'CS' && + (types[i - 1] === 'EN' || types[i - 1] === 'AN') && + types[i + 1] === types[i - 1] + ) { + types[i] = types[i - 1]! + } + } + for (let i = 0; i < len; i++) { + if (types[i] !== 'EN') continue + let j + for (j = i - 1; j >= 0 && types[j] === 'ET'; j--) types[j] = 'EN' + for (j = i + 1; j < len && types[j] === 'ET'; j++) types[j] = 'EN' + } + for (let i = 0; i < len; i++) { + const t = types[i]! + if (t === 'WS' || t === 'ES' || t === 'ET' || t === 'CS') types[i] = 'ON' + } + lastType = sor + for (let i = 0; i < len; i++) { + const t = types[i]! + if (t === 'EN') types[i] = lastType === 'L' ? 'L' : 'EN' + else if (t === 'R' || t === 'L') lastType = t + } + + // N1-N2 + for (let i = 0; i < len; i++) { + if (types[i] !== 'ON') continue + let end = i + 1 + while (end < len && types[end] === 'ON') end++ + const before: BidiType = i > 0 ? types[i - 1]! : sor + const after: BidiType = end < len ? types[end]! : sor + const bDir: BidiType = before !== 'L' ? 'R' : 'L' + const aDir: BidiType = after !== 'L' ? 'R' : 'L' + if (bDir === aDir) { + for (let j = i; j < end; j++) types[j] = bDir + } + i = end - 1 + } + for (let i = 0; i < len; i++) { + if (types[i] === 'ON') types[i] = e + } + + // I1-I2 + for (let i = 0; i < len; i++) { + const t = types[i]! + if ((levels[i]! & 1) === 0) { + if (t === 'R') levels[i]!++ + else if (t === 'AN' || t === 'EN') levels[i]! += 2 + } else if (t === 'L' || t === 'AN' || t === 'EN') { + levels[i]!++ + } + } + + return levels +} + +export function computeSegmentLevels(normalized: string, segStarts: number[]): Int8Array | null { + const bidiLevels = computeBidiLevels(normalized) + if (bidiLevels === null) return null + + const segLevels = new Int8Array(segStarts.length) + for (let i = 0; i < segStarts.length; i++) { + segLevels[i] = bidiLevels[segStarts[i]!]! + } + return segLevels +} diff --git a/packages/lynx-pretext/src/pretext/host.ts b/packages/lynx-pretext/src/pretext/host.ts new file mode 100644 index 0000000..a9cfccd --- /dev/null +++ b/packages/lynx-pretext/src/pretext/host.ts @@ -0,0 +1,67 @@ +import { + clearCache, + layout, + layoutNextLine, + layoutWithLines, + prepare, + prepareWithSegments, + profilePrepare, + setLocale, + walkLineRanges, + type LayoutCursor, + type LayoutLine, + type LayoutLineRange, + type LayoutLinesResult, + type LayoutResult, + type PrepareOptions, + type PrepareProfile, + type PreparedText, + type PreparedTextWithSegments, +} from './layout.js' +import { withMeasurementHost, type MeasurementHost } from './measurement.js' + +export type PretextHostConfig = { + measurement: MeasurementHost +} + +export type PretextHostApi = { + profilePrepare(text: string, font: string, options?: PrepareOptions): PrepareProfile + prepare(text: string, font: string, options?: PrepareOptions): PreparedText + prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments + layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult + walkLineRanges( + prepared: PreparedTextWithSegments, + maxWidth: number, + onLine?: (line: LayoutLineRange) => void, + ): number + layoutNextLine( + prepared: PreparedTextWithSegments, + start: LayoutCursor, + maxWidth: number, + ): LayoutLine | null + layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult + clearCache(): void + setLocale(locale?: string): void +} + +export { type MeasurementHost } from './measurement.js' + +export function createPretext(config: PretextHostConfig): PretextHostApi { + const bind = ( + fn: (...args: Args) => Result, + ): ((...args: Args) => Result) => { + return (...args: Args) => withMeasurementHost(config.measurement, () => fn(...args)) + } + + return { + profilePrepare: bind(profilePrepare), + prepare: bind(prepare), + prepareWithSegments: bind(prepareWithSegments), + layout: bind(layout), + walkLineRanges: bind(walkLineRanges), + layoutNextLine: bind(layoutNextLine), + layoutWithLines: bind(layoutWithLines), + clearCache: bind(clearCache), + setLocale, + } +} diff --git a/packages/lynx-pretext/src/pretext/layout.ts b/packages/lynx-pretext/src/pretext/layout.ts new file mode 100644 index 0000000..465a067 --- /dev/null +++ b/packages/lynx-pretext/src/pretext/layout.ts @@ -0,0 +1,717 @@ +// Text measurement for browser environments using canvas measureText. +// +// Problem: DOM-based text measurement (getBoundingClientRect, offsetHeight) +// forces synchronous layout reflow. When components independently measure text, +// each measurement triggers a reflow of the entire document. This creates +// read/write interleaving that can cost 30ms+ per frame for 500 text blocks. +// +// Solution: two-phase measurement centered around canvas measureText. +// prepare(text, font) — segments text via Intl.Segmenter, measures each word +// via canvas, caches widths, and does one cached DOM calibration read per +// font when emoji correction is needed. Call once when text first appears. +// layout(prepared, maxWidth, lineHeight) — walks cached word widths with pure +// arithmetic to count lines and compute height. Call on every resize. +// ~0.0002ms per text. +// +// i18n: Intl.Segmenter handles CJK (per-character breaking), Thai, Arabic, etc. +// Bidi: simplified rich-path metadata for mixed LTR/RTL custom rendering. +// Punctuation merging: "better." measured as one unit (matches CSS behavior). +// Trailing whitespace: hangs past line edge without triggering breaks (CSS behavior). +// overflow-wrap: pre-measured grapheme widths enable character-level word breaking. +// +// Emoji correction: Chrome/Firefox canvas measures emoji wider than DOM at font +// sizes <24px on macOS (Apple Color Emoji). The inflation is constant per emoji +// grapheme at a given size, font-independent. Auto-detected by comparing canvas +// vs actual DOM emoji width (one cached DOM read per font). Safari canvas and +// DOM agree (both wider than fontSize), so correction = 0 there. +// +// Limitations: +// - system-ui font: canvas resolves to different optical variants than DOM on macOS. +// Use named fonts (Helvetica, Inter, etc.) for guaranteed accuracy. +// See RESEARCH.md "Discovery: system-ui font resolution mismatch". +// +// Based on Sebastian Markbage's text-layout research (github.com/chenglou/text-layout). + +import { computeSegmentLevels } from './bidi.js' +import { + analyzeText, + clearAnalysisCaches, + endsWithClosingQuote, + isCJK, + kinsokuEnd, + kinsokuStart, + leftStickyPunctuation, + setAnalysisLocale, + type AnalysisChunk, + type SegmentBreakKind, + type TextAnalysis, + type WhiteSpaceMode, +} from './analysis.js' +import { + clearMeasurementCaches, + getCorrectedSegmentWidth, + getEngineProfile, + getFontMeasurementState, + getSegmentGraphemePrefixWidths, + getSegmentGraphemeWidths, + getSegmentMetrics, + textMayContainEmoji, +} from './measurement.js' +import { + countPreparedLines, + layoutNextLineRange as stepPreparedLineRange, + walkPreparedLines, + type InternalLayoutLine, +} from './line-break.js' + +let sharedGraphemeSegmenter: Intl.Segmenter | null = null +// Rich-path only. Reuses grapheme splits while materializing multiple lines +// from the same prepared handle, without pushing that cache into the API. +let sharedLineTextCaches = new WeakMap>() + +function getSharedGraphemeSegmenter(): Intl.Segmenter { + if (sharedGraphemeSegmenter === null) { + sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' }) + } + return sharedGraphemeSegmenter +} + +// --- Public types --- + +declare const preparedTextBrand: unique symbol + +type PreparedCore = { + widths: number[] // Segment widths, e.g. [42.5, 4.4, 37.2] + lineEndFitAdvances: number[] // Width contribution when a line ends after this segment + lineEndPaintAdvances: number[] // Painted width contribution when a line ends after this segment + kinds: SegmentBreakKind[] // Break behavior per segment, e.g. ['text', 'space', 'text'] + simpleLineWalkFastPath: boolean // Normal text can use the simpler old line walker across all layout APIs + segLevels: Int8Array | null // Rich-path bidi metadata for custom rendering; layout() never reads it + breakableWidths: (number[] | null)[] // Grapheme widths for overflow-wrap segments, else null + breakablePrefixWidths: (number[] | null)[] // Cumulative grapheme prefix widths for narrow browser-policy shims + discretionaryHyphenWidth: number // Visible width added when a soft hyphen is chosen as the break + tabStopAdvance: number // Absolute advance between tab stops for pre-wrap tab segments + chunks: PreparedLineChunk[] // Precompiled hard-break chunks for line walking +} + +// Keep the main prepared handle opaque so the public API does not accidentally +// calcify around the current parallel-array representation. +export type PreparedText = { + readonly [preparedTextBrand]: true +} + +type InternalPreparedText = PreparedText & PreparedCore + +// Rich/diagnostic variant that still exposes the structural segment data. +// Treat this as the unstable escape hatch for experiments and custom rendering. +export type PreparedTextWithSegments = InternalPreparedText & { + segments: string[] // Segment text aligned with the parallel arrays, e.g. ['hello', ' ', 'world'] +} + +export type LayoutCursor = { + segmentIndex: number // Segment index in `segments` + graphemeIndex: number // Grapheme index within that segment; `0` at segment boundaries +} + +export type LayoutResult = { + lineCount: number // Number of wrapped lines, e.g. 3 + height: number // Total block height, e.g. lineCount * lineHeight = 57 +} + +export type LayoutLine = { + text: string // Full text content of this line, e.g. 'hello world' + width: number // Measured width of this line, e.g. 87.5 + start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes + end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes +} + +export type LayoutLineRange = { + width: number // Measured width of this line, e.g. 87.5 + start: LayoutCursor // Inclusive start cursor in prepared segments/graphemes + end: LayoutCursor // Exclusive end cursor in prepared segments/graphemes +} + +export type LayoutLinesResult = LayoutResult & { + lines: LayoutLine[] // Per-line text/width pairs for custom rendering +} + +export type PrepareProfile = { + analysisMs: number + measureMs: number + totalMs: number + analysisSegments: number + preparedSegments: number + breakableSegments: number +} + +export type PrepareOptions = { + whiteSpace?: WhiteSpaceMode +} + +export type PreparedLineChunk = { + startSegmentIndex: number + endSegmentIndex: number + consumedEndSegmentIndex: number +} + +// --- Public API --- + +function createEmptyPrepared(includeSegments: boolean): InternalPreparedText | PreparedTextWithSegments { + if (includeSegments) { + return { + widths: [], + lineEndFitAdvances: [], + lineEndPaintAdvances: [], + kinds: [], + simpleLineWalkFastPath: true, + segLevels: null, + breakableWidths: [], + breakablePrefixWidths: [], + discretionaryHyphenWidth: 0, + tabStopAdvance: 0, + chunks: [], + segments: [], + } as unknown as PreparedTextWithSegments + } + return { + widths: [], + lineEndFitAdvances: [], + lineEndPaintAdvances: [], + kinds: [], + simpleLineWalkFastPath: true, + segLevels: null, + breakableWidths: [], + breakablePrefixWidths: [], + discretionaryHyphenWidth: 0, + tabStopAdvance: 0, + chunks: [], + } as unknown as InternalPreparedText +} + +function measureAnalysis( + analysis: TextAnalysis, + font: string, + includeSegments: boolean, +): InternalPreparedText | PreparedTextWithSegments { + const graphemeSegmenter = getSharedGraphemeSegmenter() + const engineProfile = getEngineProfile() + const { cache, emojiCorrection } = getFontMeasurementState( + font, + textMayContainEmoji(analysis.normalized), + ) + const discretionaryHyphenWidth = getCorrectedSegmentWidth('-', getSegmentMetrics('-', cache), emojiCorrection) + const spaceWidth = getCorrectedSegmentWidth(' ', getSegmentMetrics(' ', cache), emojiCorrection) + const tabStopAdvance = spaceWidth * 8 + + if (analysis.len === 0) return createEmptyPrepared(includeSegments) + + const widths: number[] = [] + const lineEndFitAdvances: number[] = [] + const lineEndPaintAdvances: number[] = [] + const kinds: SegmentBreakKind[] = [] + let simpleLineWalkFastPath = analysis.chunks.length <= 1 + const segStarts = includeSegments ? [] as number[] : null + const breakableWidths: (number[] | null)[] = [] + const breakablePrefixWidths: (number[] | null)[] = [] + const segments = includeSegments ? [] as string[] : null + const preparedStartByAnalysisIndex = Array.from({ length: analysis.len }) + const preparedEndByAnalysisIndex = Array.from({ length: analysis.len }) + + function pushMeasuredSegment( + text: string, + width: number, + lineEndFitAdvance: number, + lineEndPaintAdvance: number, + kind: SegmentBreakKind, + start: number, + breakable: number[] | null, + breakablePrefix: number[] | null, + ): void { + if (kind !== 'text' && kind !== 'space' && kind !== 'zero-width-break') { + simpleLineWalkFastPath = false + } + widths.push(width) + lineEndFitAdvances.push(lineEndFitAdvance) + lineEndPaintAdvances.push(lineEndPaintAdvance) + kinds.push(kind) + segStarts?.push(start) + breakableWidths.push(breakable) + breakablePrefixWidths.push(breakablePrefix) + if (segments !== null) segments.push(text) + } + + for (let mi = 0; mi < analysis.len; mi++) { + preparedStartByAnalysisIndex[mi] = widths.length + const segText = analysis.texts[mi]! + const segWordLike = analysis.isWordLike[mi]! + const segKind = analysis.kinds[mi]! + const segStart = analysis.starts[mi]! + + if (segKind === 'soft-hyphen') { + pushMeasuredSegment( + segText, + 0, + discretionaryHyphenWidth, + discretionaryHyphenWidth, + segKind, + segStart, + null, + null, + ) + preparedEndByAnalysisIndex[mi] = widths.length + continue + } + + if (segKind === 'hard-break') { + pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null) + preparedEndByAnalysisIndex[mi] = widths.length + continue + } + + if (segKind === 'tab') { + pushMeasuredSegment(segText, 0, 0, 0, segKind, segStart, null, null) + preparedEndByAnalysisIndex[mi] = widths.length + continue + } + + const segMetrics = getSegmentMetrics(segText, cache) + + if (segKind === 'text' && segMetrics.containsCJK) { + let unitText = '' + let unitStart = 0 + + for (const gs of graphemeSegmenter.segment(segText)) { + const grapheme = gs.segment + + if (unitText.length === 0) { + unitText = grapheme + unitStart = gs.index + continue + } + + if ( + kinsokuEnd.has(unitText) || + kinsokuStart.has(grapheme) || + leftStickyPunctuation.has(grapheme) || + (engineProfile.carryCJKAfterClosingQuote && + isCJK(grapheme) && + endsWithClosingQuote(unitText)) + ) { + unitText += grapheme + continue + } + + const unitMetrics = getSegmentMetrics(unitText, cache) + const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection) + pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null) + + unitText = grapheme + unitStart = gs.index + } + + if (unitText.length > 0) { + const unitMetrics = getSegmentMetrics(unitText, cache) + const w = getCorrectedSegmentWidth(unitText, unitMetrics, emojiCorrection) + pushMeasuredSegment(unitText, w, w, w, 'text', segStart + unitStart, null, null) + } + preparedEndByAnalysisIndex[mi] = widths.length + continue + } + + const w = getCorrectedSegmentWidth(segText, segMetrics, emojiCorrection) + const lineEndFitAdvance = + segKind === 'space' || segKind === 'preserved-space' || segKind === 'zero-width-break' + ? 0 + : w + const lineEndPaintAdvance = + segKind === 'space' || segKind === 'zero-width-break' + ? 0 + : w + + if (segWordLike && segText.length > 1) { + const graphemeWidths = getSegmentGraphemeWidths(segText, segMetrics, cache, emojiCorrection) + const graphemePrefixWidths = engineProfile.preferPrefixWidthsForBreakableRuns + ? getSegmentGraphemePrefixWidths(segText, segMetrics, cache, emojiCorrection) + : null + pushMeasuredSegment( + segText, + w, + lineEndFitAdvance, + lineEndPaintAdvance, + segKind, + segStart, + graphemeWidths, + graphemePrefixWidths, + ) + } else { + pushMeasuredSegment( + segText, + w, + lineEndFitAdvance, + lineEndPaintAdvance, + segKind, + segStart, + null, + null, + ) + } + preparedEndByAnalysisIndex[mi] = widths.length + } + + const chunks = mapAnalysisChunksToPreparedChunks(analysis.chunks, preparedStartByAnalysisIndex, preparedEndByAnalysisIndex) + const segLevels = segStarts === null ? null : computeSegmentLevels(analysis.normalized, segStarts) + if (segments !== null) { + return { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + simpleLineWalkFastPath, + segLevels, + breakableWidths, + breakablePrefixWidths, + discretionaryHyphenWidth, + tabStopAdvance, + chunks, + segments, + } as unknown as PreparedTextWithSegments + } + return { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + simpleLineWalkFastPath, + segLevels, + breakableWidths, + breakablePrefixWidths, + discretionaryHyphenWidth, + tabStopAdvance, + chunks, + } as unknown as InternalPreparedText +} + +function mapAnalysisChunksToPreparedChunks( + chunks: AnalysisChunk[], + preparedStartByAnalysisIndex: number[], + preparedEndByAnalysisIndex: number[], +): PreparedLineChunk[] { + const preparedChunks: PreparedLineChunk[] = [] + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]! + const startSegmentIndex = + chunk.startSegmentIndex < preparedStartByAnalysisIndex.length + ? preparedStartByAnalysisIndex[chunk.startSegmentIndex]! + : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 + const endSegmentIndex = + chunk.endSegmentIndex < preparedStartByAnalysisIndex.length + ? preparedStartByAnalysisIndex[chunk.endSegmentIndex]! + : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 + const consumedEndSegmentIndex = + chunk.consumedEndSegmentIndex < preparedStartByAnalysisIndex.length + ? preparedStartByAnalysisIndex[chunk.consumedEndSegmentIndex]! + : preparedEndByAnalysisIndex[preparedEndByAnalysisIndex.length - 1] ?? 0 + + preparedChunks.push({ + startSegmentIndex, + endSegmentIndex, + consumedEndSegmentIndex, + }) + } + return preparedChunks +} + +function prepareInternal( + text: string, + font: string, + includeSegments: boolean, + options?: PrepareOptions, +): InternalPreparedText | PreparedTextWithSegments { + const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace) + return measureAnalysis(analysis, font, includeSegments) +} + +// Diagnostic-only helper used by the browser benchmark harness to separate the +// text-analysis and measurement phases without duplicating the prepare logic. +export function profilePrepare(text: string, font: string, options?: PrepareOptions): PrepareProfile { + const t0 = performance.now() + const analysis = analyzeText(text, getEngineProfile(), options?.whiteSpace) + const t1 = performance.now() + const prepared = measureAnalysis(analysis, font, false) as InternalPreparedText + const t2 = performance.now() + + let breakableSegments = 0 + for (const widths of prepared.breakableWidths) { + if (widths !== null) breakableSegments++ + } + + return { + analysisMs: t1 - t0, + measureMs: t2 - t1, + totalMs: t2 - t0, + analysisSegments: analysis.len, + preparedSegments: prepared.widths.length, + breakableSegments, + } +} + +// Prepare text for layout. Segments the text, measures each segment via canvas, +// and stores the widths for fast relayout at any width. Call once per text block +// (e.g. when a comment first appears). The result is width-independent — the +// same PreparedText can be laid out at any maxWidth and lineHeight via layout(). +// +// Steps: +// 1. Normalize collapsible whitespace (CSS white-space: normal behavior) +// 2. Segment via Intl.Segmenter (handles CJK, Thai, etc.) +// 3. Merge punctuation into preceding word ("better." as one unit) +// 4. Split CJK words into individual graphemes (per-character line breaks) +// 5. Measure each segment via canvas measureText, cache by (segment, font) +// 6. Pre-measure graphemes of long words (for overflow-wrap: break-word) +// 7. Correct emoji canvas inflation (auto-detected per font size) +// 8. Optionally compute rich-path bidi metadata for custom renderers +export function prepare(text: string, font: string, options?: PrepareOptions): PreparedText { + return prepareInternal(text, font, false, options) as PreparedText +} + +// Rich variant used by callers that need enough information to render the +// laid-out lines themselves. +export function prepareWithSegments(text: string, font: string, options?: PrepareOptions): PreparedTextWithSegments { + return prepareInternal(text, font, true, options) as PreparedTextWithSegments +} + +function getInternalPrepared(prepared: PreparedText): InternalPreparedText { + return prepared as InternalPreparedText +} + +// Layout prepared text at a given max width and caller-provided lineHeight. +// Pure arithmetic on cached widths — no canvas calls, no DOM reads, no string +// operations, no allocations. +// ~0.0002ms per text block. Call on every resize. +// +// Line breaking rules (matching CSS white-space: normal + overflow-wrap: break-word): +// - Break before any non-space segment that would overflow the line +// - Trailing whitespace hangs past the line edge (doesn't trigger breaks) +// - Segments wider than maxWidth are broken at grapheme boundaries +export function layout(prepared: PreparedText, maxWidth: number, lineHeight: number): LayoutResult { + // Keep the resize hot path specialized. `layoutWithLines()` shares the same + // break semantics but also tracks line ranges; the extra bookkeeping is too + // expensive to pay on every hot-path `layout()` call. + const lineCount = countPreparedLines(getInternalPrepared(prepared), maxWidth) + return { lineCount, height: lineCount * lineHeight } +} + +function getSegmentGraphemes( + segmentIndex: number, + segments: string[], + cache: Map, +): string[] { + let graphemes = cache.get(segmentIndex) + if (graphemes !== undefined) return graphemes + + graphemes = [] + const graphemeSegmenter = getSharedGraphemeSegmenter() + for (const gs of graphemeSegmenter.segment(segments[segmentIndex]!)) { + graphemes.push(gs.segment) + } + cache.set(segmentIndex, graphemes) + return graphemes +} + +function getLineTextCache(prepared: PreparedTextWithSegments): Map { + let cache = sharedLineTextCaches.get(prepared) + if (cache !== undefined) return cache + + cache = new Map() + sharedLineTextCaches.set(prepared, cache) + return cache +} + +function lineHasDiscretionaryHyphen( + kinds: SegmentBreakKind[], + startSegmentIndex: number, + startGraphemeIndex: number, + endSegmentIndex: number, +): boolean { + return ( + endSegmentIndex > 0 && + kinds[endSegmentIndex - 1] === 'soft-hyphen' && + !(startSegmentIndex === endSegmentIndex && startGraphemeIndex > 0) + ) +} + +function buildLineTextFromRange( + segments: string[], + kinds: SegmentBreakKind[], + cache: Map, + startSegmentIndex: number, + startGraphemeIndex: number, + endSegmentIndex: number, + endGraphemeIndex: number, +): string { + let text = '' + const endsWithDiscretionaryHyphen = lineHasDiscretionaryHyphen( + kinds, + startSegmentIndex, + startGraphemeIndex, + endSegmentIndex, + ) + + for (let i = startSegmentIndex; i < endSegmentIndex; i++) { + if (kinds[i] === 'soft-hyphen' || kinds[i] === 'hard-break') continue + if (i === startSegmentIndex && startGraphemeIndex > 0) { + text += getSegmentGraphemes(i, segments, cache).slice(startGraphemeIndex).join('') + } else { + text += segments[i]! + } + } + + if (endGraphemeIndex > 0) { + if (endsWithDiscretionaryHyphen) text += '-' + text += getSegmentGraphemes(endSegmentIndex, segments, cache).slice( + startSegmentIndex === endSegmentIndex ? startGraphemeIndex : 0, + endGraphemeIndex, + ).join('') + } else if (endsWithDiscretionaryHyphen) { + text += '-' + } + + return text +} + +function createLayoutLine( + prepared: PreparedTextWithSegments, + cache: Map, + width: number, + startSegmentIndex: number, + startGraphemeIndex: number, + endSegmentIndex: number, + endGraphemeIndex: number, +): LayoutLine { + return { + text: buildLineTextFromRange( + prepared.segments, + prepared.kinds, + cache, + startSegmentIndex, + startGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + ), + width, + start: { + segmentIndex: startSegmentIndex, + graphemeIndex: startGraphemeIndex, + }, + end: { + segmentIndex: endSegmentIndex, + graphemeIndex: endGraphemeIndex, + }, + } +} + +function materializeLayoutLine( + prepared: PreparedTextWithSegments, + cache: Map, + line: InternalLayoutLine, +): LayoutLine { + return createLayoutLine( + prepared, + cache, + line.width, + line.startSegmentIndex, + line.startGraphemeIndex, + line.endSegmentIndex, + line.endGraphemeIndex, + ) +} + +function toLayoutLineRange(line: InternalLayoutLine): LayoutLineRange { + return { + width: line.width, + start: { + segmentIndex: line.startSegmentIndex, + graphemeIndex: line.startGraphemeIndex, + }, + end: { + segmentIndex: line.endSegmentIndex, + graphemeIndex: line.endGraphemeIndex, + }, + } +} + +function stepLineRange( + prepared: PreparedTextWithSegments, + start: LayoutCursor, + maxWidth: number, +): LayoutLineRange | null { + const line = stepPreparedLineRange(prepared, start, maxWidth) + if (line === null) return null + return toLayoutLineRange(line) +} + +function materializeLine( + prepared: PreparedTextWithSegments, + line: LayoutLineRange, +): LayoutLine { + return createLayoutLine( + prepared, + getLineTextCache(prepared), + line.width, + line.start.segmentIndex, + line.start.graphemeIndex, + line.end.segmentIndex, + line.end.graphemeIndex, + ) +} + +// Batch low-level line geometry pass. This is the non-materializing counterpart +// to layoutWithLines(), useful for shrinkwrap and other aggregate geometry work. +export function walkLineRanges( + prepared: PreparedTextWithSegments, + maxWidth: number, + onLine: (line: LayoutLineRange) => void, +): number { + if (prepared.widths.length === 0) return 0 + + return walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => { + onLine(toLayoutLineRange(line)) + }) +} + +export function layoutNextLine( + prepared: PreparedTextWithSegments, + start: LayoutCursor, + maxWidth: number, +): LayoutLine | null { + const line = stepLineRange(prepared, start, maxWidth) + if (line === null) return null + return materializeLine(prepared, line) +} + +// Rich layout API for callers that want the actual line contents and widths. +// Caller still supplies lineHeight at layout time. Mirrors layout()'s break +// decisions, but keeps extra per-line bookkeeping so it should stay off the +// resize hot path. +export function layoutWithLines(prepared: PreparedTextWithSegments, maxWidth: number, lineHeight: number): LayoutLinesResult { + const lines: LayoutLine[] = [] + if (prepared.widths.length === 0) return { lineCount: 0, height: 0, lines } + + const graphemeCache = getLineTextCache(prepared) + const lineCount = walkPreparedLines(getInternalPrepared(prepared), maxWidth, line => { + lines.push(materializeLayoutLine(prepared, graphemeCache, line)) + }) + + return { lineCount, height: lineCount * lineHeight, lines } +} + +export function clearCache(): void { + clearAnalysisCaches() + sharedGraphemeSegmenter = null + sharedLineTextCaches = new WeakMap>() + clearMeasurementCaches() +} + +export function setLocale(locale?: string): void { + setAnalysisLocale(locale) + clearCache() +} diff --git a/packages/lynx-pretext/src/pretext/line-break.ts b/packages/lynx-pretext/src/pretext/line-break.ts new file mode 100644 index 0000000..57fa113 --- /dev/null +++ b/packages/lynx-pretext/src/pretext/line-break.ts @@ -0,0 +1,1056 @@ +import type { SegmentBreakKind } from './analysis.js' +import { getEngineProfile } from './measurement.js' + +export type LineBreakCursor = { + segmentIndex: number + graphemeIndex: number +} + +export type PreparedLineBreakData = { + widths: number[] + lineEndFitAdvances: number[] + lineEndPaintAdvances: number[] + kinds: SegmentBreakKind[] + simpleLineWalkFastPath: boolean + breakableWidths: (number[] | null)[] + breakablePrefixWidths: (number[] | null)[] + discretionaryHyphenWidth: number + tabStopAdvance: number + chunks: { + startSegmentIndex: number + endSegmentIndex: number + consumedEndSegmentIndex: number + }[] +} + +export type InternalLayoutLine = { + startSegmentIndex: number + startGraphemeIndex: number + endSegmentIndex: number + endGraphemeIndex: number + width: number +} + +function canBreakAfter(kind: SegmentBreakKind): boolean { + return ( + kind === 'space' || + kind === 'preserved-space' || + kind === 'tab' || + kind === 'zero-width-break' || + kind === 'soft-hyphen' + ) +} + +function isSimpleCollapsibleSpace(kind: SegmentBreakKind): boolean { + return kind === 'space' +} + +function getTabAdvance(lineWidth: number, tabStopAdvance: number): number { + if (tabStopAdvance <= 0) return 0 + + const remainder = lineWidth % tabStopAdvance + if (Math.abs(remainder) <= 1e-6) return tabStopAdvance + return tabStopAdvance - remainder +} + +function getBreakableAdvance( + graphemeWidths: number[], + graphemePrefixWidths: number[] | null, + graphemeIndex: number, + preferPrefixWidths: boolean, +): number { + if (!preferPrefixWidths || graphemePrefixWidths === null) { + return graphemeWidths[graphemeIndex]! + } + return graphemePrefixWidths[graphemeIndex]! - (graphemeIndex > 0 ? graphemePrefixWidths[graphemeIndex - 1]! : 0) +} + +function fitSoftHyphenBreak( + graphemeWidths: number[], + initialWidth: number, + maxWidth: number, + lineFitEpsilon: number, + discretionaryHyphenWidth: number, + cumulativeWidths: boolean, +): { fitCount: number, fittedWidth: number } { + let fitCount = 0 + let fittedWidth = initialWidth + + while (fitCount < graphemeWidths.length) { + const nextWidth = cumulativeWidths + ? initialWidth + graphemeWidths[fitCount]! + : fittedWidth + graphemeWidths[fitCount]! + const nextLineWidth = fitCount + 1 < graphemeWidths.length + ? nextWidth + discretionaryHyphenWidth + : nextWidth + if (nextLineWidth > maxWidth + lineFitEpsilon) break + fittedWidth = nextWidth + fitCount++ + } + + return { fitCount, fittedWidth } +} + +function findChunkIndexForStart(prepared: PreparedLineBreakData, segmentIndex: number): number { + for (let i = 0; i < prepared.chunks.length; i++) { + const chunk = prepared.chunks[i]! + if (segmentIndex < chunk.consumedEndSegmentIndex) return i + } + return -1 +} + +export function normalizeLineStart( + prepared: PreparedLineBreakData, + start: LineBreakCursor, +): LineBreakCursor | null { + let segmentIndex = start.segmentIndex + const graphemeIndex = start.graphemeIndex + + if (segmentIndex >= prepared.widths.length) return null + if (graphemeIndex > 0) return start + + const chunkIndex = findChunkIndexForStart(prepared, segmentIndex) + if (chunkIndex < 0) return null + + const chunk = prepared.chunks[chunkIndex]! + if (chunk.startSegmentIndex === chunk.endSegmentIndex && segmentIndex === chunk.startSegmentIndex) { + return { segmentIndex, graphemeIndex: 0 } + } + + if (segmentIndex < chunk.startSegmentIndex) segmentIndex = chunk.startSegmentIndex + while (segmentIndex < chunk.endSegmentIndex) { + const kind = prepared.kinds[segmentIndex]! + if (kind !== 'space' && kind !== 'zero-width-break' && kind !== 'soft-hyphen') { + return { segmentIndex, graphemeIndex: 0 } + } + segmentIndex++ + } + + if (chunk.consumedEndSegmentIndex >= prepared.widths.length) return null + return { segmentIndex: chunk.consumedEndSegmentIndex, graphemeIndex: 0 } +} + +export function countPreparedLines(prepared: PreparedLineBreakData, maxWidth: number): number { + if (prepared.simpleLineWalkFastPath) { + return countPreparedLinesSimple(prepared, maxWidth) + } + return walkPreparedLines(prepared, maxWidth) +} + +function countPreparedLinesSimple(prepared: PreparedLineBreakData, maxWidth: number): number { + const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared + if (widths.length === 0) return 0 + + const engineProfile = getEngineProfile() + const lineFitEpsilon = engineProfile.lineFitEpsilon + + let lineCount = 0 + let lineW = 0 + let hasContent = false + + function placeOnFreshLine(segmentIndex: number): void { + const w = widths[segmentIndex]! + if (w > maxWidth && breakableWidths[segmentIndex] !== null) { + const gWidths = breakableWidths[segmentIndex]! + const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null + lineW = 0 + for (let g = 0; g < gWidths.length; g++) { + const gw = getBreakableAdvance( + gWidths, + gPrefixWidths, + g, + engineProfile.preferPrefixWidthsForBreakableRuns, + ) + if (lineW > 0 && lineW + gw > maxWidth + lineFitEpsilon) { + lineCount++ + lineW = gw + } else { + if (lineW === 0) lineCount++ + lineW += gw + } + } + } else { + lineW = w + lineCount++ + } + hasContent = true + } + + for (let i = 0; i < widths.length; i++) { + const w = widths[i]! + const kind = kinds[i]! + + if (!hasContent) { + placeOnFreshLine(i) + continue + } + + const newW = lineW + w + if (newW > maxWidth + lineFitEpsilon) { + if (isSimpleCollapsibleSpace(kind)) continue + lineW = 0 + hasContent = false + placeOnFreshLine(i) + continue + } + + lineW = newW + } + + if (!hasContent) return lineCount + 1 + return lineCount +} + +function walkPreparedLinesSimple( + prepared: PreparedLineBreakData, + maxWidth: number, + onLine?: (line: InternalLayoutLine) => void, +): number { + const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared + if (widths.length === 0) return 0 + + const engineProfile = getEngineProfile() + const lineFitEpsilon = engineProfile.lineFitEpsilon + + let lineCount = 0 + let lineW = 0 + let hasContent = false + let lineStartSegmentIndex = 0 + let lineStartGraphemeIndex = 0 + let lineEndSegmentIndex = 0 + let lineEndGraphemeIndex = 0 + let pendingBreakSegmentIndex = -1 + let pendingBreakPaintWidth = 0 + + function clearPendingBreak(): void { + pendingBreakSegmentIndex = -1 + pendingBreakPaintWidth = 0 + } + + function emitCurrentLine( + endSegmentIndex = lineEndSegmentIndex, + endGraphemeIndex = lineEndGraphemeIndex, + width = lineW, + ): void { + lineCount++ + onLine?.({ + startSegmentIndex: lineStartSegmentIndex, + startGraphemeIndex: lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + }) + lineW = 0 + hasContent = false + clearPendingBreak() + } + + function startLineAtSegment(segmentIndex: number, width: number): void { + hasContent = true + lineStartSegmentIndex = segmentIndex + lineStartGraphemeIndex = 0 + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + lineW = width + } + + function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + hasContent = true + lineStartSegmentIndex = segmentIndex + lineStartGraphemeIndex = graphemeIndex + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = graphemeIndex + 1 + lineW = width + } + + function appendWholeSegment(segmentIndex: number, width: number): void { + if (!hasContent) { + startLineAtSegment(segmentIndex, width) + return + } + lineW += width + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + + function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { + if (!canBreakAfter(kinds[segmentIndex]!)) return + pendingBreakSegmentIndex = segmentIndex + 1 + pendingBreakPaintWidth = lineW - segmentWidth + } + + function appendBreakableSegment(segmentIndex: number): void { + appendBreakableSegmentFrom(segmentIndex, 0) + } + + function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { + const gWidths = breakableWidths[segmentIndex]! + const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null + for (let g = startGraphemeIndex; g < gWidths.length; g++) { + const gw = getBreakableAdvance( + gWidths, + gPrefixWidths, + g, + engineProfile.preferPrefixWidthsForBreakableRuns, + ) + + if (!hasContent) { + startLineAtGrapheme(segmentIndex, g, gw) + continue + } + + if (lineW + gw > maxWidth + lineFitEpsilon) { + emitCurrentLine() + startLineAtGrapheme(segmentIndex, g, gw) + } else { + lineW += gw + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = g + 1 + } + } + + if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + } + + let i = 0 + while (i < widths.length) { + const w = widths[i]! + const kind = kinds[i]! + + if (!hasContent) { + if (w > maxWidth && breakableWidths[i] !== null) { + appendBreakableSegment(i) + } else { + startLineAtSegment(i, w) + } + updatePendingBreak(i, w) + i++ + continue + } + + const newW = lineW + w + if (newW > maxWidth + lineFitEpsilon) { + if (canBreakAfter(kind)) { + appendWholeSegment(i, w) + emitCurrentLine(i + 1, 0, lineW - w) + i++ + continue + } + + if (pendingBreakSegmentIndex >= 0) { + emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + continue + } + + if (w > maxWidth && breakableWidths[i] !== null) { + emitCurrentLine() + appendBreakableSegment(i) + i++ + continue + } + + emitCurrentLine() + continue + } + + appendWholeSegment(i, w) + updatePendingBreak(i, w) + i++ + } + + if (hasContent) emitCurrentLine() + return lineCount +} + +export function walkPreparedLines( + prepared: PreparedLineBreakData, + maxWidth: number, + onLine?: (line: InternalLayoutLine) => void, +): number { + if (prepared.simpleLineWalkFastPath) { + return walkPreparedLinesSimple(prepared, maxWidth, onLine) + } + + const { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + breakableWidths, + breakablePrefixWidths, + discretionaryHyphenWidth, + tabStopAdvance, + chunks, + } = prepared + if (widths.length === 0 || chunks.length === 0) return 0 + + const engineProfile = getEngineProfile() + const lineFitEpsilon = engineProfile.lineFitEpsilon + + let lineCount = 0 + let lineW = 0 + let hasContent = false + let lineStartSegmentIndex = 0 + let lineStartGraphemeIndex = 0 + let lineEndSegmentIndex = 0 + let lineEndGraphemeIndex = 0 + let pendingBreakSegmentIndex = -1 + let pendingBreakFitWidth = 0 + let pendingBreakPaintWidth = 0 + let pendingBreakKind: SegmentBreakKind | null = null + + function clearPendingBreak(): void { + pendingBreakSegmentIndex = -1 + pendingBreakFitWidth = 0 + pendingBreakPaintWidth = 0 + pendingBreakKind = null + } + + function emitCurrentLine( + endSegmentIndex = lineEndSegmentIndex, + endGraphemeIndex = lineEndGraphemeIndex, + width = lineW, + ): void { + lineCount++ + onLine?.({ + startSegmentIndex: lineStartSegmentIndex, + startGraphemeIndex: lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + }) + lineW = 0 + hasContent = false + clearPendingBreak() + } + + function startLineAtSegment(segmentIndex: number, width: number): void { + hasContent = true + lineStartSegmentIndex = segmentIndex + lineStartGraphemeIndex = 0 + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + lineW = width + } + + function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + hasContent = true + lineStartSegmentIndex = segmentIndex + lineStartGraphemeIndex = graphemeIndex + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = graphemeIndex + 1 + lineW = width + } + + function appendWholeSegment(segmentIndex: number, width: number): void { + if (!hasContent) { + startLineAtSegment(segmentIndex, width) + return + } + lineW += width + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + + function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { + if (!canBreakAfter(kinds[segmentIndex]!)) return + const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! + const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! + pendingBreakSegmentIndex = segmentIndex + 1 + pendingBreakFitWidth = lineW - segmentWidth + fitAdvance + pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance + pendingBreakKind = kinds[segmentIndex]! + } + + function appendBreakableSegment(segmentIndex: number): void { + appendBreakableSegmentFrom(segmentIndex, 0) + } + + function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): void { + const gWidths = breakableWidths[segmentIndex]! + const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null + for (let g = startGraphemeIndex; g < gWidths.length; g++) { + const gw = getBreakableAdvance( + gWidths, + gPrefixWidths, + g, + engineProfile.preferPrefixWidthsForBreakableRuns, + ) + + if (!hasContent) { + startLineAtGrapheme(segmentIndex, g, gw) + continue + } + + if (lineW + gw > maxWidth + lineFitEpsilon) { + emitCurrentLine() + startLineAtGrapheme(segmentIndex, g, gw) + } else { + lineW += gw + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = g + 1 + } + } + + if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + } + + function continueSoftHyphenBreakableSegment(segmentIndex: number): boolean { + if (pendingBreakKind !== 'soft-hyphen') return false + const gWidths = breakableWidths[segmentIndex]! + if (gWidths === null) return false + const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns + ? breakablePrefixWidths[segmentIndex] ?? gWidths + : gWidths + const usesPrefixWidths = fitWidths !== gWidths + const { fitCount, fittedWidth } = fitSoftHyphenBreak( + fitWidths, + lineW, + maxWidth, + lineFitEpsilon, + discretionaryHyphenWidth, + usesPrefixWidths, + ) + if (fitCount === 0) return false + + lineW = fittedWidth + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = fitCount + clearPendingBreak() + + if (fitCount === gWidths.length) { + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + return true + } + + emitCurrentLine( + segmentIndex, + fitCount, + fittedWidth + discretionaryHyphenWidth, + ) + appendBreakableSegmentFrom(segmentIndex, fitCount) + return true + } + + function emitEmptyChunk(chunk: { startSegmentIndex: number, consumedEndSegmentIndex: number }): void { + lineCount++ + onLine?.({ + startSegmentIndex: chunk.startSegmentIndex, + startGraphemeIndex: 0, + endSegmentIndex: chunk.consumedEndSegmentIndex, + endGraphemeIndex: 0, + width: 0, + }) + clearPendingBreak() + } + + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { + const chunk = chunks[chunkIndex]! + if (chunk.startSegmentIndex === chunk.endSegmentIndex) { + emitEmptyChunk(chunk) + continue + } + + hasContent = false + lineW = 0 + lineStartSegmentIndex = chunk.startSegmentIndex + lineStartGraphemeIndex = 0 + lineEndSegmentIndex = chunk.startSegmentIndex + lineEndGraphemeIndex = 0 + clearPendingBreak() + + let i = chunk.startSegmentIndex + while (i < chunk.endSegmentIndex) { + const kind = kinds[i]! + const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! + + if (kind === 'soft-hyphen') { + if (hasContent) { + lineEndSegmentIndex = i + 1 + lineEndGraphemeIndex = 0 + pendingBreakSegmentIndex = i + 1 + pendingBreakFitWidth = lineW + discretionaryHyphenWidth + pendingBreakPaintWidth = lineW + discretionaryHyphenWidth + pendingBreakKind = kind + } + i++ + continue + } + + if (!hasContent) { + if (w > maxWidth && breakableWidths[i] !== null) { + appendBreakableSegment(i) + } else { + startLineAtSegment(i, w) + } + updatePendingBreakForWholeSegment(i, w) + i++ + continue + } + + const newW = lineW + w + if (newW > maxWidth + lineFitEpsilon) { + const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) + const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) + + if ( + pendingBreakKind === 'soft-hyphen' && + engineProfile.preferEarlySoftHyphenBreak && + pendingBreakFitWidth <= maxWidth + lineFitEpsilon + ) { + emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + continue + } + + if (pendingBreakKind === 'soft-hyphen' && continueSoftHyphenBreakableSegment(i)) { + i++ + continue + } + + if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { + appendWholeSegment(i, w) + emitCurrentLine(i + 1, 0, currentBreakPaintWidth) + i++ + continue + } + + if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { + emitCurrentLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + continue + } + + if (w > maxWidth && breakableWidths[i] !== null) { + emitCurrentLine() + appendBreakableSegment(i) + i++ + continue + } + + emitCurrentLine() + continue + } + + appendWholeSegment(i, w) + updatePendingBreakForWholeSegment(i, w) + i++ + } + + if (hasContent) { + const finalPaintWidth = + pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex + ? pendingBreakPaintWidth + : lineW + emitCurrentLine(chunk.consumedEndSegmentIndex, 0, finalPaintWidth) + } + } + + return lineCount +} + +export function layoutNextLineRange( + prepared: PreparedLineBreakData, + start: LineBreakCursor, + maxWidth: number, +): InternalLayoutLine | null { + const normalizedStart = normalizeLineStart(prepared, start) + if (normalizedStart === null) return null + + if (prepared.simpleLineWalkFastPath) { + return layoutNextLineRangeSimple(prepared, normalizedStart, maxWidth) + } + + const chunkIndex = findChunkIndexForStart(prepared, normalizedStart.segmentIndex) + if (chunkIndex < 0) return null + + const chunk = prepared.chunks[chunkIndex]! + if (chunk.startSegmentIndex === chunk.endSegmentIndex) { + return { + startSegmentIndex: chunk.startSegmentIndex, + startGraphemeIndex: 0, + endSegmentIndex: chunk.consumedEndSegmentIndex, + endGraphemeIndex: 0, + width: 0, + } + } + + const { + widths, + lineEndFitAdvances, + lineEndPaintAdvances, + kinds, + breakableWidths, + breakablePrefixWidths, + discretionaryHyphenWidth, + tabStopAdvance, + } = prepared + const engineProfile = getEngineProfile() + const lineFitEpsilon = engineProfile.lineFitEpsilon + + let lineW = 0 + let hasContent = false + const lineStartSegmentIndex = normalizedStart.segmentIndex + const lineStartGraphemeIndex = normalizedStart.graphemeIndex + let lineEndSegmentIndex = lineStartSegmentIndex + let lineEndGraphemeIndex = lineStartGraphemeIndex + let pendingBreakSegmentIndex = -1 + let pendingBreakFitWidth = 0 + let pendingBreakPaintWidth = 0 + let pendingBreakKind: SegmentBreakKind | null = null + + function clearPendingBreak(): void { + pendingBreakSegmentIndex = -1 + pendingBreakFitWidth = 0 + pendingBreakPaintWidth = 0 + pendingBreakKind = null + } + + function finishLine( + endSegmentIndex = lineEndSegmentIndex, + endGraphemeIndex = lineEndGraphemeIndex, + width = lineW, + ): InternalLayoutLine | null { + if (!hasContent) return null + + return { + startSegmentIndex: lineStartSegmentIndex, + startGraphemeIndex: lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + } + } + + function startLineAtSegment(segmentIndex: number, width: number): void { + hasContent = true + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + lineW = width + } + + function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + hasContent = true + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = graphemeIndex + 1 + lineW = width + } + + function appendWholeSegment(segmentIndex: number, width: number): void { + if (!hasContent) { + startLineAtSegment(segmentIndex, width) + return + } + lineW += width + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + + function updatePendingBreakForWholeSegment(segmentIndex: number, segmentWidth: number): void { + if (!canBreakAfter(kinds[segmentIndex]!)) return + const fitAdvance = kinds[segmentIndex] === 'tab' ? 0 : lineEndFitAdvances[segmentIndex]! + const paintAdvance = kinds[segmentIndex] === 'tab' ? segmentWidth : lineEndPaintAdvances[segmentIndex]! + pendingBreakSegmentIndex = segmentIndex + 1 + pendingBreakFitWidth = lineW - segmentWidth + fitAdvance + pendingBreakPaintWidth = lineW - segmentWidth + paintAdvance + pendingBreakKind = kinds[segmentIndex]! + } + + function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { + const gWidths = breakableWidths[segmentIndex]! + const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null + for (let g = startGraphemeIndex; g < gWidths.length; g++) { + const gw = getBreakableAdvance( + gWidths, + gPrefixWidths, + g, + engineProfile.preferPrefixWidthsForBreakableRuns, + ) + + if (!hasContent) { + startLineAtGrapheme(segmentIndex, g, gw) + continue + } + + if (lineW + gw > maxWidth + lineFitEpsilon) { + return finishLine() + } + + lineW += gw + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = g + 1 + } + + if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + return null + } + + function maybeFinishAtSoftHyphen(segmentIndex: number): InternalLayoutLine | null { + if (pendingBreakKind !== 'soft-hyphen' || pendingBreakSegmentIndex < 0) return null + + const gWidths = breakableWidths[segmentIndex] ?? null + if (gWidths !== null) { + const fitWidths = engineProfile.preferPrefixWidthsForBreakableRuns + ? breakablePrefixWidths[segmentIndex] ?? gWidths + : gWidths + const usesPrefixWidths = fitWidths !== gWidths + const { fitCount, fittedWidth } = fitSoftHyphenBreak( + fitWidths, + lineW, + maxWidth, + lineFitEpsilon, + discretionaryHyphenWidth, + usesPrefixWidths, + ) + + if (fitCount === gWidths.length) { + lineW = fittedWidth + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + clearPendingBreak() + return null + } + + if (fitCount > 0) { + return finishLine( + segmentIndex, + fitCount, + fittedWidth + discretionaryHyphenWidth, + ) + } + } + + if (pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { + return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + } + + return null + } + + for (let i = normalizedStart.segmentIndex; i < chunk.endSegmentIndex; i++) { + const kind = kinds[i]! + const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 + const w = kind === 'tab' ? getTabAdvance(lineW, tabStopAdvance) : widths[i]! + + if (kind === 'soft-hyphen' && startGraphemeIndex === 0) { + if (hasContent) { + lineEndSegmentIndex = i + 1 + lineEndGraphemeIndex = 0 + pendingBreakSegmentIndex = i + 1 + pendingBreakFitWidth = lineW + discretionaryHyphenWidth + pendingBreakPaintWidth = lineW + discretionaryHyphenWidth + pendingBreakKind = kind + } + continue + } + + if (!hasContent) { + if (startGraphemeIndex > 0) { + const line = appendBreakableSegmentFrom(i, startGraphemeIndex) + if (line !== null) return line + } else if (w > maxWidth && breakableWidths[i] !== null) { + const line = appendBreakableSegmentFrom(i, 0) + if (line !== null) return line + } else { + startLineAtSegment(i, w) + } + updatePendingBreakForWholeSegment(i, w) + continue + } + + const newW = lineW + w + if (newW > maxWidth + lineFitEpsilon) { + const currentBreakFitWidth = lineW + (kind === 'tab' ? 0 : lineEndFitAdvances[i]!) + const currentBreakPaintWidth = lineW + (kind === 'tab' ? w : lineEndPaintAdvances[i]!) + + if ( + pendingBreakKind === 'soft-hyphen' && + engineProfile.preferEarlySoftHyphenBreak && + pendingBreakFitWidth <= maxWidth + lineFitEpsilon + ) { + return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + } + + const softBreakLine = maybeFinishAtSoftHyphen(i) + if (softBreakLine !== null) return softBreakLine + + if (canBreakAfter(kind) && currentBreakFitWidth <= maxWidth + lineFitEpsilon) { + appendWholeSegment(i, w) + return finishLine(i + 1, 0, currentBreakPaintWidth) + } + + if (pendingBreakSegmentIndex >= 0 && pendingBreakFitWidth <= maxWidth + lineFitEpsilon) { + return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + } + + if (w > maxWidth && breakableWidths[i] !== null) { + const currentLine = finishLine() + if (currentLine !== null) return currentLine + const line = appendBreakableSegmentFrom(i, 0) + if (line !== null) return line + } + + return finishLine() + } + + appendWholeSegment(i, w) + updatePendingBreakForWholeSegment(i, w) + } + + if (pendingBreakSegmentIndex === chunk.consumedEndSegmentIndex && lineEndGraphemeIndex === 0) { + return finishLine(chunk.consumedEndSegmentIndex, 0, pendingBreakPaintWidth) + } + + return finishLine(chunk.consumedEndSegmentIndex, 0, lineW) +} + +function layoutNextLineRangeSimple( + prepared: PreparedLineBreakData, + normalizedStart: LineBreakCursor, + maxWidth: number, +): InternalLayoutLine | null { + const { widths, kinds, breakableWidths, breakablePrefixWidths } = prepared + const engineProfile = getEngineProfile() + const lineFitEpsilon = engineProfile.lineFitEpsilon + + let lineW = 0 + let hasContent = false + const lineStartSegmentIndex = normalizedStart.segmentIndex + const lineStartGraphemeIndex = normalizedStart.graphemeIndex + let lineEndSegmentIndex = lineStartSegmentIndex + let lineEndGraphemeIndex = lineStartGraphemeIndex + let pendingBreakSegmentIndex = -1 + let pendingBreakPaintWidth = 0 + + function finishLine( + endSegmentIndex = lineEndSegmentIndex, + endGraphemeIndex = lineEndGraphemeIndex, + width = lineW, + ): InternalLayoutLine | null { + if (!hasContent) return null + + return { + startSegmentIndex: lineStartSegmentIndex, + startGraphemeIndex: lineStartGraphemeIndex, + endSegmentIndex, + endGraphemeIndex, + width, + } + } + + function startLineAtSegment(segmentIndex: number, width: number): void { + hasContent = true + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + lineW = width + } + + function startLineAtGrapheme(segmentIndex: number, graphemeIndex: number, width: number): void { + hasContent = true + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = graphemeIndex + 1 + lineW = width + } + + function appendWholeSegment(segmentIndex: number, width: number): void { + if (!hasContent) { + startLineAtSegment(segmentIndex, width) + return + } + lineW += width + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + + function updatePendingBreak(segmentIndex: number, segmentWidth: number): void { + if (!canBreakAfter(kinds[segmentIndex]!)) return + pendingBreakSegmentIndex = segmentIndex + 1 + pendingBreakPaintWidth = lineW - segmentWidth + } + + function appendBreakableSegmentFrom(segmentIndex: number, startGraphemeIndex: number): InternalLayoutLine | null { + const gWidths = breakableWidths[segmentIndex]! + const gPrefixWidths = breakablePrefixWidths[segmentIndex] ?? null + for (let g = startGraphemeIndex; g < gWidths.length; g++) { + const gw = getBreakableAdvance( + gWidths, + gPrefixWidths, + g, + engineProfile.preferPrefixWidthsForBreakableRuns, + ) + + if (!hasContent) { + startLineAtGrapheme(segmentIndex, g, gw) + continue + } + + if (lineW + gw > maxWidth + lineFitEpsilon) { + return finishLine() + } + + lineW += gw + lineEndSegmentIndex = segmentIndex + lineEndGraphemeIndex = g + 1 + } + + if (hasContent && lineEndSegmentIndex === segmentIndex && lineEndGraphemeIndex === gWidths.length) { + lineEndSegmentIndex = segmentIndex + 1 + lineEndGraphemeIndex = 0 + } + return null + } + + for (let i = normalizedStart.segmentIndex; i < widths.length; i++) { + const w = widths[i]! + const kind = kinds[i]! + const startGraphemeIndex = i === normalizedStart.segmentIndex ? normalizedStart.graphemeIndex : 0 + + if (!hasContent) { + if (startGraphemeIndex > 0) { + const line = appendBreakableSegmentFrom(i, startGraphemeIndex) + if (line !== null) return line + } else if (w > maxWidth && breakableWidths[i] !== null) { + const line = appendBreakableSegmentFrom(i, 0) + if (line !== null) return line + } else { + startLineAtSegment(i, w) + } + updatePendingBreak(i, w) + continue + } + + const newW = lineW + w + if (newW > maxWidth + lineFitEpsilon) { + if (canBreakAfter(kind)) { + appendWholeSegment(i, w) + return finishLine(i + 1, 0, lineW - w) + } + + if (pendingBreakSegmentIndex >= 0) { + return finishLine(pendingBreakSegmentIndex, 0, pendingBreakPaintWidth) + } + + if (w > maxWidth && breakableWidths[i] !== null) { + const currentLine = finishLine() + if (currentLine !== null) return currentLine + const line = appendBreakableSegmentFrom(i, 0) + if (line !== null) return line + } + + return finishLine() + } + + appendWholeSegment(i, w) + updatePendingBreak(i, w) + } + + return finishLine() +} diff --git a/packages/lynx-pretext/src/pretext/measurement.ts b/packages/lynx-pretext/src/pretext/measurement.ts new file mode 100644 index 0000000..892417a --- /dev/null +++ b/packages/lynx-pretext/src/pretext/measurement.ts @@ -0,0 +1,350 @@ +import { isCJK } from './analysis.js' + +export type SegmentMetrics = { + width: number + containsCJK: boolean + emojiCount?: number + graphemeWidths?: number[] | null + graphemePrefixWidths?: number[] | null +} + +export type EngineProfile = { + lineFitEpsilon: number + carryCJKAfterClosingQuote: boolean + preferPrefixWidthsForBreakableRuns: boolean + preferEarlySoftHyphenBreak: boolean +} + +export type FontMeasurementState = { + cache: Map + fontSize: number + emojiCorrection: number +} + +export type MeasurementHost = { + clearMeasurementCaches(): void + getSegmentMetrics(seg: string, cache: Map): SegmentMetrics + getEngineProfile(): EngineProfile + getCorrectedSegmentWidth(seg: string, metrics: SegmentMetrics, emojiCorrection: number): number + getSegmentGraphemeWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, + ): number[] | null + getSegmentGraphemePrefixWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, + ): number[] | null + getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState + textMayContainEmoji(text: string): boolean +} + +let measureContext: CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D | null = null +const segmentMetricCaches = new Map>() +let cachedEngineProfile: EngineProfile | null = null +let measurementHostOverride: MeasurementHost | null = null + +const maybeEmojiFallbackRe = /[\xA9\xAE\u203C\u2049\u20E3\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u261D\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692-\u2697\u2699\u269B\u269C\u26A0\u26A1\u26A7\u26AA\u26AB\u26B0\u26B1\u26BD\u26BE\u26C4\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3\u26D4\u26E9\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299\uFE0F\u{1F004}\u{1F02C}-\u{1F02F}\u{1F094}-\u{1F09F}\u{1F0AF}\u{1F0B0}\u{1F0C0}\u{1F0CF}\u{1F0D0}\u{1F0F6}-\u{1F0FF}\u{1F170}\u{1F171}\u{1F17E}\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1AE}-\u{1F1FF}\u{1F201}-\u{1F20F}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F23C}-\u{1F23F}\u{1F249}-\u{1F25F}\u{1F266}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F8}-\u{1F4FD}\u{1F3F7}-\u{1F3FA}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}\u{1F570}\u{1F573}-\u{1F57A}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}\u{1F596}\u{1F5A4}\u{1F5A5}\u{1F5A8}\u{1F5B1}\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D2}\u{1F6D5}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6F0}\u{1F6F3}-\u{1F6FF}\u{1F7DA}-\u{1F7FF}\u{1F80C}-\u{1F80F}\u{1F848}-\u{1F84F}\u{1F85A}-\u{1F85F}\u{1F888}-\u{1F88F}\u{1F8AE}\u{1F8AF}\u{1F8BC}-\u{1F8BF}\u{1F8C2}-\u{1F8CF}\u{1F8D9}-\u{1F8FF}\u{1F90C}-\u{1F93A}\u{1F93C}-\u{1F945}\u{1F947}-\u{1F9FF}\u{1FA58}-\u{1FA5F}\u{1FA6E}-\u{1FAFF}\u{1FC00}-\u{1FFFD}]/u +let emojiPresentationRe: RegExp | null = null +let maybeEmojiRe: RegExp | null = null +let sharedGraphemeSegmenter: Intl.Segmenter | null = null +const emojiCorrectionCache = new Map() + +function getEmojiPresentationRe(): RegExp { + if (emojiPresentationRe !== null) return emojiPresentationRe + try { + emojiPresentationRe = new RegExp('\\p{Emoji_Presentation}', 'u') + } catch { + emojiPresentationRe = maybeEmojiFallbackRe + } + return emojiPresentationRe +} + +function getMaybeEmojiRe(): RegExp { + if (maybeEmojiRe !== null) return maybeEmojiRe + try { + maybeEmojiRe = new RegExp( + '[\\p{Emoji_Presentation}\\p{Extended_Pictographic}\\p{Regional_Indicator}\\uFE0F\\u20E3]', + 'u', + ) + } catch { + maybeEmojiRe = maybeEmojiFallbackRe + } + return maybeEmojiRe +} + +export function getMeasureContext(): CanvasRenderingContext2D | OffscreenCanvasRenderingContext2D { + if (measureContext !== null) return measureContext + + if (typeof OffscreenCanvas !== 'undefined') { + measureContext = new OffscreenCanvas(1, 1).getContext('2d')! + return measureContext + } + + if (typeof document !== 'undefined') { + measureContext = document.createElement('canvas').getContext('2d')! + return measureContext + } + + throw new Error('Text measurement requires OffscreenCanvas or a DOM canvas context.') +} + +export function getSegmentMetricCache(font: string): Map { + let cache = segmentMetricCaches.get(font) + if (!cache) { + cache = new Map() + segmentMetricCaches.set(font, cache) + } + return cache +} + +function browserGetSegmentMetrics(seg: string, cache: Map): SegmentMetrics { + let metrics = cache.get(seg) + if (metrics === undefined) { + const ctx = getMeasureContext() + metrics = { + width: ctx.measureText(seg).width, + containsCJK: isCJK(seg), + } + cache.set(seg, metrics) + } + return metrics +} + +function browserGetEngineProfile(): EngineProfile { + if (cachedEngineProfile !== null) return cachedEngineProfile + + if (typeof navigator === 'undefined') { + cachedEngineProfile = { + lineFitEpsilon: 0.005, + carryCJKAfterClosingQuote: false, + preferPrefixWidthsForBreakableRuns: false, + preferEarlySoftHyphenBreak: false, + } + return cachedEngineProfile + } + + const ua = navigator.userAgent + const vendor = navigator.vendor + const isSafari = + vendor === 'Apple Computer, Inc.' && + ua.includes('Safari/') && + !ua.includes('Chrome/') && + !ua.includes('Chromium/') && + !ua.includes('CriOS/') && + !ua.includes('FxiOS/') && + !ua.includes('EdgiOS/') + const isChromium = + ua.includes('Chrome/') || + ua.includes('Chromium/') || + ua.includes('CriOS/') || + ua.includes('Edg/') + + cachedEngineProfile = { + lineFitEpsilon: isSafari ? 1 / 64 : 0.005, + carryCJKAfterClosingQuote: isChromium, + preferPrefixWidthsForBreakableRuns: isSafari, + preferEarlySoftHyphenBreak: isSafari, + } + return cachedEngineProfile +} + +export function parseFontSize(font: string): number { + const m = font.match(/(\d+(?:\.\d+)?)\s*px/) + return m ? parseFloat(m[1]!) : 16 +} + +function getSharedGraphemeSegmenter(): Intl.Segmenter { + if (sharedGraphemeSegmenter === null) { + sharedGraphemeSegmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme' }) + } + return sharedGraphemeSegmenter +} + +function isEmojiGrapheme(g: string): boolean { + return getEmojiPresentationRe().test(g) || g.includes('\uFE0F') +} + +function browserTextMayContainEmoji(text: string): boolean { + return getMaybeEmojiRe().test(text) +} + +function getEmojiCorrection(font: string, fontSize: number): number { + let correction = emojiCorrectionCache.get(font) + if (correction !== undefined) return correction + + const ctx = getMeasureContext() + ctx.font = font + const canvasW = ctx.measureText('\u{1F600}').width + correction = 0 + if ( + canvasW > fontSize + 0.5 && + typeof document !== 'undefined' && + document.body !== null + ) { + const span = document.createElement('span') + span.style.font = font + span.style.display = 'inline-block' + span.style.visibility = 'hidden' + span.style.position = 'absolute' + span.textContent = '\u{1F600}' + document.body.appendChild(span) + const domW = span.getBoundingClientRect().width + document.body.removeChild(span) + if (canvasW - domW > 0.5) { + correction = canvasW - domW + } + } + emojiCorrectionCache.set(font, correction) + return correction +} + +function countEmojiGraphemes(text: string): number { + let count = 0 + const graphemeSegmenter = getSharedGraphemeSegmenter() + for (const g of graphemeSegmenter.segment(text)) { + if (isEmojiGrapheme(g.segment)) count++ + } + return count +} + +function getEmojiCount(seg: string, metrics: SegmentMetrics): number { + if (metrics.emojiCount === undefined) { + metrics.emojiCount = countEmojiGraphemes(seg) + } + return metrics.emojiCount +} + +function browserGetCorrectedSegmentWidth( + seg: string, + metrics: SegmentMetrics, + emojiCorrection: number, +): number { + if (emojiCorrection === 0) return metrics.width + return metrics.width - getEmojiCount(seg, metrics) * emojiCorrection +} + +function browserGetSegmentGraphemeWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, +): number[] | null { + if (metrics.graphemeWidths !== undefined) return metrics.graphemeWidths + + const widths: number[] = [] + const graphemeSegmenter = getSharedGraphemeSegmenter() + for (const gs of graphemeSegmenter.segment(seg)) { + const graphemeMetrics = browserGetSegmentMetrics(gs.segment, cache) + widths.push(browserGetCorrectedSegmentWidth(gs.segment, graphemeMetrics, emojiCorrection)) + } + + metrics.graphemeWidths = widths.length > 1 ? widths : null + return metrics.graphemeWidths +} + +function browserGetSegmentGraphemePrefixWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, +): number[] | null { + if (metrics.graphemePrefixWidths !== undefined) return metrics.graphemePrefixWidths + + const prefixWidths: number[] = [] + const graphemeSegmenter = getSharedGraphemeSegmenter() + let prefix = '' + for (const gs of graphemeSegmenter.segment(seg)) { + prefix += gs.segment + const prefixMetrics = browserGetSegmentMetrics(prefix, cache) + prefixWidths.push(browserGetCorrectedSegmentWidth(prefix, prefixMetrics, emojiCorrection)) + } + + metrics.graphemePrefixWidths = prefixWidths.length > 1 ? prefixWidths : null + return metrics.graphemePrefixWidths +} + +function browserGetFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState { + const ctx = getMeasureContext() + ctx.font = font + const cache = getSegmentMetricCache(font) + const fontSize = parseFontSize(font) + const emojiCorrection = needsEmojiCorrection ? getEmojiCorrection(font, fontSize) : 0 + return { cache, fontSize, emojiCorrection } +} + +function browserClearMeasurementCaches(): void { + segmentMetricCaches.clear() + emojiCorrectionCache.clear() + sharedGraphemeSegmenter = null +} + +export const browserMeasurementHost: MeasurementHost = { + clearMeasurementCaches: browserClearMeasurementCaches, + getSegmentMetrics: browserGetSegmentMetrics, + getEngineProfile: browserGetEngineProfile, + getCorrectedSegmentWidth: browserGetCorrectedSegmentWidth, + getSegmentGraphemeWidths: browserGetSegmentGraphemeWidths, + getSegmentGraphemePrefixWidths: browserGetSegmentGraphemePrefixWidths, + getFontMeasurementState: browserGetFontMeasurementState, + textMayContainEmoji: browserTextMayContainEmoji, +} + +function getActiveMeasurementHost(): MeasurementHost { + return measurementHostOverride ?? browserMeasurementHost +} + +export function withMeasurementHost(measurementHost: MeasurementHost, fn: () => T): T { + const previousHost = measurementHostOverride + measurementHostOverride = measurementHost + try { + return fn() + } finally { + measurementHostOverride = previousHost + } +} + +export function getSegmentMetrics(seg: string, cache: Map): SegmentMetrics { + return getActiveMeasurementHost().getSegmentMetrics(seg, cache) +} + +export function getEngineProfile(): EngineProfile { + return getActiveMeasurementHost().getEngineProfile() +} + +export function textMayContainEmoji(text: string): boolean { + return getActiveMeasurementHost().textMayContainEmoji(text) +} + +export function getCorrectedSegmentWidth(seg: string, metrics: SegmentMetrics, emojiCorrection: number): number { + return getActiveMeasurementHost().getCorrectedSegmentWidth(seg, metrics, emojiCorrection) +} + +export function getSegmentGraphemeWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, +): number[] | null { + return getActiveMeasurementHost().getSegmentGraphemeWidths(seg, metrics, cache, emojiCorrection) +} + +export function getSegmentGraphemePrefixWidths( + seg: string, + metrics: SegmentMetrics, + cache: Map, + emojiCorrection: number, +): number[] | null { + return getActiveMeasurementHost().getSegmentGraphemePrefixWidths(seg, metrics, cache, emojiCorrection) +} + +export function getFontMeasurementState(font: string, needsEmojiCorrection: boolean): FontMeasurementState { + return getActiveMeasurementHost().getFontMeasurementState(font, needsEmojiCorrection) +} + +export function clearMeasurementCaches(): void { + getActiveMeasurementHost().clearMeasurementCaches() +} diff --git a/packages/lynx-pretext/tsconfig.json b/packages/lynx-pretext/tsconfig.json index b89bf66..3f01f1e 100644 --- a/packages/lynx-pretext/tsconfig.json +++ b/packages/lynx-pretext/tsconfig.json @@ -2,6 +2,7 @@ "extends": "../../tsconfig.json", "compilerOptions": { "composite": true, + "lib": ["ES2021", "DOM", "ESNext.Intl"], "module": "ESNext", "moduleResolution": "Bundler", "noEmit": true,