diff --git a/CHANGELOG.md b/CHANGELOG.md index fe9da6aa0ca..7bbf35c76a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [next] +- feat: Add Intl.Segmenter support for Textbox word splitting [#10791](https://github.com/fabricjs/fabric.js/pull/10791) - chore(): update major version of vitest [#10786](https://github.com/fabricjs/fabric.js/pull/10786) - fix(): Prototype pollution risk on text char cache [#10782](https://github.com/fabricjs/fabric.js/pull/10782) - chore(): update playwright [#10780](https://github.com/fabricjs/fabric.js/pull/10780) diff --git a/src/shapes/Textbox.ts b/src/shapes/Textbox.ts index d695c047b46..e237468d489 100644 --- a/src/shapes/Textbox.ts +++ b/src/shapes/Textbox.ts @@ -8,6 +8,7 @@ import type { SerializedITextProps, ITextProps } from './IText/IText'; import type { ITextEvents } from './IText/ITextBehavior'; import type { TextLinesInfo } from './Text/Text'; import type { Control } from '../controls/Control'; +import { wordSplit } from '../util/lang_string'; // @TODO: Many things here are configuration related and shouldn't be on the class nor prototype // regexes, list of properties that are not suppose to change by instances, magic consts. @@ -408,7 +409,7 @@ export class Textbox< * @returns {string[]} array of words */ wordSplit(value: string): string[] { - return value.split(this._wordJoiners); + return wordSplit(value, this._wordJoiners); } /** diff --git a/src/util/lang_string.ts b/src/util/lang_string.ts index 1f4810535d1..6ce87658629 100644 --- a/src/util/lang_string.ts +++ b/src/util/lang_string.ts @@ -26,18 +26,31 @@ export const escapeXml = (string: string): string => .replace(//g, '>'); -let segmenter: Intl.Segmenter | false; +let graphemeSegmenter: Intl.Segmenter | false; +let wordSegmenter: Intl.Segmenter | false; -const getSegmenter = () => { - if (!segmenter) { - segmenter = +const getGraphemeSegmenter = () => { + if (!graphemeSegmenter) { + graphemeSegmenter = 'Intl' in getFabricWindow() && 'Segmenter' in Intl && new Intl.Segmenter(undefined, { granularity: 'grapheme', }); } - return segmenter; + return graphemeSegmenter; +}; + +const getWordSegmenter = () => { + if (!wordSegmenter) { + wordSegmenter = + 'Intl' in getFabricWindow() && + 'Segmenter' in Intl && + new Intl.Segmenter(undefined, { + granularity: 'word', + }); + } + return wordSegmenter; }; /** @@ -46,9 +59,9 @@ const getSegmenter = () => { * @return {Array} array containing the graphemes */ export const graphemeSplit = (textstring: string): string[] => { - segmenter || getSegmenter(); - if (segmenter) { - const segments = segmenter.segment(textstring); + graphemeSegmenter || getGraphemeSegmenter(); + if (graphemeSegmenter) { + const segments = graphemeSegmenter.segment(textstring); return Array.from(segments).map(({ segment }) => segment); } @@ -56,6 +69,26 @@ export const graphemeSplit = (textstring: string): string[] => { return graphemeSplitImpl(textstring); }; +/** + * Divide a string into words + * @param {String} textstring String to split into words + * @param {RegExp} splitRegex Optional regex pattern for fallback splitting (default: /[ \t\r]/) + * @return {Array} array containing the words + */ +export const wordSplit = ( + textstring: string, + splitRegex: RegExp +): string[] => { + wordSegmenter || getWordSegmenter(); + if (wordSegmenter) { + const segments = wordSegmenter.segment(textstring); + return Array.from(segments).map(({ segment }) => segment); + } + + // Fallback to regex-based split + return textstring.split(splitRegex); +}; + const graphemeSplitImpl = (textstring: string): string[] => { const graphemes: string[] = []; for (let i = 0, chr; i < textstring.length; i++) {