From 69032e683d2d96d5b841737077134e1f17f1808d Mon Sep 17 00:00:00 2001 From: Ben Hughes Date: Sat, 9 Mar 2024 09:41:21 -0700 Subject: [PATCH] Add DateTime.buildFormatParser and DateTime.fromFormatParser (#1582) This allows constructing a parser for a locale/format and reusing it when parsing dates. Without this, DateTime.fromFormat constructs a new parser on every call. When parsing large amounts of date strings, this gets rather slow. In benchmarks, this speeds up parsing by 4.4x --- benchmarks/datetime.js | 10 +++++ src/datetime.js | 69 ++++++++++++++++++++++++++++++++ src/impl/locale.js | 4 ++ src/impl/tokenParser.js | 68 ++++++++++++++++++++++--------- test/datetime/tokenParse.test.js | 25 ++++++++++++ 5 files changed, 158 insertions(+), 18 deletions(-) diff --git a/benchmarks/datetime.js b/benchmarks/datetime.js index 39d6bf07c..32839ad2a 100644 --- a/benchmarks/datetime.js +++ b/benchmarks/datetime.js @@ -8,6 +8,8 @@ function runDateTimeSuite() { const dt = DateTime.now(); + const formatParser = DateTime.buildFormatParser("yyyy/MM/dd HH:mm:ss.SSS"); + suite .add("DateTime.local", () => { DateTime.now(); @@ -32,6 +34,14 @@ function runDateTimeSuite() { zone: "America/Los_Angeles", }); }) + .add("DateTime.fromFormatParser", () => { + DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser); + }) + .add("DateTime.fromFormatParser with zone", () => { + DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, { + zone: "America/Los_Angeles", + }); + }) .add("DateTime#setZone", () => { dt.setZone("America/Los_Angeles"); }) diff --git a/src/datetime.js b/src/datetime.js index 85533dda0..a3dde9525 100644 --- a/src/datetime.js +++ b/src/datetime.js @@ -28,6 +28,7 @@ import { explainFromTokens, formatOptsToTokens, expandMacroTokens, + TokenParser, } from "./impl/tokenParser.js"; import { gregorianToWeek, @@ -2233,6 +2234,74 @@ export default class DateTime { return DateTime.fromFormatExplain(text, fmt, options); } + /** + * Build a parser for `fmt` using the given locale. This parser can be passed + * to {@link DateTime.fromFormatParser} to a parse a date in this format. This + * can be used to optimize cases where many dates need to be parsed in a + * specific format. + * + * @param {String} fmt - the format the string is expected to be in (see + * description) + * @param {Object} options - options used to set locale and numberingSystem + * for parser + * @returns {TokenParser} - opaque object to be used + */ + static buildFormatParser(fmt, options = {}) { + const { locale = null, numberingSystem = null } = options, + localeToUse = Locale.fromOpts({ + locale, + numberingSystem, + defaultToEN: true, + }); + return new TokenParser(localeToUse, fmt); + } + + /** + * Create a DateTime from an input string and format parser. + * + * The format parser must have been created with the same locale as this call. + * + * @param {String} text - the string to parse + * @param {TokenParser} formatParser - parser from {@link DateTime.buildFormatParser} + * @param {Object} opts - options taken by fromFormat() + * @returns {DateTime} + */ + static fromFormatParser(text, formatParser, opts = {}) { + if (isUndefined(text) || isUndefined(formatParser)) { + throw new InvalidArgumentError( + "fromFormatParser requires an input string and a format parser" + ); + } + const { locale = null, numberingSystem = null } = opts, + localeToUse = Locale.fromOpts({ + locale, + numberingSystem, + defaultToEN: true, + }); + + if (!localeToUse.equals(formatParser.locale)) { + throw new InvalidArgumentError( + `fromFormatParser called with a locale of ${localeToUse}, ` + + `but the format parser was created for ${formatParser.locale}` + ); + } + + const { result, zone, specificOffset, invalidReason } = formatParser.explainFromTokens(text); + + if (invalidReason) { + return DateTime.invalid(invalidReason); + } else { + return parseDataToDateTime( + result, + zone, + opts, + `format ${formatParser.format}`, + text, + specificOffset + ); + } + } + // FORMAT PRESETS /** diff --git a/src/impl/locale.js b/src/impl/locale.js index f1caf1495..cd55b3bfc 100644 --- a/src/impl/locale.js +++ b/src/impl/locale.js @@ -539,4 +539,8 @@ export default class Locale { this.outputCalendar === other.outputCalendar ); } + + toString() { + return `Locale(${this.locale}, ${this.numberingSystem}, ${this.outputCalendar})`; + } } diff --git a/src/impl/tokenParser.js b/src/impl/tokenParser.js index 8dd38f37f..48a7595ed 100644 --- a/src/impl/tokenParser.js +++ b/src/impl/tokenParser.js @@ -432,27 +432,59 @@ export function expandMacroTokens(tokens, locale) { * @private */ -export function explainFromTokens(locale, input, format) { - const tokens = expandMacroTokens(Formatter.parseFormat(format), locale), - units = tokens.map((t) => unitForToken(t, locale)), - disqualifyingUnit = units.find((t) => t.invalidReason); +export class TokenParser { + constructor(locale, format) { + this.locale = locale; + this.format = format; + this.tokens = expandMacroTokens(Formatter.parseFormat(format), locale); + this.units = this.tokens.map((t) => unitForToken(t, locale)); + this.disqualifyingUnit = this.units.find((t) => t.invalidReason); + + if (!this.disqualifyingUnit) { + const [regexString, handlers] = buildRegex(this.units); + this.regex = RegExp(regexString, "i"); + this.handlers = handlers; + } + } - if (disqualifyingUnit) { - return { input, tokens, invalidReason: disqualifyingUnit.invalidReason }; - } else { - const [regexString, handlers] = buildRegex(units), - regex = RegExp(regexString, "i"), - [rawMatches, matches] = match(input, regex, handlers), - [result, zone, specificOffset] = matches - ? dateTimeFromMatches(matches) - : [null, null, undefined]; - if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) { - throw new ConflictingSpecificationError( - "Can't include meridiem when specifying 24-hour format" - ); + explainFromTokens(input) { + if (!this.isValid) { + return { input, tokens: this.tokens, invalidReason: this.invalidReason }; + } else { + const [rawMatches, matches] = match(input, this.regex, this.handlers), + [result, zone, specificOffset] = matches + ? dateTimeFromMatches(matches) + : [null, null, undefined]; + if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) { + throw new ConflictingSpecificationError( + "Can't include meridiem when specifying 24-hour format" + ); + } + return { + input, + tokens: this.tokens, + regex: this.regex, + rawMatches, + matches, + result, + zone, + specificOffset, + }; } - return { input, tokens, regex, rawMatches, matches, result, zone, specificOffset }; } + + get isValid() { + return !this.disqualifyingUnit; + } + + get invalidReason() { + return this.disqualifyingUnit ? this.disqualifyingUnit.invalidReason : null; + } +} + +export function explainFromTokens(locale, input, format) { + const parser = new TokenParser(locale, format); + return parser.explainFromTokens(input); } export function parseFromTokens(locale, input, format) { diff --git a/test/datetime/tokenParse.test.js b/test/datetime/tokenParse.test.js index 4025821d8..8b5c6a8d7 100644 --- a/test/datetime/tokenParse.test.js +++ b/test/datetime/tokenParse.test.js @@ -1224,3 +1224,28 @@ test("DateTime.expandFormat respects the hour cycle when forced by the macro tok const format = DateTime.expandFormat("T", { locale: "en-US" }); expect(format).toBe("H:m"); }); + +//------ +// .fromFormatParser +//------- + +test("DateTime.fromFormatParser behaves equivalently to DateTime.fromFormat", () => { + const dateTimeStr = "1982/05/25 09:10:11.445"; + const format = "yyyy/MM/dd HH:mm:ss.SSS"; + const formatParser = DateTime.buildFormatParser(format); + const ff1 = DateTime.fromFormat(dateTimeStr, format), + ffP1 = DateTime.fromFormatParser(dateTimeStr, formatParser); + + expect(ffP1).toEqual(ff1); + expect(ffP1.isValid).toBe(true); +}); + +test("DateTime.fromFormatParser throws error when used with a different locale than it was created with", () => { + const format = "yyyy/MM/dd HH:mm:ss.SSS"; + const formatParser = DateTime.buildFormatParser(format, { locale: "es-ES" }); + expect(() => + DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, { locale: "es-MX" }) + ).toThrowError( + "fromFormatParser called with a locale of Locale(es-MX, null, null), but the format parser was created for Locale(es-ES, null, null)" + ); +});