From 69032e683d2d96d5b841737077134e1f17f1808d Mon Sep 17 00:00:00 2001
From: Ben Hughes <benjamin@makenotion.com>
Date: Sat, 9 Mar 2024 09:41:21 -0700
Subject: [PATCH] Add DateTime.buildFormatParser and DateTime.fromFormatParser
 (#1582)

This allows constructing a parser for a locale/format and reusing it
when parsing dates. Without this, DateTime.fromFormat constructs a new
parser on every call. When parsing large amounts of date strings, this
gets rather slow.

In benchmarks, this speeds up parsing by 4.4x
---
 benchmarks/datetime.js           | 10 +++++
 src/datetime.js                  | 69 ++++++++++++++++++++++++++++++++
 src/impl/locale.js               |  4 ++
 src/impl/tokenParser.js          | 68 ++++++++++++++++++++++---------
 test/datetime/tokenParse.test.js | 25 ++++++++++++
 5 files changed, 158 insertions(+), 18 deletions(-)

diff --git a/benchmarks/datetime.js b/benchmarks/datetime.js
index 39d6bf07c..32839ad2a 100644
--- a/benchmarks/datetime.js
+++ b/benchmarks/datetime.js
@@ -8,6 +8,8 @@ function runDateTimeSuite() {
 
     const dt = DateTime.now();
 
+    const formatParser = DateTime.buildFormatParser("yyyy/MM/dd HH:mm:ss.SSS");
+
     suite
       .add("DateTime.local", () => {
         DateTime.now();
@@ -32,6 +34,14 @@ function runDateTimeSuite() {
           zone: "America/Los_Angeles",
         });
       })
+      .add("DateTime.fromFormatParser", () => {
+        DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser);
+      })
+      .add("DateTime.fromFormatParser with zone", () => {
+        DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, {
+          zone: "America/Los_Angeles",
+        });
+      })
       .add("DateTime#setZone", () => {
         dt.setZone("America/Los_Angeles");
       })
diff --git a/src/datetime.js b/src/datetime.js
index 85533dda0..a3dde9525 100644
--- a/src/datetime.js
+++ b/src/datetime.js
@@ -28,6 +28,7 @@ import {
   explainFromTokens,
   formatOptsToTokens,
   expandMacroTokens,
+  TokenParser,
 } from "./impl/tokenParser.js";
 import {
   gregorianToWeek,
@@ -2233,6 +2234,74 @@ export default class DateTime {
     return DateTime.fromFormatExplain(text, fmt, options);
   }
 
+  /**
+   * Build a parser for `fmt` using the given locale. This parser can be passed
+   * to {@link DateTime.fromFormatParser} to a parse a date in this format. This
+   * can be used to optimize cases where many dates need to be parsed in a
+   * specific format.
+   *
+   * @param {String} fmt - the format the string is expected to be in (see
+   * description)
+   * @param {Object} options - options used to set locale and numberingSystem
+   * for parser
+   * @returns {TokenParser} - opaque object to be used
+   */
+  static buildFormatParser(fmt, options = {}) {
+    const { locale = null, numberingSystem = null } = options,
+      localeToUse = Locale.fromOpts({
+        locale,
+        numberingSystem,
+        defaultToEN: true,
+      });
+    return new TokenParser(localeToUse, fmt);
+  }
+
+  /**
+   * Create a DateTime from an input string and format parser.
+   *
+   * The format parser must have been created with the same locale as this call.
+   *
+   * @param {String} text - the string to parse
+   * @param {TokenParser} formatParser - parser from {@link DateTime.buildFormatParser}
+   * @param {Object} opts - options taken by fromFormat()
+   * @returns {DateTime}
+   */
+  static fromFormatParser(text, formatParser, opts = {}) {
+    if (isUndefined(text) || isUndefined(formatParser)) {
+      throw new InvalidArgumentError(
+        "fromFormatParser requires an input string and a format parser"
+      );
+    }
+    const { locale = null, numberingSystem = null } = opts,
+      localeToUse = Locale.fromOpts({
+        locale,
+        numberingSystem,
+        defaultToEN: true,
+      });
+
+    if (!localeToUse.equals(formatParser.locale)) {
+      throw new InvalidArgumentError(
+        `fromFormatParser called with a locale of ${localeToUse}, ` +
+          `but the format parser was created for ${formatParser.locale}`
+      );
+    }
+
+    const { result, zone, specificOffset, invalidReason } = formatParser.explainFromTokens(text);
+
+    if (invalidReason) {
+      return DateTime.invalid(invalidReason);
+    } else {
+      return parseDataToDateTime(
+        result,
+        zone,
+        opts,
+        `format ${formatParser.format}`,
+        text,
+        specificOffset
+      );
+    }
+  }
+
   // FORMAT PRESETS
 
   /**
diff --git a/src/impl/locale.js b/src/impl/locale.js
index f1caf1495..cd55b3bfc 100644
--- a/src/impl/locale.js
+++ b/src/impl/locale.js
@@ -539,4 +539,8 @@ export default class Locale {
       this.outputCalendar === other.outputCalendar
     );
   }
+
+  toString() {
+    return `Locale(${this.locale}, ${this.numberingSystem}, ${this.outputCalendar})`;
+  }
 }
diff --git a/src/impl/tokenParser.js b/src/impl/tokenParser.js
index 8dd38f37f..48a7595ed 100644
--- a/src/impl/tokenParser.js
+++ b/src/impl/tokenParser.js
@@ -432,27 +432,59 @@ export function expandMacroTokens(tokens, locale) {
  * @private
  */
 
-export function explainFromTokens(locale, input, format) {
-  const tokens = expandMacroTokens(Formatter.parseFormat(format), locale),
-    units = tokens.map((t) => unitForToken(t, locale)),
-    disqualifyingUnit = units.find((t) => t.invalidReason);
+export class TokenParser {
+  constructor(locale, format) {
+    this.locale = locale;
+    this.format = format;
+    this.tokens = expandMacroTokens(Formatter.parseFormat(format), locale);
+    this.units = this.tokens.map((t) => unitForToken(t, locale));
+    this.disqualifyingUnit = this.units.find((t) => t.invalidReason);
+
+    if (!this.disqualifyingUnit) {
+      const [regexString, handlers] = buildRegex(this.units);
+      this.regex = RegExp(regexString, "i");
+      this.handlers = handlers;
+    }
+  }
 
-  if (disqualifyingUnit) {
-    return { input, tokens, invalidReason: disqualifyingUnit.invalidReason };
-  } else {
-    const [regexString, handlers] = buildRegex(units),
-      regex = RegExp(regexString, "i"),
-      [rawMatches, matches] = match(input, regex, handlers),
-      [result, zone, specificOffset] = matches
-        ? dateTimeFromMatches(matches)
-        : [null, null, undefined];
-    if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) {
-      throw new ConflictingSpecificationError(
-        "Can't include meridiem when specifying 24-hour format"
-      );
+  explainFromTokens(input) {
+    if (!this.isValid) {
+      return { input, tokens: this.tokens, invalidReason: this.invalidReason };
+    } else {
+      const [rawMatches, matches] = match(input, this.regex, this.handlers),
+        [result, zone, specificOffset] = matches
+          ? dateTimeFromMatches(matches)
+          : [null, null, undefined];
+      if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) {
+        throw new ConflictingSpecificationError(
+          "Can't include meridiem when specifying 24-hour format"
+        );
+      }
+      return {
+        input,
+        tokens: this.tokens,
+        regex: this.regex,
+        rawMatches,
+        matches,
+        result,
+        zone,
+        specificOffset,
+      };
     }
-    return { input, tokens, regex, rawMatches, matches, result, zone, specificOffset };
   }
+
+  get isValid() {
+    return !this.disqualifyingUnit;
+  }
+
+  get invalidReason() {
+    return this.disqualifyingUnit ? this.disqualifyingUnit.invalidReason : null;
+  }
+}
+
+export function explainFromTokens(locale, input, format) {
+  const parser = new TokenParser(locale, format);
+  return parser.explainFromTokens(input);
 }
 
 export function parseFromTokens(locale, input, format) {
diff --git a/test/datetime/tokenParse.test.js b/test/datetime/tokenParse.test.js
index 4025821d8..8b5c6a8d7 100644
--- a/test/datetime/tokenParse.test.js
+++ b/test/datetime/tokenParse.test.js
@@ -1224,3 +1224,28 @@ test("DateTime.expandFormat respects the hour cycle when forced by the macro tok
   const format = DateTime.expandFormat("T", { locale: "en-US" });
   expect(format).toBe("H:m");
 });
+
+//------
+// .fromFormatParser
+//-------
+
+test("DateTime.fromFormatParser behaves equivalently to DateTime.fromFormat", () => {
+  const dateTimeStr = "1982/05/25 09:10:11.445";
+  const format = "yyyy/MM/dd HH:mm:ss.SSS";
+  const formatParser = DateTime.buildFormatParser(format);
+  const ff1 = DateTime.fromFormat(dateTimeStr, format),
+    ffP1 = DateTime.fromFormatParser(dateTimeStr, formatParser);
+
+  expect(ffP1).toEqual(ff1);
+  expect(ffP1.isValid).toBe(true);
+});
+
+test("DateTime.fromFormatParser throws error when used with a different locale than it was created with", () => {
+  const format = "yyyy/MM/dd HH:mm:ss.SSS";
+  const formatParser = DateTime.buildFormatParser(format, { locale: "es-ES" });
+  expect(() =>
+    DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, { locale: "es-MX" })
+  ).toThrowError(
+    "fromFormatParser called with a locale of Locale(es-MX, null, null), but the format parser was created for Locale(es-ES, null, null)"
+  );
+});