Skip to content

perf(toml): add Scanner match() method, capturing numbers and bare key patterns using sticky regexp #6538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 18, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 70 additions & 134 deletions toml/_parser.ts
Original file line number Diff line number Diff line change
@@ -115,6 +115,14 @@
startsWith(searchString: string) {
return this.#source.startsWith(searchString, this.#position);
}

match(regExp: RegExp) {
if (!regExp.sticky) {
throw new Error(`RegExp ${regExp} does not have a sticky 'y' flag`);
}

Check warning on line 122 in toml/_parser.ts

Codecov / codecov/patch

toml/_parser.ts#L121-L122

Added lines #L121 - L122 were not covered by tests
regExp.lastIndex = this.#position;
return this.#source.match(regExp);
}
}

// -----------------------
@@ -313,21 +321,12 @@
// Parser components
// -----------------------

const BARE_KEY_REGEXP = /[A-Za-z0-9_-]/;
const FLOAT_REGEXP = /[0-9_\.e+\-]/i;
const END_OF_VALUE_REGEXP = /[ \t\r\n#,}\]]/;

const BARE_KEY_REGEXP = /[A-Za-z0-9_-]+/y;
export function bareKey(scanner: Scanner): ParseResult<string> {
scanner.skipWhitespaces();
if (!scanner.char() || !BARE_KEY_REGEXP.test(scanner.char())) {
return failure();
}
const acc: string[] = [];
while (scanner.char() && BARE_KEY_REGEXP.test(scanner.char())) {
acc.push(scanner.char());
scanner.next();
}
const key = acc.join("");
const key = scanner.match(BARE_KEY_REGEXP)?.[0];
if (!key) return failure();
scanner.next(key.length);
return success(key);
}

@@ -355,10 +354,7 @@
case "U": {
// Unicode character
const codePointLen = scanner.char() === "u" ? 4 : 6;
const codePoint = parseInt(
"0x" + scanner.slice(1, 1 + codePointLen),
16,
);
const codePoint = parseInt("0x" + scanner.slice(1, 1 + codePointLen), 16);
const str = String.fromCodePoint(codePoint);
scanner.next(codePointLen + 1);
return success(str);
@@ -524,148 +520,85 @@

export const dottedKey = join(or([bareKey, basicString, literalString]), ".");

export function integer(scanner: Scanner): ParseResult<number | string> {
const BINARY_REGEXP = /0b[01_]+/y;
export function binary(scanner: Scanner): ParseResult<number | string> {
scanner.skipWhitespaces();
const match = scanner.match(BINARY_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const value = match.slice(2).replaceAll("_", "");
const number = parseInt(value, 2);
return isNaN(number) ? failure() : success(number);
}

// Handle binary, octal, or hex numbers
const first2 = scanner.slice(0, 2);
if (first2.length === 2 && /0(?:x|o|b)/i.test(first2)) {
scanner.next(2);
const prefix = first2.toLowerCase();

// Determine allowed characters and base in one switch
let allowedChars: RegExp;
let base: number;
switch (prefix) {
case "0b":
allowedChars = /[01_]/; // Binary
base = 2;
break;
case "0o":
allowedChars = /[0-7_]/; // Octal
base = 8;
break;
case "0x":
allowedChars = /[0-9a-f_]/i; // Hex
base = 16;
break;
default:
return failure(); // Unreachable due to regex check
}

const acc = [];
// Collect valid characters
while (!scanner.eof()) {
const char = scanner.char();
if (!allowedChars.test(char)) break;
if (char === "_") {
scanner.next();
continue;
}
acc.push(char);
scanner.next();
}

if (!acc.length) return failure();

const numberStr = acc.join("");
const number = parseInt(numberStr, base);
return isNaN(number) ? failure() : success(number);
}

// Handle regular integers
const acc = [];
if (/[+-]/.test(scanner.char())) {
acc.push(scanner.char());
scanner.next();
}

while (!scanner.eof() && /[0-9_]/.test(scanner.char())) {
acc.push(scanner.char());
scanner.next();
}
const OCTAL_REGEXP = /0o[0-7_]+/y;
export function octal(scanner: Scanner): ParseResult<number | string> {
scanner.skipWhitespaces();
const match = scanner.match(OCTAL_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const value = match.slice(2).replaceAll("_", "");
const number = parseInt(value, 8);
return isNaN(number) ? failure() : success(number);
}

if (acc.length === 0 || (acc.length === 1 && /[+-]/.test(acc[0]!))) {
return failure();
}
const HEX_REGEXP = /0x[0-9a-f_]+/yi;
export function hex(scanner: Scanner): ParseResult<number | string> {
scanner.skipWhitespaces();
const match = scanner.match(HEX_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const value = match.slice(2).replaceAll("_", "");
const number = parseInt(value, 16);
return isNaN(number) ? failure() : success(number);
}

const intStr = acc.filter((c) => c !== "_").join("");
const int = parseInt(intStr, 10);
const INTEGER_REGEXP = /[+-]?[0-9_]+/y;
export function integer(scanner: Scanner): ParseResult<number | string> {
scanner.skipWhitespaces();
const match = scanner.match(INTEGER_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const value = match.replaceAll("_", "");
const int = parseInt(value, 10);
return success(int);
}

const FLOAT_REGEXP = /[+-]?[0-9_]+(?:\.[0-9_]+)?(?:e[+-]?[0-9_]+)?/yi;
export function float(scanner: Scanner): ParseResult<number> {
scanner.skipWhitespaces();

// lookahead validation is needed for integer value is similar to float
let position = 0;
while (
scanner.char(position) &&
!END_OF_VALUE_REGEXP.test(scanner.char(position))
) {
if (!FLOAT_REGEXP.test(scanner.char(position))) return failure();
position++;
}

const acc = [];
if (/[+-]/.test(scanner.char())) {
acc.push(scanner.char());
scanner.next();
}
while (FLOAT_REGEXP.test(scanner.char()) && !scanner.eof()) {
acc.push(scanner.char());
scanner.next();
}

if (acc.length === 0) return failure();
const float = parseFloat(acc.filter((char) => char !== "_").join(""));
const match = scanner.match(FLOAT_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const value = match.replaceAll("_", "");
const float = parseFloat(value);
if (isNaN(float)) return failure();

return success(float);
}

const DATE_TIME_REGEXP = /\d{4}-\d{2}-\d{2}(?:[ 0-9TZ.:+-]+)?/y;
export function dateTime(scanner: Scanner): ParseResult<Date> {
scanner.skipWhitespaces();

let dateStr = scanner.slice(0, 10);
// example: 1979-05-27
if (!/^\d{4}-\d{2}-\d{2}/.test(dateStr)) return failure();
scanner.next(10);

const acc = [];
// example: 1979-05-27T00:32:00Z
while (/[ 0-9TZ.:+-]/.test(scanner.char()) && !scanner.eof()) {
acc.push(scanner.char());
scanner.next();
}
dateStr += acc.join("");
const date = new Date(dateStr.trim());
const match = scanner.match(DATE_TIME_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
const date = new Date(match.trim());
// invalid date
if (isNaN(date.getTime())) {
throw new SyntaxError(`Invalid date string "${dateStr}"`);
throw new SyntaxError(`Invalid date string "${match}"`);
}

return success(date);
}

const LOCAL_TIME_REGEXP = /(\d{2}):(\d{2}):(\d{2})(?:\.[0-9]+)?/y;
export function localTime(scanner: Scanner): ParseResult<string> {
scanner.skipWhitespaces();

let timeStr = scanner.slice(0, 8);
if (!/^(\d{2}):(\d{2}):(\d{2})/.test(timeStr)) return failure();
scanner.next(8);

const acc = [];
if (scanner.char() !== ".") return success(timeStr);
acc.push(scanner.char());
scanner.next();

while (/[0-9]/.test(scanner.char()) && !scanner.eof()) {
acc.push(scanner.char());
scanner.next();
}
timeStr += acc.join("");
return success(timeStr);
const match = scanner.match(LOCAL_TIME_REGEXP)?.[0];
if (!match) return failure();
scanner.next(match.length);
return success(match);
}

export function arrayValue(scanner: Scanner): ParseResult<unknown[]> {
@@ -722,6 +655,9 @@
symbols,
dateTime,
localTime,
binary,
octal,
hex,
float,
integer,
arrayValue,
43 changes: 37 additions & 6 deletions toml/parse_test.ts
Original file line number Diff line number Diff line change
@@ -4,16 +4,19 @@ import {
arrayValue,
bareKey,
basicString,
binary,
dateTime,
deepAssignWithTable,
dottedKey,
float,
hex,
inlineTable,
integer,
literalString,
localTime,
multilineBasicString,
multilineLiteralString,
octal,
pair,
parserFactory,
Scanner,
@@ -233,6 +236,40 @@ fizz.buzz = true
},
});

Deno.test({
name: "parse() handles binary",
fn() {
const parse = parserFactory(binary);
assertEquals(parse("0b11010110"), 0b11010110); // 0b11010110 = 214
assertThrows(() => parse(""));
assertThrows(() => parse("+Z"));
assertThrows(() => parse("0x"));
},
});
Deno.test({
name: "parse() handles octal",
fn() {
const parse = parserFactory(octal);
assertEquals(parse("0o01234567"), 0o01234567); // 0o01234567 = 342391
assertEquals(parse("0o755"), 0o755); // 0o755 = 493
assertThrows(() => parse(""));
assertThrows(() => parse("+Z"));
assertThrows(() => parse("0x"));
},
});
Deno.test({
name: "parse() handles hex",
fn() {
const parse = parserFactory(hex);

assertEquals(parse("0xDEADBEEF"), 0xDEADBEEF); // 0xDEADBEEF = 3735928559
assertEquals(parse("0xdeadbeef"), 0xdeadbeef); // 0xdeadbeef = 3735928559
assertEquals(parse("0xdead_beef"), 0xdead_beef); // 0xdead_beef = 3735928559
assertThrows(() => parse(""));
assertThrows(() => parse("+Z"));
assertThrows(() => parse("0x"));
},
});
Deno.test({
name: "parse() handles integer",
fn() {
@@ -241,12 +278,6 @@ Deno.test({
assertEquals(parse("+123"), 123);
assertEquals(parse("-123"), -123);
assertEquals(parse("123_456"), 123456);
assertEquals(parse("0xDEADBEEF"), 0xDEADBEEF); // 0xDEADBEEF = 3735928559
assertEquals(parse("0xdeadbeef"), 0xdeadbeef); // 0xdeadbeef = 3735928559
assertEquals(parse("0xdead_beef"), 0xdead_beef); // 0xdead_beef = 3735928559
assertEquals(parse("0o01234567"), 0o01234567); // 0o01234567 = 342391
assertEquals(parse("0o755"), 0o755); // 0o755 = 493
assertEquals(parse("0b11010110"), 0b11010110); // 0b11010110 = 214
assertThrows(() => parse(""));
assertThrows(() => parse("+Z"));
assertThrows(() => parse("0x"));

Unchanged files with check annotations Beta

try {
const snapshotFileUrl = this.#snapshotFileUrl.toString();
const { snapshot } = await import(snapshotFileUrl);

Check warning on line 401 in testing/snapshot.ts

GitHub Actions / test (canary, ubuntu-latest)

unable to analyze dynamic import
this.#currentSnapshots = typeof snapshot === "undefined"
? new Map()
: new Map(
});
`);
const { snapshot } = await import(toFileUrl(snapshotFilePath).toString());

Check warning on line 329 in testing/snapshot_test.ts

GitHub Actions / test (canary, ubuntu-latest)

unable to analyze dynamic import
await assertSnapshot(t, snapshot[`${snapshotName} 1`]);
await assertSnapshot(t, formatTestOutput(result.output));