Skip to content

Commit 5d4feea

Browse files
committed
[Yarr] Improve processing of an alternation of strings
https://bugs.webkit.org/show_bug.cgi?id=288102 rdar://145222010 Reviewed by Yusuke Suzuki. Added the notion of a string list to a parsed RegExp that is in the form of /^(?:break|case|which|do|for)/ with an optional trailing $. Such a RegExp will not backtrack and therefore we can streamline the code we emit for such a pattern. This change involves recognizing beginning of string anchored alternations of strings while parsing and then treating the generation of JIT code differently for these patterns. This includes changing how conditional branching works, specifically that instead of the "fall through on match" for each term, to a "jump on match" for the whole alternation. Fixed a bug in the original version where we weren't properly checking the nested alternatives to see if they only contain fixed single count PatternCharacter terms. The current code generated for the "case" elternative is: 8:Term PatternCharacter checked-offset:(3) 'c' <156> 0x11381430c: add w1, w1, #2 <160> 0x113814310: cmp w1, w2 <164> 0x113814314: b.hi 0x113814444 -> <468> 10:Term PatternCharacter checked-offset:(4) 'c' <168> 0x113814318: sub x17, x0, #4 <172> 0x11381431c: ldr w17, [x17, x1] <176> 0x113814320: movz w16, #0x6163 <180> 0x113814324: movk w16, #0x6573, lsl #16 -> 0x65736163 <184> 0x113814328: cmp w17, w16 <188> 0x11381432c: b.ne 0x113814444 -> <468> 11:Term PatternCharacter checked-offset:(4) 'a' already handled 12:Term PatternCharacter checked-offset:(4) 's' already handled 13:Term PatternCharacter checked-offset:(4) 'e' already handled 14:NestedAlternativeNext minimum-size:(5),checked-offset:(5) <192> 0x113814330: movz x16, #0x4444 <196> 0x113814334: movk x16, #0x1381, lsl #16 <200> 0x113814338: movk x16, #0x8001, lsl #32 <204> 0x11381433c: movk x16, #0xc973, lsl #48 -> 0x113814444 JIT PC <208> 0x113814340: stur x16, [sp, #8] <212> 0x113814344: b 0x113814404 -> <404> With some additional backtracking code: 9:NestedAlternativeNext minimum-size:(4),checked-offset:(4) <468> 0x113814444: sub w1, w1, #2 <472> 0x113814448: b 0x113814348 -> <216> With this change, the processing of "case" becomes: 9:StringListAlternativeNext minimum-size:(4),checked-offset:(4) <132> 0x12a8285c4: sub w1, w1, #1 <136> 0x12a8285c8: cmp w1, w2 <140> 0x12a8285cc: b.hi 0x12a8285e8 -> <168> 10:Term PatternCharacter checked-offset:(4) 'c' <144> 0x12a8285d0: sub x17, x0, #4 <148> 0x12a8285d4: ldr w17, [x17, x1] <152> 0x12a8285d8: movz w16, #0x6163 <156> 0x12a8285dc: movk w16, #0x6573, lsl #16 -> 0x65736163 <160> 0x12a8285e0: cmp w17, w16 <164> 0x12a8285e4: b.eq 0x12a82866c -> <300> 11:Term PatternCharacter checked-offset:(4) 'a' already handled 12:Term PatternCharacter checked-offset:(4) 's' already handled 13:Term PatternCharacter checked-offset:(4) 'e' already handled 14:StringListAlternativeNext minimum-size:(5),checked-offset:(5) With no backtracking code. We are able to eliminate one branch and the saving of the continuation PC for backtracking. The code size to process these string list RegExp is reduces. For the example RegExp above, the prior version created 1940 bytes (485 instructions) of code while the code created with this 1392 bytes (345 instructions) of code, a nearly 30% reduction in code. This change is a ~18% progression on the new regexp-keyword-parsing microbenchmark: Baseline YarrStringList regexp-keyword-parsing 136.7065+-0.9807 ^ 116.0161+-1.1791 ^ definitely 1.1783x faster <geometric> 136.7065+-0.9807 ^ 116.0161+-1.1791 ^ definitely 1.1783x faster * JSTests/microbenchmarks/regexp-keyword-parsing.js: Added. (arrayToString): (objectToString): (dumpValue): (compareArray): (compareGroups): (testRegExp): (testRegExpSyntaxError): (let.re.break.case.catch.continue.debugger.default.else.finally.if): (let.re1.break.case.catch.continue.debugger.default.else.finally.if): * JSTests/stress/regexp-parsing-tokens.js: Added. (arrayToString): (objectToString): (dumpValue): (compareArray): (compareGroups): (testRegExp): (testRegExpSyntaxError): * Source/JavaScriptCore/yarr/YarrJIT.cpp: * Source/JavaScriptCore/yarr/YarrPattern.cpp: (JSC::Yarr::YarrPatternConstructor::atomParenthesesEnd): (JSC::Yarr::YarrPatternConstructor::checkForTerminalParentheses): (JSC::Yarr::PatternAlternative::dump): (JSC::Yarr::PatternTerm::dump): * Source/JavaScriptCore/yarr/YarrPattern.h: (JSC::Yarr::PatternTerm::PatternTerm): (JSC::Yarr::PatternAlternative::PatternAlternative): Canonical link: https://commits.webkit.org/290982@main
1 parent 4688eeb commit 5d4feea

File tree

5 files changed

+732
-84
lines changed

5 files changed

+732
-84
lines changed
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
// With verbose set to false, this test is successful if there is no output. Set verbose to true to see expected matches.
2+
let verbose = false;
3+
4+
function arrayToString(arr)
5+
{
6+
let str = '';
7+
arr.forEach(function(v, index) {
8+
if (typeof v == "string")
9+
str += "\"" + v + "\"";
10+
else
11+
str += v;
12+
13+
if (index != (arr.length - 1))
14+
str += ',';
15+
});
16+
return str;
17+
}
18+
19+
function objectToString(obj)
20+
{
21+
let str = "";
22+
23+
firstEntry = true;
24+
25+
for (const [key, value] of Object.entries(obj)) {
26+
if (!firstEntry)
27+
str += ", ";
28+
29+
str += key + ": " + dumpValue(value);
30+
31+
firstEntry = false;
32+
}
33+
34+
return "{ " + str + " }";
35+
}
36+
37+
function dumpValue(v)
38+
{
39+
if (v === null)
40+
return "<null>";
41+
42+
if (v === undefined)
43+
return "<undefined>";
44+
45+
if (typeof v == "string")
46+
return "\"" + v + "\"";
47+
48+
let str = "";
49+
50+
if (v.length)
51+
str += arrayToString(v);
52+
53+
if (v.groups) {
54+
groupStr = objectToString(v.groups);
55+
56+
if (str.length) {
57+
if ( groupStr.length)
58+
str += ", " + groupStr;
59+
} else
60+
str = groupStr;
61+
}
62+
63+
return "[ " + str + " ]";
64+
}
65+
66+
function compareArray(expected, actual)
67+
{
68+
if (expected === null && actual === null)
69+
return true;
70+
71+
if (expected === null) {
72+
print("### expected is null, actual is not null");
73+
return false;
74+
}
75+
76+
if (actual === null) {
77+
print("### expected is not null, actual is null");
78+
return false;
79+
}
80+
81+
if (expected.length !== actual.length) {
82+
print("### expected.length: " + expected.length + ", actual.length: " + actual.length);
83+
return false;
84+
}
85+
86+
for (var i = 0; i < expected.length; i++) {
87+
if (expected[i] !== actual[i]) {
88+
print("### expected[" + i + "]: \"" + expected[i] + "\" !== actual[" + i + "]: \"" + actual[i] + "\"");
89+
return false;
90+
}
91+
}
92+
93+
return true;
94+
}
95+
96+
function compareGroups(expected, actual)
97+
{
98+
if (expected === null && actual === null)
99+
return true;
100+
101+
if (expected === null) {
102+
print("### expected group is null, actual group is not null");
103+
return false;
104+
}
105+
106+
if (actual === null) {
107+
print("### expected group is not null, actual group is null");
108+
return false;
109+
}
110+
111+
for (const key in expected) {
112+
if (expected[key] !== actual[key]) {
113+
print("### expected." + key + ": " + dumpValue(expected[key]) + " !== actual." + key + ": " + dumpValue(actual[key]));
114+
return false;
115+
}
116+
}
117+
118+
return true;
119+
}
120+
121+
let testNumber = 0;
122+
123+
function testRegExp(re, str, exp, groups)
124+
{
125+
testNumber++;
126+
127+
if (groups)
128+
exp.groups = groups;
129+
130+
let actual = re.exec(str);
131+
132+
let result = compareArray(exp, actual);;
133+
134+
if (exp && exp.groups) {
135+
if (!compareGroups(exp.groups, actual.groups))
136+
result = false;
137+
}
138+
139+
if (result) {
140+
if (verbose)
141+
print(re.toString() + ".exec(" + dumpValue(str) + "), passed ", dumpValue(exp));
142+
} else
143+
print(re.toString() + ".exec(" + dumpValue(str) + "), FAILED test #" + testNumber + ", Expected ", dumpValue(exp), " got ", dumpValue(actual));
144+
}
145+
146+
function testRegExpSyntaxError(reString, flags, expError)
147+
{
148+
testNumber++;
149+
150+
151+
try {
152+
let re = new RegExp(reString, flags);
153+
print("FAILED test #" + testNumber + ", Expected /" + reString + "/" + flags + " to throw \"" + expError + "\", but it didn't");
154+
} catch (e) {
155+
if (e != expError)
156+
print("FAILED test #" + testNumber + ", Expected /" + reString + "/" + flags + " to throw \"" + expError + "\" got \"" + e + "\"");
157+
else if (verbose)
158+
print("/" + reString + "/" + flags + " passed, it threw \"" + expError + "\" as expected");
159+
}
160+
}
161+
162+
let re = /^(?:break|case|catch|continue|debugger|default|do|else|finally|for|function|if|return|switch|throw|try|var|while|with|null|true|false|instanceof|typeof|void|delete|new|in|this)/;
163+
164+
for (i = 0; i < 1000000; i++) {
165+
testRegExp(re, "function", ["function"]);
166+
testRegExp(re, "return", ["return"]);
167+
testRegExp(re, "let", null);
168+
}
169+
170+
let re1 = /^(?:break|case|catch|continue|debugger|default|do|else|finally|for|function|if|return|switch|throw|try|var|while|with|null|true|false|instanceof|typeof|void|delete|new|in|this)$/;
171+
172+
for (i = 0; i < 1000000; i++) {
173+
testRegExp(re1, "throw", ["throw"]);
174+
testRegExp(re1, "while", ["while"]);
175+
}

0 commit comments

Comments
 (0)