Skip to content

Commit b1c5adf

Browse files
authored
Merge pull request #269 from skoppe/better-error
keep track of the longest failed match to improve error messages
2 parents af79adb + c6d1a03 commit b1c5adf

File tree

1 file changed

+179
-20
lines changed

1 file changed

+179
-20
lines changed

pegged/peg.d

+179-20
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Writing tests the long way is preferred here, as it will avoid the circular
1818
dependency.
1919
*/
2020

21-
import std.algorithm: equal, map, startsWith;
21+
import std.algorithm: equal, map, startsWith, max, countUntil, maxElement, filter;
2222
import std.uni : isAlpha, icmp;
2323
import std.array;
2424
import std.conv;
@@ -248,6 +248,9 @@ struct ParseTree
248248
249249
ParseTree[] children; /// The sub-trees created by sub-rules parsing.
250250
251+
size_t failEnd; // The furthest this tree could match the input (including !successful rules).
252+
ParseTree[] failedChild; /// The !successful child that could still be partially parsed.
253+
251254
/**
252255
Basic toString for easy pretty-printing.
253256
*/
@@ -343,6 +346,8 @@ struct ParseTree
343346
result.input = input;
344347
result.begin = begin;
345348
result.end = end;
349+
result.failEnd = failEnd;
350+
result.failedChild = map!(p => p.dup)(failedChild).array();
346351
result.children = map!(p => p.dup)(children).array();
347352
return result;
348353
}
@@ -716,11 +721,16 @@ template literal(string s)
716721
ParseTree literal(ParseTree p)
717722
{
718723
enum lit = "\"" ~ s ~ "\"";
724+
719725
if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s)
720726
return ParseTree(name, true, [s], p.input, p.end, p.end+s.length);
721-
else
722-
return ParseTree(name, false, [lit], p.input, p.end, p.end);
723-
}
727+
else {
728+
import std.algorithm : commonPrefix;
729+
import std.utf : byCodeUnit;
730+
auto prefix = p.input[p.end..$].byCodeUnit.commonPrefix(s.byCodeUnit);
731+
return ParseTree(name, false, [lit], p.input, p.end, p.end, null, p.end + prefix.length);
732+
}
733+
}
724734
725735
ParseTree literal(string input)
726736
{
@@ -1239,7 +1249,6 @@ and that the second subrule ('[a-z]') failed at position 3 (so, on '1').
12391249
*/
12401250
template and(rules...) if (rules.length > 0)
12411251
{
1242-
12431252
string ctfeGetNameAnd()
12441253
{
12451254
string name = "and!(";
@@ -1262,7 +1271,8 @@ template and(rules...) if (rules.length > 0)
12621271
//&& !node.name.startsWith("drop!(")
12631272
&& node.matches !is null
12641273
//&& node.begin != node.end
1265-
);
1274+
)
1275+
|| (node.failEnd >= node.end);
12661276
}
12671277
12681278
version (tracer)
@@ -1281,6 +1291,7 @@ template and(rules...) if (rules.length > 0)
12811291
}
12821292
ParseTree temp = r(result);
12831293
result.end = temp.end;
1294+
result.failEnd = max(result.failEnd, temp.failEnd);
12841295
if (temp.successful)
12851296
{
12861297
if (keepNode(temp))
@@ -1296,9 +1307,21 @@ template and(rules...) if (rules.length > 0)
12961307
}
12971308
else
12981309
{
1299-
result.children ~= temp;// add the failed node, to indicate which failed
1300-
if (temp.matches.length > 0)
1301-
result.matches ~= temp.matches[$-1];
1310+
auto firstLongestFailedMatch = result.children.countUntil!(c => c.failEnd > temp.end);
1311+
if (firstLongestFailedMatch == -1) {
1312+
result.children ~= temp;// add the failed node, to indicate which failed
1313+
if (temp.matches.length > 0)
1314+
result.matches ~= temp.matches[$-1];
1315+
} else {
1316+
// don't add the failed node because a previous one already failed further back
1317+
result.children = result.children[0 .. firstLongestFailedMatch+1]; // discard any intermediate correct nodes
1318+
// This current 'and' rule has failed parsing and there is a successful child
1319+
// that had a longer failing match. We now want to revisit that child and modify it
1320+
// so that it is no longer successful and we want to move its failedChild into its children.
1321+
failedChildFixup(result.children[firstLongestFailedMatch], result.children[firstLongestFailedMatch].failEnd);
1322+
}
1323+
result.end = result.children.map!(c => c.end).maxElement;
1324+
result.failEnd = result.children.map!(c => c.failEnd).maxElement;
13021325
version (tracer)
13031326
{
13041327
if (shouldTrace(getName!(r)(), p))
@@ -1331,6 +1354,35 @@ template and(rules...) if (rules.length > 0)
13311354
{
13321355
return name;
13331356
}
1357+
1358+
// A child ParseTree has kept track of an alternate ParseTree (in failedChild) that matches longer.
1359+
// whenever the 'and' rule fails we want to rewrite that child so that the failedChild is
1360+
// moved into its children, the successful is set to false, the end is set the its failEnd,
1361+
// the failEnd is reset, and all that info is propagated upwards the tree so intermediate
1362+
// nodes reflect the proper state.
1363+
bool failedChildFixup(ref ParseTree p, size_t failEnd) {
1364+
if (p.failedChild.length > 0) {
1365+
p.children ~= p.failedChild[0];
1366+
p.failedChild = [];
1367+
p.successful = false;
1368+
p.end = p.failEnd;
1369+
p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1370+
return true;
1371+
} else {
1372+
bool result = false;
1373+
foreach(ref c; p.children) {
1374+
if (c.failEnd != failEnd)
1375+
continue;
1376+
if (failedChildFixup(c, failEnd)) {
1377+
p.end = c.end;
1378+
p.successful = false;
1379+
p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1380+
result = true;
1381+
}
1382+
}
1383+
return result;
1384+
}
1385+
}
13341386
}
13351387
13361388
unittest // 'and' unit test
@@ -1403,6 +1455,62 @@ unittest // 'and' unit test
14031455
, "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure).");
14041456
}
14051457
1458+
version (unittest) {
1459+
static ParseTree getError(ref ParseTree p) {
1460+
if (p.children.length > 0)
1461+
return getError(p.children[$-1]);
1462+
return p;
1463+
}
1464+
}
1465+
1466+
unittest // 'and' unit test with zeroOrMore and longest failing match
1467+
{
1468+
alias literal!"abc" A;
1469+
alias literal!"def" B;
1470+
alias literal!"ghi" C;
1471+
1472+
alias and!(zeroOrMore!(and!(A,B)), C) Thing;
1473+
1474+
ParseTree input = ParseTree("",false,[], "abc");
1475+
ParseTree result = Thing(input);
1476+
1477+
assert(!result.successful);
1478+
assert(getError(result).matches[$-1] == "\"def\"", "and!(zeroOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1479+
assert(result.matches == []);
1480+
}
1481+
1482+
unittest // 'and' unit test with option and longest failing match
1483+
{
1484+
alias literal!"abc" A;
1485+
alias literal!"def" B;
1486+
alias literal!"ghi" C;
1487+
1488+
alias and!(option!(and!(A,B)), C) Thing;
1489+
1490+
ParseTree input = ParseTree("",false,[], "abc");
1491+
ParseTree result = Thing(input);
1492+
1493+
assert(!result.successful);
1494+
assert(getError(result).matches[$-1] == "\"def\"", "and!(option!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1495+
assert(result.matches == []);
1496+
}
1497+
1498+
unittest // 'and' unit test with oneOrMore and longest failing match
1499+
{
1500+
alias literal!"abc" A;
1501+
alias literal!"def" B;
1502+
alias literal!"ghi" C;
1503+
1504+
alias and!(oneOrMore!(and!(A,B)), C) Thing;
1505+
1506+
ParseTree input = ParseTree("",false,[], "abcdefabc");
1507+
ParseTree result = Thing(input);
1508+
1509+
assert(!result.successful);
1510+
assert(getError(result).matches[$-1] == "\"def\"", "and!(oneOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abcdefabc\"");
1511+
assert(result.matches == ["abc", "def"]);
1512+
}
1513+
14061514
template wrapAround(alias before, alias target, alias after)
14071515
{
14081516
ParseTree wrapAround(ParseTree p)
@@ -1524,6 +1632,11 @@ template or(rules...) if (rules.length > 0)
15241632
{
15251633
temp.children = [temp];
15261634
temp.name = name;
1635+
// if there is a child that failed but parsed more
1636+
if (longestFail.failEnd > temp.end) {
1637+
temp.failEnd = longestFail.failEnd;
1638+
temp.failedChild = [longestFail];
1639+
}
15271640
version (tracer)
15281641
{
15291642
if (shouldTrace(getName!(r)(), p))
@@ -1543,15 +1656,15 @@ template or(rules...) if (rules.length > 0)
15431656
failedLength[i] = temp.end;
15441657
if (temp.end >= longestFail.end)
15451658
{
1659+
if (temp.end == longestFail.end)
1660+
errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1661+
else
1662+
errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
15461663
maxFailedLength = temp.end;
15471664
longestFail = temp;
15481665
names[i] = errName;
15491666
results[i] = temp;
15501667
1551-
if (temp.end == longestFail.end)
1552-
errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1553-
else
1554-
errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
15551668
}
15561669
// Else, this error parsed less input than another one: we discard it.
15571670
}
@@ -1587,9 +1700,8 @@ template or(rules...) if (rules.length > 0)
15871700
longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] :
15881701
longestFail.matches[0..$-1] // discarding longestFail error message
15891702
~ [orErrorString]; // and replacing it by the new, concatenated one.
1590-
longestFail.name = name;
1591-
longestFail.begin = p.end;
1592-
return longestFail;
1703+
auto children = results[].filter!(r => max(r.end, r.failEnd) >= maxFailedLength).array();
1704+
return ParseTree(name, false, longestFail.matches, p.input, p.end, longestFail.end, children, children.map!(c => c.failEnd).maxElement);
15931705
}
15941706
15951707
ParseTree or(string input)
@@ -2166,13 +2278,19 @@ template zeroOrMore(alias r)
21662278
result.matches ~= temp.matches;
21672279
result.children ~= temp;
21682280
result.end = temp.end;
2281+
result.failEnd = max(result.failEnd, temp.failEnd);
21692282
version (tracer)
21702283
{
21712284
if (shouldTrace(getName!(r)(), p))
21722285
trace(traceMsg(result, name, getName!(r)()));
21732286
}
21742287
temp = r(result);
21752288
}
2289+
auto maxFail = max(temp.failEnd, temp.end);
2290+
if (maxFail > result.failEnd && maxFail > result.end) {
2291+
result.failedChild = [temp];
2292+
result.failEnd = maxFail;
2293+
}
21762294
result.successful = true;
21772295
version (tracer)
21782296
{
@@ -2328,13 +2446,19 @@ template oneOrMore(alias r)
23282446
result.matches ~= temp.matches;
23292447
result.children ~= temp;
23302448
result.end = temp.end;
2449+
result.failEnd = max(result.failEnd, temp.failEnd);
23312450
version (tracer)
23322451
{
23332452
if (shouldTrace(getName!(r)(), p))
23342453
trace(traceMsg(result, name, getName!(r)()));
23352454
}
23362455
temp = r(result);
23372456
}
2457+
auto maxFail = max(temp.failEnd, temp.end);
2458+
if (maxFail > result.failEnd && maxFail > result.end) {
2459+
result.failedChild = [temp];
2460+
result.failEnd = maxFail;
2461+
}
23382462
result.successful = true;
23392463
}
23402464
version (tracer)
@@ -2451,9 +2575,9 @@ template option(alias r)
24512575
}
24522576
ParseTree result = r(p);
24532577
if (result.successful)
2454-
return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result]);
2578+
return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result], result.failEnd);
24552579
else
2456-
return ParseTree(name, true, [], p.input, p.end, p.end, null);
2580+
return ParseTree(name, true, [], p.input, p.end, p.end, null, max(result.end,result.failEnd), [result]);
24572581
}
24582582
24592583
ParseTree option(string input)
@@ -3474,15 +3598,19 @@ mixin template decimateTree()
34743598
{
34753599
if(p.children.length == 0) return p;
34763600
3601+
bool parseFailed = !p.successful;
3602+
34773603
ParseTree[] filterChildren(ParseTree pt)
34783604
{
34793605
ParseTree[] result;
34803606
foreach(child; pt.children)
34813607
{
34823608
import std.algorithm : startsWith;
34833609
3484-
if ( (isRule(child.name) && child.matches.length != 0)
3485-
|| !child.successful && child.children.length == 0)
3610+
if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3611+
|| (!child.successful && child.children.length == 0)
3612+
|| (!child.successful && child.name.startsWith("or!") && child.children.length > 1)
3613+
|| (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
34863614
{
34873615
child.children = filterChildren(child);
34883616
result ~= child;
@@ -3499,6 +3627,37 @@ mixin template decimateTree()
34993627
}
35003628
return result;
35013629
}
3630+
void filterFailedChildren(ref ParseTree pt)
3631+
{
3632+
foreach(ref child; pt.children)
3633+
{
3634+
filterFailedChildren(child);
3635+
import std.algorithm : startsWith;
3636+
3637+
if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3638+
|| (!child.successful && child.children.length == 0)
3639+
|| (!child.successful && child.name.startsWith("or!") && child.children.length > 1)
3640+
|| (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
3641+
{
3642+
}
3643+
else if (child.name.startsWith("keep!(")) // 'keep' node are never discarded.
3644+
// They have only one child, the node to keep
3645+
{
3646+
}
3647+
else if (child.failedChild.length > 0)// discard this node, but see if its children contain nodes to keep
3648+
{
3649+
pt.failedChild ~= child.failedChild;
3650+
child.failedChild = [];
3651+
}
3652+
}
3653+
foreach(ref child; pt.failedChild)
3654+
{
3655+
filterFailedChildren(child);
3656+
child.children = filterChildren(child);
3657+
}
3658+
}
3659+
if (!p.successful)
3660+
filterFailedChildren(p);
35023661
p.children = filterChildren(p);
35033662
return p;
35043663
}

0 commit comments

Comments
 (0)