@@ -18,7 +18,7 @@ Writing tests the long way is preferred here, as it will avoid the circular
18
18
dependency.
19
19
*/
20
20
21
- import std.algorithm : equal, map, startsWith;
21
+ import std.algorithm : equal, map, startsWith, max, countUntil, maxElement, filter ;
22
22
import std.uni : isAlpha, icmp;
23
23
import std.array ;
24
24
import std.conv ;
@@ -248,6 +248,9 @@ struct ParseTree
248
248
249
249
ParseTree[] children; /// The sub-trees created by sub-rules parsing.
250
250
251
+ size_t failEnd; // The furthest this tree could match the input (including !successful rules).
252
+ ParseTree[] failedChild; /// The !successful child that could still be partially parsed.
253
+
251
254
/**
252
255
Basic toString for easy pretty-printing.
253
256
*/
@@ -343,6 +346,8 @@ struct ParseTree
343
346
result.input = input;
344
347
result.begin = begin;
345
348
result.end = end;
349
+ result.failEnd = failEnd;
350
+ result.failedChild = map!(p => p.dup)(failedChild).array();
346
351
result.children = map!(p => p.dup)(children).array();
347
352
return result;
348
353
}
@@ -716,11 +721,16 @@ template literal(string s)
716
721
ParseTree literal(ParseTree p)
717
722
{
718
723
enum lit = "\"" ~ s ~ "\"";
724
+
719
725
if (p.end+s.length <= p.input.length && p.input[p.end..p.end+s.length] == s)
720
726
return ParseTree(name, true, [s], p.input, p.end, p.end+s.length);
721
- else
722
- return ParseTree(name, false, [lit], p.input, p.end, p.end);
723
- }
727
+ else {
728
+ import std.algorithm : commonPrefix;
729
+ import std.utf : byCodeUnit;
730
+ auto prefix = p.input[p.end..$].byCodeUnit.commonPrefix(s.byCodeUnit);
731
+ return ParseTree(name, false, [lit], p.input, p.end, p.end, null, p.end + prefix.length);
732
+ }
733
+ }
724
734
725
735
ParseTree literal(string input)
726
736
{
@@ -1239,7 +1249,6 @@ and that the second subrule ('[a-z]') failed at position 3 (so, on '1').
1239
1249
*/
1240
1250
template and(rules...) if (rules.length > 0)
1241
1251
{
1242
-
1243
1252
string ctfeGetNameAnd()
1244
1253
{
1245
1254
string name = "and!(";
@@ -1262,7 +1271,8 @@ template and(rules...) if (rules.length > 0)
1262
1271
//&& !node.name.startsWith("drop!(")
1263
1272
&& node.matches !is null
1264
1273
//&& node.begin != node.end
1265
- );
1274
+ )
1275
+ || (node.failEnd >= node.end);
1266
1276
}
1267
1277
1268
1278
version (tracer)
@@ -1281,6 +1291,7 @@ template and(rules...) if (rules.length > 0)
1281
1291
}
1282
1292
ParseTree temp = r(result);
1283
1293
result.end = temp.end;
1294
+ result.failEnd = max(result.failEnd, temp.failEnd);
1284
1295
if (temp.successful)
1285
1296
{
1286
1297
if (keepNode(temp))
@@ -1296,9 +1307,21 @@ template and(rules...) if (rules.length > 0)
1296
1307
}
1297
1308
else
1298
1309
{
1299
- result.children ~= temp;// add the failed node, to indicate which failed
1300
- if (temp.matches.length > 0)
1301
- result.matches ~= temp.matches[$-1];
1310
+ auto firstLongestFailedMatch = result.children.countUntil!(c => c.failEnd > temp.end);
1311
+ if (firstLongestFailedMatch == -1) {
1312
+ result.children ~= temp;// add the failed node, to indicate which failed
1313
+ if (temp.matches.length > 0)
1314
+ result.matches ~= temp.matches[$-1];
1315
+ } else {
1316
+ // don't add the failed node because a previous one already failed further back
1317
+ result.children = result.children[0 .. firstLongestFailedMatch+1]; // discard any intermediate correct nodes
1318
+ // This current 'and' rule has failed parsing and there is a successful child
1319
+ // that had a longer failing match. We now want to revisit that child and modify it
1320
+ // so that it is no longer successful and we want to move its failedChild into its children.
1321
+ failedChildFixup(result.children[firstLongestFailedMatch], result.children[firstLongestFailedMatch].failEnd);
1322
+ }
1323
+ result.end = result.children.map!(c => c.end).maxElement;
1324
+ result.failEnd = result.children.map!(c => c.failEnd).maxElement;
1302
1325
version (tracer)
1303
1326
{
1304
1327
if (shouldTrace(getName!(r)(), p))
@@ -1331,6 +1354,35 @@ template and(rules...) if (rules.length > 0)
1331
1354
{
1332
1355
return name;
1333
1356
}
1357
+
1358
+ // A child ParseTree has kept track of an alternate ParseTree (in failedChild) that matches longer.
1359
+ // whenever the 'and' rule fails we want to rewrite that child so that the failedChild is
1360
+ // moved into its children, the successful is set to false, the end is set the its failEnd,
1361
+ // the failEnd is reset, and all that info is propagated upwards the tree so intermediate
1362
+ // nodes reflect the proper state.
1363
+ bool failedChildFixup(ref ParseTree p, size_t failEnd) {
1364
+ if (p.failedChild.length > 0) {
1365
+ p.children ~= p.failedChild[0];
1366
+ p.failedChild = [];
1367
+ p.successful = false;
1368
+ p.end = p.failEnd;
1369
+ p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1370
+ return true;
1371
+ } else {
1372
+ bool result = false;
1373
+ foreach(ref c; p.children) {
1374
+ if (c.failEnd != failEnd)
1375
+ continue;
1376
+ if (failedChildFixup(c, failEnd)) {
1377
+ p.end = c.end;
1378
+ p.successful = false;
1379
+ p.failEnd = p.children.map!(c => c.failEnd).maxElement();
1380
+ result = true;
1381
+ }
1382
+ }
1383
+ return result;
1384
+ }
1385
+ }
1334
1386
}
1335
1387
1336
1388
unittest // 'and' unit test
@@ -1403,6 +1455,62 @@ unittest // 'and' unit test
1403
1455
, "'abc' 'de' 'f' has two child on 'abc_efghi', the one from 'abc' (success) and the one from 'de' (failure).");
1404
1456
}
1405
1457
1458
+ version (unittest) {
1459
+ static ParseTree getError(ref ParseTree p) {
1460
+ if (p.children.length > 0)
1461
+ return getError(p.children[$-1]);
1462
+ return p;
1463
+ }
1464
+ }
1465
+
1466
+ unittest // 'and' unit test with zeroOrMore and longest failing match
1467
+ {
1468
+ alias literal!"abc" A;
1469
+ alias literal!"def" B;
1470
+ alias literal!"ghi" C;
1471
+
1472
+ alias and!(zeroOrMore!(and!(A,B)), C) Thing;
1473
+
1474
+ ParseTree input = ParseTree("",false,[], "abc");
1475
+ ParseTree result = Thing(input);
1476
+
1477
+ assert(!result.successful);
1478
+ assert(getError(result).matches[$-1] == "\"def\"", "and!(zeroOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1479
+ assert(result.matches == []);
1480
+ }
1481
+
1482
+ unittest // 'and' unit test with option and longest failing match
1483
+ {
1484
+ alias literal!"abc" A;
1485
+ alias literal!"def" B;
1486
+ alias literal!"ghi" C;
1487
+
1488
+ alias and!(option!(and!(A,B)), C) Thing;
1489
+
1490
+ ParseTree input = ParseTree("",false,[], "abc");
1491
+ ParseTree result = Thing(input);
1492
+
1493
+ assert(!result.successful);
1494
+ assert(getError(result).matches[$-1] == "\"def\"", "and!(option!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abc\"");
1495
+ assert(result.matches == []);
1496
+ }
1497
+
1498
+ unittest // 'and' unit test with oneOrMore and longest failing match
1499
+ {
1500
+ alias literal!"abc" A;
1501
+ alias literal!"def" B;
1502
+ alias literal!"ghi" C;
1503
+
1504
+ alias and!(oneOrMore!(and!(A,B)), C) Thing;
1505
+
1506
+ ParseTree input = ParseTree("",false,[], "abcdefabc");
1507
+ ParseTree result = Thing(input);
1508
+
1509
+ assert(!result.successful);
1510
+ assert(getError(result).matches[$-1] == "\"def\"", "and!(oneOrMore!(and!(literal!\"abc\", literal!\"def\")), literal!\"ghi\") should expected def when input is \"abcdefabc\"");
1511
+ assert(result.matches == ["abc", "def"]);
1512
+ }
1513
+
1406
1514
template wrapAround(alias before, alias target, alias after)
1407
1515
{
1408
1516
ParseTree wrapAround(ParseTree p)
@@ -1524,6 +1632,11 @@ template or(rules...) if (rules.length > 0)
1524
1632
{
1525
1633
temp.children = [temp];
1526
1634
temp.name = name;
1635
+ // if there is a child that failed but parsed more
1636
+ if (longestFail.failEnd > temp.end) {
1637
+ temp.failEnd = longestFail.failEnd;
1638
+ temp.failedChild = [longestFail];
1639
+ }
1527
1640
version (tracer)
1528
1641
{
1529
1642
if (shouldTrace(getName!(r)(), p))
@@ -1543,15 +1656,15 @@ template or(rules...) if (rules.length > 0)
1543
1656
failedLength[i] = temp.end;
1544
1657
if (temp.end >= longestFail.end)
1545
1658
{
1659
+ if (temp.end == longestFail.end)
1660
+ errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1661
+ else
1662
+ errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1546
1663
maxFailedLength = temp.end;
1547
1664
longestFail = temp;
1548
1665
names[i] = errName;
1549
1666
results[i] = temp;
1550
1667
1551
- if (temp.end == longestFail.end)
1552
- errorStringChars += (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1553
- else
1554
- errorStringChars = (temp.matches.length > 0 ? temp.matches[$-1].length : 0) + errName.length + 4;
1555
1668
}
1556
1669
// Else, this error parsed less input than another one: we discard it.
1557
1670
}
@@ -1587,9 +1700,8 @@ template or(rules...) if (rules.length > 0)
1587
1700
longestFail.matches = longestFail.matches.length == 0 ? [orErrorString] :
1588
1701
longestFail.matches[0..$-1] // discarding longestFail error message
1589
1702
~ [orErrorString]; // and replacing it by the new, concatenated one.
1590
- longestFail.name = name;
1591
- longestFail.begin = p.end;
1592
- return longestFail;
1703
+ auto children = results[].filter!(r => max(r.end, r.failEnd) >= maxFailedLength).array();
1704
+ return ParseTree(name, false, longestFail.matches, p.input, p.end, longestFail.end, children, children.map!(c => c.failEnd).maxElement);
1593
1705
}
1594
1706
1595
1707
ParseTree or(string input)
@@ -2166,13 +2278,19 @@ template zeroOrMore(alias r)
2166
2278
result.matches ~= temp.matches;
2167
2279
result.children ~= temp;
2168
2280
result.end = temp.end;
2281
+ result.failEnd = max(result.failEnd, temp.failEnd);
2169
2282
version (tracer)
2170
2283
{
2171
2284
if (shouldTrace(getName!(r)(), p))
2172
2285
trace(traceMsg(result, name, getName!(r)()));
2173
2286
}
2174
2287
temp = r(result);
2175
2288
}
2289
+ auto maxFail = max(temp.failEnd, temp.end);
2290
+ if (maxFail > result.failEnd && maxFail > result.end) {
2291
+ result.failedChild = [temp];
2292
+ result.failEnd = maxFail;
2293
+ }
2176
2294
result.successful = true;
2177
2295
version (tracer)
2178
2296
{
@@ -2328,13 +2446,19 @@ template oneOrMore(alias r)
2328
2446
result.matches ~= temp.matches;
2329
2447
result.children ~= temp;
2330
2448
result.end = temp.end;
2449
+ result.failEnd = max(result.failEnd, temp.failEnd);
2331
2450
version (tracer)
2332
2451
{
2333
2452
if (shouldTrace(getName!(r)(), p))
2334
2453
trace(traceMsg(result, name, getName!(r)()));
2335
2454
}
2336
2455
temp = r(result);
2337
2456
}
2457
+ auto maxFail = max(temp.failEnd, temp.end);
2458
+ if (maxFail > result.failEnd && maxFail > result.end) {
2459
+ result.failedChild = [temp];
2460
+ result.failEnd = maxFail;
2461
+ }
2338
2462
result.successful = true;
2339
2463
}
2340
2464
version (tracer)
@@ -2451,9 +2575,9 @@ template option(alias r)
2451
2575
}
2452
2576
ParseTree result = r(p);
2453
2577
if (result.successful)
2454
- return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result]);
2578
+ return ParseTree(name, true, result.matches, result.input, result.begin, result.end, [result], result.failEnd );
2455
2579
else
2456
- return ParseTree(name, true, [], p.input, p.end, p.end, null);
2580
+ return ParseTree(name, true, [], p.input, p.end, p.end, null, max(result.end,result.failEnd), [result] );
2457
2581
}
2458
2582
2459
2583
ParseTree option(string input)
@@ -3474,15 +3598,19 @@ mixin template decimateTree()
3474
3598
{
3475
3599
if(p.children.length == 0) return p;
3476
3600
3601
+ bool parseFailed = !p.successful;
3602
+
3477
3603
ParseTree[] filterChildren(ParseTree pt)
3478
3604
{
3479
3605
ParseTree[] result;
3480
3606
foreach(child; pt.children)
3481
3607
{
3482
3608
import std.algorithm : startsWith;
3483
3609
3484
- if ( (isRule(child.name) && child.matches.length != 0)
3485
- || !child.successful && child.children.length == 0)
3610
+ if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3611
+ || (!child.successful && child.children.length == 0)
3612
+ || (!child.successful && child.name.startsWith(" or! " ) && child.children.length > 1)
3613
+ || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
3486
3614
{
3487
3615
child.children = filterChildren(child);
3488
3616
result ~= child;
@@ -3499,6 +3627,37 @@ mixin template decimateTree()
3499
3627
}
3500
3628
return result;
3501
3629
}
3630
+ void filterFailedChildren(ref ParseTree pt)
3631
+ {
3632
+ foreach(ref child; pt.children)
3633
+ {
3634
+ filterFailedChildren(child);
3635
+ import std.algorithm : startsWith;
3636
+
3637
+ if ( (isRule(child.name) && (child.matches.length != 0 || parseFailed))
3638
+ || (!child.successful && child.children.length == 0)
3639
+ || (!child.successful && child.name.startsWith(" or! " ) && child.children.length > 1)
3640
+ || (!pt.successful && child.successful && child.children.length == 0 && child.failedChild.length > 0))
3641
+ {
3642
+ }
3643
+ else if (child.name.startsWith(" keep! (" )) // 'keep' node are never discarded.
3644
+ // They have only one child, the node to keep
3645
+ {
3646
+ }
3647
+ else if (child.failedChild.length > 0)// discard this node, but see if its children contain nodes to keep
3648
+ {
3649
+ pt.failedChild ~= child.failedChild;
3650
+ child.failedChild = [];
3651
+ }
3652
+ }
3653
+ foreach(ref child; pt.failedChild)
3654
+ {
3655
+ filterFailedChildren(child);
3656
+ child.children = filterChildren(child);
3657
+ }
3658
+ }
3659
+ if (!p.successful)
3660
+ filterFailedChildren(p);
3502
3661
p.children = filterChildren(p);
3503
3662
return p;
3504
3663
}
0 commit comments