Skip to content

Commit 1dc27c5

Browse files
Merge pull request #226 from veelo/LeftRecursion
Left-recursion improvements.
2 parents ade2aa5 + cfa8eb7 commit 1dc27c5

File tree

1 file changed

+54
-185
lines changed

1 file changed

+54
-185
lines changed

pegged/grammar.d

+54-185
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,28 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
113113
import pegged.introspection;
114114
import std.algorithm : canFind;
115115
GrammarInfo grammarInfo = grammarInfo(defAsParseTree.children[0]);
116-
string[][string] stoppers; // Keys are the rules that stop left-recursion and the
117-
// values are arrays of strings containing the corresponding
118-
// rules for which memoization needs to be blocked.
116+
string[] stoppers; // Keys are the rules that stop left-recursion and the
117+
// values are arrays of strings containing the corresponding
118+
// rules for which memoization needs to be blocked.
119+
120+
/*
121+
I once considered that if two left-recursive cycles intersect, unbounded left-recursion
122+
would be prevented in both cycles if only the intersection rule would be a stopper. Although
123+
true, it causes other problems, as documented in the "Mutual left-recursion" unittest below.
124+
Therefore, we simply make the first rule in every left-recursive cycle a stopper.
125+
Also, one might think that it suffices to prevent ordinary memoization in just the rules
126+
that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal
127+
would fail to parse. So memoization for all left-recursive rules is disabled during
128+
left-recursion.
129+
*/
130+
string[] allLeftRecursiveRules;
131+
foreach (cycle; grammarInfo.leftRecursiveCycles)
132+
foreach (rule; cycle)
133+
if (!canFind(allLeftRecursiveRules, rule))
134+
allLeftRecursiveRules ~= rule;
135+
foreach (cycle; grammarInfo.leftRecursiveCycles)
136+
if (!stoppers.canFind(cycle[0]))
137+
stoppers ~= cycle[0];
119138

120139
// Prints comment showing detected left-recursive cycles.
121140
string printLeftRecursiveCycles()
@@ -136,163 +155,14 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
136155
{
137156
import std.array: join;
138157
string result;
139-
foreach (stopper, rules; stoppers)
140-
{
141-
result ~= stopper ~ ": " ~ rules.join(", ") ~ "\n";
142-
/*if (rules.length > 0)
143-
result ~= rules[0];
144-
foreach (rule; rules[1..$])
145-
result ~= ", " ~ rule;
146-
result ~= "\n";*/
147-
}
158+
foreach (stopper; stoppers)
159+
result ~= stopper ~ ": " ~ allLeftRecursiveRules.join(", ") ~ "\n";
148160
return result.length > 0 ?
149161
"/** Rules that stop left-recursive cycles, followed by rules for which\n"
150162
~ " * memoization is blocked during recursion:\n" ~ result ~ "*/\n\n" : "";
151163
}
152-
size_t[] handledCycleIndices;
153-
// Detect interlocking cycles. Each cycle needs a different stopper.
154-
foreach (i, cycle; grammarInfo.leftRecursiveCycles)
155-
{
156-
foreach (j, otherCycle; grammarInfo.leftRecursiveCycles[i+1 .. $])
157-
{
158-
foreach (rule; cycle)
159-
{
160-
if (otherCycle.canFind(rule))
161-
{
162-
// cycle and otherCycle intersect at rule.
163-
// If a cycle has one single rule (direct left-recursion) then it needs to be a stopper.
164-
if (cycle.length == 1)
165-
{
166-
if (!handledCycleIndices.canFind(i))
167-
{
168-
if (!(rule in stoppers))
169-
stoppers[rule] = [];
170-
handledCycleIndices ~= i;
171-
}
172-
// The other cycle needs a different stopper.
173-
assert(otherCycle.length > 1);
174-
if (!handledCycleIndices.canFind(j + i + 1))
175-
{
176-
foreach (r; otherCycle)
177-
if (!(r in stoppers))
178-
{
179-
stoppers[r] = [];
180-
foreach (rr; otherCycle)
181-
if (rr != r)
182-
stoppers[r] ~= rr;
183-
handledCycleIndices ~= j + i + 1;
184-
break;
185-
}
186-
assert(handledCycleIndices.canFind(j + i + 1));
187-
}
188-
}
189-
if (otherCycle.length == 1)
190-
{
191-
if (!handledCycleIndices.canFind(j + i + 1))
192-
{
193-
if (!(rule in stoppers))
194-
stoppers[rule] = [];
195-
handledCycleIndices ~= j + i + 1;
196-
}
197-
// The other cycle needs a different stopper.
198-
assert(cycle.length > 1);
199-
if (!handledCycleIndices.canFind(i))
200-
{
201-
foreach (r; cycle)
202-
if (!(r in stoppers))
203-
{
204-
stoppers[r] = [];
205-
foreach (rr; cycle)
206-
if (rr != r)
207-
stoppers[r] ~= rr;
208-
handledCycleIndices ~= i;
209-
break;
210-
}
211-
assert(handledCycleIndices.canFind(i));
212-
}
213-
}
214-
// At this point, if a cycle has not been handled yet, it has more than one rule.
215-
if (!handledCycleIndices.canFind(i))
216-
{
217-
foreach (r; cycle)
218-
if (!(r in stoppers))
219-
{
220-
stoppers[r] = [];
221-
foreach (rr; cycle)
222-
if (rr != r)
223-
stoppers[r] ~= rr;
224-
handledCycleIndices ~= i;
225-
break;
226-
}
227-
assert(handledCycleIndices.canFind(i));
228-
}
229-
if (!handledCycleIndices.canFind(j + i + 1))
230-
{
231-
foreach (r; otherCycle)
232-
if (!(r in stoppers))
233-
{
234-
stoppers[r] = [];
235-
foreach (rr; otherCycle)
236-
if (rr != r)
237-
stoppers[r] ~= rr;
238-
handledCycleIndices ~= j + i + 1;
239-
break;
240-
}
241-
assert(handledCycleIndices.canFind(j + i + 1));
242-
}
243-
}
244-
}
245-
}
246-
}
247-
// Take the first node in remaining cycles as the stopper.
248-
foreach (i, cycle; grammarInfo.leftRecursiveCycles)
249-
{
250-
if (handledCycleIndices.canFind(i))
251-
continue;
252-
stoppers[cycle[0]] = cycle[1..$].dup;
253-
}
254164
// Analysis completed.
255165

256-
/// Returns code to prevent memoization of incomplete matches during left-recursion through this rule.
257-
string blockMemoForLeftRecursion(string stopper)
258-
{
259-
string result;
260-
foreach (rule; stoppers[stopper] ~ stopper)
261-
result ~= " blockMemo_" ~ rule ~ "_atPos ~= p.end;\n";
262-
return result;
263-
}
264-
265-
/// Returns code that enables memoization when left-recursion has completed.
266-
string unblockMemoForLeftRecursion(string stopper)
267-
{
268-
string result;
269-
foreach (rule; stoppers[stopper] ~ stopper)
270-
// TODO investigate if p.end is always the last element.
271-
result ~= " assert(blockMemo_" ~ rule ~ "_atPos.canFind(p.end));\n"
272-
~ " remove(blockMemo_" ~ rule ~ "_atPos, countUntil(blockMemo_" ~ rule ~ "_atPos, p.end));\n";
273-
return result;
274-
}
275-
276-
/// If $(D_PARAM name) is part of a left-recursive cycle and not a stopping rule, code is
277-
// inserted to test for blocking and if blocked return with "$(D_PARAM code)(p)".
278-
string maybeBlockedMemo(string name, string code)
279-
{
280-
assert(!stoppers.keys.canFind(name));
281-
foreach (cycle; stoppers)
282-
foreach (rule; cycle)
283-
if (rule == name)
284-
return
285-
" if (blockMemo_" ~ name ~ "_atPos.canFind(p.end))\n"
286-
~ " return " ~ code ~ "(p);\n";
287-
return "";
288-
}
289-
290-
/// Returns a Boolean expression whether $(D_PARAM rule) is not blocked.
291-
string shouldMemoLeftRecursion(string rule)
292-
{
293-
return "!blockMemo_" ~ rule ~ "_atPos.canFind(p.end)";
294-
}
295-
296166
string generateForgetMemo()
297167
{
298168
string result;
@@ -318,25 +188,6 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
318188
{
319189
string result;
320190

321-
// Variables holding the block-state.
322-
string generateBlockers()
323-
{
324-
string result;
325-
string[] visited = [];
326-
foreach (cycle; grammarInfo.leftRecursiveCycles)
327-
foreach (rule; cycle)
328-
if (!visited.canFind(rule))
329-
{
330-
visited ~= rule;
331-
result ~= "
332-
static size_t[] blockMemo_" ~ rule ~ "_atPos;";
333-
}
334-
if (result.length > 0)
335-
return "
336-
import std.algorithm: canFind, countUntil, remove;" ~ result;
337-
return result;
338-
}
339-
340191
switch (p.name)
341192
{
342193
case "Pegged":
@@ -351,9 +202,9 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
351202
result =
352203
"struct Generic" ~ shortGrammarName ~ "(TParseTree)
353204
{
354-
import std.functional : toDelegate;
205+
import std.functional : toDelegate;
355206
import pegged.dynamic.grammar;
356-
static import pegged.peg;
207+
static import pegged.peg;
357208
struct " ~ grammarName ~ "\n {
358209
enum name = \"" ~ shortGrammarName ~ "\";
359210
static ParseTree delegate(ParseTree)[string] before;
@@ -364,7 +215,10 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
364215
result ~= "
365216
import std.typecons:Tuple, tuple;
366217
static TParseTree[Tuple!(string, size_t)] memo;";
367-
result ~= generateBlockers();
218+
if (grammarInfo.leftRecursiveCycles.length > 0)
219+
result ~= "
220+
import std.algorithm: canFind, countUntil, remove;
221+
static size_t[] blockMemoAtPos;";
368222
}
369223

370224
result ~= "
@@ -603,13 +457,12 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
603457
string ctfeCode = " pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\")";
604458
code = "hooked!(pegged.peg.defined!(" ~ code ~ ", \"" ~ propagatedName ~ "." ~ innerName[1..$-1] ~ "\"), \"" ~ hookedName ~ "\")";
605459

606-
import std.algorithm.searching: canFind;
607460
if (withMemo == Memoization.no)
608461
result ~= " static TParseTree " ~ shortName ~ "(TParseTree p)\n"
609462
~ " {\n"
610463
~ " if(__ctfe)\n"
611464
~ " {\n"
612-
~ (stoppers.keys.canFind(shortName) ?
465+
~ (stoppers.canFind(shortName) ?
613466
" assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported "
614467
~ "at compile-time. Consider using asModule().\");\n"
615468
:
@@ -618,7 +471,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
618471
~ " }\n"
619472
~ " else\n"
620473
~ " {\n"
621-
~ (stoppers.keys.canFind(shortName) ?
474+
~ (stoppers.canFind(shortName) ?
622475
// This rule needs to prevent infinite left-recursion.
623476
" static TParseTree[size_t /*position*/] seed;\n"
624477
~ " if (auto s = p.end in seed)\n"
@@ -663,7 +516,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
663516
~ " {\n"
664517
~ " if(__ctfe)\n"
665518
~ " {\n"
666-
~ (stoppers.keys.canFind(shortName) ?
519+
~ (stoppers.canFind(shortName) ?
667520
" assert(false, \"" ~ shortName ~ " is left-recursive, which is not supported "
668521
~ "at compile-time. Consider using asModule().\");\n"
669522
:
@@ -672,17 +525,17 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
672525
~ " }\n"
673526
~ " else\n"
674527
~ " {\n"
675-
~ (stoppers.keys.canFind(shortName) ?
528+
~ (stoppers.canFind(shortName) ?
676529
// This rule needs to prevent infinite left-recursion.
677530
" static TParseTree[size_t /*position*/] seed;\n"
678531
~ " if (auto s = p.end in seed)\n"
679532
~ " return *s;\n"
680-
~ " if (" ~ shouldMemoLeftRecursion(shortName) ~ ")\n"
533+
~ " if (!blockMemoAtPos.canFind(p.end))\n"
681534
~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n"
682535
~ " return *m;\n"
683536
~ " auto current = fail(p);\n"
684537
~ " seed[p.end] = current;\n"
685-
~ blockMemoForLeftRecursion(shortName)
538+
~ " blockMemoAtPos ~= p.end;\n"
686539
~ " while (true)\n"
687540
~ " {\n"
688541
~ " auto result = " ~ code ~ "(p);\n"
@@ -704,14 +557,20 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
704557
// care of by memo. Note that p.end remains constant for the course of recursion,
705558
// and the length of seed only grows when nested recursion occurs.
706559
~ " seed.remove(p.end);\n"
707-
~ unblockMemoForLeftRecursion(shortName)
560+
// TODO investigate if p.end is always the last element of blockMemoAtPos.
561+
~ " assert(blockMemoAtPos.canFind(p.end));\n"
562+
~ " blockMemoAtPos = blockMemoAtPos.remove(countUntil(blockMemoAtPos, p.end));\n"
708563
~ " memo[tuple(" ~ innerName ~ ", p.end)] = current;\n"
709564
~ " return current;\n"
710565
~ " }\n"
711566
~ " }\n"
712567
:
713568
// Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle.
714-
maybeBlockedMemo(shortName, code)
569+
(allLeftRecursiveRules.canFind(shortName) ?
570+
" if (blockMemoAtPos.canFind(p.end))\n"
571+
~ " return " ~ code ~ "(p);\n"
572+
: ""
573+
)
715574
~ " if (auto m = tuple(" ~ innerName ~ ", p.end) in memo)\n"
716575
~ " return *m;\n"
717576
~ " else\n"
@@ -3016,6 +2875,16 @@ unittest // Proper blocking of memoization
30162875
// Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf
30172876
unittest // Mutual left-recursion
30182877
{
2878+
/* A thing about stoppers:
2879+
Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should
2880+
suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But,
2881+
stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper,
2882+
at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails.
2883+
If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer
2884+
consumes input, which will appear as a left-recursion to L if it is a stopper and because of that
2885+
it will have a chance to succeed on the full input which it recorded in its seed for the previous
2886+
recursion.
2887+
*/
30192888
enum LeftGrammar = `
30202889
Left:
30212890
M <- L eoi

0 commit comments

Comments
 (0)