@@ -113,9 +113,28 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
113
113
import pegged.introspection;
114
114
import std.algorithm : canFind;
115
115
GrammarInfo grammarInfo = grammarInfo(defAsParseTree.children[0 ]);
116
- string [][string ] stoppers; // Keys are the rules that stop left-recursion and the
117
- // values are arrays of strings containing the corresponding
118
- // rules for which memoization needs to be blocked.
116
+ string [] stoppers; // Keys are the rules that stop left-recursion and the
117
+ // values are arrays of strings containing the corresponding
118
+ // rules for which memoization needs to be blocked.
119
+
120
+ /*
121
+ I once considered that if two left-recursive cycles intersect, unbounded left-recursion
122
+ would be prevented in both cycles if only the intersection rule would be a stopper. Although
123
+ true, it causes other problems, as documented in the "Mutual left-recursion" unittest below.
124
+ Therefore, we simply make the first rule in every left-recursive cycle a stopper.
125
+ Also, one might think that it suffices to prevent ordinary memoization in just the rules
126
+ that are part of the cycle. However, some larger input files for pegged/examples/extended_pascal
127
+ would fail to parse. So memoization for all left-recursive rules is disabled during
128
+ left-recursion.
129
+ */
130
+ string [] allLeftRecursiveRules;
131
+ foreach (cycle; grammarInfo.leftRecursiveCycles)
132
+ foreach (rule; cycle)
133
+ if (! canFind(allLeftRecursiveRules, rule))
134
+ allLeftRecursiveRules ~= rule;
135
+ foreach (cycle; grammarInfo.leftRecursiveCycles)
136
+ if (! stoppers.canFind(cycle[0 ]))
137
+ stoppers ~= cycle[0 ];
119
138
120
139
// Prints comment showing detected left-recursive cycles.
121
140
string printLeftRecursiveCycles ()
@@ -136,163 +155,14 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
136
155
{
137
156
import std.array : join;
138
157
string result;
139
- foreach (stopper, rules; stoppers)
140
- {
141
- result ~= stopper ~ " : " ~ rules.join(" , " ) ~ " \n " ;
142
- /* if (rules.length > 0)
143
- result ~= rules[0];
144
- foreach (rule; rules[1..$])
145
- result ~= ", " ~ rule;
146
- result ~= "\n";*/
147
- }
158
+ foreach (stopper; stoppers)
159
+ result ~= stopper ~ " : " ~ allLeftRecursiveRules.join(" , " ) ~ " \n " ;
148
160
return result.length > 0 ?
149
161
" /** Rules that stop left-recursive cycles, followed by rules for which\n "
150
162
~ " * memoization is blocked during recursion:\n " ~ result ~ " */\n\n " : " " ;
151
163
}
152
- size_t [] handledCycleIndices;
153
- // Detect interlocking cycles. Each cycle needs a different stopper.
154
- foreach (i, cycle; grammarInfo.leftRecursiveCycles)
155
- {
156
- foreach (j, otherCycle; grammarInfo.leftRecursiveCycles[i+ 1 .. $])
157
- {
158
- foreach (rule; cycle)
159
- {
160
- if (otherCycle.canFind(rule))
161
- {
162
- // cycle and otherCycle intersect at rule.
163
- // If a cycle has one single rule (direct left-recursion) then it needs to be a stopper.
164
- if (cycle.length == 1 )
165
- {
166
- if (! handledCycleIndices.canFind(i))
167
- {
168
- if (! (rule in stoppers))
169
- stoppers[rule] = [];
170
- handledCycleIndices ~= i;
171
- }
172
- // The other cycle needs a different stopper.
173
- assert (otherCycle.length > 1 );
174
- if (! handledCycleIndices.canFind(j + i + 1 ))
175
- {
176
- foreach (r; otherCycle)
177
- if (! (r in stoppers))
178
- {
179
- stoppers[r] = [];
180
- foreach (rr; otherCycle)
181
- if (rr != r)
182
- stoppers[r] ~= rr;
183
- handledCycleIndices ~= j + i + 1 ;
184
- break ;
185
- }
186
- assert (handledCycleIndices.canFind(j + i + 1 ));
187
- }
188
- }
189
- if (otherCycle.length == 1 )
190
- {
191
- if (! handledCycleIndices.canFind(j + i + 1 ))
192
- {
193
- if (! (rule in stoppers))
194
- stoppers[rule] = [];
195
- handledCycleIndices ~= j + i + 1 ;
196
- }
197
- // The other cycle needs a different stopper.
198
- assert (cycle.length > 1 );
199
- if (! handledCycleIndices.canFind(i))
200
- {
201
- foreach (r; cycle)
202
- if (! (r in stoppers))
203
- {
204
- stoppers[r] = [];
205
- foreach (rr; cycle)
206
- if (rr != r)
207
- stoppers[r] ~= rr;
208
- handledCycleIndices ~= i;
209
- break ;
210
- }
211
- assert (handledCycleIndices.canFind(i));
212
- }
213
- }
214
- // At this point, if a cycle has not been handled yet, it has more than one rule.
215
- if (! handledCycleIndices.canFind(i))
216
- {
217
- foreach (r; cycle)
218
- if (! (r in stoppers))
219
- {
220
- stoppers[r] = [];
221
- foreach (rr; cycle)
222
- if (rr != r)
223
- stoppers[r] ~= rr;
224
- handledCycleIndices ~= i;
225
- break ;
226
- }
227
- assert (handledCycleIndices.canFind(i));
228
- }
229
- if (! handledCycleIndices.canFind(j + i + 1 ))
230
- {
231
- foreach (r; otherCycle)
232
- if (! (r in stoppers))
233
- {
234
- stoppers[r] = [];
235
- foreach (rr; otherCycle)
236
- if (rr != r)
237
- stoppers[r] ~= rr;
238
- handledCycleIndices ~= j + i + 1 ;
239
- break ;
240
- }
241
- assert (handledCycleIndices.canFind(j + i + 1 ));
242
- }
243
- }
244
- }
245
- }
246
- }
247
- // Take the first node in remaining cycles as the stopper.
248
- foreach (i, cycle; grammarInfo.leftRecursiveCycles)
249
- {
250
- if (handledCycleIndices.canFind(i))
251
- continue ;
252
- stoppers[cycle[0 ]] = cycle[1 .. $].dup ;
253
- }
254
164
// Analysis completed.
255
165
256
- // / Returns code to prevent memoization of incomplete matches during left-recursion through this rule.
257
- string blockMemoForLeftRecursion (string stopper)
258
- {
259
- string result;
260
- foreach (rule; stoppers[stopper] ~ stopper)
261
- result ~= " blockMemo_" ~ rule ~ " _atPos ~= p.end;\n " ;
262
- return result;
263
- }
264
-
265
- // / Returns code that enables memoization when left-recursion has completed.
266
- string unblockMemoForLeftRecursion (string stopper)
267
- {
268
- string result;
269
- foreach (rule; stoppers[stopper] ~ stopper)
270
- // TODO investigate if p.end is always the last element.
271
- result ~= " assert(blockMemo_" ~ rule ~ " _atPos.canFind(p.end));\n "
272
- ~ " remove(blockMemo_" ~ rule ~ " _atPos, countUntil(blockMemo_" ~ rule ~ " _atPos, p.end));\n " ;
273
- return result;
274
- }
275
-
276
- // / If $(D_PARAM name) is part of a left-recursive cycle and not a stopping rule, code is
277
- // inserted to test for blocking and if blocked return with "$(D_PARAM code)(p)".
278
- string maybeBlockedMemo (string name, string code)
279
- {
280
- assert (! stoppers.keys .canFind(name));
281
- foreach (cycle; stoppers)
282
- foreach (rule; cycle)
283
- if (rule == name)
284
- return
285
- " if (blockMemo_" ~ name ~ " _atPos.canFind(p.end))\n "
286
- ~ " return " ~ code ~ " (p);\n " ;
287
- return " " ;
288
- }
289
-
290
- // / Returns a Boolean expression whether $(D_PARAM rule) is not blocked.
291
- string shouldMemoLeftRecursion (string rule)
292
- {
293
- return " !blockMemo_" ~ rule ~ " _atPos.canFind(p.end)" ;
294
- }
295
-
296
166
string generateForgetMemo ()
297
167
{
298
168
string result;
@@ -318,25 +188,6 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
318
188
{
319
189
string result;
320
190
321
- // Variables holding the block-state.
322
- string generateBlockers ()
323
- {
324
- string result;
325
- string [] visited = [];
326
- foreach (cycle; grammarInfo.leftRecursiveCycles)
327
- foreach (rule; cycle)
328
- if (! visited.canFind(rule))
329
- {
330
- visited ~= rule;
331
- result ~= "
332
- static size_t[] blockMemo_" ~ rule ~ " _atPos;" ;
333
- }
334
- if (result.length > 0 )
335
- return "
336
- import std.algorithm: canFind, countUntil, remove;" ~ result;
337
- return result;
338
- }
339
-
340
191
switch (p.name)
341
192
{
342
193
case " Pegged" :
@@ -351,9 +202,9 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
351
202
result =
352
203
" struct Generic" ~ shortGrammarName ~ " (TParseTree)
353
204
{
354
- import std.functional : toDelegate;
205
+ import std.functional : toDelegate;
355
206
import pegged.dynamic.grammar;
356
- static import pegged.peg;
207
+ static import pegged.peg;
357
208
struct " ~ grammarName ~ " \n {
358
209
enum name = \" " ~ shortGrammarName ~ " \" ;
359
210
static ParseTree delegate(ParseTree)[string] before;
@@ -364,7 +215,10 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
364
215
result ~= "
365
216
import std.typecons:Tuple, tuple;
366
217
static TParseTree[Tuple!(string, size_t)] memo;" ;
367
- result ~= generateBlockers();
218
+ if (grammarInfo.leftRecursiveCycles.length > 0 )
219
+ result ~= "
220
+ import std.algorithm: canFind, countUntil, remove;
221
+ static size_t[] blockMemoAtPos;" ;
368
222
}
369
223
370
224
result ~= "
@@ -603,13 +457,12 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
603
457
string ctfeCode = " pegged.peg.defined!(" ~ code ~ " , \" " ~ propagatedName ~ " ." ~ innerName[1 .. $- 1 ] ~ " \" )" ;
604
458
code = " hooked!(pegged.peg.defined!(" ~ code ~ " , \" " ~ propagatedName ~ " ." ~ innerName[1 .. $- 1 ] ~ " \" ), \" " ~ hookedName ~ " \" )" ;
605
459
606
- import std.algorithm.searching : canFind;
607
460
if (withMemo == Memoization.no)
608
461
result ~= " static TParseTree " ~ shortName ~ " (TParseTree p)\n "
609
462
~ " {\n "
610
463
~ " if(__ctfe)\n "
611
464
~ " {\n "
612
- ~ (stoppers.keys . canFind(shortName) ?
465
+ ~ (stoppers.canFind(shortName) ?
613
466
" assert(false, \" " ~ shortName ~ " is left-recursive, which is not supported "
614
467
~ " at compile-time. Consider using asModule().\" );\n "
615
468
:
@@ -618,7 +471,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
618
471
~ " }\n "
619
472
~ " else\n "
620
473
~ " {\n "
621
- ~ (stoppers.keys . canFind(shortName) ?
474
+ ~ (stoppers.canFind(shortName) ?
622
475
// This rule needs to prevent infinite left-recursion.
623
476
" static TParseTree[size_t /*position*/] seed;\n "
624
477
~ " if (auto s = p.end in seed)\n "
@@ -663,7 +516,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
663
516
~ " {\n "
664
517
~ " if(__ctfe)\n "
665
518
~ " {\n "
666
- ~ (stoppers.keys . canFind(shortName) ?
519
+ ~ (stoppers.canFind(shortName) ?
667
520
" assert(false, \" " ~ shortName ~ " is left-recursive, which is not supported "
668
521
~ " at compile-time. Consider using asModule().\" );\n "
669
522
:
@@ -672,17 +525,17 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
672
525
~ " }\n "
673
526
~ " else\n "
674
527
~ " {\n "
675
- ~ (stoppers.keys . canFind(shortName) ?
528
+ ~ (stoppers.canFind(shortName) ?
676
529
// This rule needs to prevent infinite left-recursion.
677
530
" static TParseTree[size_t /*position*/] seed;\n "
678
531
~ " if (auto s = p.end in seed)\n "
679
532
~ " return *s;\n "
680
- ~ " if (" ~ shouldMemoLeftRecursion(shortName) ~ " )\n "
533
+ ~ " if (!blockMemoAtPos.canFind(p.end) )\n "
681
534
~ " if (auto m = tuple(" ~ innerName ~ " , p.end) in memo)\n "
682
535
~ " return *m;\n "
683
536
~ " auto current = fail(p);\n "
684
537
~ " seed[p.end] = current;\n "
685
- ~ blockMemoForLeftRecursion(shortName)
538
+ ~ " blockMemoAtPos ~= p.end; \n "
686
539
~ " while (true)\n "
687
540
~ " {\n "
688
541
~ " auto result = " ~ code ~ " (p);\n "
@@ -704,14 +557,20 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
704
557
// care of by memo. Note that p.end remains constant for the course of recursion,
705
558
// and the length of seed only grows when nested recursion occurs.
706
559
~ " seed.remove(p.end);\n "
707
- ~ unblockMemoForLeftRecursion(shortName)
560
+ // TODO investigate if p.end is always the last element of blockMemoAtPos.
561
+ ~ " assert(blockMemoAtPos.canFind(p.end));\n "
562
+ ~ " blockMemoAtPos = blockMemoAtPos.remove(countUntil(blockMemoAtPos, p.end));\n "
708
563
~ " memo[tuple(" ~ innerName ~ " , p.end)] = current;\n "
709
564
~ " return current;\n "
710
565
~ " }\n "
711
566
~ " }\n "
712
567
:
713
568
// Possibly left-recursive rule, but infinite recursion is already prevented by another rule in the same cycle.
714
- maybeBlockedMemo(shortName, code)
569
+ (allLeftRecursiveRules.canFind(shortName) ?
570
+ " if (blockMemoAtPos.canFind(p.end))\n "
571
+ ~ " return " ~ code ~ " (p);\n "
572
+ : " "
573
+ )
715
574
~ " if (auto m = tuple(" ~ innerName ~ " , p.end) in memo)\n "
716
575
~ " return *m;\n "
717
576
~ " else\n "
@@ -3016,6 +2875,16 @@ unittest // Proper blocking of memoization
3016
2875
// Example from http://www.inf.puc-rio.br/~roberto/docs/sblp2012.pdf
3017
2876
unittest // Mutual left-recursion
3018
2877
{
2878
+ /* A thing about stoppers:
2879
+ Because P is at the intersection of left-recursive cycles P -> P and L -> P -> L, it should
2880
+ suffice to make only P a stopper to stop unbounded left-recursion. And so it does. But,
2881
+ stoppers parse greedily: they always consume the maximum of input. So below, if only P is a stopper,
2882
+ at some point P parses the complete input. Then L fails because it cannot append ".x", then M fails.
2883
+ If both are made a stopper then M succeeds. That is because P will try L when P '(n)' no longer
2884
+ consumes input, which will appear as a left-recursion to L if it is a stopper and because of that
2885
+ it will have a chance to succeed on the full input which it recorded in its seed for the previous
2886
+ recursion.
2887
+ */
3019
2888
enum LeftGrammar = `
3020
2889
Left:
3021
2890
M <- L eoi
0 commit comments