4
4
#include < algorithm>
5
5
#include < cstring>
6
6
#include < map>
7
+ #include < set>
7
8
#include < vector>
8
9
namespace RE
9
10
{
@@ -17,7 +18,6 @@ class dfaRE : protected RE::nfaRE
17
18
std::vector<State*> n;
18
19
bool searched;
19
20
} * DStart;
20
- size_t stateNum;
21
21
std::unordered_set<int > charset;
22
22
23
23
bool useNfa;
@@ -77,7 +77,6 @@ class dfaRE : protected RE::nfaRE
77
77
if (pos == allDState.end ())
78
78
{
79
79
DState* ndsta = new DState ({{}, std::move (arr), false });
80
- stateNum++;
81
80
allDState.insert ({&(ndsta->n ), ndsta});
82
81
dsta->m [i->c ] = ndsta;
83
82
}
@@ -93,7 +92,101 @@ class dfaRE : protected RE::nfaRE
93
92
if (not i.second ->searched )
94
93
buildDfa (i.second );
95
94
}
96
- dsta->searched = false ;
95
+ }
96
+ void minimizeDfa ()
97
+ {
98
+ if (false )
99
+ return ;
100
+ using namespace std ;
101
+ unordered_map<DState*, DState*> _m;
102
+
103
+ unordered_map<DState*, int > unionid;
104
+
105
+ set<vector<DState*>> _set;
106
+
107
+ vector<DState*> ac, nac;
108
+ for (auto && i : allDState)
109
+ {
110
+ if (binary_search (begin (i.second ->n ), end (i.second ->n ), &Accept))
111
+ ac.push_back (i.second ), unionid[i.second ] = 0 ;
112
+ else
113
+ nac.push_back (i.second ), unionid[i.second ] = 1 ;
114
+ }
115
+ sort (begin (ac), end (ac));
116
+ sort (begin (nac), end (nac));
117
+ _set.insert (move (ac));
118
+ _set.insert (move (nac));
119
+ unordered_map<int , vector<DState*>> spgroup;
120
+ int lastUnionId = 1 ;
121
+ while (true )
122
+ {
123
+ bool flag = false ;
124
+ auto _next = _set.begin ();
125
+ for (auto fir = _set.begin (); fir != _set.end (); fir = _next)
126
+ {
127
+ _next = ++fir;
128
+ --fir;
129
+ auto && subset = *fir;
130
+ spgroup.clear ();
131
+ for (auto && ch : charset)
132
+ {
133
+ for (auto && st : subset)
134
+ {
135
+ auto cpos = st->m .find (ch);
136
+ if (cpos == st->m .end ())
137
+ {
138
+ spgroup[-1 ].push_back (st);
139
+ }
140
+ else
141
+ {
142
+ spgroup[unionid[cpos->second ]].push_back (st);
143
+ }
144
+ }
145
+ if (spgroup.size () == 1 )
146
+ {
147
+ spgroup.clear ();
148
+ }
149
+ else
150
+ {
151
+ flag = true ;
152
+ for (auto && i : spgroup)
153
+ {
154
+ lastUnionId++;
155
+ for (auto && j : i.second )
156
+ {
157
+ unionid[j] = lastUnionId;
158
+ }
159
+ _set.insert (move (i.second ));
160
+ }
161
+ _set.erase (fir);
162
+ break ;
163
+ }
164
+ }
165
+ }
166
+ if (not flag)
167
+ break ;
168
+ }
169
+ for (auto && subset : _set)
170
+ {
171
+ if (subset.size () == 1 )
172
+ _m[subset.front ()] = subset.front ();
173
+ else
174
+ {
175
+ auto startpos = lower_bound (begin (subset), end (subset), DStart);
176
+ if (not (startpos != subset.end () and *startpos == DStart))
177
+ startpos = subset.begin ();
178
+ for (auto && i : subset)
179
+ {
180
+ _m[i] = *startpos;
181
+ if (i != *startpos)
182
+ {
183
+ allDState.erase (&(i->n ));
184
+ delete i;
185
+ }
186
+ }
187
+ }
188
+ }
189
+ dfsRebuild (DStart, _m);
97
190
}
98
191
99
192
void minilizeDfa ()
@@ -122,7 +215,7 @@ class dfaRE : protected RE::nfaRE
122
215
int lastUnionId = 1 ;
123
216
stack<int > _stack;
124
217
_stack.push (0 );
125
- _stack.push (1 );
218
+ _stack.push (1 ); // ! bug
126
219
while (not _stack.empty ())
127
220
{
128
221
auto nowid = _stack.top ();
@@ -168,27 +261,22 @@ class dfaRE : protected RE::nfaRE
168
261
group.insert ({lastUnionId, move (i.second )});
169
262
_stack.push (lastUnionId);
170
263
}
264
+ group.erase (nowid);
171
265
break ;
172
266
}
173
267
}
174
268
if (not flag)
175
269
{
176
270
auto startpos = lower_bound (begin (pos->second ), end (pos->second ), DStart);
177
- if (startpos != pos->second .end () and *startpos == DStart)
178
- {
179
- ;
180
- }
181
- else
182
- {
271
+ if (not (startpos != pos->second .end () and *startpos == DStart))
183
272
startpos = pos->second .begin ();
184
- }
185
273
for (auto && i : pos->second )
186
274
{
275
+ _m[i] = *startpos;
187
276
if (i != *startpos)
188
277
{
189
- _m[i] = *startpos ;
278
+ allDState. erase (&(i-> n )) ;
190
279
delete i;
191
- stateNum--;
192
280
}
193
281
}
194
282
}
@@ -197,17 +285,13 @@ class dfaRE : protected RE::nfaRE
197
285
}
198
286
void dfsRebuild (DState* now, std::unordered_map<DState*, DState*>& rep)
199
287
{
200
- using namespace std ;
201
- if (now == nullptr )
202
- return ;
203
- now->searched = true ;
288
+ now->searched = false ;
204
289
for (auto && i : now->m )
205
290
{
206
291
i.second = rep[i.second ];
207
- if (not i.second ->searched )
292
+ if (i.second ->searched )
208
293
dfsRebuild (i.second , rep);
209
294
}
210
- now->searched = false ;
211
295
}
212
296
213
297
std::vector<std::pair<size_t , size_t >> nonGreadySearch (const std::string& str)
@@ -304,22 +388,19 @@ class dfaRE : protected RE::nfaRE
304
388
public:
305
389
dfaRE (const size_t maxdstate = 128 )
306
390
{
307
- stateNum = 0 ;
308
391
useNfa = false ;
309
392
MAXDSTATELIMIT = maxdstate;
310
393
DStart = nullptr ;
311
394
}
312
395
dfaRE (const std::string& rex, const size_t maxdstate = 128 )
313
396
: nfaRE(rex)
314
397
{
315
- stateNum = 0 ;
316
398
useNfa = false ;
317
399
MAXDSTATELIMIT = maxdstate;
318
400
std::vector<State*> arr;
319
401
addState2 (Start, arr);
320
402
std::sort (begin (arr), end (arr));
321
403
DStart = new DState ({{}, std::move (arr), false });
322
- stateNum++;
323
404
allDState.insert ({&(DStart->n ), DStart});
324
405
buildDfa (DStart);
325
406
if (useNfa)
@@ -330,19 +411,20 @@ class dfaRE : protected RE::nfaRE
330
411
}
331
412
else
332
413
{
333
- minilizeDfa ();
414
+ auto before = allDState.size ();
415
+ // std::cout << "Befor minilize: " << allDState.size() << std::endl;
416
+ // minilizeDfa();
417
+ minimizeDfa ();
418
+ std::cout << " After minilize: " << before - allDState.size () << std::endl;
334
419
}
335
420
}
336
421
~dfaRE ()
337
422
{
338
- std::cout << " dfa:" << stateNum << std::endl;
339
423
for (auto && i : allDState)
340
424
delete i.second ;
341
425
}
342
426
void assign (const std::string& rex)
343
427
{
344
- std::cout << " dfa:" << stateNum << std::endl;
345
- stateNum = 0 ;
346
428
for (auto && i : allDState)
347
429
delete i.second ;
348
430
std::map<std::vector<State*>*, DState*, mcmp>().swap (allDState);
@@ -354,7 +436,6 @@ class dfaRE : protected RE::nfaRE
354
436
addState2 (Start, arr);
355
437
std::sort (begin (arr), end (arr));
356
438
DStart = new DState ({{}, std::move (arr), false });
357
- stateNum++;
358
439
allDState.insert ({&(DStart->n ), DStart});
359
440
buildDfa (DStart);
360
441
if (useNfa)
@@ -365,7 +446,11 @@ class dfaRE : protected RE::nfaRE
365
446
}
366
447
else
367
448
{
368
- minilizeDfa ();
449
+ auto before = allDState.size ();
450
+ // std::cout << "Befor minilize: " << allDState.size() << std::endl;
451
+ // minilizeDfa();
452
+ minimizeDfa ();
453
+ std::cout << " After minilize: " << before - allDState.size () << std::endl;
369
454
}
370
455
}
371
456
bool match (const std::string& str)
0 commit comments