Skip to content

Commit 59fc008

Browse files
author
Delta-in-hub
committed
fixed all bugs about minimizeDfa
1 parent fd5dbf0 commit 59fc008

File tree

8 files changed

+124
-35
lines changed

8 files changed

+124
-35
lines changed

Regex/test/RegexTest.cc

+4-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ int main(void)
5252
// searchTest("aaa", "abcaaaaaaa");
5353
// searchTest(".*", "abcaaaaaaa");
5454
// searchTest("[\\d]{2}", "abc123123aaa21aa1aa");
55-
RE::Regex re("(ab)*(a*|b*)(ba)*");
55+
RE::Regex re;
56+
re.assign("(a*)(b|abc)");
57+
assert(re.match("abc"));
58+
cout << 1 << endl;
5659
return 0;
5760
}

Regex/test/log.txt

66 Bytes
Binary file not shown.

dfaRE--/dfaRE--.hpp

+113-28
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <algorithm>
55
#include <cstring>
66
#include <map>
7+
#include <set>
78
#include <vector>
89
namespace RE
910
{
@@ -17,7 +18,6 @@ class dfaRE : protected RE::nfaRE
1718
std::vector<State*> n;
1819
bool searched;
1920
} * DStart;
20-
size_t stateNum;
2121
std::unordered_set<int> charset;
2222

2323
bool useNfa;
@@ -77,7 +77,6 @@ class dfaRE : protected RE::nfaRE
7777
if (pos == allDState.end())
7878
{
7979
DState* ndsta = new DState({{}, std::move(arr), false});
80-
stateNum++;
8180
allDState.insert({&(ndsta->n), ndsta});
8281
dsta->m[i->c] = ndsta;
8382
}
@@ -93,7 +92,101 @@ class dfaRE : protected RE::nfaRE
9392
if (not i.second->searched)
9493
buildDfa(i.second);
9594
}
96-
dsta->searched = false;
95+
}
96+
void minimizeDfa()
97+
{
98+
if (false)
99+
return;
100+
using namespace std;
101+
unordered_map<DState*, DState*> _m;
102+
103+
unordered_map<DState*, int> unionid;
104+
105+
set<vector<DState*>> _set;
106+
107+
vector<DState*> ac, nac;
108+
for (auto&& i : allDState)
109+
{
110+
if (binary_search(begin(i.second->n), end(i.second->n), &Accept))
111+
ac.push_back(i.second), unionid[i.second] = 0;
112+
else
113+
nac.push_back(i.second), unionid[i.second] = 1;
114+
}
115+
sort(begin(ac), end(ac));
116+
sort(begin(nac), end(nac));
117+
_set.insert(move(ac));
118+
_set.insert(move(nac));
119+
unordered_map<int, vector<DState*>> spgroup;
120+
int lastUnionId = 1;
121+
while (true)
122+
{
123+
bool flag = false;
124+
auto _next = _set.begin();
125+
for (auto fir = _set.begin(); fir != _set.end(); fir = _next)
126+
{
127+
_next = ++fir;
128+
--fir;
129+
auto&& subset = *fir;
130+
spgroup.clear();
131+
for (auto&& ch : charset)
132+
{
133+
for (auto&& st : subset)
134+
{
135+
auto cpos = st->m.find(ch);
136+
if (cpos == st->m.end())
137+
{
138+
spgroup[-1].push_back(st);
139+
}
140+
else
141+
{
142+
spgroup[unionid[cpos->second]].push_back(st);
143+
}
144+
}
145+
if (spgroup.size() == 1)
146+
{
147+
spgroup.clear();
148+
}
149+
else
150+
{
151+
flag = true;
152+
for (auto&& i : spgroup)
153+
{
154+
lastUnionId++;
155+
for (auto&& j : i.second)
156+
{
157+
unionid[j] = lastUnionId;
158+
}
159+
_set.insert(move(i.second));
160+
}
161+
_set.erase(fir);
162+
break;
163+
}
164+
}
165+
}
166+
if (not flag)
167+
break;
168+
}
169+
for (auto&& subset : _set)
170+
{
171+
if (subset.size() == 1)
172+
_m[subset.front()] = subset.front();
173+
else
174+
{
175+
auto startpos = lower_bound(begin(subset), end(subset), DStart);
176+
if (not(startpos != subset.end() and *startpos == DStart))
177+
startpos = subset.begin();
178+
for (auto&& i : subset)
179+
{
180+
_m[i] = *startpos;
181+
if (i != *startpos)
182+
{
183+
allDState.erase(&(i->n));
184+
delete i;
185+
}
186+
}
187+
}
188+
}
189+
dfsRebuild(DStart, _m);
97190
}
98191

99192
void minilizeDfa()
@@ -122,7 +215,7 @@ class dfaRE : protected RE::nfaRE
122215
int lastUnionId = 1;
123216
stack<int> _stack;
124217
_stack.push(0);
125-
_stack.push(1);
218+
_stack.push(1); // ! bug
126219
while (not _stack.empty())
127220
{
128221
auto nowid = _stack.top();
@@ -168,27 +261,22 @@ class dfaRE : protected RE::nfaRE
168261
group.insert({lastUnionId, move(i.second)});
169262
_stack.push(lastUnionId);
170263
}
264+
group.erase(nowid);
171265
break;
172266
}
173267
}
174268
if (not flag)
175269
{
176270
auto startpos = lower_bound(begin(pos->second), end(pos->second), DStart);
177-
if (startpos != pos->second.end() and *startpos == DStart)
178-
{
179-
;
180-
}
181-
else
182-
{
271+
if (not(startpos != pos->second.end() and *startpos == DStart))
183272
startpos = pos->second.begin();
184-
}
185273
for (auto&& i : pos->second)
186274
{
275+
_m[i] = *startpos;
187276
if (i != *startpos)
188277
{
189-
_m[i] = *startpos;
278+
allDState.erase(&(i->n));
190279
delete i;
191-
stateNum--;
192280
}
193281
}
194282
}
@@ -197,17 +285,13 @@ class dfaRE : protected RE::nfaRE
197285
}
198286
void dfsRebuild(DState* now, std::unordered_map<DState*, DState*>& rep)
199287
{
200-
using namespace std;
201-
if (now == nullptr)
202-
return;
203-
now->searched = true;
288+
now->searched = false;
204289
for (auto&& i : now->m)
205290
{
206291
i.second = rep[i.second];
207-
if (not i.second->searched)
292+
if (i.second->searched)
208293
dfsRebuild(i.second, rep);
209294
}
210-
now->searched = false;
211295
}
212296

213297
std::vector<std::pair<size_t, size_t>> nonGreadySearch(const std::string& str)
@@ -304,22 +388,19 @@ class dfaRE : protected RE::nfaRE
304388
public:
305389
dfaRE(const size_t maxdstate = 128)
306390
{
307-
stateNum = 0;
308391
useNfa = false;
309392
MAXDSTATELIMIT = maxdstate;
310393
DStart = nullptr;
311394
}
312395
dfaRE(const std::string& rex, const size_t maxdstate = 128)
313396
: nfaRE(rex)
314397
{
315-
stateNum = 0;
316398
useNfa = false;
317399
MAXDSTATELIMIT = maxdstate;
318400
std::vector<State*> arr;
319401
addState2(Start, arr);
320402
std::sort(begin(arr), end(arr));
321403
DStart = new DState({{}, std::move(arr), false});
322-
stateNum++;
323404
allDState.insert({&(DStart->n), DStart});
324405
buildDfa(DStart);
325406
if (useNfa)
@@ -330,19 +411,20 @@ class dfaRE : protected RE::nfaRE
330411
}
331412
else
332413
{
333-
minilizeDfa();
414+
auto before = allDState.size();
415+
// std::cout << "Befor minilize: " << allDState.size() << std::endl;
416+
// minilizeDfa();
417+
minimizeDfa();
418+
std::cout << "After minilize: " << before - allDState.size() << std::endl;
334419
}
335420
}
336421
~dfaRE()
337422
{
338-
std::cout << "dfa:" << stateNum << std::endl;
339423
for (auto&& i : allDState)
340424
delete i.second;
341425
}
342426
void assign(const std::string& rex)
343427
{
344-
std::cout << "dfa:" << stateNum << std::endl;
345-
stateNum = 0;
346428
for (auto&& i : allDState)
347429
delete i.second;
348430
std::map<std::vector<State*>*, DState*, mcmp>().swap(allDState);
@@ -354,7 +436,6 @@ class dfaRE : protected RE::nfaRE
354436
addState2(Start, arr);
355437
std::sort(begin(arr), end(arr));
356438
DStart = new DState({{}, std::move(arr), false});
357-
stateNum++;
358439
allDState.insert({&(DStart->n), DStart});
359440
buildDfa(DStart);
360441
if (useNfa)
@@ -365,7 +446,11 @@ class dfaRE : protected RE::nfaRE
365446
}
366447
else
367448
{
368-
minilizeDfa();
449+
auto before = allDState.size();
450+
// std::cout << "Befor minilize: " << allDState.size() << std::endl;
451+
// minilizeDfa();
452+
minimizeDfa();
453+
std::cout << "After minilize: " << before - allDState.size() << std::endl;
369454
}
370455
}
371456
bool match(const std::string& str)

dfaRE--/test/dfaRETest.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ signed main(void)
3737
// searchTest("ab|cd", "abcdhfcd");
3838
// searchTest("aa*", "abcaaaaaaa");
3939
// searchTest("aaa", "abcaaaaaaa");
40-
RE::dfaRE re("(ab)*(a*|b*)(ba)*");
41-
cout << re.match("abaacaaaaba");
40+
RE::dfaRE re("[a-z]+@com");
41+
cout << re.match("asd@com");
4242
return 0;
4343
}

example.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//example.cc
2-
#include "Regex.h"
2+
#include "./Regex/Regex.hpp"
3+
// #include "Regex.h"
34
#include <cassert>
45
#include <iostream>
56

log.txt

16.1 KB
Binary file not shown.

performance.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// #include "./Regex/Regex.hpp"
2-
#include "Regex.h"
1+
#include "./Regex/Regex.hpp"
2+
// #include "Regex.h"
33
#include <cassert>
44
#include <chrono>
55
#include <iostream>

test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void rexTest(const string& s)
4747
cnt++;
4848
}
4949
fs.close();
50-
cout << s << ' ' << cnt << '/' << linenum << " cases Done and No error!" << endl;
50+
cout << s << ' ' << cnt << '/' << linenum << " cases Done!" << endl;
5151
}
5252

5353
void searchTest(const std::string& rex, const std::string& source, bool flag = true)

0 commit comments

Comments
 (0)