Skip to content

Commit ad3fa14

Browse files
author
Delta-in-hub
committed
clean code
1 parent 49ff278 commit ad3fa14

File tree

7 files changed

+136
-120
lines changed

7 files changed

+136
-120
lines changed

Regex.cc

+116
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,8 @@ void __BASERE__::dfaRE::buildDfa(DState* dsta)
407407
if (vis[i->c])
408408
continue;
409409
vis[i->c] = true;
410+
if (i->c != Match)
411+
charset.insert(i->c);
410412
std::vector<State*> arr;
411413
for (auto&& j : dsta->n)
412414
{
@@ -437,6 +439,112 @@ void __BASERE__::dfaRE::buildDfa(DState* dsta)
437439
buildDfa(i.second);
438440
}
439441
}
442+
void __BASERE__::dfaRE::minimizeDfa()
443+
{
444+
if (false)
445+
return;
446+
using namespace std;
447+
unordered_map<DState*, DState*> _m;
448+
449+
unordered_map<DState*, int> unionid;
450+
451+
set<vector<DState*>> _set;
452+
453+
vector<DState*> ac, nac;
454+
for (auto&& i : allDState)
455+
{
456+
if (binary_search(begin(i.second->n), end(i.second->n), &Accept))
457+
ac.push_back(i.second), unionid[i.second] = 0;
458+
else
459+
nac.push_back(i.second), unionid[i.second] = 1;
460+
}
461+
sort(begin(ac), end(ac));
462+
sort(begin(nac), end(nac));
463+
_set.insert(move(ac));
464+
_set.insert(move(nac));
465+
unordered_map<int, vector<DState*>> spgroup;
466+
int lastUnionId = 1;
467+
while (true)
468+
{
469+
bool flag = false;
470+
auto _next = _set.begin();
471+
for (auto fir = _set.begin(); fir != _set.end(); fir = _next)
472+
{
473+
_next = ++fir;
474+
--fir;
475+
auto&& subset = *fir;
476+
spgroup.clear();
477+
for (auto&& ch : charset)
478+
{
479+
for (auto&& st : subset)
480+
{
481+
auto cpos = st->m.find(ch);
482+
if (cpos == st->m.end())
483+
{
484+
spgroup[-1].push_back(st);
485+
}
486+
else
487+
{
488+
spgroup[unionid[cpos->second]].push_back(st);
489+
}
490+
}
491+
if (spgroup.size() == 1)
492+
{
493+
spgroup.clear();
494+
}
495+
else
496+
{
497+
flag = true;
498+
for (auto&& i : spgroup)
499+
{
500+
lastUnionId++;
501+
for (auto&& j : i.second)
502+
{
503+
unionid[j] = lastUnionId;
504+
}
505+
_set.insert(move(i.second));
506+
}
507+
_next = _set.erase(fir);
508+
break;
509+
}
510+
}
511+
}
512+
if (not flag)
513+
break;
514+
}
515+
for (auto&& subset : _set)
516+
{
517+
if (subset.size() == 1)
518+
_m[subset.front()] = subset.front();
519+
else
520+
{
521+
auto startpos = lower_bound(begin(subset), end(subset), DStart);
522+
if (not(startpos != subset.end() and *startpos == DStart))
523+
startpos = subset.begin();
524+
for (auto&& i : subset)
525+
{
526+
_m[i] = *startpos;
527+
if (i != *startpos)
528+
{
529+
allDState.erase(&(i->n));
530+
delete i;
531+
}
532+
}
533+
}
534+
}
535+
dfsRebuild(DStart, _m);
536+
}
537+
538+
void __BASERE__::dfaRE::dfsRebuild(DState* now, std::unordered_map<DState*, DState*>& rep)
539+
{
540+
now->searched = false;
541+
for (auto&& i : now->m)
542+
{
543+
i.second = rep[i.second];
544+
if (i.second->searched)
545+
dfsRebuild(i.second, rep);
546+
}
547+
}
440548

441549
__BASERE__::dfaRE::dfaRE(const size_t maxdstate)
442550
{
@@ -462,6 +570,10 @@ __BASERE__::dfaRE::dfaRE(const std::string& rex, const size_t maxdstate)
462570
delete i.second;
463571
std::map<std::vector<State*>*, DState*, mcmp>().swap(allDState);
464572
}
573+
else
574+
{
575+
minimizeDfa();
576+
}
465577
}
466578

467579
__BASERE__::dfaRE::~dfaRE()
@@ -489,6 +601,10 @@ void __BASERE__::dfaRE::assign(const std::string& rex)
489601
delete i.second;
490602
std::map<std::vector<State*>*, DState*, mcmp>().swap(allDState);
491603
}
604+
else
605+
{
606+
minimizeDfa();
607+
}
492608
}
493609

494610
bool __BASERE__::dfaRE::match(const std::string& str)

Regex.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
* Head File
44
*
55
* @author:delta-in-hub
6-
* @date:2021/04/07
76
* https://github.com/Delta-in-hub/RE--
87
*/
98
#ifndef __REGEX
109
#define __REGEX
10+
#include <ciso646>
1111
#include <map>
12+
#include <set>
1213
#include <string>
1314
#include <unordered_map>
1415
#include <unordered_set>
@@ -75,12 +76,15 @@ class dfaRE : protected nfaRE
7576
bool operator()(const std::vector<State*>* a, const std::vector<State*>* b) const;
7677
};
7778
bool useNfa;
79+
std::unordered_set<int> charset;
7880

7981
std::map<std::vector<State*>*, DState*, mcmp> allDState;
8082
void addState2(State* s, std::vector<State*>& stateSet);
8183
void buildDfa(DState* dsta);
8284
std::vector<std::pair<size_t, size_t>> greadySearch(const std::string& target);
8385
std::vector<std::pair<size_t, size_t>> nonGreadySearch(const std::string& target);
86+
void dfsRebuild(DState* now, std::unordered_map<DState*, DState*>& rep);
87+
void minimizeDfa();
8488

8589
public:
8690
dfaRE(const size_t maxdstate = 64);

Regex/Regex.hpp

+3-13
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,7 @@ namespace RE
1212
{
1313
class Regex : protected RE::dfaRE
1414
{
15-
/*
16-
* [xyz] (x|y|z)
17-
* [0-3] (0|1|2|3)
18-
* [0-2a-c] (0|1|2|a|b|c)
19-
* \w [A-Za-z0-9_]
20-
* a{3} aaa
21-
* a{3,5} aaa|aaaa|aaaaa
22-
* ||转义...
23-
* \d [0-9]
24-
* \n char(10)
25-
*/
15+
2616
protected:
2717
std::string parse(const std::string& source);
2818
std::string parse2(const std::string& src);
@@ -45,9 +35,9 @@ class Regex : protected RE::dfaRE
4535
{
4636
return dfaRE::match(tar);
4737
}
48-
std::vector<std::pair<size_t, size_t>> search(const std::string& str,bool isGreadySearch = true)
38+
std::vector<std::pair<size_t, size_t>> search(const std::string& str, bool isGreadySearch = true)
4939
{
50-
return dfaRE::search(str,isGreadySearch);
40+
return dfaRE::search(str, isGreadySearch);
5141
}
5242
};
5343

dfaRE--/dfaRE--.hpp

+5-99
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class dfaRE : protected RE::nfaRE
158158
}
159159
_set.insert(move(i.second));
160160
}
161-
_set.erase(fir);
161+
_next = _set.erase(fir);
162162
break;
163163
}
164164
}
@@ -189,100 +189,6 @@ class dfaRE : protected RE::nfaRE
189189
dfsRebuild(DStart, _m);
190190
}
191191

192-
void minilizeDfa()
193-
{
194-
if (false)
195-
return;
196-
using namespace std;
197-
unordered_map<DState*, DState*> _m;
198-
199-
unordered_map<DState*, int> unionid;
200-
201-
unordered_map<int, vector<DState*>> group;
202-
203-
vector<DState*> ac, nac;
204-
for (auto&& i : allDState)
205-
{
206-
if (binary_search(begin(i.second->n), end(i.second->n), &Accept))
207-
ac.push_back(i.second), unionid[i.second] = 0;
208-
else
209-
nac.push_back(i.second), unionid[i.second] = 1;
210-
}
211-
sort(begin(ac), end(ac));
212-
sort(begin(nac), end(nac));
213-
group.insert({0, move(ac)});
214-
group.insert({1, move(nac)});
215-
int lastUnionId = 1;
216-
stack<int> _stack;
217-
_stack.push(0);
218-
_stack.push(1); // ! bug
219-
while (not _stack.empty())
220-
{
221-
auto nowid = _stack.top();
222-
_stack.pop();
223-
auto pos = group.find(nowid);
224-
if (pos == group.end())
225-
continue;
226-
if (pos->second.size() == 1)
227-
{
228-
_m[pos->second.front()] = pos->second.front();
229-
continue;
230-
}
231-
unordered_map<int, vector<DState*>> spgroup;
232-
bool flag = false;
233-
for (auto&& ch : charset)
234-
{
235-
for (auto&& st : pos->second)
236-
{
237-
auto cpos = st->m.find(ch);
238-
if (cpos == st->m.end())
239-
{
240-
spgroup[-1].push_back(st);
241-
}
242-
else
243-
{
244-
spgroup[unionid[cpos->second]].push_back(st);
245-
}
246-
}
247-
if (spgroup.size() == 1)
248-
{
249-
spgroup.clear();
250-
}
251-
else
252-
{
253-
flag = true;
254-
for (auto&& i : spgroup)
255-
{
256-
lastUnionId++;
257-
for (auto&& j : i.second)
258-
{
259-
unionid[j] = lastUnionId;
260-
}
261-
group.insert({lastUnionId, move(i.second)});
262-
_stack.push(lastUnionId);
263-
}
264-
group.erase(nowid);
265-
break;
266-
}
267-
}
268-
if (not flag)
269-
{
270-
auto startpos = lower_bound(begin(pos->second), end(pos->second), DStart);
271-
if (not(startpos != pos->second.end() and *startpos == DStart))
272-
startpos = pos->second.begin();
273-
for (auto&& i : pos->second)
274-
{
275-
_m[i] = *startpos;
276-
if (i != *startpos)
277-
{
278-
allDState.erase(&(i->n));
279-
delete i;
280-
}
281-
}
282-
}
283-
}
284-
dfsRebuild(DStart, _m);
285-
}
286192
void dfsRebuild(DState* now, std::unordered_map<DState*, DState*>& rep)
287193
{
288194
now->searched = false;
@@ -411,11 +317,11 @@ class dfaRE : protected RE::nfaRE
411317
}
412318
else
413319
{
414-
auto before = allDState.size();
320+
// auto before = allDState.size();
415321
// std::cout << "Befor minilize: " << allDState.size() << std::endl;
416322
// minilizeDfa();
417323
minimizeDfa();
418-
std::cout << "After minilize: " << before - allDState.size() << std::endl;
324+
// std::cout << "After minilize: " << before - allDState.size() << std::endl;
419325
}
420326
}
421327
~dfaRE()
@@ -446,11 +352,11 @@ class dfaRE : protected RE::nfaRE
446352
}
447353
else
448354
{
449-
auto before = allDState.size();
355+
// auto before = allDState.size();
450356
// std::cout << "Befor minilize: " << allDState.size() << std::endl;
451357
// minilizeDfa();
452358
minimizeDfa();
453-
std::cout << "After minilize: " << before - allDState.size() << std::endl;
359+
// std::cout << "After minilize: " << before - allDState.size() << std::endl;
454360
}
455361
}
456362
bool match(const std::string& str)

example.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//example.cc
2-
#include "./Regex/Regex.hpp"
3-
// #include "Regex.h"
2+
// #include "./Regex/Regex.hpp"
3+
#include "Regex.h"
44
#include <cassert>
55
#include <iostream>
66

performance.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include "./Regex/Regex.hpp"
2-
// #include "Regex.h"
1+
// #include "./Regex/Regex.hpp"
2+
#include "Regex.h"
33
#include <cassert>
44
#include <chrono>
55
#include <iostream>
@@ -37,7 +37,7 @@ signed main(void)
3737
cout << endl;
3838
rex = "[a-zA-Z0-9._]+@([a-zA-Z0-9]+.)+com";
3939
rex1 = "[a-zA-Z0-9._]+@(?:[a-zA-Z0-9]+.)+com";
40-
tar = "power.overwhelming@aaaaaaaaaaaaaaaaaaaaaaaaaa";
40+
tar = "power.overwhelming@aaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
4141
cout << "std::regex use " << execTime([&rex1]() { stdre.assign(rex1); }) << "us to construct " << rex1 << endl;
4242
cout << "RE::Regex use " << execTime([&rex]() { myre.assign(rex); }) << "us to construct " << rex << endl;
4343
cout << "std::regex use " << execTime([&tar]() { f1 = (regex_match(tar, stdre)); }) << "us to match\t" << tar << endl;

test.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#include "Regex/Regex.hpp"
2-
// #include "Regex.h"
1+
// #include "Regex/Regex.hpp"
2+
#include "Regex.h"
33
#include <fstream>
44
#include <iostream>
55
#include <stdexcept>

0 commit comments

Comments
 (0)