Skip to content

Commit e178c88

Browse files
authored
Update automaton's convertion to regex algorithm (#1)
* WIP: Rework convert automaton to regex * WIP: Remove prints * WIP: Some good progress * Update convert to regex algorithm * Update version * Fix
1 parent 691ccaf commit e178c88

File tree

10 files changed

+888
-466
lines changed

10 files changed

+888
-466
lines changed

Cargo.toml

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "regexsolver"
3-
version = "0.2.0"
3+
version = "0.2.1"
44
edition = "2021"
55
authors = ["Alexandre van Beurden"]
66
repository = "https://github.com/RegexSolver/regexsolver"
@@ -21,7 +21,6 @@ sha2 = "0.10.8"
2121
nohash-hasher = "0.2"
2222
ahash = "0.8.11"
2323
regex-syntax = "0.8.5"
24-
petgraph = "0.6.4"
2524
log = "0.4.21"
2625
rand = "0.8.5"
2726
lazy_static = "1.4.0"

src/fast_automaton/convert/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
use super::*;
2-
use crate::regex::RegularExpression;
32

43
mod to_regex;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
use super::*;
2+
3+
mod scc;
4+
5+
impl StateEliminationAutomaton<Range> {
6+
pub fn new(automaton: &FastAutomaton) -> Result<Option<Self>, EngineError> {
7+
if automaton.is_empty() {
8+
return Ok(None);
9+
}
10+
11+
let mut state_elimination_automaton = StateEliminationAutomaton {
12+
start_state: 0, // start_state is not set yet
13+
accept_state: 0, // accept_state is not set yet
14+
transitions: Vec::with_capacity(automaton.get_number_of_states()),
15+
transitions_in: IntMap::with_capacity(automaton.get_number_of_states()),
16+
removed_states: IntSet::new(),
17+
cyclic: false,
18+
};
19+
20+
let mut states_map = IntMap::with_capacity(automaton.get_number_of_states());
21+
22+
for from_state in automaton.transitions_iter() {
23+
let new_from_state = *states_map
24+
.entry(from_state)
25+
.or_insert_with(|| state_elimination_automaton.new_state());
26+
for (to_state, condition) in
27+
automaton.transitions_from_state_enumerate_into_iter(&from_state)
28+
{
29+
let new_to_state = *states_map
30+
.entry(to_state)
31+
.or_insert_with(|| state_elimination_automaton.new_state());
32+
33+
state_elimination_automaton.add_transition_to(
34+
new_from_state,
35+
new_to_state,
36+
GraphTransition::Weight(condition.to_range(automaton.get_spanning_set())?),
37+
);
38+
}
39+
}
40+
41+
state_elimination_automaton.start_state =
42+
*states_map.get(&automaton.get_start_state()).unwrap(); // We finally set start_state
43+
44+
if automaton.get_accept_states().len() == 1 {
45+
// If there is only one accept state with just set it
46+
state_elimination_automaton.accept_state = *states_map
47+
.get(automaton.get_accept_states().iter().next().unwrap())
48+
.unwrap();
49+
} else {
50+
// If not we create a new state that will be the new accept state
51+
state_elimination_automaton.accept_state = state_elimination_automaton.new_state();
52+
for accept_state in automaton.get_accept_states() {
53+
let accept_state = *states_map.get(accept_state).unwrap();
54+
// We add an empty string transition to the new accept state
55+
state_elimination_automaton.add_transition_to(
56+
accept_state,
57+
state_elimination_automaton.accept_state,
58+
GraphTransition::Epsilon,
59+
);
60+
}
61+
}
62+
state_elimination_automaton.identify_and_apply_components()?;
63+
//state_elimination_automaton.to_dot();
64+
Ok(Some(state_elimination_automaton))
65+
}
66+
67+
pub fn new_state(&mut self) -> usize {
68+
if let Some(new_state) = self.removed_states.clone().iter().next() {
69+
self.removed_states.remove(new_state);
70+
self.transitions_in.insert(*new_state, IntSet::new());
71+
*new_state
72+
} else {
73+
self.transitions.push(IntMap::default());
74+
self.transitions_in
75+
.insert(self.transitions.len() - 1, IntSet::new());
76+
self.transitions.len() - 1
77+
}
78+
}
79+
80+
#[inline]
81+
pub fn has_state(&self, state: State) -> bool {
82+
!(state >= self.transitions.len() || self.removed_states.contains(&state))
83+
}
84+
85+
#[inline]
86+
fn assert_state_exists(&self, state: State) {
87+
if !self.has_state(state) {
88+
panic!("The state {} does not exist", state);
89+
}
90+
}
91+
92+
pub fn add_transition_to(
93+
&mut self,
94+
from_state: State,
95+
to_state: State,
96+
transition: GraphTransition<Range>,
97+
) {
98+
self.assert_state_exists(from_state);
99+
if from_state != to_state {
100+
self.assert_state_exists(to_state);
101+
}
102+
103+
self.transitions_in
104+
.entry(to_state)
105+
.or_default()
106+
.insert(from_state);
107+
match self.transitions[from_state].entry(to_state) {
108+
Entry::Occupied(mut o) => {
109+
if let (GraphTransition::Weight(current_regex), GraphTransition::Weight(regex)) =
110+
(o.get(), transition)
111+
{
112+
o.insert(GraphTransition::Weight(current_regex.union(&regex)));
113+
} else {
114+
panic!("Cannot add transition");
115+
}
116+
}
117+
Entry::Vacant(v) => {
118+
v.insert(transition);
119+
}
120+
};
121+
}
122+
123+
pub fn remove_state(&mut self, state: State) {
124+
self.assert_state_exists(state);
125+
if self.start_state == state || self.accept_state == state {
126+
panic!(
127+
"Can not remove the state {}, it is still used as start state or accept state.",
128+
state
129+
);
130+
}
131+
self.transitions_in.remove(&state);
132+
if self.transitions.len() - 1 == state {
133+
self.transitions.remove(state);
134+
135+
let mut s = state;
136+
while self.removed_states.contains(&s) {
137+
self.transitions.remove(s);
138+
self.removed_states.remove(&s);
139+
s -= 1;
140+
}
141+
} else {
142+
self.transitions[state].clear();
143+
self.removed_states.insert(state);
144+
}
145+
146+
for transitions in self.transitions.iter_mut() {
147+
transitions.remove(&state);
148+
}
149+
for (_, transitions) in self.transitions_in.iter_mut() {
150+
transitions.remove(&state);
151+
}
152+
}
153+
154+
pub fn remove_transition(&mut self, from_state: State, to_state: State) {
155+
self.assert_state_exists(from_state);
156+
if from_state != to_state {
157+
self.assert_state_exists(to_state);
158+
}
159+
160+
if let Some(from_states) = self.transitions_in.get_mut(&to_state) {
161+
from_states.remove(&from_state);
162+
}
163+
164+
self.transitions[from_state].remove(&to_state);
165+
}
166+
167+
pub fn get_transition(&self, from_state: State, to_state: State) -> Option<&GraphTransition<Range>> {
168+
self.transitions.get(from_state)?.get(&to_state)
169+
}
170+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
use super::*;
2+
3+
impl StateEliminationAutomaton<Range> {
4+
pub fn identify_and_apply_components(&mut self) -> Result<(), EngineError> {
5+
let mut index = 0;
6+
let mut stack = Vec::new();
7+
let mut indices = vec![-1; self.transitions.len()];
8+
let mut lowlink = vec![-1; self.transitions.len()];
9+
let mut on_stack = vec![false; self.transitions.len()];
10+
let mut scc = Vec::new();
11+
12+
for state in self.states_iter() {
13+
if self.removed_states.contains(&state) {
14+
continue;
15+
}
16+
if indices[state] == -1 {
17+
self.strongconnect(
18+
state,
19+
&mut index,
20+
&mut stack,
21+
&mut indices,
22+
&mut lowlink,
23+
&mut on_stack,
24+
&mut scc,
25+
);
26+
}
27+
}
28+
29+
let scc = scc
30+
.into_iter()
31+
.filter(|states| {
32+
let first_state = states.iter().next().unwrap();
33+
let self_loop = if let Some(transitions_in) = self.transitions_in.get(first_state) {
34+
transitions_in.contains(first_state)
35+
} else {
36+
false
37+
};
38+
states.len() != 1 || self_loop
39+
})
40+
.collect::<Vec<_>>();
41+
42+
for component in scc {
43+
self.build_component(&component)?;
44+
}
45+
46+
self.cyclic = false;
47+
48+
Ok(())
49+
}
50+
51+
#[allow(clippy::too_many_arguments)]
52+
fn strongconnect(
53+
&self,
54+
v: usize,
55+
index: &mut usize,
56+
stack: &mut Vec<usize>,
57+
indices: &mut Vec<i32>,
58+
lowlink: &mut Vec<i32>,
59+
on_stack: &mut Vec<bool>,
60+
scc: &mut Vec<Vec<usize>>,
61+
) {
62+
indices[v] = *index as i32;
63+
lowlink[v] = *index as i32;
64+
*index += 1;
65+
stack.push(v);
66+
on_stack[v] = true;
67+
68+
if let Some(neighbors) = self.transitions.get(v) {
69+
for &w in neighbors.keys() {
70+
if indices[w] == -1 {
71+
self.strongconnect(w, index, stack, indices, lowlink, on_stack, scc);
72+
lowlink[v] = lowlink[v].min(lowlink[w]);
73+
} else if on_stack[w] {
74+
lowlink[v] = lowlink[v].min(indices[w]);
75+
}
76+
}
77+
}
78+
79+
if lowlink[v] == indices[v] {
80+
let mut component = Vec::new();
81+
while let Some(w) = stack.pop() {
82+
on_stack[w] = false;
83+
component.push(w);
84+
if w == v {
85+
break;
86+
}
87+
}
88+
scc.push(component);
89+
}
90+
}
91+
92+
fn build_component(&mut self, states: &[usize]) -> Result<(), EngineError> {
93+
let state_set = states.iter().copied().collect::<IntSet<usize>>();
94+
let mut start_states = IntMap::new();
95+
let mut accept_states = IntMap::new();
96+
97+
let mut state_elimination_automaton = StateEliminationAutomaton {
98+
start_state: 0, // start_state is not set yet
99+
accept_state: 0, // accept_state is not set yet
100+
transitions: Vec::with_capacity(states.len()),
101+
transitions_in: IntMap::with_capacity(states.len()),
102+
removed_states: IntSet::new(),
103+
cyclic: true,
104+
};
105+
106+
let mut states_map = IntMap::with_capacity(states.len());
107+
for from_state in states {
108+
if *from_state == self.accept_state {
109+
self.accept_state = self.new_state();
110+
self.add_transition_to(*from_state, self.accept_state, GraphTransition::Epsilon);
111+
}
112+
if *from_state == self.start_state {
113+
self.start_state = self.new_state();
114+
self.add_transition_to(self.start_state, *from_state, GraphTransition::Epsilon);
115+
}
116+
let from_state_new = *states_map
117+
.entry(*from_state)
118+
.or_insert_with(|| state_elimination_automaton.new_state());
119+
for (to_state, transition) in self.transitions_from_state_enumerate_iter(from_state) {
120+
if !state_set.contains(to_state) {
121+
accept_states
122+
.entry(*to_state)
123+
.or_insert_with(Vec::new)
124+
.push((from_state_new, transition.clone()));
125+
continue;
126+
}
127+
128+
let to_state_new = *states_map
129+
.entry(*to_state)
130+
.or_insert_with(|| state_elimination_automaton.new_state());
131+
132+
state_elimination_automaton.add_transition_to(
133+
from_state_new,
134+
to_state_new,
135+
transition.clone(),
136+
);
137+
}
138+
139+
for (parent_state, transition) in self.in_transitions_vec(*from_state) {
140+
if !state_set.contains(&parent_state) {
141+
start_states
142+
.entry(from_state_new)
143+
.or_insert_with(Vec::new)
144+
.push((parent_state, transition.clone()));
145+
}
146+
}
147+
}
148+
149+
for state in states {
150+
self.remove_state(*state);
151+
}
152+
153+
for (start_state, parent_states) in &start_states {
154+
for (parent_state, transition) in parent_states {
155+
let new_parent_state = if !transition.is_empty_string() {
156+
let new_parent_state = self.new_state();
157+
158+
self.add_transition_to(*parent_state, new_parent_state, transition.clone());
159+
new_parent_state
160+
} else {
161+
*parent_state
162+
};
163+
for (target_state, accept_states_transition) in &accept_states {
164+
let mut new_automaton = state_elimination_automaton.clone();
165+
166+
let target_state = if accept_states_transition.len() > 1 {
167+
new_automaton.accept_state = new_automaton.new_state();
168+
for (accept_state, transition) in accept_states_transition {
169+
new_automaton.add_transition_to(
170+
*accept_state,
171+
new_automaton.accept_state,
172+
transition.clone(),
173+
);
174+
}
175+
*target_state
176+
} else {
177+
let (accept_state, transition) =
178+
accept_states_transition.iter().next().unwrap();
179+
180+
new_automaton.accept_state = *accept_state;
181+
if !transition.is_empty_string() {
182+
let new_target_state = self.new_state();
183+
self.add_transition_to(
184+
new_target_state,
185+
*target_state,
186+
transition.clone(),
187+
);
188+
new_target_state
189+
} else {
190+
*target_state
191+
}
192+
};
193+
194+
new_automaton.start_state = *start_state;
195+
196+
self.add_transition_to(
197+
new_parent_state,
198+
target_state,
199+
GraphTransition::Graph(new_automaton),
200+
);
201+
}
202+
}
203+
}
204+
205+
Ok(())
206+
}
207+
}

0 commit comments

Comments
 (0)