Skip to content

Commit 8e465c7

Browse files
authored
Fix performance issue on intersection & execution timeout not respected on intersection and generate_strings (#7)
Fix #5: Performance issue on intersection (#7) Fix #6: Execution timeout not respected on intersection and generate_strings
1 parent 32d4201 commit 8e465c7

File tree

11 files changed

+309
-155
lines changed

11 files changed

+309
-155
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "regexsolver"
3-
version = "0.2.2"
3+
version = "0.3.0"
44
edition = "2021"
55
authors = ["Alexandre van Beurden"]
66
repository = "https://github.com/RegexSolver/regexsolver"

src/execution_profile.rs

+37-2
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,10 @@ mod tests {
218218
};
219219
ThreadLocalParams::init_profile(&execution_profile);
220220

221-
assert_eq!(EngineError::OperationTimeOutError, term.generate_strings(100).unwrap_err());
221+
assert_eq!(
222+
EngineError::OperationTimeOutError,
223+
term.generate_strings(100).unwrap_err()
224+
);
222225

223226
let run_duration = SystemTime::now()
224227
.duration_since(start_time)
@@ -244,7 +247,39 @@ mod tests {
244247
};
245248
ThreadLocalParams::init_profile(&execution_profile);
246249

247-
assert_eq!(EngineError::OperationTimeOutError, term1.difference(&term2).unwrap_err());
250+
assert_eq!(
251+
EngineError::OperationTimeOutError,
252+
term1.difference(&term2).unwrap_err()
253+
);
254+
255+
let run_duration = SystemTime::now()
256+
.duration_since(start_time)
257+
.expect("Time went backwards")
258+
.as_millis();
259+
260+
println!("{run_duration}");
261+
assert!(run_duration <= execution_profile.execution_timeout + 50);
262+
Ok(())
263+
}
264+
265+
#[test]
266+
fn test_execution_timeout_intersection() -> Result<(), String> {
267+
let term1 = Term::from_regex(".*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz").unwrap();
268+
let term2 = Term::from_regex(".*abc.*def.*qdsqd.*sqdsqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdsqd.*sqdsqd.*qsdsqdsqdz.*abc.*def.*qdsqd.*sqdsqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz.*abc.*def.*qdqd.*qsdsqdsqdz").unwrap();
269+
270+
let start_time = SystemTime::now();
271+
let execution_profile = ExecutionProfile {
272+
max_number_of_states: 8192,
273+
start_execution_time: Some(start_time),
274+
execution_timeout: 100,
275+
max_number_of_terms: 50,
276+
};
277+
ThreadLocalParams::init_profile(&execution_profile);
278+
279+
assert_eq!(
280+
EngineError::OperationTimeOutError,
281+
term1.intersection(&[term2]).unwrap_err()
282+
);
248283

249284
let run_duration = SystemTime::now()
250285
.duration_since(start_time)

src/fast_automaton/builder.rs

+5-9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use condition::converter::ConditionConverter;
2+
13
use crate::error::EngineError;
24

35
use super::*;
@@ -29,11 +31,7 @@ impl FastAutomaton {
2931
let mut automaton: FastAutomaton = Self::new_empty();
3032
automaton.spanning_set = SpanningSet::new_total();
3133
automaton.accept(automaton.start_state);
32-
automaton.add_transition_to(
33-
0,
34-
0,
35-
&Condition::total(&automaton.spanning_set),
36-
);
34+
automaton.add_transition_to(0, 0, &Condition::total(&automaton.spanning_set));
3735
automaton
3836
}
3937

@@ -69,14 +67,12 @@ impl FastAutomaton {
6967
if new_spanning_set == &self.spanning_set {
7068
return Ok(());
7169
}
70+
let condition_converter = ConditionConverter::new(&self.spanning_set, new_spanning_set)?;
7271
for from_state in &self.transitions_vec() {
7372
for to_state in self.transitions_from_state(from_state) {
7473
match self.transitions[*from_state].entry(to_state) {
7574
Entry::Occupied(mut o) => {
76-
o.insert(
77-
o.get()
78-
.project_to(&self.spanning_set, new_spanning_set)?,
79-
);
75+
o.insert(condition_converter.convert(o.get())?);
8076
}
8177
Entry::Vacant(_) => {}
8278
};
+168
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
use ahash::HashMapExt;
2+
use nohash_hasher::IntMap;
3+
4+
use crate::{error::EngineError, fast_automaton::spanning_set::SpanningSet};
5+
6+
use super::Condition;
7+
8+
/// Converter to project [`Condition`] on a [`SpanningSet`].
9+
pub struct ConditionConverter<'a, 'b> {
10+
from_spanning_set: &'a SpanningSet,
11+
to_spanning_set: &'b SpanningSet,
12+
equivalence_map: Vec<Vec<usize>>,
13+
}
14+
15+
impl<'a, 'b> ConditionConverter<'a, 'b> {
16+
/// Build a converter to project [`Condition`] from `from_spanning_set` to `to_spanning_set`.
17+
///
18+
/// Currently this method does not check that the provided [`SpanningSet`] are actually convertible.
19+
pub fn new(
20+
from_spanning_set: &'a SpanningSet,
21+
to_spanning_set: &'b SpanningSet,
22+
) -> Result<Self, EngineError> {
23+
let mut to_base_map =
24+
IntMap::with_capacity(to_spanning_set.spanning_ranges_with_rest_len());
25+
for (i, base) in to_spanning_set
26+
.get_spanning_ranges_with_rest()
27+
.into_iter()
28+
.enumerate()
29+
{
30+
to_base_map.insert(i, base);
31+
}
32+
33+
let mut equivalence_map: Vec<Vec<usize>> =
34+
Vec::with_capacity(from_spanning_set.get_number_of_spanning_ranges() + 1);
35+
for from_base in from_spanning_set.get_spanning_ranges_with_rest().iter() {
36+
let mut index = Vec::with_capacity(1);
37+
for (i, to_base) in &to_base_map {
38+
if from_base == to_base || from_base.has_intersection(to_base) {
39+
index.push(*i);
40+
}
41+
}
42+
index.iter().for_each(|i| {
43+
to_base_map.remove(i);
44+
});
45+
equivalence_map.push(index);
46+
}
47+
48+
Ok(ConditionConverter {
49+
from_spanning_set,
50+
to_spanning_set,
51+
equivalence_map,
52+
})
53+
}
54+
55+
/// Project the given [`Condition`] from `from_spanning_set` to `to_spanning_set`.
56+
///
57+
/// If `from_spanning_set` is not convertible to `to_spanning_set` or if the given [`Condition`] is not based on `from_spanning_set`,
58+
/// the resulting [`Condition`] will not have any relevance.
59+
pub fn convert(&self, condition: &Condition) -> Result<Condition, EngineError> {
60+
let mut new_condition = Condition::empty(self.to_spanning_set);
61+
for (from_index, to_indexes) in self.equivalence_map.iter().enumerate() {
62+
if let Some(has) = condition.0.get(from_index) {
63+
if has && !to_indexes.is_empty() {
64+
to_indexes.iter().for_each(|&to_index| {
65+
new_condition.0.set(to_index, true);
66+
});
67+
}
68+
} else {
69+
return Err(EngineError::ConditionIndexOutOfBound);
70+
}
71+
}
72+
73+
Ok(new_condition)
74+
}
75+
76+
/// Returns `from_spanning_set`.
77+
pub fn get_from_spanning_set(&self) -> &'a SpanningSet {
78+
self.from_spanning_set
79+
}
80+
81+
/// Returns `to_spanning_set`.
82+
pub fn get_to_spanning_set(&self) -> &'b SpanningSet {
83+
self.to_spanning_set
84+
}
85+
}
86+
87+
#[cfg(test)]
88+
mod tests {
89+
use regex_charclass::{char::Char, irange::range::AnyRange};
90+
91+
use crate::Range;
92+
93+
use super::*;
94+
95+
fn get_from_spanning_set() -> SpanningSet {
96+
let ranges = vec![
97+
Range::new_from_range(Char::new('\0')..=Char::new('\u{2}')),
98+
Range::new_from_range(Char::new('\u{4}')..=Char::new('\u{6}')),
99+
Range::new_from_range(Char::new('\u{9}')..=Char::new('\u{9}')),
100+
];
101+
102+
SpanningSet::compute_spanning_set(&ranges)
103+
}
104+
105+
fn get_to_spanning_set() -> SpanningSet {
106+
let ranges = vec![
107+
Range::new_from_range(Char::new('\0')..=Char::new('\u{1}')),
108+
Range::new_from_range(Char::new('\u{2}')..=Char::new('\u{2}')),
109+
Range::new_from_range(Char::new('\u{4}')..=Char::new('\u{6}')),
110+
Range::new_from_range(Char::new('\u{9}')..=Char::new('\u{9}')),
111+
Range::new_from_range(Char::new('\u{20}')..=Char::new('\u{22}')),
112+
];
113+
114+
SpanningSet::compute_spanning_set(&ranges)
115+
}
116+
117+
#[test]
118+
fn test_convert() -> Result<(), String> {
119+
let from_spanning_set = get_from_spanning_set();
120+
let to_spanning_set = get_to_spanning_set();
121+
122+
let converter = ConditionConverter::new(&from_spanning_set, &to_spanning_set).unwrap();
123+
124+
let empty = Condition::empty(&from_spanning_set);
125+
assert!(converter.convert(&empty).unwrap().is_empty());
126+
127+
let total = Condition::total(&from_spanning_set);
128+
assert!(converter.convert(&total).unwrap().is_total());
129+
130+
let range = Range::new_from_range(Char::new('\0')..=Char::new('\u{2}'));
131+
let condition = Condition::from_range(&range, &from_spanning_set).unwrap();
132+
assert_eq!(
133+
range,
134+
converter
135+
.convert(&condition)
136+
.unwrap()
137+
.to_range(&to_spanning_set)
138+
.unwrap()
139+
);
140+
141+
let range = Range::new_from_range(Char::new('\u{4}')..=Char::new('\u{6}'));
142+
let condition = Condition::from_range(&range, &from_spanning_set).unwrap();
143+
assert_eq!(
144+
range,
145+
converter
146+
.convert(&condition)
147+
.unwrap()
148+
.to_range(&to_spanning_set)
149+
.unwrap()
150+
);
151+
152+
let range = Range::new_from_ranges(&[
153+
AnyRange::from(Char::new('\u{4}')..=Char::new('\u{6}')),
154+
AnyRange::from(Char::new('\u{9}')..=Char::new('\u{9}')),
155+
]);
156+
let condition = Condition::from_range(&range, &from_spanning_set).unwrap();
157+
assert_eq!(
158+
range,
159+
converter
160+
.convert(&condition)
161+
.unwrap()
162+
.to_range(&to_spanning_set)
163+
.unwrap()
164+
);
165+
166+
Ok(())
167+
}
168+
}

src/fast_automaton/condition/fast_bit_vec/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ impl FastBitVec {
123123
(!0) >> ((64 - bits % 64) % 64)
124124
}
125125

126-
pub fn get_hot_bits(&self) -> Vec<bool> {
126+
pub fn get_bits(&self) -> Vec<bool> {
127127
let mut hot_bits = Vec::with_capacity(self.n);
128128
for i in 0..self.n {
129129
hot_bits.push(self.get(i).unwrap());

0 commit comments

Comments
 (0)