Skip to content

Commit 1bd84bb

Browse files
committed
Refactoring and improvement of documentation
1 parent 24798f5 commit 1bd84bb

File tree

26 files changed

+737
-458
lines changed

26 files changed

+737
-458
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "regexsolver"
3-
version = "0.1.1"
3+
version = "0.2.0"
44
edition = "2021"
55
authors = ["Alexandre van Beurden"]
66
repository = "https://github.com/RegexSolver/regexsolver"

README.md

+49-11
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,72 @@
22

33
[![Crates.io Version](https://img.shields.io/crates/v/regexsolver)](https://crates.io/crates/regexsolver)
44

5-
This repository contains the code of RegexSolver engine.
5+
This repository contains the code of [RegexSolver](https://regexsolver.com/) engine.
6+
7+
For more information, you can check the library's [documentation](https://docs.rs/regexsolver/latest/regexsolver/).
8+
9+
If you want to use this library with other programming languages, we provide a wide range of wrappers:
10+
11+
- [regexsolver-java](https://github.com/RegexSolver/regexsolver-java)
12+
- [regexsolver-js](https://github.com/RegexSolver/regexsolver-js)
13+
- [regexsolver-python](https://github.com/RegexSolver/regexsolver-python)
14+
15+
For more information about how to use the wrappers, you can refer to our [getting started guide](https://docs.regexsolver.com/getting-started.html).
616

717
## Installation
818

919
Add the following line in your `Cargo.toml`:
1020

1121
```toml
1222
[dependencies]
13-
regexsolver = "0.1"
23+
regexsolver = "0.2"
1424
```
1525

1626
## Examples
1727

28+
### Union
29+
30+
```rust
31+
use regexsolver::Term;
32+
33+
let term1 = Term::from_regex("abc").unwrap();
34+
let term2 = Term::from_regex("de").unwrap();
35+
let term3 = Term::from_regex("fghi").unwrap();
36+
37+
let union = term1.union(&[term2, term3]).unwrap();
38+
39+
if let Term::RegularExpression(regex) = union {
40+
println!("{}", regex.to_string()); // (abc|de|fghi)
41+
}
42+
```
43+
44+
### Intersection
45+
1846
```rust
19-
use regexsolver::{intersection, regex::RegularExpression, subtraction, Term};
47+
use regexsolver::Term;
2048

49+
let term1 = Term::from_regex("(abc|de){2}").unwrap();
50+
let term2 = Term::from_regex("de.*").unwrap();
51+
let term3 = Term::from_regex(".*abc").unwrap();
2152

22-
let term1 = Term::RegularExpression(RegularExpression::new("(abc|de|fg){2,}").unwrap());
23-
let term2 = Term::RegularExpression(RegularExpression::new("de.*").unwrap());
24-
let term3 = Term::RegularExpression(RegularExpression::new(".*abc").unwrap());
53+
let intersection = term1.intersection(&[term2, term3]).unwrap();
2554

26-
let term4 = Term::RegularExpression(RegularExpression::new(".+(abc|de).+").unwrap());
55+
if let Term::RegularExpression(regex) = intersection {
56+
println!("{}", regex.to_string()); // deabc
57+
}
58+
```
59+
60+
### Difference/Subtraction
61+
62+
```rust
63+
use regexsolver::Term;
2764

28-
let intersection = intersection(&[term1, term2, term3]).unwrap();
65+
let term1 = Term::from_regex("(abc|de)").unwrap();
66+
let term2 = Term::from_regex("de").unwrap();
2967

30-
let result = subtraction(&intersection, &term4).unwrap();
68+
let subtraction = term1.subtraction(&term2).unwrap();
3169

32-
if let Term::RegularExpression(regex) = result {
33-
println!("result={}", regex); // result=de(fg)*abc
70+
if let Term::RegularExpression(regex) = subtraction {
71+
assert_eq!("abc", regex.to_string());
3472
}
3573
```

src/cardinality/mod.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
use serde_derive::{Deserialize, Serialize};
22

3-
pub trait IntegerTrait {}
4-
5-
impl IntegerTrait for u128 {}
6-
impl IntegerTrait for u32 {}
7-
3+
/// Represent a number.
84
#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize)]
95
#[serde(tag = "type", content = "value")]
10-
pub enum Cardinality<U: IntegerTrait> {
6+
pub enum Cardinality<U> {
7+
/// An infinite number.
118
Infinite,
9+
/// A finite number.
1210
Integer(U),
11+
/// A finite number too big to be represented.
1312
BigInteger,
1413
}

src/error/mod.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,26 @@ use std::fmt::{self};
22

33
use crate::tokenizer::token::TokenError;
44

5+
/// An error thrown by the engine.
56
#[derive(Debug, PartialEq, Eq)]
67
pub enum EngineError {
8+
/// Invalid character used in regex.
79
InvalidCharacterInRegex,
10+
/// The operation took too much time.
811
OperationTimeOutError,
12+
/// The given automaton should be deterministic.
913
AutomatonShouldBeDeterministic,
14+
/// The automaton has too many states.
1015
AutomatonHasTooManyStates,
16+
/// The regular expression can not be parsed.
1117
RegexSyntaxError(String),
18+
/// Too many terms are used in the operation.
1219
TooMuchTerms(usize, usize),
20+
/// The provided range can not be built from the spanning set.
1321
ConditionInvalidRange,
22+
/// The provided index is out of bound of the condition.
1423
ConditionIndexOutOfBound,
24+
/// There is an error with one of the token.
1525
TokenError(TokenError),
1626
}
1727

@@ -25,7 +35,7 @@ impl fmt::Display for EngineError {
2535
EngineError::RegexSyntaxError(err) => write!(f, "{err}."),
2636
EngineError::TooMuchTerms(max, got) => write!(f, "Too many terms are used in this operation, the maximum allowed for your plan is {max} and you used {got}."),
2737
EngineError::TokenError(err) => write!(f, "{err}."),
28-
EngineError::ConditionInvalidRange => write!(f, "The provided range can not be built from bases."),
38+
EngineError::ConditionInvalidRange => write!(f, "The provided range can not be built from the spanning set."),
2939
EngineError::ConditionIndexOutOfBound => write!(f, "The provided index is out of bound of the condition."),
3040
}
3141
}

src/execution_profile.rs

+99-7
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,97 @@ use std::{cell::RefCell, time::SystemTime};
22

33
use crate::error::EngineError;
44

5-
/// Hold information about limitations and constraints of operations execution:
6-
/// - max_number_of_states: the maximum number of states that a non-determinitic finite automaton can hold.
7-
/// - start_execution_time: timestamp of when the execution has started.
8-
/// - execution_timeout: the longest time in milliseconds that an operation execution can last.
9-
/// - max_number_of_terms: the maximum number of terms that an operation can have.
5+
/// Hold settings about limitations and constraints of operations execution within the engine.
6+
///
7+
/// To apply the settings on the current thread you need to call the following function:
8+
/// ```
9+
/// use regexsolver::execution_profile::{ExecutionProfile, ThreadLocalParams};
10+
///
11+
/// let execution_profile = ExecutionProfile {
12+
/// max_number_of_states: 1,
13+
/// start_execution_time: None,
14+
/// execution_timeout: 1000,
15+
/// max_number_of_terms: 10,
16+
/// };
17+
///
18+
/// // Store the settings on the current thread.
19+
/// ThreadLocalParams::init_profile(&execution_profile);
20+
/// ```
21+
///
22+
/// # Examples:
23+
///
24+
/// ## Limiting the number of states
25+
/// ```
26+
/// use regexsolver::{Term, execution_profile::{ExecutionProfile, ThreadLocalParams}, error::EngineError};
27+
///
28+
/// let term1 = Term::from_regex(".*abc.*").unwrap();
29+
/// let term2 = Term::from_regex(".*def.*").unwrap();
30+
///
31+
/// let execution_profile = ExecutionProfile {
32+
/// max_number_of_states: 1,
33+
/// start_execution_time: None,
34+
/// execution_timeout: 1000,
35+
/// max_number_of_terms: 10,
36+
/// };
37+
/// ThreadLocalParams::init_profile(&execution_profile);
38+
///
39+
/// assert_eq!(EngineError::AutomatonHasTooManyStates, term1.intersection(&[term2]).unwrap_err());
40+
/// ```
41+
///
42+
/// ## Limiting the number of terms
43+
/// ```
44+
/// use regexsolver::{Term, execution_profile::{ExecutionProfile, ThreadLocalParams}, error::EngineError};
45+
///
46+
/// let term1 = Term::from_regex(".*abc.*").unwrap();
47+
/// let term2 = Term::from_regex(".*def.*").unwrap();
48+
/// let term3 = Term::from_regex(".*hij.*").unwrap();
49+
///
50+
/// let execution_profile = ExecutionProfile {
51+
/// max_number_of_states: 8192,
52+
/// start_execution_time: None,
53+
/// execution_timeout: 1000,
54+
/// max_number_of_terms: 2,
55+
/// };
56+
/// ThreadLocalParams::init_profile(&execution_profile);
57+
///
58+
/// assert_eq!(EngineError::TooMuchTerms(2,3), term1.intersection(&[term2, term3]).unwrap_err());
59+
/// ```
60+
///
61+
/// ## Limiting the execution time
62+
/// ```
63+
/// use regexsolver::{Term, execution_profile::{ExecutionProfile, ThreadLocalParams}, error::EngineError};
64+
/// use std::time::SystemTime;
65+
///
66+
/// let term = Term::from_regex(".*abc.*cdef.*sqdsqf.*").unwrap();
67+
///
68+
/// let execution_profile = ExecutionProfile {
69+
/// max_number_of_states: 8192,
70+
/// start_execution_time: Some(SystemTime::now()),
71+
/// execution_timeout: 1,
72+
/// max_number_of_terms: 50,
73+
/// };
74+
/// ThreadLocalParams::init_profile(&execution_profile);
75+
///
76+
/// assert_eq!(EngineError::OperationTimeOutError, term.generate_strings(100).unwrap_err());
77+
/// ```
1078
pub struct ExecutionProfile {
79+
/// The maximum number of states that a non-determinitic finite automaton can hold, this is checked during the convertion of regular expression to automaton.
1180
pub max_number_of_states: usize,
81+
/// Timestamp of when the execution has started, if this value is not set the operations will never timeout.
1282
pub start_execution_time: Option<SystemTime>,
83+
/// The longest time in milliseconds that an operation execution can last, there are no guaranties that the exact time will be respected.
1384
pub execution_timeout: u128,
85+
/// The maximum number of terms that an operation can have.
1486
pub max_number_of_terms: usize,
1587
}
1688

1789
impl ExecutionProfile {
18-
pub fn is_timed_out(&self) -> Result<(), EngineError> {
90+
/// Assert that `execution_timeout` is not exceeded.
91+
///
92+
/// Return empty if `execution_timeout` is not exceeded or if `start_execution_time` is not set.
93+
///
94+
/// Return [`EngineError::OperationTimeOutError`] otherwise.
95+
pub fn assert_not_timed_out(&self) -> Result<(), EngineError> {
1996
if let Some(start) = self.start_execution_time {
2097
let run_duration = SystemTime::now()
2198
.duration_since(start)
@@ -33,6 +110,20 @@ impl ExecutionProfile {
33110
}
34111
}
35112

113+
114+
/// Hold [`ExecutionProfile`] on the current thread.
115+
///
116+
/// The default [`ExecutionProfile`] is the following:
117+
/// ```
118+
/// use regexsolver::execution_profile::ExecutionProfile;
119+
///
120+
/// ExecutionProfile {
121+
/// max_number_of_states: 8192,
122+
/// start_execution_time: None,
123+
/// execution_timeout: 1500,
124+
/// max_number_of_terms: 50,
125+
/// };
126+
/// ```
36127
pub struct ThreadLocalParams;
37128
impl ThreadLocalParams {
38129
thread_local! {
@@ -42,7 +133,7 @@ impl ThreadLocalParams {
42133
static MAX_NUMBER_OF_TERMS: RefCell<usize> = const { RefCell::new(50) };
43134
}
44135

45-
/// Initialize the thread local holding the ExecutionProfile.
136+
/// Store on the current thread [`ExecutionProfile`].
46137
pub fn init_profile(profile: &ExecutionProfile) {
47138
ThreadLocalParams::MAX_NUMBER_OF_STATES.with(|cell| {
48139
*cell.borrow_mut() = profile.max_number_of_states;
@@ -77,6 +168,7 @@ impl ThreadLocalParams {
77168
ThreadLocalParams::MAX_NUMBER_OF_TERMS.with(|cell| *cell.borrow())
78169
}
79170

171+
/// Return the [`ExecutionProfile`] stored on the current thread.
80172
pub fn get_execution_profile() -> ExecutionProfile {
81173
ExecutionProfile {
82174
max_number_of_states: Self::get_max_number_of_states(),

src/fast_automaton/analyze/cardinality.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl FastAutomaton {
2929
for (to_state, condition) in to_states {
3030
if let Some(distance) = current_distance.checked_mul(
3131
condition
32-
.get_cardinality(&self.used_bases)
32+
.get_cardinality(&self.spanning_set)
3333
.expect("It should be possible to get the cardinality of a condition."),
3434
) {
3535
if let Some(new_distance) =

src/fast_automaton/analyze/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ impl FastAutomaton {
6161
live
6262
}
6363

64-
pub fn get_bases(&self) -> Result<Vec<Condition>, EngineError> {
65-
self.used_bases.get_bases().map(|range| {
66-
Condition::from_range(range, &self.used_bases)
64+
pub fn get_ranges(&self) -> Result<Vec<Condition>, EngineError> {
65+
self.spanning_set.get_spanning_ranges().map(|range| {
66+
Condition::from_range(range, &self.spanning_set)
6767
}).collect()
6868
}
6969
}

src/fast_automaton/builder.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ impl FastAutomaton {
1111
start_state: 0,
1212
accept_states: IntSet::default(),
1313
removed_states: IntSet::default(),
14-
used_bases: UsedBases::new_empty(),
14+
spanning_set: SpanningSet::new_empty(),
1515
deterministic: true,
1616
cyclic: false,
1717
}
@@ -27,12 +27,12 @@ impl FastAutomaton {
2727
#[inline]
2828
pub fn new_total() -> Self {
2929
let mut automaton: FastAutomaton = Self::new_empty();
30-
automaton.used_bases = UsedBases::new_total();
30+
automaton.spanning_set = SpanningSet::new_total();
3131
automaton.accept(automaton.start_state);
3232
automaton.add_transition_to(
3333
0,
3434
0,
35-
&Condition::total(&automaton.used_bases),
35+
&Condition::total(&automaton.spanning_set),
3636
);
3737
automaton
3838
}
@@ -54,19 +54,19 @@ impl FastAutomaton {
5454
}
5555
let new_state = automaton.new_state();
5656

57-
let used_bases = UsedBases::compute_used_bases(&[range.clone()]);
58-
let condition = Condition::from_range(range, &used_bases)?;
59-
automaton.used_bases = used_bases;
57+
let spanning_set = SpanningSet::compute_spanning_set(&[range.clone()]);
58+
let condition = Condition::from_range(range, &spanning_set)?;
59+
automaton.spanning_set = spanning_set;
6060
automaton.add_transition_to(0, new_state, &condition);
6161
automaton.accept(new_state);
6262
Ok(automaton)
6363
}
6464

65-
pub fn apply_newly_used_bases(
65+
pub fn apply_new_spanning_set(
6666
&mut self,
67-
newly_used_bases: &UsedBases,
67+
new_spanning_set: &SpanningSet,
6868
) -> Result<(), EngineError> {
69-
if newly_used_bases == &self.used_bases {
69+
if new_spanning_set == &self.spanning_set {
7070
return Ok(());
7171
}
7272
for from_state in &self.transitions_vec() {
@@ -75,14 +75,14 @@ impl FastAutomaton {
7575
Entry::Occupied(mut o) => {
7676
o.insert(
7777
o.get()
78-
.project_to(&self.used_bases, newly_used_bases)?,
78+
.project_to(&self.spanning_set, new_spanning_set)?,
7979
);
8080
}
8181
Entry::Vacant(_) => {}
8282
};
8383
}
8484
}
85-
self.used_bases = newly_used_bases.clone();
85+
self.spanning_set = new_spanning_set.clone();
8686
Ok(())
8787
}
8888

@@ -92,7 +92,7 @@ impl FastAutomaton {
9292
self.start_state = model.start_state;
9393
self.accept_states = model.accept_states.clone();
9494
self.removed_states = model.removed_states.clone();
95-
self.used_bases = model.used_bases.clone();
95+
self.spanning_set = model.spanning_set.clone();
9696
self.deterministic = model.deterministic;
9797
self.cyclic = model.cyclic;
9898
}

0 commit comments

Comments
 (0)