Skip to content

Commit b162330

Browse files
committed
Add code
1 parent b4c4a63 commit b162330

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+8167
-0
lines changed

Cargo.toml

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
[package]
2+
name = "regexsolver"
3+
version = "0.1.0"
4+
edition = "2021"
5+
authors = ["Alexandre van Beurden"]
6+
repository = "https://github.com/RegexSolver/regexsolver"
7+
license = "MIT"
8+
keywords = ["automaton", "intersection", "union", "difference", "regex"]
9+
description = "Manipulate regex and automaton as if they were sets."
10+
readme = "README.md"
11+
12+
[dependencies]
13+
env_logger = "0.11.3"
14+
serde = "1.0.197"
15+
serde_derive = "1.0.197"
16+
serde_json = "1.0.114"
17+
ciborium = "0.2.2"
18+
z85 = "3.0.5"
19+
aes-gcm-siv = "0.11.1"
20+
sha2 = "0.10.8"
21+
nohash-hasher = "0.2"
22+
ahash = "0.8.11"
23+
regex-syntax = "0.8.5"
24+
petgraph = "0.6.4"
25+
log = "0.4.21"
26+
rand = "0.8.5"
27+
lazy_static = "1.4.0"
28+
flate2 = { version = "1.0.30", features = [
29+
"zlib-ng",
30+
], default-features = false }
31+
regex = "1.10.3"
32+
regex-charclass = { version = "1.0.3", features = ["serde"] }
33+
34+
[dev-dependencies]
35+
criterion = { version = "0.5", features = ["html_reports"] }
36+
37+
[[bench]]
38+
name = "my_benchmark"
39+
harness = false

README.md

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# RegexSolver
2+
3+
[Homepage](https://regexsolver.com) | [Online Demo](https://regexsolver.com/demo) | [Documentation](https://docs.regexsolver.com) | [Developer Console](https://console.regexsolver.com)
4+
5+
RegexSolver is a comprehensive toolkit designed for working with regular expressions, enabling you to manipulate them as sets.
6+
7+
## Features
8+
9+
- **Intersection**: Find the common patterns between multiple regular expressions.
10+
- **Union**: Combine multiple regular expressions into a single, unified expression.
11+
- **Subtraction/Difference**: Subtract one regular expression from another.
12+
- **Equivalence**: Determine if two regular expressions match the same set of strings.
13+
- **Subset**: Check if one regular expression is a subset of another.

benches/my_benchmark.rs

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use ahash::AHashSet;
2+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
3+
use regexsolver::{fast_automaton::FastAutomaton, regex::RegularExpression};
4+
5+
fn parse_regex(regex: &str) -> RegularExpression {
6+
RegularExpression::new(regex).unwrap()
7+
}
8+
9+
fn to_regex(automaton: &FastAutomaton) -> RegularExpression {
10+
automaton.to_regex().unwrap()
11+
}
12+
13+
fn determinize(automaton: &FastAutomaton) -> FastAutomaton {
14+
automaton.determinize().unwrap()
15+
}
16+
17+
fn intersection(automaton_1: &FastAutomaton, automaton_2: &FastAutomaton) -> FastAutomaton {
18+
automaton_1.intersection(automaton_2).unwrap()
19+
}
20+
21+
fn generate_strings(automaton: &FastAutomaton) -> AHashSet<String> {
22+
automaton.generate_strings(2000).unwrap()
23+
}
24+
25+
fn criterion_benchmark(c: &mut Criterion) {
26+
{
27+
c.bench_function("parse_regex", |b| {
28+
b.iter(|| parse_regex(black_box("a(bcfe|bcdg|mkv)*(abc){2,3}(abc){2}")))
29+
});
30+
}
31+
32+
{
33+
let input_regex = RegularExpression::new("a(bcfe|bcdg|mkv)*(abc){2,3}").unwrap();
34+
let input_automaton = input_regex.to_automaton().unwrap();
35+
36+
c.bench_function("to_regex", |b| {
37+
b.iter(|| to_regex(black_box(&input_automaton)))
38+
});
39+
}
40+
41+
{
42+
let input_regex = RegularExpression::new(
43+
"((aad|ads|a)*abc.*def.*uif(aad|ads|x)*abc.*oxs.*def(aad|ads|ax)*abc.*def.*ksd|q)",
44+
)
45+
.unwrap();
46+
let input_automaton = input_regex.to_automaton().unwrap();
47+
48+
c.bench_function("determinize", |b| {
49+
b.iter(|| determinize(black_box(&input_automaton)))
50+
});
51+
}
52+
53+
/*{
54+
let input_regex = RegularExpression::new("((aad|ads|a)*abc.*def.*uif(aad|ads|x)*abc.*oxs.*def(aad|ads|ax)*abc.*def.*ksd|q){1,5}").unwrap();
55+
let input_automaton = input_regex.to_automaton().unwrap();
56+
57+
c.bench_function("test_determinize", |b| {
58+
b.iter(|| determinize(black_box(&input_automaton)))
59+
});
60+
}*/
61+
62+
{
63+
let automaton1 = RegularExpression::new("a(bcfe|bcdg|mkv)*(abc){1,3}")
64+
.unwrap()
65+
.to_automaton().unwrap();
66+
let automaton2 = RegularExpression::new("a(bcfe|mkv|opr)*(abc){2,4}")
67+
.unwrap()
68+
.to_automaton().unwrap();
69+
70+
c.bench_function("intersection", |b| {
71+
b.iter(|| intersection(black_box(&automaton1), black_box(&automaton2)))
72+
});
73+
}
74+
75+
{
76+
let automaton = RegularExpression::new("a(bcfe|bcdg|mkv)*(abc){1,3}")
77+
.unwrap()
78+
.to_automaton().unwrap();
79+
80+
c.bench_function("generate_strings", |b| {
81+
b.iter(|| generate_strings(black_box(&automaton)))
82+
});
83+
}
84+
}
85+
86+
criterion_group!(benches, criterion_benchmark);
87+
criterion_main!(benches);

src/cardinality/mod.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use serde_derive::{Deserialize, Serialize};
2+
3+
pub trait IntegerTrait {}
4+
5+
impl IntegerTrait for u128 {}
6+
impl IntegerTrait for u32 {}
7+
8+
#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize)]
9+
#[serde(tag = "type", content = "value")]
10+
pub enum Cardinality<U: IntegerTrait> {
11+
Infinite,
12+
Integer(U),
13+
BigInteger,
14+
}

src/condition/fast_bit_vec/mod.rs

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#[derive(Clone, PartialEq, Eq, Debug, Hash)]
2+
pub struct FastBitVec {
3+
bits: Vec<u64>,
4+
n: usize,
5+
}
6+
7+
impl std::fmt::Display for FastBitVec {
8+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
9+
for a in &self.bits {
10+
write!(f, "{:b}", a)?;
11+
}
12+
Ok(())
13+
}
14+
}
15+
16+
impl FastBitVec {
17+
#[inline]
18+
pub fn from_elem(n: usize, bit: bool) -> Self {
19+
let nblocks = if n % 64 == 0 { n / 64 } else { n / 64 + 1 };
20+
let bits = vec![if bit { !0_u64 } else { 0_u64 }; nblocks];
21+
let mut bit_vec = FastBitVec { bits, n };
22+
bit_vec.fix_last_block();
23+
bit_vec
24+
}
25+
26+
fn fix_last_block(&mut self) {
27+
if let Some((last_block, used_bits)) = self.last_block_mut_with_mask() {
28+
*last_block &= used_bits;
29+
}
30+
}
31+
32+
#[inline]
33+
fn last_block_mut_with_mask(&mut self) -> Option<(&mut u64, u64)> {
34+
let extra_bits = self.len() % 64;
35+
if extra_bits > 0 {
36+
let mask = (1 << extra_bits) - 1;
37+
let storage_len = self.bits.len();
38+
Some((&mut self.bits[storage_len - 1], mask))
39+
} else {
40+
None
41+
}
42+
}
43+
44+
#[inline]
45+
pub fn len(&self) -> usize {
46+
self.n
47+
}
48+
49+
#[inline]
50+
pub fn get(&self, i: usize) -> Option<bool> {
51+
if i >= self.n {
52+
return None;
53+
}
54+
let w = i / 64;
55+
let b = i % 64;
56+
self.bits.get(w).map(|&block| (block & (1 << b)) != 0)
57+
}
58+
59+
#[inline]
60+
pub fn set(&mut self, i: usize, x: bool) {
61+
let w = i / 64;
62+
let b = i % 64;
63+
let flag = 1 << b;
64+
let val = if x {
65+
self.bits[w] | flag
66+
} else {
67+
self.bits[w] & !flag
68+
};
69+
self.bits[w] = val;
70+
}
71+
72+
#[inline]
73+
pub fn complement(&mut self) {
74+
for w in &mut self.bits {
75+
*w = !*w;
76+
}
77+
self.fix_last_block();
78+
}
79+
80+
#[inline]
81+
pub fn union(&mut self, other: &Self) {
82+
for (a, b) in self.bits.iter_mut().zip(&other.bits) {
83+
let w = *a | b;
84+
*a = w;
85+
}
86+
}
87+
88+
#[inline]
89+
pub fn intersection(&mut self, other: &Self) {
90+
for (a, b) in self.bits.iter_mut().zip(&other.bits) {
91+
let w = *a & b;
92+
*a = w;
93+
}
94+
}
95+
96+
#[inline]
97+
pub fn has_intersection(&self, other: &Self) -> bool {
98+
for (a, b) in self.bits.iter().zip(&other.bits) {
99+
if *a & b != 0 {
100+
return true;
101+
}
102+
}
103+
false
104+
}
105+
106+
#[inline]
107+
pub fn empty(&self) -> bool {
108+
self.bits.iter().all(|w| w == &0)
109+
}
110+
111+
#[inline]
112+
pub fn total(&self) -> bool {
113+
let mut last_word = !0;
114+
self.bits.iter().all(|elem| {
115+
let tmp = last_word;
116+
last_word = *elem;
117+
tmp == !0
118+
}) && (last_word == Self::mask_for_bits(self.n))
119+
}
120+
121+
fn mask_for_bits(bits: usize) -> u64 {
122+
(!0) >> ((64 - bits % 64) % 64)
123+
}
124+
}

0 commit comments

Comments
 (0)