Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --verbose
run: cargo build --all-features --verbose
- name: Test & Lint
run: |
cargo test
cargo clippy
cargo test --all-features
cargo clippy --all-features
40 changes: 26 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "regexsolver"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
authors = ["Alexandre van Beurden"]
repository = "https://github.com/RegexSolver/regexsolver"
Expand All @@ -10,28 +10,40 @@ description = "Manipulate regex and automaton as if they were sets."
readme = "README.md"

[dependencies]
env_logger = "0.11.3"
serde = "1.0.197"
serde_derive = "1.0.197"
serde_json = "1.0.114"
ciborium = "0.2.2"
z85 = "3.0.5"
aes-gcm-siv = "0.11.1"
sha2 = "0.10.8"
serde = { version = "1.0", features = ["derive"], optional = true }
ciborium = { version = "0.2.2", optional = true }
z85 = { version = "3.0.5", optional = true }
aes-gcm-siv = { version = "0.11.1", optional = true }
sha2 = { version = "0.10.8", optional = true }
flate2 = { version = "1.0.30", features = [
"zlib-ng",
], default-features = false, optional = true }
nohash-hasher = "0.2"
ahash = "0.8.11"
regex-syntax = "0.8.5"
log = "0.4.21"
rand = "0.8.5"
lazy_static = "1.4.0"
flate2 = { version = "1.0.30", features = [
"zlib-ng",
], default-features = false }
regex = "1.10.3"
regex-charclass = { version = "1.0.3", features = ["serde"] }
regex-syntax = "0.8.5"
regex-charclass = { version = "1.0.3" }

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
env_logger = "0.11.3"
serde_json = "1.0.114"


[features]
default = ["serde"]
serde = [
"regex-charclass/serde",
"dep:serde",
"dep:ciborium",
"dep:z85",
"dep:aes-gcm-siv",
"dep:sha2",
"dep:flate2",
]

[[bench]]
name = "my_benchmark"
Expand Down
8 changes: 5 additions & 3 deletions src/cardinality/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

/// Represent a number.
#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", content = "value")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(PartialEq, Eq, Debug, Clone)]
#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))]
pub enum Cardinality<U> {
/// An infinite number.
Infinite,
Expand Down
46 changes: 32 additions & 14 deletions src/fast_automaton/convert/to_regex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::{
};

use ahash::{HashMapExt, HashSetExt};
use log::warn;
use nohash_hasher::IntMap;

use crate::{error::EngineError, execution_profile::ThreadLocalParams, regex::RegularExpression};
Expand Down Expand Up @@ -258,28 +259,20 @@ impl FastAutomaton {
Ok(automaton) => match self.is_equivalent_of(&automaton) {
Ok(result) => {
if !result {
/*println!(
"The automaton is not equivalent to the generated regex; automaton={} regex={}",
serde_json::to_string(self).unwrap(),
regex
);*/
warn!("The automaton is not equivalent to the generated regex; automaton={}, regex={}", self, regex);
None
} else {
Some(regex)
}
}
Err(_) => {
//println!("{err}");
Err(err) => {
warn!("Engine error while checking for equivalence ({}); automaton={}, regex={}", err, self, regex);
None
}
},
Err(err) => {
if let crate::error::EngineError::RegexSyntaxError(_) = err {
/*error!(
"The generated regex can not be converted to automaton to be checked for equivalence (Syntax Error); automaton={} regex={}",
serde_json::to_string(self).unwrap(),
regex
);*/
if let crate::error::EngineError::RegexSyntaxError(err) = err {
warn!("The generated regex cannot be converted to automaton to be checked for equivalence ({}); automaton={}, regex={}", err, self, regex);
}
None
}
Expand Down Expand Up @@ -422,8 +415,33 @@ mod tests {
Ok(())
}

/*#[test]
#[test]
fn test_convert_after_operation_4() -> Result<(), String> {
let automaton1 = RegularExpression::new(".*abc.*")
.unwrap()
.to_automaton()
.unwrap();
let automaton2 = RegularExpression::new(".*def.*")
.unwrap()
.to_automaton()
.unwrap();

let result = automaton1.intersection(&automaton2).unwrap();

let result = result.to_regex().unwrap();

assert_eq!(".*(abc.*def|def.*abc).*", result.to_string());

Ok(())
}

/*#[test]
fn test_convert_after_operation_5() -> Result<(), String> {
if std::env::var_os("RUST_LOG").is_none() {
std::env::set_var("RUST_LOG", "regexsolver=debug");
}
env_logger::init();

let automaton1 = RegularExpression::new(".*abc.*")
.unwrap()
.to_automaton()
Expand Down
2 changes: 1 addition & 1 deletion src/fast_automaton/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use std::collections::hash_map::Entry;
use std::collections::VecDeque;
use std::fmt::Display;

use crate::tokenizer::Tokenizer;
use crate::{IntMap, IntSet};

pub(crate) type State = usize;
Expand All @@ -19,6 +18,7 @@ pub mod condition;
mod convert;
mod generate;
mod operation;
#[cfg(feature = "serde")]
mod serializer;
pub mod spanning_set;

Expand Down
3 changes: 2 additions & 1 deletion src/fast_automaton/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ use super::*;
use lazy_static::lazy_static;
use rand::Rng;
use serde::{de, ser, Deserializer, Serializer};
use serde_derive::{Deserialize, Serialize};
use serde::{Deserialize, Serialize};
use std::env;
use z85::{decode, encode};
use crate::tokenizer::Tokenizer;

use sha2::{Digest, Sha256};

Expand Down
4 changes: 3 additions & 1 deletion src/fast_automaton/spanning_set/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ use std::slice::Iter;

use ahash::AHashSet;
use regex_charclass::{char::Char, irange::RangeSet};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

/// Contains a set of [`RangeSet<Char>`] that span all the transition of a [`crate::FastAutomaton`].
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SpanningSet(Vec<RangeSet<Char>>, RangeSet<Char>);

impl SpanningSet {
Expand Down
15 changes: 9 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use fast_automaton::FastAutomaton;
use nohash_hasher::NoHashHasher;
use regex::RegularExpression;
use regex_charclass::{char::Char, irange::RangeSet};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

pub mod cardinality;
Expand All @@ -27,12 +28,13 @@ type Range = RangeSet<Char>;
/// Represents a term that can be either a regular expression or a finite automaton. This term can be manipulated with a wide range of operations.
///
/// To put constraint and limitation on the execution of operations please refer to [`execution_profile::ExecutionProfile`].
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[serde(tag = "type", content = "value")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))]
pub enum Term {
#[serde(rename = "regex")]
#[cfg_attr(feature = "serde", serde(rename = "regex"))]
RegularExpression(RegularExpression),
#[serde(rename = "fair")]
#[cfg_attr(feature = "serde", serde(rename = "fair"))]
Automaton(FastAutomaton),
}

Expand Down Expand Up @@ -319,8 +321,9 @@ impl Term {
}

/// Represents details about a [Term].
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename = "details")]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Eq, Debug)]
#[cfg_attr(feature = "serde", serde(tag = "type", rename = "details"))]
pub struct Details {
cardinality: Option<Cardinality<u32>>,
length: (Option<u32>, Option<u32>),
Expand Down
1 change: 1 addition & 0 deletions src/regex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use super::*;
mod analyze;
mod builder;
mod operation;
#[cfg(feature = "serde")]
mod serializer;

/// Represent a regular expression.
Expand Down
Loading