Skip to content

Commit 725f1f7

Browse files
authored
refactor: parameterize the regex (#31)
1 parent 67173fe commit 725f1f7

File tree

8 files changed

+132
-92
lines changed

8 files changed

+132
-92
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ fn main() {
2323
pathname: Some("/users/:id".to_owned()),
2424
..Default::default()
2525
};
26-
let pattern = UrlPattern::parse(init).unwrap();
26+
let pattern = <UrlPattern>::parse(init).unwrap();
2727

2828
// Match the pattern against a URL.
2929
let url = "https://example.com/users/123".parse().unwrap();

src/component.rs

+18-28
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@ use crate::parser::Part;
55
use crate::parser::PartModifier;
66
use crate::parser::PartType;
77
use crate::parser::FULL_WILDCARD_REGEXP_VALUE;
8+
use crate::regexp::RegExp;
89
use crate::tokenizer::is_valid_name_codepoint;
910
use crate::Error;
1011

1112
// Ref: https://wicg.github.io/urlpattern/#component
1213
#[derive(Debug)]
13-
pub(crate) struct Component {
14+
pub(crate) struct Component<R: RegExp> {
1415
pub pattern_string: String,
15-
pub rust_regexp: Result<regex::Regex, Error>,
16-
pub ecma_regexp_string: String,
16+
pub regexp: Result<R, Error>,
1717
pub group_name_list: Vec<String>,
1818
}
1919

20-
impl Component {
20+
impl<R: RegExp> Component<R> {
2121
// Ref: https://wicg.github.io/urlpattern/#compile-a-component
2222
pub(crate) fn compile<F>(
2323
input: Option<&str>,
@@ -32,18 +32,13 @@ impl Component {
3232
&options,
3333
encoding_callback,
3434
)?;
35-
let (rust_regexp_string, _) =
36-
generate_regular_expression_and_name_list(&part_list, &options);
37-
let rust_regexp =
38-
regex::Regex::new(&rust_regexp_string).map_err(Error::RegEx);
39-
let options = options.with_syntax(crate::parser::RegexSyntax::EcmaScript);
40-
let (ecma_regexp_string, name_list) =
35+
let (regexp_string, name_list) =
4136
generate_regular_expression_and_name_list(&part_list, &options);
37+
let regexp = R::parse(&regexp_string).map_err(Error::RegExp);
4238
let pattern_string = generate_pattern_string(part_list, &options);
4339
Ok(Component {
4440
pattern_string,
45-
rust_regexp,
46-
ecma_regexp_string,
41+
regexp,
4742
group_name_list: name_list,
4843
})
4944
}
@@ -52,9 +47,9 @@ impl Component {
5247
pub(crate) fn protocol_component_matches_special_scheme(&self) -> bool {
5348
const SPECIAL_SCHEMES: [&str; 6] =
5449
["ftp", "file", "http", "https", "ws", "wss"];
55-
if let Ok(regex) = &self.rust_regexp {
50+
if let Ok(regex) = &self.regexp {
5651
for scheme in SPECIAL_SCHEMES {
57-
if regex.captures(scheme).is_some() {
52+
if regex.matches(scheme).is_some() {
5853
return true;
5954
}
6055
}
@@ -66,28 +61,23 @@ impl Component {
6661
pub(crate) fn create_match_result(
6762
&self,
6863
input: String,
69-
exec_result: regex::Captures,
64+
exec_result: Vec<&str>,
7065
) -> crate::UrlPatternComponentResult {
71-
let mut iter = exec_result.iter();
72-
iter.next(); // first match is entire string
73-
crate::UrlPatternComponentResult {
74-
input,
75-
groups: self
76-
.group_name_list
77-
.clone()
78-
.into_iter()
79-
.zip(iter.map(|e| e.map(|e| e.as_str().to_string())))
80-
.map(|(name, key)| (name, key.unwrap_or_default()))
81-
.collect(),
82-
}
66+
let groups = self
67+
.group_name_list
68+
.clone()
69+
.into_iter()
70+
.zip(exec_result.into_iter().map(str::to_owned))
71+
.collect();
72+
crate::UrlPatternComponentResult { input, groups }
8373
}
8474

8575
pub(crate) fn optionally_transpose_regex_error(
8676
mut self,
8777
do_transpose: bool,
8878
) -> Result<Self, Error> {
8979
if do_transpose {
90-
self.rust_regexp = Ok(self.rust_regexp?);
80+
self.regexp = Ok(self.regexp?);
9181
}
9282
Ok(self)
9383
}

src/constructor_parser.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
22

33
use crate::error::Error;
4+
use crate::regexp::RegExp;
45
use crate::tokenizer::Token;
56
use crate::tokenizer::TokenType;
67
use crate::UrlPatternInit;
@@ -193,9 +194,11 @@ impl<'a> ConstructorStringParser<'a> {
193194
}
194195

195196
// Ref: https://wicg.github.io/urlpattern/#compute-protocol-matches-a-special-scheme-flag
196-
fn compute_protocol_matches_special_scheme(&mut self) -> Result<(), Error> {
197+
fn compute_protocol_matches_special_scheme<R: RegExp>(
198+
&mut self,
199+
) -> Result<(), Error> {
197200
let protocol_string = self.make_component_string();
198-
let protocol_component = crate::component::Component::compile(
201+
let protocol_component = crate::component::Component::<R>::compile(
199202
Some(&protocol_string),
200203
crate::canonicalize_and_process::canonicalize_protocol,
201204
Default::default(),
@@ -230,7 +233,7 @@ impl<'a> ConstructorStringParser<'a> {
230233
}
231234

232235
// Ref: https://wicg.github.io/urlpattern/#parse-a-constructor-string
233-
pub(crate) fn parse_constructor_string(
236+
pub(crate) fn parse_constructor_string<R: RegExp>(
234237
input: &str,
235238
) -> Result<UrlPatternInit, Error> {
236239
let token_list = crate::tokenizer::tokenize(
@@ -315,7 +318,7 @@ pub(crate) fn parse_constructor_string(
315318
}
316319
ConstructorStringParserState::Protocol => {
317320
if parser.is_protocol_suffix() {
318-
parser.compute_protocol_matches_special_scheme()?;
321+
parser.compute_protocol_matches_special_scheme::<R>()?;
319322
if parser.protocol_matches_special_scheme {
320323
parser.result.pathname = Some(String::from("/"));
321324
}

src/error.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ pub enum Error {
2020
Parser(ParserError),
2121

2222
Url(url::ParseError),
23-
RegEx(regex::Error),
23+
24+
#[display(fmt = "regexp error")]
25+
RegExp(()),
2426
}
2527

2628
impl std::error::Error for Error {}

src/lib.rs

+33-43
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ mod constructor_parser;
1111
mod error;
1212
mod parser;
1313
pub mod quirks;
14+
mod regexp;
1415
mod tokenizer;
1516

1617
pub use error::Error;
@@ -19,6 +20,7 @@ use url::Url;
1920
use crate::canonicalize_and_process::is_special_scheme;
2021
use crate::canonicalize_and_process::special_scheme_default_port;
2122
use crate::component::Component;
23+
use crate::regexp::RegExp;
2224

2325
/// The structured input used to create a URL pattern.
2426
#[derive(Debug, Default, Clone, Eq, PartialEq)]
@@ -35,11 +37,11 @@ pub struct UrlPatternInit {
3537
}
3638

3739
impl UrlPatternInit {
38-
pub fn parse_constructor_string(
40+
pub fn parse_constructor_string<R: RegExp>(
3941
pattern: &str,
4042
base_url: Option<Url>,
4143
) -> Result<UrlPatternInit, Error> {
42-
let mut init = constructor_parser::parse_constructor_string(pattern)?;
44+
let mut init = constructor_parser::parse_constructor_string::<R>(pattern)?;
4345
if base_url.is_none() && init.protocol.is_none() {
4446
return Err(Error::BaseUrlRequired);
4547
}
@@ -194,7 +196,7 @@ fn is_absolute_pathname(
194196
/// pathname: Some("/users/:id".to_owned()),
195197
/// ..Default::default()
196198
/// };
197-
/// let pattern = UrlPattern::parse(init).unwrap();
199+
/// let pattern = <UrlPattern>::parse(init).unwrap();
198200
///
199201
/// // Match the pattern against a URL.
200202
/// let url = "https://example.com/users/123".parse().unwrap();
@@ -203,15 +205,15 @@ fn is_absolute_pathname(
203205
///# }
204206
/// ```
205207
#[derive(Debug)]
206-
pub struct UrlPattern {
207-
protocol: Component,
208-
username: Component,
209-
password: Component,
210-
hostname: Component,
211-
port: Component,
212-
pathname: Component,
213-
search: Component,
214-
hash: Component,
208+
pub struct UrlPattern<R: RegExp = regex::Regex> {
209+
protocol: Component<R>,
210+
username: Component<R>,
211+
password: Component<R>,
212+
hostname: Component<R>,
213+
port: Component<R>,
214+
pathname: Component<R>,
215+
search: Component<R>,
216+
hash: Component<R>,
215217
}
216218

217219
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -220,7 +222,7 @@ pub enum UrlPatternMatchInput {
220222
Url(Url),
221223
}
222224

223-
impl UrlPattern {
225+
impl<R: RegExp> UrlPattern<R> {
224226
// Ref: https://wicg.github.io/urlpattern/#dom-urlpattern-urlpattern
225227
/// Parse a [UrlPatternInit] into a [UrlPattern].
226228
pub fn parse(init: UrlPatternInit) -> Result<Self, Error> {
@@ -405,52 +407,40 @@ impl UrlPattern {
405407

406408
let protocol_exec_result = self
407409
.protocol
408-
.rust_regexp
410+
.regexp
409411
.as_ref()
410412
.unwrap()
411-
.captures(&input.protocol);
413+
.matches(&input.protocol);
412414
let username_exec_result = self
413415
.username
414-
.rust_regexp
416+
.regexp
415417
.as_ref()
416418
.unwrap()
417-
.captures(&input.username);
419+
.matches(&input.username);
418420
let password_exec_result = self
419421
.password
420-
.rust_regexp
422+
.regexp
421423
.as_ref()
422424
.unwrap()
423-
.captures(&input.password);
425+
.matches(&input.password);
424426
let hostname_exec_result = self
425427
.hostname
426-
.rust_regexp
428+
.regexp
427429
.as_ref()
428430
.unwrap()
429-
.captures(&input.hostname);
430-
let port_exec_result = self
431-
.port
432-
.rust_regexp
433-
.as_ref()
434-
.unwrap()
435-
.captures(&input.port);
431+
.matches(&input.hostname);
432+
let port_exec_result =
433+
self.port.regexp.as_ref().unwrap().matches(&input.port);
436434
let pathname_exec_result = self
437435
.pathname
438-
.rust_regexp
439-
.as_ref()
440-
.unwrap()
441-
.captures(&input.pathname);
442-
let search_exec_result = self
443-
.search
444-
.rust_regexp
445-
.as_ref()
446-
.unwrap()
447-
.captures(&input.search);
448-
let hash_exec_result = self
449-
.hash
450-
.rust_regexp
436+
.regexp
451437
.as_ref()
452438
.unwrap()
453-
.captures(&input.hash);
439+
.matches(&input.pathname);
440+
let search_exec_result =
441+
self.search.regexp.as_ref().unwrap().matches(&input.search);
442+
let hash_exec_result =
443+
self.hash.regexp.as_ref().unwrap().matches(&input.hash);
454444

455445
match (
456446
protocol_exec_result,
@@ -640,7 +630,7 @@ mod tests {
640630
base_url.as_deref(),
641631
);
642632

643-
let res = init_res.and_then(UrlPattern::parse);
633+
let res = init_res.and_then(<UrlPattern>::parse);
644634
let expected_obj = match case.expected_obj {
645635
Some(StringOrInit::String(s)) if s == "error" => {
646636
assert!(res.is_err());
@@ -864,7 +854,7 @@ mod tests {
864854

865855
#[test]
866856
fn issue26() {
867-
UrlPattern::parse(UrlPatternInit {
857+
<UrlPattern>::parse(UrlPatternInit {
868858
pathname: Some("/:foo.".to_owned()),
869859
..Default::default()
870860
})

src/parser.rs

-5
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,6 @@ impl Options {
6262
}
6363
}
6464

65-
pub fn with_syntax(mut self, syntax: RegexSyntax) -> Self {
66-
self.regex_syntax = syntax;
67-
self
68-
}
69-
7065
// Ref: https://wicg.github.io/urlpattern/#escape-a-regexp-string
7166
pub fn escape_regexp_string(&self, input: &str) -> String {
7267
assert!(input.is_ascii());

0 commit comments

Comments
 (0)