From 1e6b64831705ae7f852b85558c106c232b919e5c Mon Sep 17 00:00:00 2001 From: edgul Date: Wed, 30 Jul 2025 15:50:53 -0400 Subject: [PATCH 1/2] fix hostname tokenizing of newline, carriage return and tab --- src/testdata/urlpatterntestdata.json | 24 +++++++++++++++++++++--- src/tokenizer.rs | 8 ++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 5fcda0e..7cc5d71 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -2494,15 +2494,33 @@ }, { "pattern": [{ "hostname": "bad\nhostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{ "hostname": "bad\rhostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{ "hostname": "bad\thostname" }], - "expected_obj": "error" + "inputs": [{ "hostname": "badhostname" }], + "expected_obj": { + "hostname": "badhostname" + }, + "expected_match": { + "hostname": { "input": "badhostname", "groups": {} } + } }, { "pattern": [{}], diff --git a/src/tokenizer.rs b/src/tokenizer.rs index dd25cfb..b250d9c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -160,6 +160,14 @@ pub fn tokenize( ); continue; } + if tokenizer.code_point == Some('\n') + || tokenizer.code_point == Some('\r') + || tokenizer.code_point == Some('\t') + { + // ignore newline, carriage return and tab + tokenizer.index = tokenizer.next_index; + continue; + } if tokenizer.code_point == Some('{') { tokenizer.add_token_with_default_pos_and_len(TokenType::Open); continue; From 9f5a8ae8b67ca884457e4bc79400cca70d8336bf Mon Sep 17 00:00:00 2001 From: edgul Date: Wed, 6 Aug 2025 11:34:54 -0400 Subject: [PATCH 2/2] url version bump to get bad\:hostname fix --- Cargo.toml | 2 +- src/testdata/urlpatterntestdata.json | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8295022..39cf4c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/denoland/rust-urlpattern" license = "MIT" [dependencies] -url = "2.4.1" +url = "2.5.6" regex = "1.10.5" serde = { version = "1.0.127", features = ["derive"] } unic-ucd-ident = { version = "0.9.0", features = ["id"] } diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 41a400d..202c290 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -2474,7 +2474,6 @@ } }, { - "skip": "likely a bug in rust-url", "pattern": [{ "hostname": "bad\\:hostname" }], "expected_obj": "error" },