Skip to content

Commit 544e8f1

Browse files
committed
Align IPv4 parsing to spec
This commit aligns the IPv4 parsing steps to spec and adds the relevant WPT tests.
1 parent fc76310 commit 544e8f1

File tree

3 files changed

+216
-49
lines changed

3 files changed

+216
-49
lines changed

url/src/host.rs

+54-47
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
use std::cmp;
1010
use std::fmt::{self, Formatter};
1111
use std::net::{Ipv4Addr, Ipv6Addr};
12+
use std::num::IntErrorKind;
1213

1314
use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
1415
#[cfg(feature = "serde")]
@@ -90,29 +91,26 @@ impl Host<String> {
9091
}
9192

9293
let is_invalid_domain_char = |c| {
93-
matches!(
94-
c,
95-
| '\0'..='\u{001F}'
96-
| ' '
97-
| '#'
98-
| '%'
99-
| '/'
100-
| ':'
101-
| '<'
102-
| '>'
103-
| '?'
104-
| '@'
105-
| '['
106-
| '\\'
107-
| ']'
108-
| '^'
109-
| '\u{007F}'
110-
)
94+
matches!(c, |'\0'..='\u{001F}'| ' '
95+
| '#'
96+
| '%'
97+
| '/'
98+
| ':'
99+
| '<'
100+
| '>'
101+
| '?'
102+
| '@'
103+
| '['
104+
| '\\'
105+
| ']'
106+
| '^'
107+
| '\u{007F}')
111108
};
112109

113110
if domain.find(is_invalid_domain_char).is_some() {
114111
Err(ParseError::InvalidDomainCharacter)
115-
} else if let Some(address) = parse_ipv4addr(&domain)? {
112+
} else if ends_in_a_number(&domain) {
113+
let address = parse_ipv4addr(&domain)?;
116114
Ok(Host::Ipv4(address))
117115
} else {
118116
Ok(Host::Domain(domain))
@@ -266,8 +264,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
266264
}
267265
}
268266

267+
/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
268+
fn ends_in_a_number(input: &str) -> bool {
269+
let mut parts = input.rsplit('.');
270+
let last = parts.next().unwrap();
271+
let last = if last.is_empty() {
272+
if let Some(last) = parts.next() {
273+
last
274+
} else {
275+
return false;
276+
}
277+
} else {
278+
last
279+
};
280+
if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
281+
return true;
282+
}
283+
284+
parse_ipv4number(last).is_ok()
285+
}
286+
269287
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
288+
/// Ok(None) means the input is a valid number, but it overflows a `u32`.
270289
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
290+
if input.is_empty() {
291+
return Err(());
292+
}
293+
271294
let mut r = 10;
272295
if input.starts_with("0x") || input.starts_with("0X") {
273296
input = &input[2..];
@@ -277,10 +300,10 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
277300
r = 8;
278301
}
279302

280-
// At the moment we can't know the reason why from_str_radix fails
281-
// https://github.com/rust-lang/rust/issues/22639
282-
// So instead we check if the input looks like a real number and only return
283-
// an error when it's an overflow.
303+
if input.is_empty() {
304+
return Ok(Some(0));
305+
}
306+
284307
let valid_number = match r {
285308
8 => input.chars().all(|c| ('0'..='7').contains(&c)),
286309
10 => input.chars().all(|c| ('0'..='9').contains(&c)),
@@ -289,50 +312,34 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
289312
}),
290313
_ => false,
291314
};
292-
293315
if !valid_number {
294-
return Ok(None);
316+
return Err(());
295317
}
296318

297-
if input.is_empty() {
298-
return Ok(Some(0));
299-
}
300-
if input.starts_with('+') {
301-
return Ok(None);
302-
}
303319
match u32::from_str_radix(input, r) {
304-
Ok(number) => Ok(Some(number)),
320+
Ok(num) => Ok(Some(num)),
321+
Err(err) if *err.kind() == IntErrorKind::PosOverflow => Ok(None),
305322
Err(_) => Err(()),
306323
}
307324
}
308325

309326
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
310-
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
311-
if input.is_empty() {
312-
return Ok(None);
313-
}
327+
fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
314328
let mut parts: Vec<&str> = input.split('.').collect();
315329
if parts.last() == Some(&"") {
316330
parts.pop();
317331
}
318332
if parts.len() > 4 {
319-
return Ok(None);
333+
return Err(ParseError::InvalidIpv4Address);
320334
}
321335
let mut numbers: Vec<u32> = Vec::new();
322-
let mut overflow = false;
323336
for part in parts {
324-
if part.is_empty() {
325-
return Ok(None);
326-
}
327337
match parse_ipv4number(part) {
328338
Ok(Some(n)) => numbers.push(n),
329-
Ok(None) => return Ok(None),
330-
Err(()) => overflow = true,
339+
Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
340+
Err(()) => return Err(ParseError::InvalidIpv4Address),
331341
};
332342
}
333-
if overflow {
334-
return Err(ParseError::InvalidIpv4Address);
335-
}
336343
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
337344
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
338345
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
@@ -344,7 +351,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
344351
for (counter, n) in numbers.iter().enumerate() {
345352
ipv4 += n << (8 * (3 - counter as u32))
346353
}
347-
Ok(Some(Ipv4Addr::from(ipv4)))
354+
Ok(Ipv4Addr::from(ipv4))
348355
}
349356

350357
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>

url/tests/unit.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ fn host() {
256256
0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344,
257257
)),
258258
);
259-
assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49"));
260259
assert_host(
261260
"http://[::]",
262261
Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)),
@@ -271,7 +270,8 @@ fn host() {
271270
);
272271
assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
273272
assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111)));
274-
assert_host("http://2..2.3", Host::Domain("2..2.3"));
273+
assert!(Url::parse("http://1.35.+33.49").is_err());
274+
assert!(Url::parse("http://2..2.3").is_err());
275275
assert!(Url::parse("http://42.0x1232131").is_err());
276276
assert!(Url::parse("http://192.168.0.257").is_err());
277277

url/tests/urltestdata.json

+160
Original file line numberDiff line numberDiff line change
@@ -7688,5 +7688,165 @@
76887688
"protocol": "abc:",
76897689
"search": "",
76907690
"username": ""
7691+
},
7692+
"Last component looks like a number, but not valid IPv4",
7693+
{
7694+
"input": "http://1.2.3.4.5",
7695+
"base": "http://other.com/",
7696+
"failure": true
7697+
},
7698+
{
7699+
"input": "http://1.2.3.4.5.",
7700+
"base": "http://other.com/",
7701+
"failure": true
7702+
},
7703+
{
7704+
"input": "http://0..0x300/",
7705+
"base": "about:blank",
7706+
"failure": true
7707+
},
7708+
{
7709+
"input": "http://0..0x300./",
7710+
"base": "about:blank",
7711+
"failure": true
7712+
},
7713+
{
7714+
"input": "http://256.256.256.256.256",
7715+
"base": "http://other.com/",
7716+
"failure": true
7717+
},
7718+
{
7719+
"input": "http://256.256.256.256.256.",
7720+
"base": "http://other.com/",
7721+
"failure": true
7722+
},
7723+
{
7724+
"input": "http://1.2.3.08",
7725+
"base": "about:blank",
7726+
"failure": true
7727+
},
7728+
{
7729+
"input": "http://1.2.3.08.",
7730+
"base": "about:blank",
7731+
"failure": true
7732+
},
7733+
{
7734+
"input": "http://1.2.3.09",
7735+
"base": "about:blank",
7736+
"failure": true
7737+
},
7738+
{
7739+
"input": "http://09.2.3.4",
7740+
"base": "about:blank",
7741+
"failure": true
7742+
},
7743+
{
7744+
"input": "http://09.2.3.4.",
7745+
"base": "about:blank",
7746+
"failure": true
7747+
},
7748+
{
7749+
"input": "http://01.2.3.4.5",
7750+
"base": "about:blank",
7751+
"failure": true
7752+
},
7753+
{
7754+
"input": "http://01.2.3.4.5.",
7755+
"base": "about:blank",
7756+
"failure": true
7757+
},
7758+
{
7759+
"input": "http://0x100.2.3.4",
7760+
"base": "about:blank",
7761+
"failure": true
7762+
},
7763+
{
7764+
"input": "http://0x100.2.3.4.",
7765+
"base": "about:blank",
7766+
"failure": true
7767+
},
7768+
{
7769+
"input": "http://0x1.2.3.4.5",
7770+
"base": "about:blank",
7771+
"failure": true
7772+
},
7773+
{
7774+
"input": "http://0x1.2.3.4.5.",
7775+
"base": "about:blank",
7776+
"failure": true
7777+
},
7778+
{
7779+
"input": "http://foo.1.2.3.4",
7780+
"base": "about:blank",
7781+
"failure": true
7782+
},
7783+
{
7784+
"input": "http://foo.1.2.3.4.",
7785+
"base": "about:blank",
7786+
"failure": true
7787+
},
7788+
{
7789+
"input": "http://foo.2.3.4",
7790+
"base": "about:blank",
7791+
"failure": true
7792+
},
7793+
{
7794+
"input": "http://foo.2.3.4.",
7795+
"base": "about:blank",
7796+
"failure": true
7797+
},
7798+
{
7799+
"input": "http://foo.09",
7800+
"base": "about:blank",
7801+
"failure": true
7802+
},
7803+
{
7804+
"input": "http://foo.09.",
7805+
"base": "about:blank",
7806+
"failure": true
7807+
},
7808+
{
7809+
"input": "http://foo.0x4",
7810+
"base": "about:blank",
7811+
"failure": true
7812+
},
7813+
{
7814+
"input": "http://foo.0x4.",
7815+
"base": "about:blank",
7816+
"failure": true
7817+
},
7818+
{
7819+
"input": "http://foo.09..",
7820+
"base": "about:blank",
7821+
"hash": "",
7822+
"host": "foo.09..",
7823+
"hostname": "foo.09..",
7824+
"href":"http://foo.09../",
7825+
"password": "",
7826+
"pathname": "/",
7827+
"port":"",
7828+
"protocol": "http:",
7829+
"search": "",
7830+
"username": ""
7831+
},
7832+
{
7833+
"input": "http://0999999999999999999/",
7834+
"base": "about:blank",
7835+
"failure": true
7836+
},
7837+
{
7838+
"input": "http://foo.0x",
7839+
"base": "about:blank",
7840+
"failure": true
7841+
},
7842+
{
7843+
"input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123",
7844+
"base": "about:blank",
7845+
"failure": true
7846+
},
7847+
{
7848+
"input": "http://💩.123/",
7849+
"base": "about:blank",
7850+
"failure": true
76917851
}
76927852
]

0 commit comments

Comments
 (0)