Skip to content

Commit 7889641

Browse files
committed
Align IPv4 parsing to spec
This commit aligns the IPv4 parsing steps to spec and adds the relevant WPT tests.
1 parent c34de27 commit 7889641

File tree

3 files changed

+202
-32
lines changed

3 files changed

+202
-32
lines changed

url/src/host.rs

+40-30
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ impl Host<String> {
109109

110110
if domain.find(is_invalid_domain_char).is_some() {
111111
Err(ParseError::InvalidDomainCharacter)
112-
} else if let Some(address) = parse_ipv4addr(&domain)? {
112+
} else if ends_in_a_number(&domain) {
113+
let address = parse_ipv4addr(&domain)?;
113114
Ok(Host::Ipv4(address))
114115
} else {
115116
Ok(Host::Domain(domain))
@@ -264,8 +265,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
264265
}
265266
}
266267

268+
/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
269+
fn ends_in_a_number(input: &str) -> bool {
270+
let mut parts = input.rsplit('.');
271+
let last = parts.next().unwrap();
272+
let last = if last.is_empty() {
273+
if let Some(last) = parts.next() {
274+
last
275+
} else {
276+
return false;
277+
}
278+
} else {
279+
last
280+
};
281+
if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
282+
return true;
283+
}
284+
285+
parse_ipv4number(last).is_ok()
286+
}
287+
267288
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
289+
/// Ok(None) means the input is a valid number, but it overflows a `u32`.
268290
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
291+
if input.is_empty() {
292+
return Err(());
293+
}
294+
269295
let mut r = 10;
270296
if input.starts_with("0x") || input.starts_with("0X") {
271297
input = &input[2..];
@@ -275,10 +301,10 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
275301
r = 8;
276302
}
277303

278-
// At the moment we can't know the reason why from_str_radix fails
279-
// https://github.com/rust-lang/rust/issues/22639
280-
// So instead we check if the input looks like a real number and only return
281-
// an error when it's an overflow.
304+
if input.is_empty() {
305+
return Ok(Some(0));
306+
}
307+
282308
let valid_number = match r {
283309
8 => input.chars().all(|c| ('0'..='7').contains(&c)),
284310
10 => input.chars().all(|c| ('0'..='9').contains(&c)),
@@ -287,50 +313,34 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
287313
}),
288314
_ => false,
289315
};
290-
291316
if !valid_number {
292-
return Ok(None);
317+
return Err(());
293318
}
294319

295-
if input.is_empty() {
296-
return Ok(Some(0));
297-
}
298-
if input.starts_with('+') {
299-
return Ok(None);
300-
}
301320
match u32::from_str_radix(input, r) {
302-
Ok(number) => Ok(Some(number)),
303-
Err(_) => Err(()),
321+
Ok(num) => Ok(Some(num)),
322+
Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
323+
// The validity of the chars in the input is checked above.
304324
}
305325
}
306326

307327
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
308-
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
309-
if input.is_empty() {
310-
return Ok(None);
311-
}
328+
fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
312329
let mut parts: Vec<&str> = input.split('.').collect();
313330
if parts.last() == Some(&"") {
314331
parts.pop();
315332
}
316333
if parts.len() > 4 {
317-
return Ok(None);
334+
return Err(ParseError::InvalidIpv4Address);
318335
}
319336
let mut numbers: Vec<u32> = Vec::new();
320-
let mut overflow = false;
321337
for part in parts {
322-
if part.is_empty() {
323-
return Ok(None);
324-
}
325338
match parse_ipv4number(part) {
326339
Ok(Some(n)) => numbers.push(n),
327-
Ok(None) => return Ok(None),
328-
Err(()) => overflow = true,
340+
Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
341+
Err(()) => return Err(ParseError::InvalidIpv4Address),
329342
};
330343
}
331-
if overflow {
332-
return Err(ParseError::InvalidIpv4Address);
333-
}
334344
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
335345
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
336346
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
@@ -342,7 +352,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
342352
for (counter, n) in numbers.iter().enumerate() {
343353
ipv4 += n << (8 * (3 - counter as u32))
344354
}
345-
Ok(Some(Ipv4Addr::from(ipv4)))
355+
Ok(Ipv4Addr::from(ipv4))
346356
}
347357

348358
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>

url/tests/unit.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ fn host() {
256256
0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344,
257257
)),
258258
);
259-
assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49"));
260259
assert_host(
261260
"http://[::]",
262261
Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)),
@@ -271,7 +270,8 @@ fn host() {
271270
);
272271
assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
273272
assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111)));
274-
assert_host("http://2..2.3", Host::Domain("2..2.3"));
273+
assert!(Url::parse("http://1.35.+33.49").is_err());
274+
assert!(Url::parse("http://2..2.3").is_err());
275275
assert!(Url::parse("http://42.0x1232131").is_err());
276276
assert!(Url::parse("http://192.168.0.257").is_err());
277277

url/tests/urltestdata.json

+160
Original file line numberDiff line numberDiff line change
@@ -7733,5 +7733,165 @@
77337733
"protocol": "abc:",
77347734
"search": "",
77357735
"username": ""
7736+
},
7737+
"Last component looks like a number, but not valid IPv4",
7738+
{
7739+
"input": "http://1.2.3.4.5",
7740+
"base": "http://other.com/",
7741+
"failure": true
7742+
},
7743+
{
7744+
"input": "http://1.2.3.4.5.",
7745+
"base": "http://other.com/",
7746+
"failure": true
7747+
},
7748+
{
7749+
"input": "http://0..0x300/",
7750+
"base": "about:blank",
7751+
"failure": true
7752+
},
7753+
{
7754+
"input": "http://0..0x300./",
7755+
"base": "about:blank",
7756+
"failure": true
7757+
},
7758+
{
7759+
"input": "http://256.256.256.256.256",
7760+
"base": "http://other.com/",
7761+
"failure": true
7762+
},
7763+
{
7764+
"input": "http://256.256.256.256.256.",
7765+
"base": "http://other.com/",
7766+
"failure": true
7767+
},
7768+
{
7769+
"input": "http://1.2.3.08",
7770+
"base": "about:blank",
7771+
"failure": true
7772+
},
7773+
{
7774+
"input": "http://1.2.3.08.",
7775+
"base": "about:blank",
7776+
"failure": true
7777+
},
7778+
{
7779+
"input": "http://1.2.3.09",
7780+
"base": "about:blank",
7781+
"failure": true
7782+
},
7783+
{
7784+
"input": "http://09.2.3.4",
7785+
"base": "about:blank",
7786+
"failure": true
7787+
},
7788+
{
7789+
"input": "http://09.2.3.4.",
7790+
"base": "about:blank",
7791+
"failure": true
7792+
},
7793+
{
7794+
"input": "http://01.2.3.4.5",
7795+
"base": "about:blank",
7796+
"failure": true
7797+
},
7798+
{
7799+
"input": "http://01.2.3.4.5.",
7800+
"base": "about:blank",
7801+
"failure": true
7802+
},
7803+
{
7804+
"input": "http://0x100.2.3.4",
7805+
"base": "about:blank",
7806+
"failure": true
7807+
},
7808+
{
7809+
"input": "http://0x100.2.3.4.",
7810+
"base": "about:blank",
7811+
"failure": true
7812+
},
7813+
{
7814+
"input": "http://0x1.2.3.4.5",
7815+
"base": "about:blank",
7816+
"failure": true
7817+
},
7818+
{
7819+
"input": "http://0x1.2.3.4.5.",
7820+
"base": "about:blank",
7821+
"failure": true
7822+
},
7823+
{
7824+
"input": "http://foo.1.2.3.4",
7825+
"base": "about:blank",
7826+
"failure": true
7827+
},
7828+
{
7829+
"input": "http://foo.1.2.3.4.",
7830+
"base": "about:blank",
7831+
"failure": true
7832+
},
7833+
{
7834+
"input": "http://foo.2.3.4",
7835+
"base": "about:blank",
7836+
"failure": true
7837+
},
7838+
{
7839+
"input": "http://foo.2.3.4.",
7840+
"base": "about:blank",
7841+
"failure": true
7842+
},
7843+
{
7844+
"input": "http://foo.09",
7845+
"base": "about:blank",
7846+
"failure": true
7847+
},
7848+
{
7849+
"input": "http://foo.09.",
7850+
"base": "about:blank",
7851+
"failure": true
7852+
},
7853+
{
7854+
"input": "http://foo.0x4",
7855+
"base": "about:blank",
7856+
"failure": true
7857+
},
7858+
{
7859+
"input": "http://foo.0x4.",
7860+
"base": "about:blank",
7861+
"failure": true
7862+
},
7863+
{
7864+
"input": "http://foo.09..",
7865+
"base": "about:blank",
7866+
"hash": "",
7867+
"host": "foo.09..",
7868+
"hostname": "foo.09..",
7869+
"href":"http://foo.09../",
7870+
"password": "",
7871+
"pathname": "/",
7872+
"port":"",
7873+
"protocol": "http:",
7874+
"search": "",
7875+
"username": ""
7876+
},
7877+
{
7878+
"input": "http://0999999999999999999/",
7879+
"base": "about:blank",
7880+
"failure": true
7881+
},
7882+
{
7883+
"input": "http://foo.0x",
7884+
"base": "about:blank",
7885+
"failure": true
7886+
},
7887+
{
7888+
"input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123",
7889+
"base": "about:blank",
7890+
"failure": true
7891+
},
7892+
{
7893+
"input": "http://💩.123/",
7894+
"base": "about:blank",
7895+
"failure": true
77367896
}
77377897
]

0 commit comments

Comments
 (0)