Skip to content

Commit f69e6c3

Browse files
authored
fix node issue 51593 (#583)
* fix node issue 51593 * linting
1 parent 7ed703c commit f69e6c3

File tree

4 files changed

+45
-18
lines changed

4 files changed

+45
-18
lines changed

include/ada/url_aggregator.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,12 @@ struct url_aggregator : url_base {
232232
}
233233

234234
/**
235-
* Return true on success.
235+
* Return true on success. The 'in_place' parameter indicates whether the
236+
* the string_view input is pointing in the buffer. When in_place is false,
237+
* we must nearly always update the buffer.
236238
* @see https://url.spec.whatwg.org/#concept-ipv4-parser
237239
*/
238-
[[nodiscard]] bool parse_ipv4(std::string_view input);
240+
[[nodiscard]] bool parse_ipv4(std::string_view input, bool in_place);
239241

240242
/**
241243
* Return true on success.

src/url.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
namespace ada {
1010

1111
bool url::parse_opaque_host(std::string_view input) {
12-
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
12+
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
1313
if (std::any_of(input.begin(), input.end(),
1414
ada::unicode::is_forbidden_host_code_point)) {
1515
return is_valid = false;
@@ -23,7 +23,7 @@ bool url::parse_opaque_host(std::string_view input) {
2323
}
2424

2525
bool url::parse_ipv4(std::string_view input) {
26-
ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]");
26+
ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]");
2727
if (input.back() == '.') {
2828
input.remove_suffix(1);
2929
}
@@ -98,7 +98,7 @@ bool url::parse_ipv4(std::string_view input) {
9898
}
9999

100100
bool url::parse_ipv6(std::string_view input) {
101-
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
101+
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
102102

103103
if (input.empty()) {
104104
return is_valid = false;
@@ -422,7 +422,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) {
422422
}
423423

424424
ada_really_inline bool url::parse_host(std::string_view input) {
425-
ada_log("parse_host ", input, "[", input.size(), " bytes]");
425+
ada_log("parse_host ", input, " [", input.size(), " bytes]");
426426
if (input.empty()) {
427427
return is_valid = false;
428428
} // technically unnecessary.
@@ -474,6 +474,8 @@ ada_really_inline bool url::parse_host(std::string_view input) {
474474
ada_log("parse_host to_ascii returns false");
475475
return is_valid = false;
476476
}
477+
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
478+
" bytes]");
477479

478480
if (std::any_of(host.value().begin(), host.value().end(),
479481
ada::unicode::is_forbidden_domain_code_point)) {
@@ -484,7 +486,7 @@ ada_really_inline bool url::parse_host(std::string_view input) {
484486
// If asciiDomain ends in a number, then return the result of IPv4 parsing
485487
// asciiDomain.
486488
if (checkers::is_ipv4(host.value())) {
487-
ada_log("parse_host got ipv4", *host);
489+
ada_log("parse_host got ipv4 ", *host);
488490
return parse_ipv4(host.value());
489491
}
490492

src/url_aggregator.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ void url_aggregator::set_hash(const std::string_view input) {
411411

412412
bool url_aggregator::set_href(const std::string_view input) {
413413
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
414-
ada_log("url_aggregator::set_href ", input, "[", input.size(), " bytes]");
414+
ada_log("url_aggregator::set_href ", input, " [", input.size(), " bytes]");
415415
ada::result<url_aggregator> out = ada::parse<url_aggregator>(input);
416416
ada_log("url_aggregator::set_href, success :", out.has_value());
417417

@@ -425,7 +425,8 @@ bool url_aggregator::set_href(const std::string_view input) {
425425
}
426426

427427
ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
428-
ada_log("url_aggregator:parse_host ", input, "[", input.size(), " bytes]");
428+
ada_log("url_aggregator:parse_host \"", input, "\" [", input.size(),
429+
" bytes]");
429430
ADA_ASSERT_TRUE(validate());
430431
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
431432
if (input.empty()) {
@@ -475,7 +476,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
475476
update_base_hostname(input);
476477
if (checkers::is_ipv4(get_hostname())) {
477478
ada_log("parse_host fast path ipv4");
478-
return parse_ipv4(get_hostname());
479+
return parse_ipv4(get_hostname(), true);
479480
}
480481
ada_log("parse_host fast path ", get_hostname());
481482
return true;
@@ -491,6 +492,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
491492
ada_log("parse_host to_ascii returns false");
492493
return is_valid = false;
493494
}
495+
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
496+
" bytes]");
494497

495498
if (std::any_of(host.value().begin(), host.value().end(),
496499
ada::unicode::is_forbidden_domain_code_point)) {
@@ -500,8 +503,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
500503
// If asciiDomain ends in a number, then return the result of IPv4 parsing
501504
// asciiDomain.
502505
if (checkers::is_ipv4(host.value())) {
503-
ada_log("parse_host got ipv4", *host);
504-
return parse_ipv4(host.value());
506+
ada_log("parse_host got ipv4 ", *host);
507+
return parse_ipv4(host.value(), false);
505508
}
506509

507510
update_base_hostname(host.value());
@@ -754,7 +757,7 @@ bool url_aggregator::set_hostname(const std::string_view input) {
754757
}
755758

756759
[[nodiscard]] std::string ada::url_aggregator::to_string() const {
757-
ada_log("url_aggregator::to_string buffer:", buffer, "[", buffer.size(),
760+
ada_log("url_aggregator::to_string buffer:", buffer, " [", buffer.size(),
758761
" bytes]");
759762
if (!is_valid) {
760763
return "null";
@@ -853,8 +856,8 @@ bool url_aggregator::set_hostname(const std::string_view input) {
853856
return checkers::verify_dns_length(get_hostname());
854857
}
855858

856-
bool url_aggregator::parse_ipv4(std::string_view input) {
857-
ada_log("parse_ipv4 ", input, "[", input.size(),
859+
bool url_aggregator::parse_ipv4(std::string_view input, bool in_place) {
860+
ada_log("parse_ipv4 ", input, " [", input.size(),
858861
" bytes], overlaps with buffer: ",
859862
helpers::overlaps(input, buffer) ? "yes" : "no");
860863
ADA_ASSERT_TRUE(validate());
@@ -878,20 +881,25 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
878881
} else {
879882
std::from_chars_result r;
880883
if (is_hex) {
884+
ada_log("parse_ipv4 trying to parse hex number");
881885
r = std::from_chars(input.data() + 2, input.data() + input.size(),
882886
segment_result, 16);
883887
} else if ((input.length() >= 2) && input[0] == '0' &&
884888
checkers::is_digit(input[1])) {
889+
ada_log("parse_ipv4 trying to parse octal number");
885890
r = std::from_chars(input.data() + 1, input.data() + input.size(),
886891
segment_result, 8);
887892
} else {
893+
ada_log("parse_ipv4 trying to parse decimal number");
888894
pure_decimal_count++;
889895
r = std::from_chars(input.data(), input.data() + input.size(),
890896
segment_result, 10);
891897
}
892898
if (r.ec != std::errc()) {
899+
ada_log("parse_ipv4 parsing failed");
893900
return is_valid = false;
894901
}
902+
ada_log("parse_ipv4 parsed ", segment_result);
895903
input.remove_prefix(r.ptr - input.data());
896904
}
897905
if (input.empty()) {
@@ -916,17 +924,22 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
916924
}
917925
}
918926
if ((digit_count != 4) || (!input.empty())) {
927+
ada_log("parse_ipv4 found invalid (more than 4 numbers or empty) ");
919928
return is_valid = false;
920929
}
921930
final:
922931
ada_log("url_aggregator::parse_ipv4 completed ", get_href(),
923932
" host: ", get_host());
924933

925934
// We could also check r.ptr to see where the parsing ended.
926-
if (pure_decimal_count == 4 && !trailing_dot) {
935+
if (in_place && pure_decimal_count == 4 && !trailing_dot) {
936+
ada_log(
937+
"url_aggregator::parse_ipv4 completed and was already correct in the "
938+
"buffer");
927939
// The original input was already all decimal and we validated it. So we
928940
// don't need to do anything.
929941
} else {
942+
ada_log("url_aggregator::parse_ipv4 completed and we need to update it");
930943
// Optimization opportunity: Get rid of unnecessary string return in ipv4
931944
// serializer.
932945
// TODO: This is likely a bug because it goes back update_base_hostname, not
@@ -940,8 +953,11 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
940953
}
941954

942955
bool url_aggregator::parse_ipv6(std::string_view input) {
956+
// TODO: Implement in_place optimization: we know that input points
957+
// in the buffer, so we can just check whether the buffer is already
958+
// well formatted.
943959
// TODO: Find a way to merge parse_ipv6 with url.cpp implementation.
944-
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
960+
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
945961
ADA_ASSERT_TRUE(validate());
946962
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
947963
if (input.empty()) {
@@ -1175,7 +1191,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) {
11751191
}
11761192

11771193
bool url_aggregator::parse_opaque_host(std::string_view input) {
1178-
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
1194+
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
11791195
ADA_ASSERT_TRUE(validate());
11801196
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
11811197
if (std::any_of(input.begin(), input.end(),

tests/basic_tests.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,10 @@ TYPED_TEST(basic_tests, nodejs_51514) {
402402
auto out = ada::parse<TypeParam>("http://1.1.1.256");
403403
ASSERT_FALSE(out);
404404
}
405+
// https://github.com/nodejs/node/issues/51593
406+
TYPED_TEST(basic_tests, nodejs_51593) {
407+
auto out = ada::parse<TypeParam>("http://\u200b123.123.123.123");
408+
ASSERT_TRUE(out);
409+
ASSERT_EQ(out->get_href(), "http://123.123.123.123/");
410+
SUCCEED();
411+
}

0 commit comments

Comments
 (0)