Skip to content

Commit 6c0578d

Browse files
committed
Improve performance of urldecode() and rawurldecode()
There are two hot spots on my machines: 1. We copy the string because the internal PHP API works in-place. 2. The conversion of hex characters is slow due to going through the C locale handling. This patch resolves the first hot spots by introducing 2 new internal APIs that avoid the redundant copy and allocate an empty string upfront. The second hotspot is resolved by having a specialised htoi handler. For the following benchmark: ```php $encoded = "Hello%20World%21+This%20is%20a%20test%3A%20%40%23%24%25%5E%26*%28%29"; for ($i=0;$i<2000000;$i++) { rawurldecode($encoded); urldecode($encoded); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php x.php Time (mean ± σ): 364.8 ms ± 3.7 ms [User: 359.9 ms, System: 3.3 ms] Range (min … max): 359.9 ms … 372.0 ms 10 runs Benchmark 2: ./sapi/cli/php_old x.php Time (mean ± σ): 565.5 ms ± 4.9 ms [User: 561.8 ms, System: 2.5 ms] Range (min … max): 560.7 ms … 578.2 ms 10 runs Summary ./sapi/cli/php x.php ran 1.55 ± 0.02 times faster than ./sapi/cli/php_old x.php ``` On an i7-1185G7: ``` Benchmark 1: ./sapi/cli/php x.php Time (mean ± σ): 708.8 ms ± 6.1 ms [User: 701.4 ms, System: 6.3 ms] Range (min … max): 701.9 ms … 722.3 ms 10 runs Benchmark 2: ./sapi/cli/php_old x.php Time (mean ± σ): 1.311 s ± 0.019 s [User: 1.300 s, System: 0.008 s] Range (min … max): 1.281 s … 1.348 s 10 runs Summary ./sapi/cli/php x.php ran 1.85 ± 0.03 times faster than ./sapi/cli/php_old x.php ``` Closes GH-18378.
1 parent c0da1bc commit 6c0578d

File tree

4 files changed

+48
-28
lines changed

4 files changed

+48
-28
lines changed

UPGRADING

+1
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,7 @@ PHP 8.5 UPGRADE NOTES
476476
- Standard:
477477
. Improved performance of array functions with callbacks
478478
(array_find, array_filter, array_map, usort, ...).
479+
. Improved performance of urlencode() and rawurlencode().
479480

480481
- XMLReader:
481482
. Improved property access performance.

UPGRADING.INTERNALS

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES
6161
is still valid. This is useful when a GC cycle is collected and the
6262
database object can be destroyed prior to destroying the statement.
6363

64+
- ext/standard
65+
. Added php_url_decode_ex() and php_raw_url_decode_ex() that unlike their
66+
non-ex counterparts do not work in-place.
67+
6468
========================
6569
4. OpCode changes
6670
========================

ext/standard/url.c

+41-28
Original file line numberDiff line numberDiff line change
@@ -411,21 +411,24 @@ PHP_FUNCTION(parse_url)
411411
}
412412
/* }}} */
413413

414+
/* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */
415+
static unsigned int php_htoi_single(unsigned char x)
416+
{
417+
ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9'));
418+
return 9 * (x >> 6) + (x & 0xf);
419+
}
420+
414421
/* {{{ php_htoi */
415-
static int php_htoi(char *s)
422+
static int php_htoi(const char *s)
416423
{
417424
int value;
418-
int c;
425+
unsigned char c;
419426

420427
c = ((unsigned char *)s)[0];
421-
if (isupper(c))
422-
c = tolower(c);
423-
value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
428+
value = php_htoi_single(c) * 16;
424429

425430
c = ((unsigned char *)s)[1];
426-
if (isupper(c))
427-
c = tolower(c);
428-
value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
431+
value += php_htoi_single(c);
429432

430433
return (value);
431434
}
@@ -572,36 +575,41 @@ PHP_FUNCTION(urldecode)
572575
Z_PARAM_STR(in_str)
573576
ZEND_PARSE_PARAMETERS_END();
574577

575-
out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
576-
ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
578+
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
579+
ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));
577580

578581
RETURN_NEW_STR(out_str);
579582
}
580583
/* }}} */
581584

582-
/* {{{ php_url_decode */
583-
PHPAPI size_t php_url_decode(char *str, size_t len)
585+
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len)
584586
{
585-
char *dest = str;
586-
char *data = str;
587+
char *dest_start = dest;
588+
const char *data = src;
587589

588-
while (len--) {
590+
while (src_len--) {
589591
if (*data == '+') {
590592
*dest = ' ';
591593
}
592-
else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
594+
else if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
593595
&& isxdigit((int) *(data + 2))) {
594596
*dest = (char) php_htoi(data + 1);
595597
data += 2;
596-
len -= 2;
598+
src_len -= 2;
597599
} else {
598600
*dest = *data;
599601
}
600602
data++;
601603
dest++;
602604
}
603605
*dest = '\0';
604-
return dest - str;
606+
return dest - dest_start;
607+
}
608+
609+
/* {{{ php_url_decode */
610+
PHPAPI size_t php_url_decode(char *str, size_t len)
611+
{
612+
return php_url_decode_ex(str, str, len);
605613
}
606614
/* }}} */
607615

@@ -634,33 +642,38 @@ PHP_FUNCTION(rawurldecode)
634642
Z_PARAM_STR(in_str)
635643
ZEND_PARSE_PARAMETERS_END();
636644

637-
out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
638-
ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
645+
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
646+
ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));
639647

640648
RETURN_NEW_STR(out_str);
641649
}
642650
/* }}} */
643651

644-
/* {{{ php_raw_url_decode */
645-
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
652+
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len)
646653
{
647-
char *dest = str;
648-
char *data = str;
654+
char *dest_start = dest;
655+
const char *data = src;
649656

650-
while (len--) {
651-
if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
657+
while (src_len--) {
658+
if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
652659
&& isxdigit((int) *(data + 2))) {
653660
*dest = (char) php_htoi(data + 1);
654661
data += 2;
655-
len -= 2;
662+
src_len -= 2;
656663
} else {
657664
*dest = *data;
658665
}
659666
data++;
660667
dest++;
661668
}
662669
*dest = '\0';
663-
return dest - str;
670+
return dest - dest_start;
671+
}
672+
673+
/* {{{ php_raw_url_decode */
674+
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
675+
{
676+
return php_raw_url_decode_ex(str, str, len);
664677
}
665678
/* }}} */
666679

ext/standard/url.h

+2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ PHPAPI php_url *php_url_parse(char const *str);
3333
PHPAPI php_url *php_url_parse_ex(char const *str, size_t length);
3434
PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port);
3535
PHPAPI size_t php_url_decode(char *str, size_t len); /* return value: length of decoded string */
36+
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len);
3637
PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length of decoded string */
38+
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len);
3739
PHPAPI zend_string *php_url_encode(char const *s, size_t len);
3840
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len);
3941

0 commit comments

Comments
 (0)