Skip to content

Commit c7c04b1

Browse files
authored
Speed up day/month parsing (#12592)
* Speed up day/month parsing The "DFA" class is quite slow for parsing dates in headers because it uses regex matching. I instead pack characters into an integer and do a simple linear search. I benchmarked this approach against other methods like Trie, DFA, SIMD, tree search. I found this method is among the fastest for this task, and remains portable, and readable. It's at least 10x faster than the previous way on x64/arm CPUs I tested.
1 parent cca8fcb commit c7c04b1

File tree

1 file changed

+55
-15
lines changed

1 file changed

+55
-15
lines changed

src/proxy/hdrs/MIME.cc

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@
2525
#include "tscore/ink_platform.h"
2626
#include "tscore/ink_memory.h"
2727
#include <cassert>
28+
#include <cctype>
2829
#include <cstdio>
2930
#include <cstring>
3031
#include <cctype>
3132
#include <algorithm>
33+
#include <string_view>
3234
#include "proxy/hdrs/MIME.h"
3335
#include "proxy/hdrs/HdrHeap.h"
3436
#include "proxy/hdrs/HdrToken.h"
@@ -51,17 +53,61 @@ using swoc::TextView;
5153
* C O N S T A N T S *
5254
* *
5355
***********************************************************************/
54-
static DFA *day_names_dfa = nullptr;
55-
static DFA *month_names_dfa = nullptr;
5656

57-
static constexpr const char *day_names[] = {
57+
namespace
58+
{
59+
constexpr std::array<std::string_view, 7> day_names = {
5860
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat",
5961
};
6062

61-
static constexpr const char *month_names[] = {
63+
constexpr std::array<std::string_view, 12> month_names = {
6264
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
6365
};
6466

67+
template <size_t count>
68+
consteval std::array<uint32_t, count>
69+
make_packed(const std::array<std::string_view, count> &names)
70+
{
71+
std::array<uint32_t, count> packed{};
72+
73+
auto tl = [](char c) -> char { return (c >= 'A' && c <= 'Z') ? (c + 32) : c; };
74+
75+
for (size_t i = 0; i < count; ++i) {
76+
const auto &sv = names[i];
77+
const uint32_t c0 = tl(static_cast<unsigned char>(sv[0]));
78+
const uint32_t c1 = tl(static_cast<unsigned char>(sv[1]));
79+
const uint32_t c2 = tl(static_cast<unsigned char>(sv[2]));
80+
packed[i] = (c0 << 16) | (c1 << 8) | c2;
81+
}
82+
return packed;
83+
}
84+
85+
constexpr std::array<uint32_t, day_names.size()> day_names_packed = make_packed(day_names);
86+
constexpr std::array<uint32_t, month_names.size()> month_names_packed = make_packed(month_names);
87+
88+
// Case-insensitive match of first 3 characters of input string against array of names
89+
// Longer strings will match if their first 3 characters match - this is intentional for
90+
// matching non-standard day/month names like "Thursday" or "September".
91+
template <size_t count>
92+
__attribute__((always_inline)) constexpr int
93+
match_3char_ci(const std::string_view s, const std::array<uint32_t, count> &names_packed)
94+
{
95+
if (s.size() < 3) {
96+
return -1;
97+
}
98+
99+
auto tl = [](char c) -> char { return (c >= 'A' && c <= 'Z') ? (c + 32) : c; };
100+
const uint32_t packed = (tl(s[0]) << 16) | (tl(s[1]) << 8) | tl(s[2]);
101+
102+
for (size_t i = 0; i < count; i++) {
103+
if (packed == names_packed[i]) {
104+
return i;
105+
}
106+
}
107+
return -1;
108+
}
109+
} // namespace
110+
65111
struct MDY {
66112
uint8_t m;
67113
uint8_t d;
@@ -594,11 +640,6 @@ mime_init()
594640
init = 0;
595641

596642
hdrtoken_init();
597-
day_names_dfa = new DFA;
598-
day_names_dfa->compile(day_names, SIZEOF(day_names), RE_CASE_INSENSITIVE);
599-
600-
month_names_dfa = new DFA;
601-
month_names_dfa->compile(month_names, SIZEOF(month_names), RE_CASE_INSENSITIVE);
602643

603644
MIME_FIELD_ACCEPT = hdrtoken_string_to_wks_sv("Accept");
604645
MIME_FIELD_ACCEPT_CHARSET = hdrtoken_string_to_wks_sv("Accept-Charset");
@@ -3123,8 +3164,7 @@ mime_parse_int64(const char *buf, const char *end)
31233164
int
31243165
mime_parse_rfc822_date_fastcase(const char *buf, int length, struct tm *tp)
31253166
{
3126-
unsigned int three_char_wday, three_char_mon;
3127-
std::string_view view{buf, size_t(length)};
3167+
unsigned int three_char_wday, three_char_mon;
31283168

31293169
ink_assert(length >= 29);
31303170
ink_assert(!is_ws(buf[0]));
@@ -3155,7 +3195,7 @@ mime_parse_rfc822_date_fastcase(const char *buf, int length, struct tm *tp)
31553195
}
31563196
}
31573197
if (tp->tm_wday < 0) {
3158-
tp->tm_wday = day_names_dfa->match(view);
3198+
tp->tm_wday = match_3char_ci({buf, 3}, day_names_packed);
31593199
if (tp->tm_wday < 0) {
31603200
return 0;
31613201
}
@@ -3208,7 +3248,7 @@ mime_parse_rfc822_date_fastcase(const char *buf, int length, struct tm *tp)
32083248
}
32093249
}
32103250
if (tp->tm_mon < 0) {
3211-
tp->tm_mon = month_names_dfa->match(view);
3251+
tp->tm_mon = match_3char_ci({buf + 8, 3}, month_names_packed);
32123252
if (tp->tm_mon < 0) {
32133253
return 0;
32143254
}
@@ -3353,7 +3393,7 @@ mime_parse_day(const char *&buf, const char *end, int *day)
33533393
e += 1;
33543394
}
33553395

3356-
*day = day_names_dfa->match({buf, size_t(e - buf)});
3396+
*day = match_3char_ci({buf, static_cast<size_t>(e - buf)}, day_names_packed);
33573397
if (*day < 0) {
33583398
return false;
33593399
} else {
@@ -3376,7 +3416,7 @@ mime_parse_month(const char *&buf, const char *end, int *month)
33763416
e += 1;
33773417
}
33783418

3379-
*month = month_names_dfa->match({buf, size_t(e - buf)});
3419+
*month = match_3char_ci({buf, static_cast<size_t>(e - buf)}, month_names_packed);
33803420
if (*month < 0) {
33813421
return false;
33823422
} else {

0 commit comments

Comments
 (0)