Skip to content

Commit 46816fc

Browse files
committed
refactor Regex::captureCount(), add unit tests
1 parent a45e444 commit 46816fc

File tree

6 files changed

+117
-38
lines changed

6 files changed

+117
-38
lines changed

include/tsutil/Regex.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,6 @@ class RegexMatchContext
116116
*/
117117
void setMatchLimit(uint32_t limit);
118118

119-
/** Limits how far an unanchored search can advance in the subject string.
120-
*/
121-
void setOffsetLimit(uint32_t limit);
122-
123119
private:
124120
/// @internal This wraps a void* so to avoid requiring a pcre2 include.
125121
struct _MatchContext;
@@ -226,8 +222,11 @@ class Regex
226222
int exec(std::string_view subject, RegexMatches &matches, uint32_t flags,
227223
RegexMatchContext const *const matchContext = nullptr) const;
228224

229-
/// @return The number of capture groups in the compiled pattern.
230-
int get_capture_count();
225+
/// @return The number of capture groups in the compiled pattern, -1 for fail.
226+
int32_t captureCount() const;
227+
228+
/// @return number of highest back references, -1 for fail.
229+
int32_t backrefMax() const;
231230

232231
/// @return Is the compiled pattern empty?
233232
bool empty() const;

plugins/experimental/cookie_remap/cookie_remap.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ class subop
385385
return false;
386386
}
387387

388-
regex_ccount = regex->get_capture_count();
388+
regex_ccount = regex->captureCount();
389389
if (regex_ccount < 0) {
390390
delete regex;
391391
regex = nullptr;
@@ -445,7 +445,7 @@ class subop
445445

446446
Regex *regex = nullptr;
447447
std::string regex_string;
448-
int regex_ccount = 0;
448+
int32_t regex_ccount = 0;
449449

450450
std::string bucket;
451451
unsigned int how_many = 0;

plugins/regex_remap/regex_remap.cc

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ class RemapRegex
165165
}
166166

167167
inline void
168-
set_match_context(RegexMatchContext *const ctx)
168+
set_match_context(RegexMatchContext const *const ctx)
169169
{
170170
_match_context = ctx;
171171
}
@@ -260,10 +260,10 @@ class RemapRegex
260260

261261
bool _lowercase_substitutions = false;
262262

263-
Regex _rex;
264-
RegexMatchContext *_match_context = nullptr; // owned by RemapInstance
265-
RemapRegex *_next = nullptr;
266-
TSHttpStatus _status = static_cast<TSHttpStatus>(0);
263+
Regex _rex;
264+
RegexMatchContext const *_match_context = nullptr; // owned by RemapInstance
265+
RemapRegex *_next = nullptr;
266+
TSHttpStatus _status = static_cast<TSHttpStatus>(0);
267267

268268
int _active_timeout = -1;
269269
int _no_activity_timeout = -1;
@@ -395,6 +395,12 @@ RemapRegex::compile(std::string &error, int &erroffset)
395395
return -1;
396396
}
397397

398+
int32_t const ccount = _rex.captureCount();
399+
if (ccount < 0) {
400+
error = "Failure to get capture count for Regex";
401+
return -1;
402+
}
403+
398404
// Get some info for the string substitutions
399405
char *str = _subst;
400406
_num_subs = 0;
@@ -440,12 +446,10 @@ RemapRegex::compile(std::string &error, int &erroffset)
440446
}
441447

442448
if (ix > -1) {
443-
/*
444-
if ((ix < 10) && (ix > matches.size())) {
445-
error = "using unavailable captured substring ($n) in substitution";
446-
return -1;
447-
}
448-
*/
449+
if ((ix < 10) && (ix > ccount)) {
450+
error = "using unavailable captured substring ($n) in substitution";
451+
return -1;
452+
}
449453

450454
_sub_ix[_num_subs] = ix;
451455
_sub_pos[_num_subs] = (str - _subst);
@@ -781,7 +785,7 @@ TSRemapNewInstance(int argc, char *argv[], void **ih, char * /* errbuf ATS_UNUSE
781785
} else {
782786
Dbg(dbg_ctl, "Added regex=%s with subs=%s and options `%s'", regex.c_str(), subst.c_str(), options.c_str());
783787
cur->set_order(++count);
784-
cur->set_match_context(&ri->match_context);
788+
cur->set_match_context(&(ri->match_context));
785789
auto tmp = cur.get();
786790
if (ri->first == nullptr) {
787791
ri->first = cur.release();

src/proxy/http/remap/RemapConfig.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ process_regex_mapping_config(const char *from_host_lower, url_mapping *new_mappi
974974
std::string_view to_host{};
975975
int to_host_len;
976976
int substitution_id;
977-
int captures;
977+
int32_t captures;
978978

979979
reg_map->to_url_host_template = nullptr;
980980
reg_map->to_url_host_template_len = 0;
@@ -989,7 +989,7 @@ process_regex_mapping_config(const char *from_host_lower, url_mapping *new_mappi
989989
goto lFail;
990990
}
991991

992-
captures = reg_map->regular_expression.get_capture_count();
992+
captures = reg_map->regular_expression.captureCount();
993993
if (captures == -1) {
994994
Warning("pcre_fullinfo failed!");
995995
goto lFail;

src/tsutil/Regex.cc

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -275,16 +275,6 @@ RegexMatchContext::setMatchLimit(uint32_t limit)
275275
}
276276
}
277277

278-
//----------------------------------------------------------------------------
279-
void
280-
RegexMatchContext::setOffsetLimit(uint32_t limit)
281-
{
282-
auto ptr = _MatchContext::get(_match_context);
283-
if (ptr != nullptr) {
284-
pcre2_set_offset_limit(ptr, limit);
285-
}
286-
}
287-
288278
//----------------------------------------------------------------------------
289279
struct Regex::_Code {
290280
static pcre2_code *
@@ -470,13 +460,24 @@ Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags, Reg
470460

471461
//----------------------------------------------------------------------------
472462
int32_t
473-
Regex::get_capture_count()
463+
Regex::captureCount() const
474464
{
475-
int captures = -1;
465+
uint32_t captures = 0;
476466
if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_CAPTURECOUNT, &captures) != 0) {
477467
return -1;
478468
}
479-
return captures;
469+
return static_cast<int32_t>(captures);
470+
}
471+
472+
//----------------------------------------------------------------------------
473+
int32_t
474+
Regex::backrefMax() const
475+
{
476+
uint32_t refs = 0;
477+
if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_BACKREFMAX, &refs) != 0) {
478+
return -1;
479+
}
480+
return static_cast<int32_t>(refs);
480481
}
481482

482483
//----------------------------------------------------------------------------

src/tsutil/unit_tests/test_Regex.cc

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ struct submatch_t {
8181

8282
struct submatch_test_t {
8383
std::string_view regex;
84-
int capture_count;
84+
int32_t capture_count;
8585
std::vector<submatch_t> tests;
8686
};
8787

@@ -129,7 +129,7 @@ TEST_CASE("Regex", "[libts][Regex]")
129129
for (auto &item : submatch_test_data) {
130130
Regex r;
131131
REQUIRE(r.compile(item.regex.data()) == true);
132-
REQUIRE(r.get_capture_count() == item.capture_count);
132+
REQUIRE(r.captureCount() == item.capture_count);
133133

134134
for (auto &test : item.tests) {
135135
RegexMatches matches;
@@ -146,7 +146,7 @@ TEST_CASE("Regex", "[libts][Regex]")
146146
for (auto &item : submatch_test_data) {
147147
Regex r;
148148
REQUIRE(r.compile(item.regex.data()) == true);
149-
REQUIRE(r.get_capture_count() == item.capture_count);
149+
REQUIRE(r.captureCount() == item.capture_count);
150150

151151
for (auto &test : item.tests) {
152152
RegexMatches matches;
@@ -489,3 +489,78 @@ TEST_CASE("Regex copy with RE_NOTEMPTY flag", "[libts][Regex][copy][flags]")
489489
CHECK(copy.exec(std::string_view(""), RE_NOTEMPTY) == false);
490490
}
491491
}
492+
493+
struct backref_test_t {
494+
std::string_view regex;
495+
bool valid;
496+
int32_t backref_max;
497+
};
498+
499+
std::vector<backref_test_t> backref_test_data{
500+
{
501+
{""},
502+
true, 0,
503+
},
504+
{
505+
{R"(\b(\w+)\s+\1\b)"},
506+
true, 1,
507+
},
508+
{
509+
{R"((.)\1)"},
510+
true,1,
511+
},
512+
{
513+
{R"((.)(.).\2\1)"},
514+
true, 2,
515+
},
516+
{
517+
{R"((.\2\1)"},
518+
false, -1,
519+
},
520+
};
521+
522+
TEST_CASE("Regex back reference counting", "[libts][Regex][backrefMax]")
523+
{
524+
// case sensitive test
525+
for (auto &item : backref_test_data) {
526+
Regex r;
527+
REQUIRE(r.compile(item.regex) == item.valid);
528+
REQUIRE(r.backrefMax() == item.backref_max);
529+
}
530+
}
531+
532+
struct match_context_test_t {
533+
std::string_view regex;
534+
std::string_view str;
535+
bool valid;
536+
int32_t rcode;
537+
};
538+
539+
std::vector<match_context_test_t> match_context_test_data{
540+
{
541+
{"abc"},
542+
{"abc"},
543+
true, 1,
544+
},
545+
{{"a+b"}, {"aaaaaab"}, true, 1},
546+
{{"(a+)+b"}, {"aaaaab"}, true, -47}, // PCRE2_ERROR_MATCHLIMIT
547+
{
548+
{"(."},
549+
{"a"},
550+
false, -1,
551+
},
552+
};
553+
554+
TEST_CASE("RegexMatchContext", "[libts][Regex][RegexMatchContext]")
555+
{
556+
RegexMatchContext match_context;
557+
match_context.setMatchLimit(5);
558+
559+
// case sensitive test
560+
for (auto &item : match_context_test_data) {
561+
Regex r;
562+
REQUIRE(r.compile(item.regex) == item.valid);
563+
RegexMatches matches;
564+
REQUIRE(r.exec(item.str, matches, 0, &match_context) == item.rcode);
565+
}
566+
}

0 commit comments

Comments
 (0)