diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h index 8f2fc012055..a041fe7c98d 100644 --- a/include/tsutil/Regex.h +++ b/include/tsutil/Regex.h @@ -36,6 +36,7 @@ enum REFlags { RE_CASE_INSENSITIVE = 0x00000008u, ///< Ignore case (default: case sensitive). RE_UNANCHORED = 0x00000400u, ///< Unanchored (DFA defaults to anchored). RE_ANCHORED = 0x80000000u, ///< Anchored (Regex defaults to unanchored). + RE_NOTEMPTY = 0x00000004u ///< Not empty (default: may match empty string). }; /// @brief Wrapper for PCRE2 match data. @@ -124,7 +125,7 @@ class Regex * * It is safe to call this method concurrently on the same instance of @a this. */ - bool exec(std::string_view subject) const; + bool exec(std::string_view subject, uint32_t flags = 0) const; /** Execute the regular expression. * @@ -137,7 +138,7 @@ class Regex * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize must * be a multiple of 3 and at least three times the number of desired capture groups. */ - int exec(std::string_view subject, RegexMatches &matches) const; + int exec(std::string_view subject, RegexMatches &matches, uint32_t flags = 0) const; /// @return The number of capture groups in the compiled pattern. int get_capture_count(); diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc index a0d548ad850..aa3f74cebd1 100644 --- a/src/tsutil/Regex.cc +++ b/src/tsutil/Regex.cc @@ -31,9 +31,10 @@ #include #include -static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSERSITIVE for current PCRE2 version."); -static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_MULTILINE for current PCRE2 version."); +static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSENSITIVE for current PCRE2 version."); +static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_UNANCHORED for current PCRE2 version."); static_assert(RE_ANCHORED == PCRE2_ANCHORED, "Update RE_ANCHORED for current PCRE2 version."); +static_assert(RE_NOTEMPTY == PCRE2_NOTEMPTY, "Update RE_NOTEMPTY for current PCRE2 version."); //---------------------------------------------------------------------------- namespace @@ -296,20 +297,20 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u //---------------------------------------------------------------------------- bool -Regex::exec(std::string_view subject) const +Regex::exec(std::string_view subject, uint32_t flags) const { if (_Code::get(_code) == nullptr) { return false; } RegexMatches matches; - int count = this->exec(subject, matches); + int count = this->exec(subject, matches, flags); return count > 0; } //---------------------------------------------------------------------------- int32_t -Regex::exec(std::string_view subject, RegexMatches &matches) const +Regex::exec(std::string_view subject, RegexMatches &matches, uint32_t flags) const { auto code = _Code::get(_code); @@ -317,7 +318,7 @@ Regex::exec(std::string_view subject, RegexMatches &matches) const if (code == nullptr) { return 0; } - int count = pcre2_match(code, reinterpret_cast(subject.data()), subject.size(), 0, 0, + int count = pcre2_match(code, reinterpret_cast(subject.data()), subject.size(), 0, flags, RegexMatches::_MatchData::get(matches._match_data), RegexContext::get_instance()->get_match_context()); matches._size = count; diff --git a/src/tsutil/unit_tests/test_Regex.cc b/src/tsutil/unit_tests/test_Regex.cc index 734b37ec2a3..ebc0f1a7641 100644 --- a/src/tsutil/unit_tests/test_Regex.cc +++ b/src/tsutil/unit_tests/test_Regex.cc @@ -190,3 +190,40 @@ TEST_CASE("Regex", "[libts][Regex]") } #endif } + +TEST_CASE("Regex RE_NOTEMPTY flag behavior", "[libts][Regex][flags][RE_NOTEMPTY]") +{ + // Pattern that only matches empty string + Regex r; + REQUIRE(r.compile("^$") == true); + + SECTION("default exec matches empty subject") + { + // boolean overload + CHECK(r.exec(std::string_view("")) == true); + + // matches overload should return 1 (one match - the whole subject) + RegexMatches matches; + CHECK(r.exec(std::string_view(""), matches) == 1); + CHECK(matches.size() == 1); + CHECK(matches[0] == std::string_view("")); + } + + SECTION("RE_NOTEMPTY prevents empty matches") + { + // boolean overload with RE_NOTEMPTY should not match + CHECK(r.exec(std::string_view(""), RE_NOTEMPTY) == false); + + // matches overload should return a negative value (PCRE2_ERROR_NOMATCH) + RegexMatches matches; + int rc = r.exec(std::string_view(""), matches, RE_NOTEMPTY); + CHECK(rc < 0); + } + + SECTION("non-empty subject unaffected by RE_NOTEMPTY for this pattern") + { + // '^$' should not match 'a' in any case + CHECK(r.exec(std::string_view("a")) == false); + CHECK(r.exec(std::string_view("a"), RE_NOTEMPTY) == false); + } +}