diff --git a/CMake/Findpcre2.cmake b/CMake/Findpcre2.cmake
new file mode 100644
index 00000000000..c72b98e2cf6
--- /dev/null
+++ b/CMake/Findpcre2.cmake
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Locate a system PCRE2 installation (8-bit code unit width) and expose it
+# under the canonical target name `pcre2-8::pcre2-8` used by the
+# velox/external/regex_compat module.
+
+find_package(PCRE2 QUIET CONFIG COMPONENTS 8BIT)
+if(PCRE2_FOUND)
+  if(NOT TARGET pcre2-8::pcre2-8 AND TARGET PCRE2::8BIT)
+    add_library(pcre2-8::pcre2-8 ALIAS PCRE2::8BIT)
+  endif()
+  message(STATUS "Found PCRE2 via CMake.")
+  return()
+endif()
+
+if(TARGET pcre2-8::pcre2-8)
+  message(STATUS "PCRE2 target already defined.")
+  return()
+endif()
+
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(PCRE2_8 QUIET libpcre2-8)
+if(PCRE2_8_FOUND)
+  add_library(pcre2-8::pcre2-8 INTERFACE IMPORTED)
+  set_property(
+    TARGET pcre2-8::pcre2-8
+    PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_8_INCLUDE_DIRS}"
+  )
+  set_property(TARGET pcre2-8::pcre2-8 PROPERTY INTERFACE_LINK_LIBRARIES "${PCRE2_8_LDFLAGS}")
+  set_property(
+    TARGET pcre2-8::pcre2-8
+    PROPERTY INTERFACE_COMPILE_DEFINITIONS "PCRE2_CODE_UNIT_WIDTH=8"
+  )
+  set(pcre2_FOUND TRUE)
+  message(STATUS "Found PCRE2 via pkg-config.")
+  return()
+endif()
+
+if(pcre2_FIND_REQUIRED)
+  message(FATAL_ERROR "Failed to find PCRE2.")
+elseif(NOT pcre2_FIND_QUIETLY)
+  message(WARNING "Failed to find PCRE2.")
+endif()
diff --git a/CMake/resolve_dependency_modules/pcre2.cmake b/CMake/resolve_dependency_modules/pcre2.cmake
new file mode 100644
index 00000000000..7053727a98d
--- /dev/null
+++ b/CMake/resolve_dependency_modules/pcre2.cmake
@@ -0,0 +1,55 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+include_guard(GLOBAL)
+
+if(DEFINED ENV{VELOX_PCRE2_URL})
+  set(VELOX_PCRE2_SOURCE_URL "$ENV{VELOX_PCRE2_URL}")
+else()
+  set(VELOX_PCRE2_VERSION 10.47)
+  set(
+    VELOX_PCRE2_SOURCE_URL
+    "https://github.com/PCRE2Project/pcre2/releases/download/pcre2-${VELOX_PCRE2_VERSION}/pcre2-${VELOX_PCRE2_VERSION}.tar.gz"
+  )
+  set(
+    VELOX_PCRE2_BUILD_SHA256_CHECKSUM
+    c08ae2388ef333e8403e670ad70c0a11f1eed021fd88308d7e02f596fcd9dc16
+  )
+endif()
+
+message(STATUS "Building PCRE2 ${VELOX_PCRE2_VERSION} from source")
+FetchContent_Declare(
+  pcre2
+  URL ${VELOX_PCRE2_SOURCE_URL}
+  URL_HASH SHA256=${VELOX_PCRE2_BUILD_SHA256_CHECKSUM}
+)
+
+set(PCRE2_BUILD_PCRE2_8 ON CACHE BOOL "" FORCE)
+set(PCRE2_BUILD_PCRE2_16 OFF CACHE BOOL "" FORCE)
+set(PCRE2_BUILD_PCRE2_32 OFF CACHE BOOL "" FORCE)
+set(PCRE2_SUPPORT_JIT ON CACHE BOOL "" FORCE)
+set(PCRE2_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "" FORCE)
+set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "" FORCE)
+set(PCRE2_STATIC_PIC ON CACHE BOOL "" FORCE)
+
+FetchContent_MakeAvailable(pcre2)
+
+# Normalise the target name so consumers always link `pcre2-8::pcre2-8`.
+if(TARGET pcre2-8-static AND NOT TARGET pcre2-8::pcre2-8)
+  add_library(pcre2-8::pcre2-8 ALIAS pcre2-8-static)
+elseif(TARGET pcre2-8 AND NOT TARGET pcre2-8::pcre2-8)
+  add_library(pcre2-8::pcre2-8 ALIAS pcre2-8)
+endif()
+
+unset(BUILD_TESTING CACHE)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36c8d6c9ea1..5b394cc83de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -142,6 +142,16 @@ option(VELOX_ENABLE_TPCDS_CONNECTOR "Build TPC-DS connector." ON)
 option(VELOX_ENABLE_PRESTO_FUNCTIONS "Build Presto SQL functions." ON)
 option(VELOX_ENABLE_SPARK_FUNCTIONS "Build Spark SQL functions." ON)
 option(VELOX_ENABLE_ICEBERG_FUNCTIONS "Build Iceberg functions." ON)
+option(
+  VELOX_ENABLE_REGEX_COMPAT_TESTS
+  "Build the PCRE2 vs RE2 Java-regex compatibility test suite (pulls in PCRE2 dep)."
+  OFF
+)
+option(
+  VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND
+  "Within the regex-compat test suite, also exercise an embedded-JVM Java backend as a third backend / oracle. Requires JDK on the build host. If JNI cannot be found, this option is auto-disabled with a warning. Only consulted when VELOX_ENABLE_REGEX_COMPAT_TESTS=ON."
+  ON
+)
 option(VELOX_ENABLE_EXPRESSION "Build expression." ON)
 option(
   VELOX_ENABLE_EXAMPLES
@@ -626,6 +636,30 @@ endif()
 velox_set_source(re2)
 velox_resolve_dependency(re2)
 
+if(VELOX_ENABLE_REGEX_COMPAT_TESTS)
+  velox_set_source(pcre2)
+  velox_resolve_dependency(pcre2)
+
+  if(VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND)
+    # Probe for a JDK so the test suite can embed a JVM as a third (oracle)
+    # backend.  This is the only place in upstream Velox that touches JNI, and
+    # it is fully opt-in (gated by the regex-compat option above).  If JNI is
+    # not found we silently degrade — the test suite still builds with the
+    # PCRE2 + RE2 backends only.
+    find_package(JNI QUIET)
+    if(JNI_FOUND)
+      message(STATUS "Regex-compat: enabling embedded-JVM Java backend (JNI: ${JNI_INCLUDE_DIRS})")
+    else()
+      message(
+        WARNING
+        "Regex-compat: JNI not found, disabling Java backend. "
+        "Install a JDK or pass -DVELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND=OFF to silence."
+      )
+      set(VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND OFF CACHE BOOL "" FORCE)
+    endif()
+  endif()
+endif()
+
 if(${VELOX_BUILD_PYTHON_PACKAGE})
   find_package(Python 3.9 COMPONENTS Interpreter Development.Module REQUIRED)
   velox_set_source(pybind11)
diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt
index f15492a2e11..9d0e19edec4 100644
--- a/velox/CMakeLists.txt
+++ b/velox/CMakeLists.txt
@@ -26,6 +26,9 @@ add_subdirectory(external/date)
 add_subdirectory(external/tzdb)
 add_subdirectory(external/md5)
 add_subdirectory(external/hdfs)
+if(VELOX_ENABLE_REGEX_COMPAT_TESTS)
+  add_subdirectory(external/regex_compat)
+endif()
 #
 
 # examples depend on expression
diff --git a/velox/external/regex_compat/CMakeLists.txt b/velox/external/regex_compat/CMakeLists.txt
new file mode 100644
index 00000000000..a35dc333d9b
--- /dev/null
+++ b/velox/external/regex_compat/CMakeLists.txt
@@ -0,0 +1,42 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only entered when VELOX_ENABLE_REGEX_COMPAT_TESTS=ON.
+
+set(_REGEX_COMPAT_SRC Re2Regex.cpp Pcre2Regex.cpp)
+set(_REGEX_COMPAT_LIBS re2::re2 pcre2-8::pcre2-8)
+
+if(VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND)
+  list(APPEND _REGEX_COMPAT_SRC JvmFixture.cpp JavaRegex.cpp)
+  list(APPEND _REGEX_COMPAT_LIBS ${JNI_LIBRARIES})
+endif()
+
+velox_add_library(velox_regex_compat ${_REGEX_COMPAT_SRC})
+
+velox_link_libraries(velox_regex_compat
+  PUBLIC ${_REGEX_COMPAT_LIBS}
+  PRIVATE velox_functions_lib velox_java_pcre2_translator)
+
+if(VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND)
+  velox_include_directories(velox_regex_compat PUBLIC ${JNI_INCLUDE_DIRS})
+  velox_compile_definitions(velox_regex_compat
+    PUBLIC VELOX_REGEX_COMPAT_HAS_JAVA=1)
+else()
+  velox_compile_definitions(velox_regex_compat
+    PUBLIC VELOX_REGEX_COMPAT_HAS_JAVA=0)
+endif()
+
+if(${VELOX_BUILD_TESTING})
+  add_subdirectory(tests)
+endif()
diff --git a/velox/external/regex_compat/JavaRegex.cpp b/velox/external/regex_compat/JavaRegex.cpp
new file mode 100644
index 00000000000..eb2d18a8b6e
--- /dev/null
+++ b/velox/external/regex_compat/JavaRegex.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/JavaRegex.h"
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+
+#include "velox/external/regex_compat/JvmFixture.h"
+
+#include <mutex>
+#include <stdexcept>
+#include <vector>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+// java.util.regex.Pattern flag bit constants (must match the JDK).
+constexpr jint kJavaCaseInsensitive = 0x02;
+constexpr jint kJavaMultiline = 0x08;
+constexpr jint kJavaDotall = 0x20;
+constexpr jint kJavaUnicodeCase = 0x40;
+
+struct JavaIds {
+  // Global refs to class objects so they survive across JNI local-ref frames.
+  jclass patternCls = nullptr;
+  jclass matcherCls = nullptr;
+  jclass stringCls = nullptr;
+  jclass mapCls = nullptr;
+  jclass setCls = nullptr;
+  jclass iteratorCls = nullptr;
+  jclass entryCls = nullptr;
+  jclass integerCls = nullptr;
+
+  jmethodID compileMethod = nullptr; // static Pattern.compile(String, int)
+  jmethodID matcherMethod = nullptr; // Pattern.matcher(CharSequence)
+  jmethodID namedGroupsMethod = nullptr; // Pattern.namedGroups() (JDK 20+)
+
+  jmethodID findMethod = nullptr; // Matcher.find(int)
+  jmethodID findNoArgMethod = nullptr; // Matcher.find()
+  jmethodID matchesMethod = nullptr; // Matcher.matches()
+  jmethodID lookingAtMethod = nullptr; // Matcher.lookingAt()
+  jmethodID startMethod = nullptr; // Matcher.start(int)
+  jmethodID endMethod = nullptr; // Matcher.end(int)
+  jmethodID groupCountMethod = nullptr; // Matcher.groupCount()
+  jmethodID replaceAllMethod = nullptr; // Matcher.replaceAll(String)
+  jmethodID regionMethod = nullptr; // Matcher.region(int, int)
+  jmethodID useAnchoringMethod = nullptr; // Matcher.useAnchoringBounds(boolean)
+
+  jmethodID mapEntrySetMethod = nullptr; // Map.entrySet()
+  jmethodID setIteratorMethod = nullptr; // Set.iterator()
+  jmethodID iteratorHasNextMethod = nullptr; // Iterator.hasNext()
+  jmethodID iteratorNextMethod = nullptr; // Iterator.next()
+  jmethodID entryGetKeyMethod = nullptr; // Map.Entry.getKey()
+  jmethodID entryGetValueMethod = nullptr; // Map.Entry.getValue()
+  jmethodID integerIntValueMethod = nullptr; // Integer.intValue()
+};
+
+std::once_flag g_idsOnce;
+JavaIds g_ids;
+
+jclass globalClassRef(JNIEnv* env, const char* name) {
+  jclass local = env->FindClass(name);
+  if (!local) {
+    throw std::runtime_error(
+        std::string("FindClass failed for ") + name);
+  }
+  jclass global = static_cast<jclass>(env->NewGlobalRef(local));
+  env->DeleteLocalRef(local);
+  return global;
+}
+
+void initIds(JNIEnv* env) {
+  g_ids.patternCls = globalClassRef(env, "java/util/regex/Pattern");
+  g_ids.matcherCls = globalClassRef(env, "java/util/regex/Matcher");
+  g_ids.stringCls = globalClassRef(env, "java/lang/String");
+  g_ids.mapCls = globalClassRef(env, "java/util/Map");
+  g_ids.setCls = globalClassRef(env, "java/util/Set");
+  g_ids.iteratorCls = globalClassRef(env, "java/util/Iterator");
+  g_ids.entryCls = globalClassRef(env, "java/util/Map$Entry");
+  g_ids.integerCls = globalClassRef(env, "java/lang/Integer");
+
+  g_ids.compileMethod = env->GetStaticMethodID(
+      g_ids.patternCls,
+      "compile",
+      "(Ljava/lang/String;I)Ljava/util/regex/Pattern;");
+  g_ids.matcherMethod = env->GetMethodID(
+      g_ids.patternCls,
+      "matcher",
+      "(Ljava/lang/CharSequence;)Ljava/util/regex/Matcher;");
+  // Pattern.namedGroups() is JDK 20+; treat as optional.
+  g_ids.namedGroupsMethod =
+      env->GetMethodID(g_ids.patternCls, "namedGroups", "()Ljava/util/Map;");
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    g_ids.namedGroupsMethod = nullptr;
+  }
+
+  g_ids.findMethod = env->GetMethodID(g_ids.matcherCls, "find", "(I)Z");
+  g_ids.findNoArgMethod = env->GetMethodID(g_ids.matcherCls, "find", "()Z");
+  g_ids.matchesMethod = env->GetMethodID(g_ids.matcherCls, "matches", "()Z");
+  g_ids.lookingAtMethod =
+      env->GetMethodID(g_ids.matcherCls, "lookingAt", "()Z");
+  g_ids.startMethod = env->GetMethodID(g_ids.matcherCls, "start", "(I)I");
+  g_ids.endMethod = env->GetMethodID(g_ids.matcherCls, "end", "(I)I");
+  g_ids.groupCountMethod =
+      env->GetMethodID(g_ids.matcherCls, "groupCount", "()I");
+  g_ids.replaceAllMethod = env->GetMethodID(
+      g_ids.matcherCls, "replaceAll", "(Ljava/lang/String;)Ljava/lang/String;");
+  g_ids.regionMethod =
+      env->GetMethodID(g_ids.matcherCls, "region", "(II)Ljava/util/regex/Matcher;");
+  g_ids.useAnchoringMethod = env->GetMethodID(
+      g_ids.matcherCls,
+      "useAnchoringBounds",
+      "(Z)Ljava/util/regex/Matcher;");
+
+  g_ids.mapEntrySetMethod =
+      env->GetMethodID(g_ids.mapCls, "entrySet", "()Ljava/util/Set;");
+  g_ids.setIteratorMethod =
+      env->GetMethodID(g_ids.setCls, "iterator", "()Ljava/util/Iterator;");
+  g_ids.iteratorHasNextMethod =
+      env->GetMethodID(g_ids.iteratorCls, "hasNext", "()Z");
+  g_ids.iteratorNextMethod =
+      env->GetMethodID(g_ids.iteratorCls, "next", "()Ljava/lang/Object;");
+  g_ids.entryGetKeyMethod =
+      env->GetMethodID(g_ids.entryCls, "getKey", "()Ljava/lang/Object;");
+  g_ids.entryGetValueMethod =
+      env->GetMethodID(g_ids.entryCls, "getValue", "()Ljava/lang/Object;");
+  g_ids.integerIntValueMethod =
+      env->GetMethodID(g_ids.integerCls, "intValue", "()I");
+}
+
+jint toJavaFlags(const Options& o) {
+  jint f = 0;
+  if (!o.caseSensitive) {
+    f |= kJavaCaseInsensitive | kJavaUnicodeCase;
+  }
+  if (o.dotNl) {
+    f |= kJavaDotall;
+  }
+  if (!o.oneLine) {
+    f |= kJavaMultiline;
+  }
+  return f;
+}
+
+// Convert a Java `String` index (a UTF-16 code-unit offset) into a byte
+// offset in the given UTF-8 source.  Used to translate Matcher.start()/end()
+// results — which are Java char indices — back into byte offsets in our
+// std::string_view input.  Returns std::string_view::npos on bad input or
+// out-of-range index.
+std::size_t javaCharOffsetToByteOffset(
+    std::string_view utf8,
+    int javaCharOffset) {
+  if (javaCharOffset < 0) {
+    return std::string_view::npos;
+  }
+  int chars = 0;
+  for (std::size_t i = 0; i < utf8.size();) {
+    if (chars == javaCharOffset) {
+      return i;
+    }
+    const unsigned char c = static_cast<unsigned char>(utf8[i]);
+    if (c < 0x80) {
+      i += 1;
+      chars += 1;
+    } else if (c < 0xC0) {
+      // Stray continuation byte — advance to avoid an infinite loop.
+      i += 1;
+      chars += 1;
+    } else if (c < 0xE0) {
+      i += 2;
+      chars += 1;
+    } else if (c < 0xF0) {
+      i += 3;
+      chars += 1;
+    } else {
+      // 4-byte UTF-8 = U+10000..U+10FFFF, encoded as a UTF-16 surrogate
+      // pair (2 code units) in Java.
+      i += 4;
+      chars += 2;
+    }
+  }
+  return chars == javaCharOffset ? utf8.size() : std::string_view::npos;
+}
+
+// Inverse of the above: given a UTF-8 byte offset, return the equivalent
+// Java UTF-16 char offset.  Used when we have to hand a byte offset (used
+// by the caller / JavaMatcherAdapter cursor) over to Java's Matcher.region().
+int byteOffsetToJavaCharOffset(
+    std::string_view utf8,
+    std::size_t byteOffset) {
+  int chars = 0;
+  std::size_t i = 0;
+  while (i < utf8.size() && i < byteOffset) {
+    const unsigned char c = static_cast<unsigned char>(utf8[i]);
+    if (c < 0x80) {
+      i += 1;
+      chars += 1;
+    } else if (c < 0xC0) {
+      i += 1;
+      chars += 1;
+    } else if (c < 0xE0) {
+      i += 2;
+      chars += 1;
+    } else if (c < 0xF0) {
+      i += 3;
+      chars += 1;
+    } else {
+      i += 4;
+      chars += 2;
+    }
+  }
+  return chars;
+}
+
+// Convert a std::string_view (UTF-8) to a JNI jstring.  Owned by caller —
+// must DeleteLocalRef after use.
+//
+// NewStringUTF interprets its input as JNI's "modified UTF-8" — bytes >= 0x80
+// are taken to be the first byte of a 2-byte sequence (essentially
+// Latin-1-ish), which mangles real 3- and 4-byte UTF-8 sequences.  To
+// faithfully round-trip UTF-8 we transcode to UTF-16 here and use
+// NewString(jchar*, jsize) instead.
+jstring toJString(JNIEnv* env, std::string_view sv) {
+  std::vector<jchar> u16;
+  u16.reserve(sv.size());
+  for (std::size_t i = 0; i < sv.size();) {
+    const unsigned char c = static_cast<unsigned char>(sv[i]);
+    std::uint32_t cp = 0;
+    std::size_t step = 1;
+    if (c < 0x80) {
+      cp = c;
+      step = 1;
+    } else if (c < 0xC0) {
+      // Stray continuation; emit replacement to keep length sane.
+      u16.push_back(0xFFFD);
+      ++i;
+      continue;
+    } else if (c < 0xE0 && i + 1 < sv.size()) {
+      cp = ((c & 0x1F) << 6) |
+          (static_cast<unsigned char>(sv[i + 1]) & 0x3F);
+      step = 2;
+    } else if (c < 0xF0 && i + 2 < sv.size()) {
+      cp = ((c & 0x0F) << 12) |
+          ((static_cast<unsigned char>(sv[i + 1]) & 0x3F) << 6) |
+          (static_cast<unsigned char>(sv[i + 2]) & 0x3F);
+      step = 3;
+    } else if (i + 3 < sv.size()) {
+      cp = ((c & 0x07) << 18) |
+          ((static_cast<unsigned char>(sv[i + 1]) & 0x3F) << 12) |
+          ((static_cast<unsigned char>(sv[i + 2]) & 0x3F) << 6) |
+          (static_cast<unsigned char>(sv[i + 3]) & 0x3F);
+      step = 4;
+    } else {
+      u16.push_back(0xFFFD);
+      ++i;
+      continue;
+    }
+    if (cp <= 0xFFFF) {
+      u16.push_back(static_cast<jchar>(cp));
+    } else {
+      cp -= 0x10000;
+      u16.push_back(static_cast<jchar>(0xD800 | (cp >> 10)));
+      u16.push_back(static_cast<jchar>(0xDC00 | (cp & 0x3FF)));
+    }
+    i += step;
+  }
+  return env->NewString(u16.data(), static_cast<jsize>(u16.size()));
+}
+
+// Read a jstring into a std::string (UTF-8).  Caller still owns the jstring.
+// We use GetStringChars (UTF-16) and transcode to UTF-8 ourselves to avoid
+// GetStringUTFChars's "modified UTF-8" which can't represent supplementary
+// chars in their 4-byte UTF-8 form.
+std::string fromJString(JNIEnv* env, jstring s) {
+  if (!s) {
+    return {};
+  }
+  const jsize len = env->GetStringLength(s);
+  const jchar* u16 = env->GetStringChars(s, nullptr);
+  std::string out;
+  out.reserve(static_cast<std::size_t>(len));
+  for (jsize i = 0; i < len; ++i) {
+    std::uint32_t cp = u16[i];
+    if (cp >= 0xD800 && cp <= 0xDBFF && i + 1 < len) {
+      const std::uint32_t lo = u16[i + 1];
+      if (lo >= 0xDC00 && lo <= 0xDFFF) {
+        cp = 0x10000 + (((cp - 0xD800) << 10) | (lo - 0xDC00));
+        ++i;
+      }
+    }
+    if (cp < 0x80) {
+      out.push_back(static_cast<char>(cp));
+    } else if (cp < 0x800) {
+      out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
+      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+    } else if (cp < 0x10000) {
+      out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
+      out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+    } else {
+      out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
+      out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
+      out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+      out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+    }
+  }
+  env->ReleaseStringChars(s, u16);
+  return out;
+}
+
+bool checkAndClearException(JNIEnv* env, std::string* outError) {
+  if (!env->ExceptionCheck()) {
+    return false;
+  }
+  if (outError) {
+    *outError = "Java exception thrown (cleared)";
+  }
+  env->ExceptionClear();
+  return true;
+}
+
+void populateNamedFromPattern(
+    JNIEnv* env,
+    jobject pattern,
+    std::map<std::string, int>* out) {
+  if (!g_ids.namedGroupsMethod) {
+    return;
+  }
+  jobject map = env->CallObjectMethod(pattern, g_ids.namedGroupsMethod);
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    return;
+  }
+  if (!map) {
+    return;
+  }
+  jobject set = env->CallObjectMethod(map, g_ids.mapEntrySetMethod);
+  jobject it = env->CallObjectMethod(set, g_ids.setIteratorMethod);
+  while (env->CallBooleanMethod(it, g_ids.iteratorHasNextMethod)) {
+    jobject entry = env->CallObjectMethod(it, g_ids.iteratorNextMethod);
+    jstring key = static_cast<jstring>(
+        env->CallObjectMethod(entry, g_ids.entryGetKeyMethod));
+    jobject value = env->CallObjectMethod(entry, g_ids.entryGetValueMethod);
+    jint idx = env->CallIntMethod(value, g_ids.integerIntValueMethod);
+    out->emplace(fromJString(env, key), static_cast<int>(idx));
+    env->DeleteLocalRef(key);
+    env->DeleteLocalRef(value);
+    env->DeleteLocalRef(entry);
+  }
+  env->DeleteLocalRef(it);
+  env->DeleteLocalRef(set);
+  env->DeleteLocalRef(map);
+}
+
+} // namespace
+
+JavaRegex::JavaRegex(std::string_view javaPattern, Options opt) {
+  auto* env = JvmFixture::instance().env();
+  std::call_once(g_idsOnce, [&]() { initIds(env); });
+
+  jstring jPat = toJString(env, javaPattern);
+  jobject pObj = env->CallStaticObjectMethod(
+      g_ids.patternCls, g_ids.compileMethod, jPat, toJavaFlags(opt));
+  env->DeleteLocalRef(jPat);
+
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    error_ = "Java PatternSyntaxException: " + std::string(javaPattern);
+    return;
+  }
+  pattern_ = env->NewGlobalRef(pObj);
+  env->DeleteLocalRef(pObj);
+
+  // groupCount via a throwaway empty matcher.
+  jstring emptyStr = toJString(env, "");
+  jobject tmpMatcher = env->CallObjectMethod(
+      pattern_, g_ids.matcherMethod, emptyStr);
+  env->DeleteLocalRef(emptyStr);
+  captureCount_ = env->CallIntMethod(tmpMatcher, g_ids.groupCountMethod);
+  env->DeleteLocalRef(tmpMatcher);
+
+  populateNamedFromPattern(env, pattern_, &named_);
+}
+
+JavaRegex::~JavaRegex() {
+  if (pattern_) {
+    JvmFixture::instance().env()->DeleteGlobalRef(pattern_);
+  }
+}
+
+bool JavaRegex::ok() const {
+  return pattern_ != nullptr;
+}
+const std::string& JavaRegex::error() const {
+  return error_;
+}
+int JavaRegex::NumberOfCapturingGroups() const {
+  return captureCount_;
+}
+const std::map<std::string, int>& JavaRegex::NamedCapturingGroups() const {
+  return named_;
+}
+
+bool JavaRegex::Match(
+    std::string_view input,
+    std::size_t startpos,
+    std::size_t endpos,
+    Anchor anchor,
+    std::string_view* submatch,
+    int nsubmatch) const {
+  if (!pattern_) {
+    return false;
+  }
+  auto* env = JvmFixture::instance().env();
+
+  // Java's Matcher operates on a CharSequence we hand it; clip input to
+  // [0, endpos) by materialising that prefix.  Then use region() so the
+  // engine treats [startpos, endpos) as the searchable window.
+  const std::string buf(input.substr(0, endpos));
+  jstring jin = toJString(env, buf);
+  jobject m = env->CallObjectMethod(pattern_, g_ids.matcherMethod, jin);
+  env->DeleteLocalRef(jin);
+
+  // Set region so anchors line up with [startpos, endpos).
+  // Java's Matcher.region(start, end) takes UTF-16 char offsets, not bytes —
+  // translate from our byte-offset parameters first.
+  const jint regionStart = static_cast<jint>(
+      byteOffsetToJavaCharOffset(input, startpos));
+  const jint regionEnd = static_cast<jint>(
+      byteOffsetToJavaCharOffset(input, endpos));
+  jobject mRegion = env->CallObjectMethod(
+      m, g_ids.regionMethod, regionStart, regionEnd);
+  env->DeleteLocalRef(mRegion);
+
+  jboolean matched = JNI_FALSE;
+  switch (anchor) {
+    case Anchor::kUnanchored:
+      matched = env->CallBooleanMethod(m, g_ids.findNoArgMethod);
+      break;
+    case Anchor::kAnchorStart:
+      matched = env->CallBooleanMethod(m, g_ids.lookingAtMethod);
+      break;
+    case Anchor::kAnchorBoth:
+      matched = env->CallBooleanMethod(m, g_ids.matchesMethod);
+      break;
+  }
+
+  if (!matched) {
+    env->DeleteLocalRef(m);
+    return false;
+  }
+
+  // Extract submatches: Matcher.start(i)/end(i) return UTF-16 char offsets
+  // into the original CharSequence (= our `buf` = a prefix of `input`).
+  // Translate each Java char offset back to a byte offset in `input` so
+  // string_view substr arithmetic works for non-ASCII input.
+  for (int i = 0; i < nsubmatch; ++i) {
+    jint s = env->CallIntMethod(m, g_ids.startMethod, i);
+    if (env->ExceptionCheck()) {
+      env->ExceptionClear();
+      submatch[i] = std::string_view{};
+      continue;
+    }
+    jint e = env->CallIntMethod(m, g_ids.endMethod, i);
+    if (s < 0) {
+      submatch[i] = std::string_view{};
+      continue;
+    }
+    const std::size_t sByte = javaCharOffsetToByteOffset(input, s);
+    const std::size_t eByte = javaCharOffsetToByteOffset(input, e);
+    if (sByte == std::string_view::npos || eByte == std::string_view::npos ||
+        eByte < sByte) {
+      submatch[i] = std::string_view{};
+    } else {
+      submatch[i] = input.substr(sByte, eByte - sByte);
+    }
+  }
+
+  env->DeleteLocalRef(m);
+  return true;
+}
+
+bool JavaRegex::FullMatch(std::string_view input, const JavaRegex& re) {
+  std::string_view sub[1];
+  return re.Match(input, 0, input.size(), Anchor::kAnchorBoth, sub, 1);
+}
+
+bool JavaRegex::PartialMatch(std::string_view input, const JavaRegex& re) {
+  std::string_view sub[1];
+  return re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1);
+}
+
+int JavaRegex::GlobalReplace(
+    std::string* str,
+    const JavaRegex& re,
+    std::string_view javaReplacement) {
+  if (!re.ok() || str == nullptr) {
+    return 0;
+  }
+  auto* env = JvmFixture::instance().env();
+
+  // Build a Matcher on the input and call replaceAll(repl).  Matcher.replaceAll
+  // is the canonical Java semantics — accepts $N / ${name} natively, returns
+  // the result as a String.  We have no way to recover the *count* of
+  // replacements done through the public API without manual find()-loop, so
+  // we approximate: count matches first, then replaceAll.  (Tests use exact
+  // count assertions, so this matters.)
+  jstring jin = toJString(env, *str);
+  jobject m = env->CallObjectMethod(re.pattern_, g_ids.matcherMethod, jin);
+
+  // First: count matches by walking find().
+  int count = 0;
+  while (env->CallBooleanMethod(m, g_ids.findNoArgMethod)) {
+    ++count;
+  }
+
+  // Second: reset matcher (recreate it — replaceAll re-walks anyway).
+  env->DeleteLocalRef(m);
+  m = env->CallObjectMethod(re.pattern_, g_ids.matcherMethod, jin);
+  jstring jRepl = toJString(env, javaReplacement);
+  jstring jOut = static_cast<jstring>(
+      env->CallObjectMethod(m, g_ids.replaceAllMethod, jRepl));
+  env->DeleteLocalRef(jRepl);
+  env->DeleteLocalRef(m);
+  env->DeleteLocalRef(jin);
+
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+    return 0;
+  }
+  *str = fromJString(env, jOut);
+  env->DeleteLocalRef(jOut);
+  return count;
+}
+
+} // namespace facebook::velox::regex_compat
+
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
diff --git a/velox/external/regex_compat/JavaRegex.h b/velox/external/regex_compat/JavaRegex.h
new file mode 100644
index 00000000000..a5ba77137c9
--- /dev/null
+++ b/velox/external/regex_compat/JavaRegex.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+// This header is only meaningful when the Java backend is enabled.  Clang-tidy
+// scans changed headers in isolation and cannot find <jni.h> on hosts without
+// a JDK, so guard the entire content rather than relying on every consumer to
+// gate the include.
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+
+#include <map>
+#include <string>
+#include <string_view>
+
+#include <jni.h>
+
+#include "velox/external/regex_compat/RegexTypes.h"
+
+namespace facebook::velox::regex_compat {
+
+/// `java.util.regex` backend in the regex-compat test suite, via an embedded
+/// JVM (see JvmFixture).  Public method names and signatures mirror
+/// `re2::RE2`'s subset that Velox uses.
+///
+/// Internally each `Match` / `GlobalReplace` call creates a fresh
+/// `java.util.regex.Matcher` via the cached `jobject pattern_` and invokes
+/// the JDK's regex engine.  Pattern + replacement input is the canonical
+/// Java syntax (this is the native source of truth for the other two
+/// backends' translation correctness).
+class JavaRegex {
+ public:
+  explicit JavaRegex(std::string_view javaPattern, Options opt = {});
+  ~JavaRegex();
+
+  JavaRegex(const JavaRegex&) = delete;
+  JavaRegex& operator=(const JavaRegex&) = delete;
+
+  bool ok() const;
+  const std::string& error() const;
+  int NumberOfCapturingGroups() const;
+  const std::map<std::string, int>& NamedCapturingGroups() const;
+
+  bool Match(
+      std::string_view input,
+      std::size_t startpos,
+      std::size_t endpos,
+      Anchor anchor,
+      std::string_view* submatch,
+      int nsubmatch) const;
+
+  static bool FullMatch(std::string_view input, const JavaRegex& re);
+  static bool PartialMatch(std::string_view input, const JavaRegex& re);
+
+  static int GlobalReplace(
+      std::string* str,
+      const JavaRegex& re,
+      std::string_view javaReplacement);
+
+ private:
+  // Pinned global reference to java.util.regex.Pattern instance.
+  jobject pattern_ = nullptr;
+  std::string error_;
+  int captureCount_ = 0;
+  std::map<std::string, int> named_;
+};
+
+} // namespace facebook::velox::regex_compat
+
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
diff --git a/velox/external/regex_compat/JvmFixture.cpp b/velox/external/regex_compat/JvmFixture.cpp
new file mode 100644
index 00000000000..9c77bbae8ce
--- /dev/null
+++ b/velox/external/regex_compat/JvmFixture.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/JvmFixture.h"
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+
+#include <gtest/gtest.h>
+
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+class JvmGlobalEnv : public ::testing::Environment {
+ public:
+  void SetUp() override {
+    // Force JVM construction now (before any test runs).
+    JvmFixture::instance();
+  }
+  // No TearDown: JNI forbids JVM destroy + recreate in the same process.
+};
+
+} // namespace
+
+JvmFixture::JvmFixture() {
+  JavaVMInitArgs args{};
+  args.version = JNI_VERSION_1_8;
+  args.ignoreUnrecognized = JNI_FALSE;
+  args.nOptions = 0;
+  args.options = nullptr;
+
+  const jint rc =
+      JNI_CreateJavaVM(&jvm_, reinterpret_cast<void**>(&env_), &args);
+  if (rc != JNI_OK) {
+    std::ostringstream os;
+    os << "JvmFixture: JNI_CreateJavaVM failed with code " << rc;
+    throw std::runtime_error(os.str());
+  }
+}
+
+JvmFixture& JvmFixture::instance() {
+  // Function-local static guarantees thread-safe one-time construction
+  // (C++11+) and avoids static-init order issues.
+  static JvmFixture inst;
+  return inst;
+}
+
+void JvmFixture::Register() {
+  ::testing::AddGlobalTestEnvironment(new JvmGlobalEnv);
+}
+
+} // namespace facebook::velox::regex_compat
+
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
diff --git a/velox/external/regex_compat/JvmFixture.h b/velox/external/regex_compat/JvmFixture.h
new file mode 100644
index 00000000000..c2b4bfa812e
--- /dev/null
+++ b/velox/external/regex_compat/JvmFixture.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+// Guarded the same way as JavaRegex.h: clang-tidy scans diff-changed headers
+// in isolation and cannot find <jni.h> on hosts without a JDK.
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+
+#include <jni.h>
+
+namespace facebook::velox::regex_compat {
+
+/// Process-singleton embedded JVM used by the regex-compat test suite's
+/// JavaRegex backend.  Boots the JVM on first `instance()` call via
+/// `JNI_CreateJavaVM` and keeps it alive for the lifetime of the process —
+/// JNI forbids destroy+recreate in the same process, so we never tear down.
+///
+/// Tests should register this as a GTest GlobalEnvironment via
+/// JvmFixture::Register() in main(), to give the JVM boot a clear lifecycle
+/// boundary distinct from per-test setup.
+class JvmFixture {
+ public:
+  static JvmFixture& instance();
+
+  JavaVM* jvm() const { return jvm_; }
+  JNIEnv* env() const { return env_; }
+
+  /// Register this fixture as a GTest GlobalEnvironment.  Call from main().
+  static void Register();
+
+ private:
+  JvmFixture();
+  ~JvmFixture() = default;
+
+  JavaVM* jvm_ = nullptr;
+  JNIEnv* env_ = nullptr;
+};
+
+} // namespace facebook::velox::regex_compat
+
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
diff --git a/velox/external/regex_compat/Pcre2Regex.cpp b/velox/external/regex_compat/Pcre2Regex.cpp
new file mode 100644
index 00000000000..efe21296e73
--- /dev/null
+++ b/velox/external/regex_compat/Pcre2Regex.cpp
@@ -0,0 +1,676 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/Pcre2Regex.h"
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+#include "velox/functions/lib/java_pcre2_translator/Evaluator.h"
+#include "velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h"
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <optional>
+#include <sstream>
+#include <vector>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+std::uint32_t toPcre2Options(const Options& o) {
+  // Java's default \d, \s and \w shorthands are ASCII-only.  Keep UTF enabled
+  // for Unicode literals and \p{...}, but do not enable PCRE2_UCP here.
+  std::uint32_t opts = PCRE2_UTF;
+  if (!o.caseSensitive) {
+    opts |= PCRE2_CASELESS;
+  }
+  if (o.dotNl) {
+    opts |= PCRE2_DOTALL;
+  }
+  if (!o.oneLine) {
+    opts |= PCRE2_MULTILINE;
+  }
+  return opts;
+}
+
+std::uint32_t toPcre2MatchOptions(Anchor a) {
+  switch (a) {
+    case Anchor::kUnanchored:
+      return 0;
+    case Anchor::kAnchorStart:
+      return PCRE2_ANCHORED;
+    case Anchor::kAnchorBoth:
+      return PCRE2_ANCHORED | PCRE2_ENDANCHORED;
+  }
+  return 0;
+}
+
+std::string pcre2ErrorToString(int code, PCRE2_SIZE offset) {
+  PCRE2_UCHAR buf[256];
+  pcre2_get_error_message(code, buf, sizeof(buf));
+  std::ostringstream os;
+  os << "PCRE2 error " << code << " at offset " << offset << ": "
+     << reinterpret_cast<const char*>(buf);
+  return os.str();
+}
+
+void replaceAll(std::string& s, std::string_view from, std::string_view to) {
+  for (std::size_t pos = 0; (pos = s.find(from, pos)) != std::string::npos;
+       pos += to.size()) {
+    s.replace(pos, from.size(), to);
+  }
+}
+
+std::string surrogateUtf8ByteEscapes(std::uint32_t cp) {
+  char buf[32];
+  std::snprintf(
+      buf,
+      sizeof(buf),
+      "\\x{%02X}\\x{%02X}\\x{%02X}",
+      0xE0 | (cp >> 12),
+      0x80 | ((cp >> 6) & 0x3F),
+      0x80 | (cp & 0x3F));
+  return buf;
+}
+
+std::string rawSurrogateUtf8BytePattern(
+    unsigned char b0,
+    unsigned char b1,
+    unsigned char b2) {
+  char buf[40];
+  std::snprintf(
+      buf,
+      sizeof(buf),
+      "(?:\\x{%02X}\\x{%02X}\\x{%02X})",
+      b0,
+      b1,
+      b2);
+  return buf;
+}
+
+std::string byteEscape(unsigned char b) {
+  char buf[8];
+  std::snprintf(buf, sizeof(buf), "\\x{%02X}", b);
+  return buf;
+}
+
+std::string codePointUtf8ByteEscapes(std::uint32_t cp) {
+  if (cp <= 0x7F) {
+    return byteEscape(static_cast<unsigned char>(cp));
+  }
+  if (cp <= 0x7FF) {
+    return byteEscape(static_cast<unsigned char>(0xC0 | (cp >> 6))) +
+        byteEscape(static_cast<unsigned char>(0x80 | (cp & 0x3F)));
+  }
+  if (cp <= 0xFFFF) {
+    return surrogateUtf8ByteEscapes(cp);
+  }
+  return byteEscape(static_cast<unsigned char>(0xF0 | (cp >> 18))) +
+      byteEscape(static_cast<unsigned char>(0x80 | ((cp >> 12) & 0x3F))) +
+      byteEscape(static_cast<unsigned char>(0x80 | ((cp >> 6) & 0x3F))) +
+      byteEscape(static_cast<unsigned char>(0x80 | (cp & 0x3F)));
+}
+
+std::uint64_t rangeSetSize(
+    const functions::java_pcre2_translator::RangeSet& rs,
+    std::uint64_t cap) {
+  std::uint64_t size = 0;
+  const auto& ranges = rs.ranges();
+  for (std::size_t i = 0; i < ranges.size(); i += 2) {
+    size += static_cast<std::uint64_t>(ranges[i + 1]) - ranges[i] + 1;
+    if (size > cap) {
+      return size;
+    }
+  }
+  return size;
+}
+
+std::string enumerateCodePointSet(
+    const functions::java_pcre2_translator::RangeSet& rs) {
+  std::string out = "(?:";
+  bool first = true;
+  const auto& ranges = rs.ranges();
+  for (std::size_t i = 0; i < ranges.size(); i += 2) {
+    for (std::int32_t cp = ranges[i]; cp <= ranges[i + 1]; ++cp) {
+      if (!first) {
+        out.push_back('|');
+      }
+      out += codePointUtf8ByteEscapes(cp);
+      first = false;
+    }
+  }
+  out.push_back(')');
+  return out;
+}
+
+std::string anyUtf8CodePointPattern() {
+  return "(?:[\\x{00}-\\x{7F}]|"
+      "[\\x{C2}-\\x{DF}][\\x{80}-\\x{BF}]|"
+      "\\x{E0}[\\x{A0}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "[\\x{E1}-\\x{EC}\\x{EE}-\\x{EF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "\\x{F0}[\\x{90}-\\x{BF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "[\\x{F1}-\\x{F3}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "\\x{F4}[\\x{80}-\\x{8F}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}])";
+}
+
+std::optional<std::string> utf8UpToPattern(std::int32_t maxCp) {
+  if (maxCp >= functions::java_pcre2_translator::RangeSet::kMaxCp) {
+    return anyUtf8CodePointPattern();
+  }
+  if (maxCp == 0x103FF) {
+    return std::string("(?:[\\x{00}-\\x{7F}]|"
+        "[\\x{C2}-\\x{DF}][\\x{80}-\\x{BF}]|"
+        "\\x{E0}[\\x{A0}-\\x{BF}][\\x{80}-\\x{BF}]|"
+        "[\\x{E1}-\\x{EC}\\x{EE}-\\x{EF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+        "\\x{ED}[\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+        "\\x{F0}\\x{90}[\\x{80}-\\x{8F}][\\x{80}-\\x{BF}])");
+  }
+  return std::nullopt;
+}
+
+std::optional<std::string> renderRangeSetAsUtf8BytePattern(
+    const functions::java_pcre2_translator::RangeSet& rs) {
+  constexpr std::uint64_t kEnumerateLimit = 4096;
+  if (rs.isEmpty()) {
+    return std::string("(?!)");
+  }
+  if (rangeSetSize(rs, kEnumerateLimit) <= kEnumerateLimit) {
+    return enumerateCodePointSet(rs);
+  }
+
+  auto excluded = functions::java_pcre2_translator::RangeSet::all()
+                      .subtract(rs);
+  auto anyPattern = anyUtf8CodePointPattern();
+  const auto& ranges = rs.ranges();
+  if (!ranges.empty() && ranges.front() == 0 &&
+      ranges.back() < functions::java_pcre2_translator::RangeSet::kMaxCp) {
+    const auto maxCp = ranges.back();
+    excluded = functions::java_pcre2_translator::RangeSet::range(0, maxCp)
+                   .subtract(rs);
+    auto upTo = utf8UpToPattern(maxCp);
+    if (!upTo.has_value()) {
+      return std::nullopt;
+    }
+    anyPattern = *upTo;
+  }
+  if (rangeSetSize(excluded, 64) <= 64) {
+    return std::string("(?!") + enumerateCodePointSet(excluded) + ")" +
+        anyPattern;
+  }
+  return std::nullopt;
+}
+
+std::optional<std::string> tryRewriteClassAsUtf8BytePattern(
+    std::string_view pattern,
+    std::size_t start,
+    std::size_t& end) {
+  namespace translator = functions::java_pcre2_translator;
+  try {
+    std::size_t pos = start;
+    const auto node = translator::ClassBodyParser::parseClass(pattern, pos);
+    end = pos;
+    const auto rs = translator::Evaluator::tryToRangeSet(node);
+    if (!rs.has_value()) {
+      return std::nullopt;
+    }
+    return renderRangeSetAsUtf8BytePattern(*rs);
+  } catch (const std::invalid_argument&) {
+    return std::nullopt;
+  }
+}
+
+bool rawSurrogateUtf8At(std::string_view s, std::size_t i) {
+  if (i + 2 >= s.size()) {
+    return false;
+  }
+  const auto b0 = static_cast<unsigned char>(s[i]);
+  const auto b1 = static_cast<unsigned char>(s[i + 1]);
+  const auto b2 = static_cast<unsigned char>(s[i + 2]);
+  return b0 == 0xED && b1 >= 0xA0 && b1 <= 0xBF && b2 >= 0x80 &&
+      b2 <= 0xBF;
+}
+
+bool containsRawSurrogateUtf8(std::string_view s) {
+  for (std::size_t i = 0; i + 2 < s.size(); ++i) {
+    if (rawSurrogateUtf8At(s, i)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+std::string rewriteRawSurrogateUtf8Classes(std::string pattern) {
+  std::string out;
+  out.reserve(pattern.size());
+  for (std::size_t i = 0; i < pattern.size();) {
+    if (pattern[i] != '[') {
+      out.push_back(pattern[i++]);
+      continue;
+    }
+
+    const std::size_t start = i;
+    std::size_t parsedEnd = i;
+    if (auto rewritten =
+            tryRewriteClassAsUtf8BytePattern(pattern, start, parsedEnd)) {
+      const std::string_view classText(
+          pattern.data() + start, parsedEnd - start);
+      if (classText.find("&&") != std::string_view::npos ||
+          containsRawSurrogateUtf8(classText)) {
+        out += *rewritten;
+        i = parsedEnd;
+        continue;
+      }
+    }
+
+    std::size_t j = i + 1;
+    if (j < pattern.size() && pattern[j] == '^') {
+      out.push_back(pattern[i++]);
+      continue;
+    }
+    bool escaped = false;
+    for (; j < pattern.size(); ++j) {
+      if (escaped) {
+        escaped = false;
+        continue;
+      }
+      if (pattern[j] == '\\') {
+        escaped = true;
+        continue;
+      }
+      if (pattern[j] == ']') {
+        break;
+      }
+    }
+    if (j == pattern.size()) {
+      out.push_back(pattern[i++]);
+      continue;
+    }
+
+    const std::string_view body(pattern.data() + i + 1, j - i - 1);
+    if (body.find("&&") != std::string_view::npos) {
+      out.append(pattern, start, j + 1 - start);
+      i = j + 1;
+      continue;
+    }
+
+    std::string byteClass;
+    std::vector<std::string> surrogateAlts;
+    bool unsupportedRange = false;
+    for (std::size_t k = 0; k < body.size();) {
+      if (rawSurrogateUtf8At(body, k)) {
+        if ((k > 0 && body[k - 1] == '-') ||
+            (k + 3 < body.size() && body[k + 3] == '-')) {
+          unsupportedRange = true;
+          break;
+        }
+        surrogateAlts.push_back(rawSurrogateUtf8BytePattern(
+            static_cast<unsigned char>(body[k]),
+            static_cast<unsigned char>(body[k + 1]),
+            static_cast<unsigned char>(body[k + 2])));
+        k += 3;
+        continue;
+      }
+      byteClass.push_back(body[k++]);
+    }
+
+    if (surrogateAlts.empty() || unsupportedRange) {
+      out.append(pattern, start, j + 1 - start);
+    } else {
+      out += "(?:";
+      bool needPipe = false;
+      if (!byteClass.empty()) {
+        out.push_back('[');
+        out += byteClass;
+        out.push_back(']');
+        needPipe = true;
+      }
+      for (const auto& alt : surrogateAlts) {
+        if (needPipe) {
+          out.push_back('|');
+        }
+        out += alt;
+        needPipe = true;
+      }
+      out.push_back(')');
+    }
+    i = j + 1;
+  }
+  return out;
+}
+
+std::string rewriteRawSurrogateUtf8Literals(std::string pattern) {
+  std::string out;
+  out.reserve(pattern.size());
+  bool inClass = false;
+  for (std::size_t i = 0; i < pattern.size();) {
+    const char c = pattern[i];
+    if (c == '\\' && i + 1 < pattern.size()) {
+      out.push_back(pattern[i++]);
+      out.push_back(pattern[i++]);
+      continue;
+    }
+    if (c == '[') {
+      inClass = true;
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+    if (c == ']' && inClass) {
+      inClass = false;
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+    if (!inClass && rawSurrogateUtf8At(pattern, i)) {
+      const auto b0 = static_cast<unsigned char>(pattern[i]);
+      const auto b1 = static_cast<unsigned char>(pattern[i + 1]);
+      const auto b2 = static_cast<unsigned char>(pattern[i + 2]);
+      out += rawSurrogateUtf8BytePattern(b0, b1, b2);
+      i += 3;
+      continue;
+    }
+    out.push_back(c);
+    ++i;
+  }
+  return out;
+}
+
+std::string rewriteSurrogateEscapesForRawByteMode(std::string pattern) {
+  // The translator reports raw-byte mode via a side-channel bool.  PCRE2 in
+  // non-UTF mode accepts literal surrogate UTF-8 bytes, but not \x{D800};
+  // rewrite the surrogate block aliases to byte-sequence regexes before
+  // dropping PCRE2_UTF.
+  constexpr std::string_view kAnySurrogateBytes =
+      "(?:\\x{ED}[\\x{A0}-\\x{AF}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])";
+  constexpr std::string_view kLowSurrogateBytes =
+      "(?:\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])";
+  replaceAll(
+      pattern,
+      "[\\x{d800}-\\x{dbff}\\x{dc00}-\\x{dfff}]",
+      kAnySurrogateBytes);
+  replaceAll(
+      pattern,
+      "[\\x{D800}-\\x{DBFF}\\x{DC00}-\\x{DFFF}]",
+      kAnySurrogateBytes);
+  replaceAll(
+      pattern,
+      "[[\\x{D800}-\\x{DB7F}][\\x{DC00}-\\x{DFFF}]]",
+      "(?:\\x{ED}[\\x{A0}-\\x{AD}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}\\x{AE}[\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])");
+  replaceAll(
+      pattern,
+      "[[\\x{d800}-\\x{db7f}][\\x{dc00}-\\x{dfff}]]",
+      "(?:\\x{ED}[\\x{A0}-\\x{AD}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}\\x{AE}[\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])");
+  replaceAll(
+      pattern,
+      "[\\x{D800}-\\x{DB7F}\\x{DC00}-\\x{DFFF}]",
+      "(?:\\x{ED}[\\x{A0}-\\x{AD}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}\\x{AE}[\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])");
+  replaceAll(pattern, "[\\x{dc00}-\\x{dfff}]", kLowSurrogateBytes);
+  replaceAll(pattern, "[\\x{DC00}-\\x{DFFF}]", kLowSurrogateBytes);
+  replaceAll(
+      pattern,
+      "[\\x{D800}-\\x{DB7F}]",
+      "(?:\\x{ED}[\\x{A0}-\\x{AD}][\\x{80}-\\x{BF}])");
+  replaceAll(
+      pattern,
+      "[\\x{DB80}-\\x{DBFF}]",
+      "(?:\\x{ED}[\\x{AE}-\\x{AF}][\\x{80}-\\x{BF}])");
+  replaceAll(
+      pattern,
+      "[\\x{DC00}-\\x{DFFF}]",
+      "(?:\\x{ED}[\\x{B0}-\\x{BF}][\\x{80}-\\x{BF}])");
+
+  for (std::uint32_t cp = 0xD800; cp <= 0xDFFF; ++cp) {
+    char token[16];
+    std::snprintf(token, sizeof(token), "\\x{%04X}", cp);
+    replaceAll(pattern, token, surrogateUtf8ByteEscapes(cp));
+    std::snprintf(token, sizeof(token), "\\x{%04x}", cp);
+    replaceAll(pattern, token, surrogateUtf8ByteEscapes(cp));
+  }
+  const std::string rawAnySurrogateRange =
+      std::string("[") + std::string("\xED\xA0\x80", 3) + "-" +
+      std::string("\xED\xBF\xBF", 3) + "]";
+  const std::string rawNotAnySurrogateRange =
+      std::string("[^") + std::string("\xED\xA0\x80", 3) + "-" +
+      std::string("\xED\xBF\xBF", 3) + "]";
+  constexpr std::string_view kValidUtf8NonSurrogate =
+      "(?:[\\x{00}-\\x{7F}]|"
+      "[\\x{C2}-\\x{DF}][\\x{80}-\\x{BF}]|"
+      "\\x{E0}[\\x{A0}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "[\\x{E1}-\\x{EC}\\x{EE}-\\x{EF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "\\x{ED}[\\x{80}-\\x{9F}][\\x{80}-\\x{BF}]|"
+      "\\x{F0}[\\x{90}-\\x{BF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "[\\x{F1}-\\x{F3}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}]|"
+      "\\x{F4}[\\x{80}-\\x{8F}][\\x{80}-\\x{BF}][\\x{80}-\\x{BF}])";
+  replaceAll(pattern, rawNotAnySurrogateRange, kValidUtf8NonSurrogate);
+  replaceAll(
+      pattern,
+      rawAnySurrogateRange,
+      "(?:\\x{ED}[\\x{A0}-\\x{BF}][\\x{80}-\\x{BF}])");
+  return rewriteRawSurrogateUtf8Literals(
+      rewriteRawSurrogateUtf8Classes(std::move(pattern)));
+}
+
+bool containsSurrogateUtf8(std::string_view s) {
+  for (std::size_t i = 0; i + 2 < s.size(); ++i) {
+    const auto b0 = static_cast<unsigned char>(s[i]);
+    const auto b1 = static_cast<unsigned char>(s[i + 1]);
+    const auto b2 = static_cast<unsigned char>(s[i + 2]);
+    if (b0 == 0xED && b1 >= 0xA0 && b1 <= 0xBF && b2 >= 0x80 &&
+        b2 <= 0xBF) {
+      return true;
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+Pcre2Regex::Pcre2Regex(std::string_view javaPattern, Options opt) {
+  // Translate Java regex syntax → PCRE2 syntax before compiling.  When
+  // the translator cannot express the pattern in PCRE2 (e.g. an
+  // unsupported `\p{...}` property in an intersection), we report the
+  // translator message verbatim and leave the pattern uncompiled.
+  std::string pcre2Pattern;
+  bool needsRawByteMode = false;
+  try {
+    pcre2Pattern = opt.caseSensitive
+        ? functions::java_pcre2_translator::toPcre2Pattern(
+              javaPattern, needsRawByteMode)
+        : functions::java_pcre2_translator::toPcre2PatternWithUnicodeCase(
+              javaPattern, needsRawByteMode);
+  } catch (const functions::java_pcre2_translator::EvaluationFailedException&
+               ex) {
+    error_ = std::string("Java→PCRE2 translator: ") + ex.what();
+    return;
+  }
+  if (needsRawByteMode) {
+    pcre2Pattern = rewriteSurrogateEscapesForRawByteMode(std::move(pcre2Pattern));
+  }
+
+  int err = 0;
+  PCRE2_SIZE off = 0;
+  code_ = pcre2_compile_8(
+      reinterpret_cast<PCRE2_SPTR8>(pcre2Pattern.data()),
+      pcre2Pattern.size(),
+      toPcre2Options(opt) & (needsRawByteMode ? ~PCRE2_UTF : ~0u),
+      &err,
+      &off,
+      nullptr);
+  if (!code_) {
+    error_ = pcre2ErrorToString(err, off);
+    return;
+  }
+  // JIT-compile for speed.  Falls back to the interpreter on platforms where
+  // JIT isn't supported, no special handling needed.
+  pcre2_jit_compile_8(code_, PCRE2_JIT_COMPLETE);
+
+  // Capture count.
+  std::uint32_t cap = 0;
+  pcre2_pattern_info_8(code_, PCRE2_INFO_CAPTURECOUNT, &cap);
+  captureCount_ = static_cast<int>(cap);
+
+  // Named groups: name table is a flat blob of fixed-size entries; first 2
+  // bytes of each entry are the (big-endian) group index, then a NUL-terminated
+  // name.
+  std::uint32_t nameCount = 0;
+  std::uint32_t entrySize = 0;
+  PCRE2_SPTR8 nameTable = nullptr;
+  pcre2_pattern_info_8(code_, PCRE2_INFO_NAMECOUNT, &nameCount);
+  pcre2_pattern_info_8(code_, PCRE2_INFO_NAMEENTRYSIZE, &entrySize);
+  pcre2_pattern_info_8(code_, PCRE2_INFO_NAMETABLE, &nameTable);
+  for (std::uint32_t i = 0; i < nameCount; ++i) {
+    const std::uint8_t* entry = nameTable + i * entrySize;
+    int idx = (entry[0] << 8) | entry[1];
+    named_.emplace(reinterpret_cast<const char*>(entry + 2), idx);
+  }
+}
+
+Pcre2Regex::~Pcre2Regex() {
+  if (code_) {
+    pcre2_code_free_8(code_);
+  }
+}
+
+bool Pcre2Regex::ok() const {
+  return code_ != nullptr;
+}
+const std::string& Pcre2Regex::error() const {
+  return error_;
+}
+int Pcre2Regex::NumberOfCapturingGroups() const {
+  return captureCount_;
+}
+const std::map<std::string, int>& Pcre2Regex::NamedCapturingGroups() const {
+  return named_;
+}
+
+bool Pcre2Regex::Match(
+    std::string_view input,
+    std::size_t startpos,
+    std::size_t endpos,
+    Anchor anchor,
+    std::string_view* submatch,
+    int nsubmatch) const {
+  if (!code_) {
+    return false;
+  }
+  pcre2_match_data_8* md =
+      pcre2_match_data_create_from_pattern_8(code_, nullptr);
+  // PCRE2 takes the full subject + the length to consider; passing `endpos`
+  // as the length cleanly caps matching to [startpos, endpos).
+  int rc = pcre2_match_8(
+      code_,
+      reinterpret_cast<PCRE2_SPTR8>(input.data()),
+      endpos,
+      startpos,
+      toPcre2MatchOptions(anchor) |
+          (containsSurrogateUtf8(input.substr(0, endpos)) ? PCRE2_NO_UTF_CHECK
+                                                          : 0),
+      md,
+      nullptr);
+  if (rc < 0) {
+    pcre2_match_data_free_8(md);
+    return false;
+  }
+  PCRE2_SIZE* ov = pcre2_get_ovector_pointer_8(md);
+  int avail = std::min<int>(nsubmatch, rc);
+  for (int i = 0; i < avail; ++i) {
+    if (ov[2 * i] == PCRE2_UNSET) {
+      submatch[i] = std::string_view{};
+    } else {
+      submatch[i] = input.substr(ov[2 * i], ov[2 * i + 1] - ov[2 * i]);
+    }
+  }
+  for (int i = avail; i < nsubmatch; ++i) {
+    submatch[i] = std::string_view{};
+  }
+  pcre2_match_data_free_8(md);
+  return true;
+}
+
+bool Pcre2Regex::FullMatch(std::string_view input, const Pcre2Regex& re) {
+  std::string_view sub[1];
+  return re.Match(input, 0, input.size(), Anchor::kAnchorBoth, sub, 1);
+}
+
+bool Pcre2Regex::PartialMatch(std::string_view input, const Pcre2Regex& re) {
+  std::string_view sub[1];
+  return re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1);
+}
+
+int Pcre2Regex::GlobalReplace(
+    std::string* str,
+    const Pcre2Regex& re,
+    std::string_view javaReplacement) {
+  if (!re.ok() || str == nullptr) {
+    return 0;
+  }
+  // PCRE2_SUBSTITUTE_EXTENDED enables $N / ${name} / $$ / \$ — the Java
+  // replacement syntax that Velox's `prepareRegexpReplaceReplacement` had to
+  // translate away for RE2.
+  std::uint32_t opts = PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED |
+      PCRE2_SUBSTITUTE_OVERFLOW_LENGTH;
+  // First try with a reasonable initial buffer; on overflow PCRE2 tells us
+  // the required size in `outlen` and we retry.
+  std::string out;
+  out.resize(str->size() * 2 + 32);
+  PCRE2_SIZE outlen = out.size();
+  int rc = pcre2_substitute_8(
+      re.code_,
+      reinterpret_cast<PCRE2_SPTR8>(str->data()),
+      str->size(),
+      0,
+      opts,
+      nullptr,
+      nullptr,
+      reinterpret_cast<PCRE2_SPTR8>(javaReplacement.data()),
+      javaReplacement.size(),
+      reinterpret_cast<PCRE2_UCHAR8*>(out.data()),
+      &outlen);
+  if (rc == PCRE2_ERROR_NOMEMORY) {
+    out.resize(outlen);
+    outlen = out.size();
+    rc = pcre2_substitute_8(
+        re.code_,
+        reinterpret_cast<PCRE2_SPTR8>(str->data()),
+        str->size(),
+        0,
+        opts,
+        nullptr,
+        nullptr,
+        reinterpret_cast<PCRE2_SPTR8>(javaReplacement.data()),
+        javaReplacement.size(),
+        reinterpret_cast<PCRE2_UCHAR8*>(out.data()),
+        &outlen);
+  }
+  if (rc < 0) {
+    // Substitution error (e.g. unknown group); leave *str untouched.
+    return 0;
+  }
+  out.resize(outlen);
+  *str = std::move(out);
+  return rc;
+}
+
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/Pcre2Regex.h b/velox/external/regex_compat/Pcre2Regex.h
new file mode 100644
index 00000000000..15bf55fcb5c
--- /dev/null
+++ b/velox/external/regex_compat/Pcre2Regex.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <map>
+#include <string>
+#include <string_view>
+
+#include "velox/external/regex_compat/RegexTypes.h"
+
+// Opaque PCRE2 8-bit types so this header doesn't drag in <pcre2.h>.
+struct pcre2_real_code_8;
+typedef struct pcre2_real_code_8 pcre2_code_8;
+
+namespace facebook::velox::regex_compat {
+
+/// PCRE2 (8-bit) backend in the regex-compat test suite.  Public method names
+/// and signatures mirror `re2::RE2`'s subset that Velox uses.
+///
+/// **Pattern / replacement input is Java `java.util.regex` syntax.**
+/// PCRE2 natively understands the Java pattern syntax for the common cases
+/// (`(?<name>)` named groups, `\d`/`\w`/`\b` etc.) plus a superset of features
+/// (lookaround, backreferences, atomic groups, etc.) — so no Java→PCRE2
+/// pattern translation is performed by this class.  For replacement strings,
+/// PCRE2's `pcre2_substitute_8` with `PCRE2_SUBSTITUTE_EXTENDED` natively
+/// understands `$N` and `${name}` Java-style references.
+///
+/// Java syntax that PCRE2 cannot express (Java-specific property tokens like
+/// `\p{InGreek}`, character-class intersection `[a-c&&b-d]`, the meaning swap
+/// of `(?U)` flag, etc.) is NOT translated here — those cases are intentionally
+/// left to surface as test failures, documenting the need for a future
+/// Java→PCRE2 translator (cf. pcre4j PR #606).
+class Pcre2Regex {
+ public:
+  explicit Pcre2Regex(std::string_view javaPattern, Options opt = {});
+  ~Pcre2Regex();
+
+  Pcre2Regex(const Pcre2Regex&) = delete;
+  Pcre2Regex& operator=(const Pcre2Regex&) = delete;
+
+  bool ok() const;
+  const std::string& error() const;
+  int NumberOfCapturingGroups() const;
+  const std::map<std::string, int>& NamedCapturingGroups() const;
+
+  bool Match(
+      std::string_view input,
+      std::size_t startpos,
+      std::size_t endpos,
+      Anchor anchor,
+      std::string_view* submatch,
+      int nsubmatch) const;
+
+  static bool FullMatch(std::string_view input, const Pcre2Regex& re);
+  static bool PartialMatch(std::string_view input, const Pcre2Regex& re);
+
+  /// Java `$N` / `${name}` replacement syntax, handled natively by PCRE2 via
+  /// `PCRE2_SUBSTITUTE_EXTENDED`.  Returns the number of replacements done.
+  static int GlobalReplace(
+      std::string* str,
+      const Pcre2Regex& re,
+      std::string_view javaReplacement);
+
+ private:
+  pcre2_code_8* code_ = nullptr;
+  std::string error_;
+  int captureCount_ = 0;
+  std::map<std::string, int> named_;
+};
+
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/README.md b/velox/external/regex_compat/README.md
new file mode 100644
index 00000000000..37fc10bdd23
--- /dev/null
+++ b/velox/external/regex_compat/README.md
@@ -0,0 +1,127 @@
+# Velox regex compatibility test suite
+
+A C++ test harness that compares three regex engines — Velox's existing
+**RE2**, **PCRE2** (8-bit, JIT), and an embedded JVM running
+**`java.util.regex`** — against the same inputs, expressed in Java regex
+syntax.
+
+The goal is to quantify how each engine handles Java-style patterns and
+replacements so the Velox project can make data-driven decisions about
+whether to introduce PCRE2 alongside RE2 in production, and (separately)
+whether to invest in a Java → PCRE2 translator analogous to
+[pcre4j PR #606](https://github.com/alexey-pelykh/pcre4j/pull/606).
+
+This module is **opt-in** and **off by default**.  It does not affect
+stock Velox builds in any way unless you enable the CMake options below.
+
+## Enabling
+
+```bash
+cmake -S . -B build -GNinja \
+  -DVELOX_ENABLE_REGEX_COMPAT_TESTS=ON \         # opt-in master switch (pulls in PCRE2)
+  -DVELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND=ON    # opt-in JNI backend (requires JDK)
+cmake --build build --target velox_regex_compat_test
+build/velox/external/regex_compat/tests/velox_regex_compat_test
+```
+
+`VELOX_ENABLE_REGEX_COMPAT_JAVA_BACKEND` defaults to `ON`.  If
+`find_package(JNI)` fails (no JDK installed) the option is silently
+flipped to `OFF` and the suite still builds — only the Java backend is
+disabled.
+
+## Architecture
+
+Three parallel, non-virtual concrete classes:
+
+| Backend     | Implementation                                              |
+| ----------- | ----------------------------------------------------------- |
+| `Re2Regex`  | wraps `re2::RE2`; uses `java_pcre2_translator::toRe2Pattern` for Java pattern syntax and Velox's inline `prepareRegexpReplaceReplacement` from `Re2Functions.h` for Java replacement syntax |
+| `Pcre2Regex`| wraps `pcre2_code_8`; uses `java_pcre2_translator::toPcre2Pattern`; `GlobalReplace` uses `PCRE2_SUBSTITUTE_EXTENDED` for `$N` / `${name}` |
+| `JavaRegex` | drives `java.util.regex.Pattern` / `Matcher` through an embedded JVM (`JNI_CreateJavaVM`) using only standard JDK classes — no Gluten / Hadoop jars needed |
+
+Their public methods deliberately mirror the subset of `re2::RE2` actually
+used in `velox/functions/lib/Re2Functions.cpp`:
+
+- `bool Match(input, startpos, endpos, anchor, submatch[], nsubmatch)`
+- `int NumberOfCapturingGroups()`
+- `const std::map<std::string, int>& NamedCapturingGroups()`
+- `bool ok() / const std::string& error()`
+- static `FullMatch / PartialMatch / GlobalReplace`
+- `Anchor { kUnanchored, kAnchorStart, kAnchorBoth }`
+- `Options { caseSensitive, dotNl, oneLine, logErrors, maxMem }`
+
+The shared shape (plus identical method signatures) lets one
+`TYPED_TEST_SUITE_P` declaration produce one test per backend at compile
+time — see `tests/BackendTypedTest.cpp` and the three ported pcre4j
+test files.
+
+The stateful Java `Matcher` API (`find()` cursor, `group(i)`,
+`replaceFirst`, …) lives in `tests/JavaMatcherAdapter.h` — a
+template that reconstructs the state machine on top of the backend's
+stateless `Match()`.  It is **test-only**; production backends do not
+carry this state.
+
+## What's tested
+
+`velox_regex_compat_test` ships with **189 GTest cases** across 15
+suites:
+
+```
+Re2RegexTest                11 cases — RE2-specific edge cases
+Pcre2RegexTest              12 cases — PCRE2-specific, incl. lookahead + backref
+JavaRegexTest               13 cases — Java-specific, incl. \p{InGreek}
+BackendTest                 13 × 3   — core API typed across all backends
+PatternPortedTest           13 × 3   — ported from pcre4j PatternTests.java
+MatchingPortedTest          14 × 3   — ported from pcre4j MatcherMatchingTests.java
+ReplacementPortedTest       11 × 3   — ported from pcre4j MatcherReplacementTests.java
+```
+
+A single typed test exercises both engine differences (e.g. PCRE2 supports
+lookahead while RE2 doesn't) and cross-engine parity (e.g. all three
+backends accept Java `(?<name>...)` named groups).
+
+## Known cross-engine differences
+
+| Java feature                       | Re2Regex                              | Pcre2Regex                  | JavaRegex |
+| ---------------------------------- | ------------------------------------- | --------------------------- | --------- |
+| `(?<name>...)` named groups        | translated via `toRe2Pattern`         | native                  | native    |
+| `$N` / `${name}` in replacement    | translated via `prepareRegexpReplaceReplacement` | `PCRE2_SUBSTITUTE_EXTENDED` native | native |
+| Lookaround `(?=...)`, `(?!...)`    | not supported (`ok() == false`)       | native                      | native    |
+| Backreferences `\1`                | not supported                         | native                      | native    |
+| Atomic groups `(?>...)`, possessive `*+` | not supported                   | native                      | native    |
+| Java `\p{InGreek}` / `\p{javaXxx}` | translated where safe                  | translated where safe   | native    |
+| Character-class intersection `[a-c&&b-d]` | translated where safe          | translated where safe   | native    |
+| `(?U)` Java UNICODE_CHARACTER_CLASS | rejected to avoid RE2 ungreedy semantics | translated where safe | native    |
+| Multiline `^`/`$`                  | injected `(?m)` prefix when `oneLine=false` | option-mapped              | option-mapped |
+| `a{` incomplete quantifier         | accepted as literal                   | accepted as literal         | rejected (`PatternSyntaxException`) |
+
+The translator rows are intentionally conservative: features are translated
+only where the target engine can preserve Java semantics, and otherwise the
+backend reports `ok() == false` with a translator error.
+
+## Provenance
+
+- `Re2Regex`, `Pcre2Regex`, `JavaRegex`, `JvmFixture` — original code,
+  Apache-2.0.
+- Ported test cases in `tests/Pattern…PortedTest.cpp` and
+  `tests/Matcher…PortedTest.cpp` are 1:1 translations of the
+  corresponding `org.pcre4j.regex.tests.*` Java tests from
+  [pcre4j](https://github.com/alexey-pelykh/pcre4j).  The upstream Java
+  code is GPL/LGPL; the C++ port re-implements them in Apache-2.0 form
+  for the Velox project.
+
+## What's **not** in this module (scope notes)
+
+- **No production code change.**  This module sits under
+  `velox/external/regex_compat/` precisely because it is a comparison
+  experiment, not a Velox engine swap.  If/when a production decision
+  is made the backend classes can be lifted to `velox/functions/lib/`
+  but that is a separate task.
+- **No production regex engine replacement.**  The Java regex translator is
+  wired into this comparison suite's RE2 and PCRE2 backends to measure
+  compatibility, not to change Velox production regex behavior.
+- **No QueryConfig runtime switch.**  Whether Velox should expose
+  RE2/PCRE2/Java as a runtime-selectable engine is a downstream
+  decision; the backend classes here all happen to be method-compatible,
+  but they are not unified behind a virtual base or `std::variant`
+  facade.
diff --git a/velox/external/regex_compat/Re2Regex.cpp b/velox/external/regex_compat/Re2Regex.cpp
new file mode 100644
index 00000000000..5af77f47f4b
--- /dev/null
+++ b/velox/external/regex_compat/Re2Regex.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/Re2Regex.h"
+
+#include <re2/re2.h>
+#include <re2/stringpiece.h>
+
+#include "velox/functions/lib/Re2Functions.h"
+#include "velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h"
+#include "velox/type/StringView.h"
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+inline re2::StringPiece toSp(std::string_view s) {
+  return re2::StringPiece(s.data(), s.size());
+}
+inline std::string_view toSv(const re2::StringPiece& sp) {
+  return std::string_view(sp.data(), sp.size());
+}
+inline StringView toVelox(std::string_view s) {
+  return StringView(s.data(), s.size());
+}
+
+re2::RE2::Anchor toRe2Anchor(Anchor a) {
+  switch (a) {
+    case Anchor::kUnanchored:
+      return re2::RE2::UNANCHORED;
+    case Anchor::kAnchorStart:
+      return re2::RE2::ANCHOR_START;
+    case Anchor::kAnchorBoth:
+      return re2::RE2::ANCHOR_BOTH;
+  }
+  return re2::RE2::UNANCHORED;
+}
+
+re2::RE2::Options toRe2Options(const Options& o) {
+  re2::RE2::Options out;
+  out.set_case_sensitive(o.caseSensitive);
+  out.set_dot_nl(o.dotNl);
+  out.set_one_line(o.oneLine);
+  out.set_log_errors(o.logErrors);
+  out.set_max_mem(o.maxMem);
+  out.set_encoding(re2::RE2::Options::EncodingUTF8);
+  return out;
+}
+
+} // namespace
+
+Re2Regex::Re2Regex(std::string_view javaPattern, Options opt) {
+  std::string re2Pattern;
+  try {
+    re2Pattern = opt.caseSensitive
+        ? functions::java_pcre2_translator::toRe2Pattern(javaPattern)
+        : functions::java_pcre2_translator::toRe2PatternWithUnicodeCase(
+              javaPattern);
+  } catch (const functions::java_pcre2_translator::EvaluationFailedException&
+               ex) {
+    error_ = std::string("Java→RE2 translator: ") + ex.what();
+    return;
+  }
+  // Java's MULTILINE flag doesn't map cleanly to any RE2 Options bit:
+  // RE2's default behavior is that `^` and `$` only match at the start/end
+  // of the entire input.  The inline `(?m)` modifier is the only way to
+  // enable per-line anchoring.  We prepend it when the caller asks for
+  // MULTILINE (oneLine == false).  Java MULTILINE is purely additive
+  // (it doesn't affect `.` or non-anchor metas), so prepending is safe.
+  if (!opt.oneLine) {
+    re2Pattern = "(?m)" + re2Pattern;
+  }
+  re_ = std::make_unique<re2::RE2>(toSp(re2Pattern), toRe2Options(opt));
+  if (!re_->ok()) {
+    error_ = re_->error();
+    return;
+  }
+  named_ = re_->NamedCapturingGroups();
+}
+
+Re2Regex::~Re2Regex() = default;
+
+bool Re2Regex::ok() const {
+  return re_ && re_->ok();
+}
+const std::string& Re2Regex::error() const {
+  return error_;
+}
+int Re2Regex::NumberOfCapturingGroups() const {
+  return re_ ? re_->NumberOfCapturingGroups() : 0;
+}
+const std::map<std::string, int>& Re2Regex::NamedCapturingGroups() const {
+  return named_;
+}
+const re2::RE2& Re2Regex::raw() const {
+  return *re_;
+}
+
+bool Re2Regex::Match(
+    std::string_view input,
+    std::size_t startpos,
+    std::size_t endpos,
+    Anchor anchor,
+    std::string_view* submatch,
+    int nsubmatch) const {
+  if (!ok()) {
+    return false;
+  }
+  // RE2 writes into StringPiece buffer; copy into caller's string_view array.
+  std::vector<re2::StringPiece> caps(nsubmatch);
+  bool matched = re_->Match(
+      toSp(input),
+      startpos,
+      endpos,
+      toRe2Anchor(anchor),
+      caps.data(),
+      nsubmatch);
+  if (!matched) {
+    return false;
+  }
+  for (int i = 0; i < nsubmatch; ++i) {
+    submatch[i] = caps[i].data() ? toSv(caps[i]) : std::string_view{};
+  }
+  return true;
+}
+
+bool Re2Regex::FullMatch(std::string_view input, const Re2Regex& re) {
+  if (!re.ok()) {
+    return false;
+  }
+  return re2::RE2::FullMatch(toSp(input), *re.re_);
+}
+
+bool Re2Regex::PartialMatch(std::string_view input, const Re2Regex& re) {
+  if (!re.ok()) {
+    return false;
+  }
+  return re2::RE2::PartialMatch(toSp(input), *re.re_);
+}
+
+int Re2Regex::GlobalReplace(
+    std::string* str,
+    const Re2Regex& re,
+    std::string_view javaReplacement) {
+  if (!re.ok() || str == nullptr) {
+    return 0;
+  }
+  const std::string re2Replacement = functions::prepareRegexpReplaceReplacement(
+      *re.re_, toVelox(javaReplacement));
+  return re2::RE2::GlobalReplace(str, *re.re_, re2Replacement);
+}
+
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/Re2Regex.h b/velox/external/regex_compat/Re2Regex.h
new file mode 100644
index 00000000000..9c4b0746cb7
--- /dev/null
+++ b/velox/external/regex_compat/Re2Regex.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "velox/external/regex_compat/RegexTypes.h"
+
+namespace re2 {
+class RE2;
+}
+
+namespace facebook::velox::regex_compat {
+
+/// `re2::RE2` backend in the regex-compat test suite.  Public method names
+/// and signatures mirror the subset of `re2::RE2` that
+/// `velox/functions/lib/Re2Functions.cpp` actually consumes — this keeps the
+/// test-suite typed-test surface aligned with Velox's existing RE2 usage.
+///
+/// **Pattern / replacement input** is Java `java.util.regex` syntax.
+/// Internally, the constructor uses `java_pcre2_translator::toRe2Pattern` and
+/// `GlobalReplace` calls Velox's existing
+/// `prepareRegexpReplaceReplacement` (`Re2Functions.h:422`).  Java features
+/// that RE2 cannot express (lookaround / backrefs / possessive / atomic group)
+/// cause `ok() == false` with a translator error message.
+class Re2Regex {
+ public:
+  explicit Re2Regex(std::string_view javaPattern, Options opt = {});
+  ~Re2Regex();
+
+  Re2Regex(const Re2Regex&) = delete;
+  Re2Regex& operator=(const Re2Regex&) = delete;
+
+  bool ok() const;
+  const std::string& error() const;
+  int NumberOfCapturingGroups() const;
+  const std::map<std::string, int>& NamedCapturingGroups() const;
+
+  bool Match(
+      std::string_view input,
+      std::size_t startpos,
+      std::size_t endpos,
+      Anchor anchor,
+      std::string_view* submatch,
+      int nsubmatch) const;
+
+  // Static convenience helpers matching `re2::RE2`.
+  static bool FullMatch(std::string_view input, const Re2Regex& re);
+  static bool PartialMatch(std::string_view input, const Re2Regex& re);
+
+  /// Globally replace all matches in `*str`.  `javaReplacement` uses Java
+  /// `$N` / `${name}` syntax; this method internally translates via Velox
+  /// `prepareRegexpReplaceReplacement` before invoking `re2::RE2::GlobalReplace`.
+  /// Returns the number of replacements performed.
+  static int GlobalReplace(
+      std::string* str,
+      const Re2Regex& re,
+      std::string_view javaReplacement);
+
+  // Internal access for the GlobalReplace implementation.
+  const re2::RE2& raw() const;
+
+ private:
+  std::unique_ptr<re2::RE2> re_;
+  std::string error_;
+  std::map<std::string, int> named_;
+};
+
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/RegexTypes.h b/velox/external/regex_compat/RegexTypes.h
new file mode 100644
index 00000000000..b9e9b38d016
--- /dev/null
+++ b/velox/external/regex_compat/RegexTypes.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+namespace facebook::velox::regex_compat {
+
+/// Mirrors `re2::RE2::Anchor`.
+enum class Anchor { kUnanchored, kAnchorStart, kAnchorBoth };
+
+/// Subset of `re2::RE2::Options` exposed to the regex-compat test suite.
+/// Each backend (Re2Regex / Pcre2Regex / JavaRegex) maps fields to its native
+/// option type.
+struct Options {
+  bool caseSensitive = true;
+  bool dotNl = false;
+  bool oneLine = true;
+  bool logErrors = false;
+  int maxMem = 8 << 20;
+};
+
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/tests/BackendTestBase.h b/velox/external/regex_compat/tests/BackendTestBase.h
new file mode 100644
index 00000000000..14608928988
--- /dev/null
+++ b/velox/external/regex_compat/tests/BackendTestBase.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include "velox/external/regex_compat/Pcre2Regex.h"
+#include "velox/external/regex_compat/Re2Regex.h"
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+#include "velox/external/regex_compat/JavaRegex.h"
+#endif
+
+namespace facebook::velox::regex_compat::test {
+
+/// GTest TYPED_TEST type list, instantiated once per backend at compile time.
+/// Tests written as `TYPED_TEST_SUITE_P(MySuite, AllBackends)` automatically
+/// run for every backend type that is enabled in this build.
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+using AllBackends =
+    ::testing::Types<Re2Regex, Pcre2Regex, JavaRegex>;
+#else
+using AllBackends = ::testing::Types<Re2Regex, Pcre2Regex>;
+#endif
+
+/// Base fixture for tests that should run against every backend.
+template <typename R>
+class BackendTest : public ::testing::Test {};
+
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/BackendTypedTest.cpp b/velox/external/regex_compat/tests/BackendTypedTest.cpp
new file mode 100644
index 00000000000..b0098f03809
--- /dev/null
+++ b/velox/external/regex_compat/tests/BackendTypedTest.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Typed test suite that exercises the regex-compat API common to all three
+// backends (Re2Regex / Pcre2Regex / JavaRegex).  Each TYPED_TEST below is
+// compiled and executed once per backend type, so one source line generates
+// `len(AllBackends)` assertions of identical behaviour.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+TYPED_TEST_SUITE(BackendTest, AllBackends);
+
+TYPED_TEST(BackendTest, compileOk) {
+  TypeParam re("\\d+");
+  EXPECT_TRUE(re.ok());
+  EXPECT_EQ(0, re.NumberOfCapturingGroups());
+}
+
+TYPED_TEST(BackendTest, compileError) {
+  TypeParam re("(unclosed");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+TYPED_TEST(BackendTest, javaNamedGroup) {
+  // Java syntax (?<name>...) — every backend must accept it.
+  TypeParam re("(?<num>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+}
+
+TYPED_TEST(BackendTest, matchUnanchored) {
+  TypeParam re("(\\d+)");
+  std::string_view sub[2];
+  std::string_view in = "abc 42 xyz";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 2));
+  EXPECT_EQ("42", sub[0]);
+  EXPECT_EQ("42", sub[1]);
+}
+
+TYPED_TEST(BackendTest, matchAnchorBoth) {
+  TypeParam re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TYPED_TEST(BackendTest, matchAnchorBothRejectsTrailing) {
+  TypeParam re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc1";
+  EXPECT_FALSE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TYPED_TEST(BackendTest, fullPartialMatch) {
+  TypeParam re("[a-z]+");
+  EXPECT_TRUE(TypeParam::FullMatch("abc", re));
+  EXPECT_FALSE(TypeParam::FullMatch("abc1", re));
+  EXPECT_TRUE(TypeParam::PartialMatch("abc1", re));
+}
+
+TYPED_TEST(BackendTest, globalReplaceNumbered) {
+  TypeParam re("(\\d+)");
+  std::string s = "a1b22c333";
+  int n = TypeParam::GlobalReplace(&s, re, "[$1]");
+  EXPECT_EQ(3, n);
+  EXPECT_EQ("a[1]b[22]c[333]", s);
+}
+
+TYPED_TEST(BackendTest, globalReplaceNamed) {
+  TypeParam re("(?<n>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "a1b22c";
+  int n = TypeParam::GlobalReplace(&s, re, "[${n}]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a[1]b[22]c", s);
+}
+
+TYPED_TEST(BackendTest, caseInsensitive) {
+  Options opt;
+  opt.caseSensitive = false;
+  TypeParam re("hello", opt);
+  EXPECT_TRUE(TypeParam::PartialMatch("HELLO world", re));
+}
+
+TYPED_TEST(BackendTest, dotAllOption) {
+  // Dot matches newline only when dotNl is on.
+  {
+    TypeParam re(".+");
+    std::string_view sub[1];
+    std::string_view in = "ab\ncd";
+    EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+    EXPECT_EQ("ab", sub[0]); // stopped at \n
+  }
+  {
+    Options opt;
+    opt.dotNl = true;
+    TypeParam re(".+", opt);
+    std::string_view sub[1];
+    std::string_view in = "ab\ncd";
+    EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+    EXPECT_EQ("ab\ncd", sub[0]); // dot now matched \n
+  }
+}
+
+TYPED_TEST(BackendTest, multilineAnchors) {
+  Options opt;
+  opt.oneLine = false; // MULTILINE
+  TypeParam re("^bar", opt);
+  std::string_view sub[1];
+  std::string_view in = "foo\nbar";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ("bar", sub[0]);
+}
+
+TYPED_TEST(BackendTest, emptyGroupMatch) {
+  // Group that didn't participate in the match — must yield an empty
+  // string_view (data == nullptr per contract).
+  TypeParam re("(a)|(b)");
+  std::string_view sub[3];
+  std::string_view in = "a";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 3));
+  EXPECT_EQ("a", sub[0]);
+  EXPECT_EQ("a", sub[1]);
+  EXPECT_EQ(nullptr, sub[2].data()); // group 2 did not match
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/CMakeLists.txt b/velox/external/regex_compat/tests/CMakeLists.txt
new file mode 100644
index 00000000000..36adc8e6d3b
--- /dev/null
+++ b/velox/external/regex_compat/tests/CMakeLists.txt
@@ -0,0 +1,61 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# (Apache-2.0)
+
+# Fetch OpenJDK 17 regex corpus files at configure time.  Pinned to
+# jdk-17.0.13-ga (SHA256 verified).  TestCases/BMP/Supplementary are
+# processed by OpenJDK's own RegExTest.processFile; GraphemeTestCases uses the
+# Unicode GraphemeBreakTest format and has its own parser.
+set(_openjdk_corpus_base
+  "https://raw.githubusercontent.com/openjdk/jdk17u/jdk-17.0.13-ga/test/jdk/java/util/regex")
+set(_openjdk_corpus_dir "${CMAKE_CURRENT_BINARY_DIR}/openjdk_corpus")
+file(MAKE_DIRECTORY "${_openjdk_corpus_dir}")
+
+function(_fetch_corpus_file _fname _fsha)
+  set(_path "${_openjdk_corpus_dir}/${_fname}")
+  if(NOT EXISTS "${_path}")
+    message(STATUS "Fetching OpenJDK regex corpus: ${_fname}")
+    file(DOWNLOAD
+      "${_openjdk_corpus_base}/${_fname}"
+      "${_path}"
+      EXPECTED_HASH SHA256=${_fsha}
+      SHOW_PROGRESS
+      STATUS _dl_status)
+    list(GET _dl_status 0 _dl_code)
+    if(NOT _dl_code EQUAL 0)
+      message(WARNING "Failed to download ${_fname}: ${_dl_status}")
+    endif()
+  endif()
+endfunction()
+
+_fetch_corpus_file(TestCases.txt
+  1bf5c8a2a4fba557ff4e4a5d69d86bbd2a9e0c720b9a6455aa001526375ba946)
+_fetch_corpus_file(BMPTestCases.txt
+  6dbdfc4c64797831b798ad5d4b546f8cbfb2e76036018fe11013f168fc4f11f2)
+_fetch_corpus_file(SupplementaryTestCases.txt
+  96a56b7e3d0732f6cb30d307c9025517fdf515732d2ce1ae9e5496e30367a019)
+_fetch_corpus_file(GraphemeTestCases.txt
+  eda68465fe85d88d1c37a6411d1fe714fc5a5de3397bd73b1c10abb612722562)
+
+add_executable(velox_regex_compat_test
+  TestMain.cpp
+  Re2RegexTest.cpp
+  Pcre2RegexTest.cpp
+  JavaRegexTest.cpp
+  BackendTypedTest.cpp
+  PatternPortedTest.cpp
+  MatcherMatchingPortedTest.cpp
+  MatcherReplacementPortedTest.cpp
+  MatcherResultsPortedTest.cpp
+  MatcherMatchResultPortedTest.cpp
+  PatternSplitPortedTest.cpp
+  MatcherUnicodePortedTest.cpp
+  RegExTestPortedTest.cpp
+  OpenJdkCorpusDiffTest.cpp)
+
+target_compile_definitions(velox_regex_compat_test
+  PRIVATE OPENJDK_CORPUS_DIR="${_openjdk_corpus_dir}")
+
+target_link_libraries(velox_regex_compat_test
+  PRIVATE velox_regex_compat GTest::gtest GTest::gmock)
+
+add_test(NAME velox_regex_compat_test COMMAND velox_regex_compat_test)
diff --git a/velox/external/regex_compat/tests/JavaMatcherAdapter.h b/velox/external/regex_compat/tests/JavaMatcherAdapter.h
new file mode 100644
index 00000000000..b8de98ecdfc
--- /dev/null
+++ b/velox/external/regex_compat/tests/JavaMatcherAdapter.h
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "velox/external/regex_compat/RegexTypes.h"
+
+namespace facebook::velox::regex_compat::test {
+
+/// Header-only adapter that reconstructs Java `java.util.regex.Matcher`'s
+/// stateful API (`find()` cursor, `group(int)`, `start/end`, `replaceAll`,
+/// etc.) on top of the stateless `IRegex`-shaped `Match()` method exposed
+/// by the three backend classes.
+///
+/// Lives in the test target only — the production backend classes
+/// deliberately do not carry this Matcher state, to keep their surface
+/// close to `re2::RE2`'s actual usage in Velox.
+template <typename R>
+class JavaMatcherAdapter {
+ public:
+  JavaMatcherAdapter(const R* re, std::string_view input)
+      : re_(re),
+        input_(input),
+        regionStart_(0),
+        regionEnd_(input.size()),
+        // +1 for group 0 (full match).
+        groups_(re->NumberOfCapturingGroups() + 1) {}
+
+  // ----- find()/matches() family -----
+
+  /// Advance past the previous match and search forward.
+  bool find() {
+    if (!re_->ok()) {
+      matched_ = false;
+      return false;
+    }
+    if (cursor_ > regionEnd_) {
+      matched_ = false;
+      return false;
+    }
+    matched_ = re_->Match(
+        input_,
+        cursor_,
+        regionEnd_,
+        Anchor::kUnanchored,
+        groups_.data(),
+        static_cast<int>(groups_.size()));
+    if (!matched_) {
+      return false;
+    }
+    const std::size_t s = matchBeg();
+    const std::size_t e = matchEnd();
+    // Zero-width match: advance by 1 to avoid an infinite loop, mirroring
+    // java.util.regex.Matcher semantics.
+    cursor_ = (s == e) ? e + 1 : e;
+    return true;
+  }
+
+  /// Reset cursor to `start`, then `find()` once.
+  bool find(int start) {
+    cursor_ = static_cast<std::size_t>(start);
+    return find();
+  }
+
+  /// Anchored full-input match (Java `Matcher.matches`).  Does not advance
+  /// the find-cursor.  Honors the active region.
+  bool matches() {
+    matched_ = re_->Match(
+        input_,
+        regionStart_,
+        regionEnd_,
+        Anchor::kAnchorBoth,
+        groups_.data(),
+        static_cast<int>(groups_.size()));
+    return matched_;
+  }
+
+  /// Anchored prefix match (Java `Matcher.lookingAt`).  Honors the active
+  /// region.
+  bool lookingAt() {
+    matched_ = re_->Match(
+        input_,
+        regionStart_,
+        regionEnd_,
+        Anchor::kAnchorStart,
+        groups_.data(),
+        static_cast<int>(groups_.size()));
+    return matched_;
+  }
+
+  void reset() {
+    cursor_ = regionStart_;
+    matched_ = false;
+  }
+
+  void reset(std::string_view input) {
+    input_ = input;
+    regionStart_ = 0;
+    regionEnd_ = input.size();
+    reset();
+  }
+
+  /// Java `Matcher.region(start, end)` — restrict matching to a sub-range.
+  /// Returns *this for chainability (matches Java's fluent API).  Also
+  /// resets the find() cursor to `start`.
+  JavaMatcherAdapter& region(int start, int end) {
+    regionStart_ = static_cast<std::size_t>(start);
+    regionEnd_ = static_cast<std::size_t>(end);
+    cursor_ = regionStart_;
+    matched_ = false;
+    return *this;
+  }
+
+  // ----- Group accessors -----
+
+  int groupCount() const {
+    return re_->NumberOfCapturingGroups();
+  }
+
+  /// `Matcher.group(i)` — returns the captured substring for group `i`
+  /// (0-based whole match = group 0).  Returns `std::nullopt` if the group
+  /// did not participate in the last match.
+  std::optional<std::string_view> group(int i) const {
+    requireMatched();
+    if (i < 0 || i >= static_cast<int>(groups_.size())) {
+      throw std::out_of_range("group index out of range");
+    }
+    if (groups_[i].data() == nullptr) {
+      return std::nullopt;
+    }
+    return groups_[i];
+  }
+
+  std::optional<std::string_view> group(const std::string& name) const {
+    requireMatched();
+    const auto& named = re_->NamedCapturingGroups();
+    auto it = named.find(name);
+    if (it == named.end()) {
+      throw std::out_of_range("unknown group name: " + name);
+    }
+    return group(it->second);
+  }
+
+  int start(int i = 0) const {
+    requireMatched();
+    if (i < 0 || i >= static_cast<int>(groups_.size())) {
+      throw std::out_of_range("group index out of range");
+    }
+    if (groups_[i].data() == nullptr) {
+      return -1;
+    }
+    return static_cast<int>(groups_[i].data() - input_.data());
+  }
+
+  int end(int i = 0) const {
+    requireMatched();
+    if (i < 0 || i >= static_cast<int>(groups_.size())) {
+      throw std::out_of_range("group index out of range");
+    }
+    if (groups_[i].data() == nullptr) {
+      return -1;
+    }
+    return static_cast<int>(
+        groups_[i].data() + groups_[i].size() - input_.data());
+  }
+
+  int start(const std::string& name) const {
+    const auto& named = re_->NamedCapturingGroups();
+    auto it = named.find(name);
+    if (it == named.end()) {
+      throw std::out_of_range("unknown group name: " + name);
+    }
+    return start(it->second);
+  }
+
+  int end(const std::string& name) const {
+    const auto& named = re_->NamedCapturingGroups();
+    auto it = named.find(name);
+    if (it == named.end()) {
+      throw std::out_of_range("unknown group name: " + name);
+    }
+    return end(it->second);
+  }
+
+  // ----- Replacement -----
+
+  /// `Matcher.replaceAll(repl)`: delegates to backend's GlobalReplace.  The
+  /// replacement string uses Java `\$N` / `\${name}` syntax.
+  std::string replaceAll(std::string_view javaReplacement) const {
+    std::string s(input_);
+    R::GlobalReplace(&s, *re_, javaReplacement);
+    return s;
+  }
+
+  /// `Matcher.replaceFirst(repl)`: replace only the first match.  We do this
+  /// by walking find() once, building the result manually.
+  std::string replaceFirst(std::string_view javaReplacement) {
+    JavaMatcherAdapter copy(re_, input_);
+    if (!copy.find()) {
+      return std::string(input_);
+    }
+    // Build by hand using backend's GlobalReplace on a one-match window:
+    // easiest correctness path is to call GlobalReplace on a string that
+    // contains only the first match in-place — but that's awkward.
+    // Instead, recompose: prefix + expand(repl, groups) + suffix.
+    const std::size_t s = copy.matchBeg();
+    const std::size_t e = copy.matchEnd();
+    std::string out;
+    out.reserve(input_.size() + javaReplacement.size());
+    out.append(input_.substr(0, s));
+    out.append(expandJavaReplacement(javaReplacement, copy.groups_));
+    out.append(input_.substr(e));
+    return out;
+  }
+
+  /// `Matcher.appendReplacement(sb, repl)`: stateful incremental replace.
+  /// Appends to `sb` the prefix-since-last-call plus the expanded
+  /// replacement for the most recent match.  Must be called only after a
+  /// successful `find()`.  Throws `std::logic_error` (mirrors Java's
+  /// `IllegalStateException`) if no match is available.
+  void appendReplacement(std::string& sb, std::string_view javaReplacement) {
+    if (!matched_) {
+      throw std::logic_error(
+          "appendReplacement: no match available (call find() first)");
+    }
+    const std::size_t s = matchBeg();
+    const std::size_t e = matchEnd();
+    const std::string replacement =
+        expandJavaReplacement(javaReplacement, groups_);
+    sb.append(input_.substr(lastAppendPos_, s - lastAppendPos_));
+    sb.append(replacement);
+    lastAppendPos_ = e;
+  }
+
+  /// `Matcher.appendTail(sb)`: appends input from lastAppendPosition to end.
+  void appendTail(std::string& sb) const {
+    sb.append(input_.substr(lastAppendPos_));
+  }
+
+  /// `Matcher.quoteReplacement(s)` static: escape `$` and `\` in `s` so it
+  /// can be safely used as a literal replacement.
+  static std::string quoteReplacement(std::string_view s) {
+    std::string out;
+    out.reserve(s.size());
+    for (char c : s) {
+      if (c == '\\' || c == '$') {
+        out.push_back('\\');
+      }
+      out.push_back(c);
+    }
+    return out;
+  }
+
+ private:
+  std::size_t matchBeg() const {
+    return groups_[0].data() - input_.data();
+  }
+  std::size_t matchEnd() const {
+    return matchBeg() + groups_[0].size();
+  }
+  void requireMatched() const {
+    if (!matched_) {
+      throw std::logic_error("no match available");
+    }
+  }
+
+  // Expand Java replacement string ($N / ${name} / \\$ / \\\\) using the
+  // given group slots.  Public-style helper used by replaceFirst.  We don't
+  // route through R::GlobalReplace here because that re-matches the whole
+  // input — we already have the groups in hand.
+  std::string expandJavaReplacement(
+      std::string_view r,
+      const std::vector<std::string_view>& g) const {
+    std::string out;
+    out.reserve(r.size());
+    for (std::size_t i = 0; i < r.size(); ++i) {
+      char c = r[i];
+      if (c == '\\' && i + 1 < r.size()) {
+        out.push_back(r[i + 1]);
+        ++i;
+      } else if (c == '$' && i + 1 < r.size()) {
+        char n = r[i + 1];
+        if (n >= '0' && n <= '9') {
+          int idx = n - '0';
+          std::size_t lastConsumed = i + 1;
+          for (std::size_t j = i + 2; j < r.size() && r[j] >= '0' &&
+               r[j] <= '9';
+               ++j) {
+            const int candidate = idx * 10 + (r[j] - '0');
+            if (candidate >= static_cast<int>(g.size())) {
+              break;
+            }
+            idx = candidate;
+            lastConsumed = j;
+          }
+          if (idx >= static_cast<int>(g.size())) {
+            throw std::out_of_range("replacement group index out of range");
+          }
+          if (g[idx].data() != nullptr) {
+            out.append(g[idx]);
+          }
+          i = lastConsumed;
+        } else if (n == '{') {
+          auto endBrace = r.find('}', i + 2);
+          if (endBrace == std::string_view::npos) {
+            throw std::invalid_argument("unterminated named replacement group");
+          }
+          const std::string name(r.substr(i + 2, endBrace - i - 2));
+          const auto& named = re_->NamedCapturingGroups();
+          auto it = named.find(name);
+          if (it == named.end() || it->second >= static_cast<int>(g.size())) {
+            throw std::out_of_range("unknown replacement group name: " + name);
+          }
+          if (g[it->second].data() != nullptr) {
+            out.append(g[it->second]);
+          }
+          i = endBrace;
+        } else {
+          throw std::invalid_argument("illegal replacement group reference");
+        }
+      } else if (c == '$') {
+        throw std::invalid_argument("dangling replacement group marker");
+      } else {
+        out.push_back(c);
+      }
+    }
+    return out;
+  }
+
+  const R* re_;
+  std::string_view input_;
+  std::size_t regionStart_ = 0;
+  std::size_t regionEnd_ = 0;
+  std::size_t cursor_ = 0;
+  std::size_t lastAppendPos_ = 0;
+  bool matched_ = false;
+  std::vector<std::string_view> groups_;
+};
+
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/JavaRegexTest.cpp b/velox/external/regex_compat/tests/JavaRegexTest.cpp
new file mode 100644
index 00000000000..b1b16222501
--- /dev/null
+++ b/velox/external/regex_compat/tests/JavaRegexTest.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+
+#include "velox/external/regex_compat/JavaRegex.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+TEST(JavaRegexTest, compileOk) {
+  JavaRegex re("\\d+");
+  EXPECT_TRUE(re.ok());
+  EXPECT_EQ(0, re.NumberOfCapturingGroups());
+}
+
+TEST(JavaRegexTest, compileError) {
+  JavaRegex re("(unclosed");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+TEST(JavaRegexTest, namedGroup) {
+  JavaRegex re("(?<num>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+  // Pattern.namedGroups() is JDK 20+; treat as best-effort.
+  if (!re.NamedCapturingGroups().empty()) {
+    EXPECT_EQ(1, re.NamedCapturingGroups().at("num"));
+  }
+}
+
+TEST(JavaRegexTest, matchUnanchored) {
+  JavaRegex re("(\\d+)");
+  std::string_view sub[2];
+  std::string_view in = "abc 42 xyz";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 2));
+  EXPECT_EQ("42", sub[0]);
+  EXPECT_EQ("42", sub[1]);
+}
+
+TEST(JavaRegexTest, matchAnchorBoth) {
+  JavaRegex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(JavaRegexTest, matchAnchorBothRejectsTrailing) {
+  JavaRegex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc1";
+  EXPECT_FALSE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(JavaRegexTest, fullPartialMatch) {
+  JavaRegex re("[a-z]+");
+  EXPECT_TRUE(JavaRegex::FullMatch("abc", re));
+  EXPECT_FALSE(JavaRegex::FullMatch("abc1", re));
+  EXPECT_TRUE(JavaRegex::PartialMatch("abc1", re));
+}
+
+TEST(JavaRegexTest, globalReplaceWithNumberedGroup) {
+  JavaRegex re("(\\d+)");
+  std::string s = "a1b22c333";
+  int n = JavaRegex::GlobalReplace(&s, re, "[$1]");
+  EXPECT_EQ(3, n);
+  EXPECT_EQ("a[1]b[22]c[333]", s);
+}
+
+TEST(JavaRegexTest, globalReplaceWithNamedGroup) {
+  JavaRegex re("(?<n>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "a1b22c";
+  int n = JavaRegex::GlobalReplace(&s, re, "[${n}]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a[1]b[22]c", s);
+}
+
+TEST(JavaRegexTest, caseInsensitiveOption) {
+  Options opt;
+  opt.caseSensitive = false;
+  JavaRegex re("hello", opt);
+  EXPECT_TRUE(JavaRegex::PartialMatch("HELLO world", re));
+}
+
+TEST(JavaRegexTest, lookaheadSupported) {
+  // Java natively supports lookahead.
+  JavaRegex re("\\d+(?=px)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string_view sub[1];
+  std::string_view in = "size 42px wide";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ("42", sub[0]);
+}
+
+TEST(JavaRegexTest, backrefSupported) {
+  // Java natively supports backreferences.
+  JavaRegex re("(\\w+) \\1");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(JavaRegex::PartialMatch("hello hello", re));
+  EXPECT_FALSE(JavaRegex::PartialMatch("hello world", re));
+}
+
+TEST(JavaRegexTest, javaSpecificPropertyInLC) {
+  // Java's \p{InGreek} (Unicode block "Greek"). This is one of the
+  // Java-specific property tokens that PCRE2 cannot understand natively —
+  // serves as a sentinel for the future Java->PCRE2 translator scope.
+  JavaRegex re("\\p{InGreek}+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(JavaRegex::PartialMatch("hello \xce\xb1\xce\xb2\xce\xb3 world", re));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat
+
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
diff --git a/velox/external/regex_compat/tests/MatcherMatchResultPortedTest.cpp b/velox/external/regex_compat/tests/MatcherMatchResultPortedTest.cpp
new file mode 100644
index 00000000000..52c7e8b3b5e
--- /dev/null
+++ b/velox/external/regex_compat/tests/MatcherMatchResultPortedTest.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `MatcherMatchResultTests.java`.
+//
+// Most cases there exercise Java-specific `MatchResult` snapshot semantics
+// (immutability of the snapshot when the matcher advances, IllegalState/
+// IndexOutOfBounds/IllegalArgument exception contracts, namedGroups() map
+// equality, hasMatch() flag).  Those are Java API-contract tests, not
+// regex-engine behavior, so they are skipped here — they would produce
+// identical pass/fail across all three backends and add no engine-compat
+// signal.
+//
+// We port only the two cases that exercise engine behavior the existing
+// MatcherMatchingPortedTest doesn't already cover:
+//   * matchResultByGroupNumber       — 3 consecutive whitespace-separated
+//                                      capturing groups, sweep over all
+//                                      group indices.
+//   * matchResultNamedGroupAccessors — 3 named groups in a date pattern.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using MatchResultPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(MatchResultPortedTest, AllBackends);
+
+TYPED_TEST(MatchResultPortedTest, matchResultByGroupNumber) {
+  TypeParam re("(\\w+)\\s+(\\w+)\\s+(\\w+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ(3, m.groupCount());
+  EXPECT_EQ("one two three", m.group(0).value());
+  EXPECT_EQ("one", m.group(1).value());
+  EXPECT_EQ("two", m.group(2).value());
+  EXPECT_EQ("three", m.group(3).value());
+  EXPECT_EQ(0, m.start(0));
+  EXPECT_EQ(13, m.end(0));
+  EXPECT_EQ(0, m.start(1));
+  EXPECT_EQ(3, m.end(1));
+  EXPECT_EQ(4, m.start(2));
+  EXPECT_EQ(7, m.end(2));
+  EXPECT_EQ(8, m.start(3));
+  EXPECT_EQ(13, m.end(3));
+}
+
+TYPED_TEST(MatchResultPortedTest, matchResultNamedGroupAccessors) {
+  TypeParam re("(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "date: 2024-01-15");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("2024", m.group("year").value());
+  EXPECT_EQ("01", m.group("month").value());
+  EXPECT_EQ("15", m.group("day").value());
+  EXPECT_EQ(6, m.start("year"));
+  EXPECT_EQ(10, m.end("year"));
+  EXPECT_EQ(11, m.start("month"));
+  EXPECT_EQ(13, m.end("month"));
+  EXPECT_EQ(14, m.start("day"));
+  EXPECT_EQ(16, m.end("day"));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/MatcherMatchingPortedTest.cpp b/velox/external/regex_compat/tests/MatcherMatchingPortedTest.cpp
new file mode 100644
index 00000000000..78e2155c9c2
--- /dev/null
+++ b/velox/external/regex_compat/tests/MatcherMatchingPortedTest.cpp
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `MatcherMatchingTests.java`.  Same provenance
+// notes as PatternPortedTest.cpp.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <type_traits>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using MatchingPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(MatchingPortedTest, AllBackends);
+
+// Matcher.find() walks all matches.
+TYPED_TEST(MatchingPortedTest, findWalksAllMatches) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a 1 b 22 c 333");
+  std::vector<std::string> found;
+  while (m.find()) {
+    found.emplace_back(m.group(0).value());
+  }
+  EXPECT_THAT(found, ::testing::ElementsAre("1", "22", "333"));
+}
+
+TYPED_TEST(MatchingPortedTest, findNoMatch) {
+  TypeParam re("xyz");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "abc def");
+  EXPECT_FALSE(m.find());
+}
+
+TYPED_TEST(MatchingPortedTest, findWithStartIndex) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "1 2 3 4");
+  ASSERT_TRUE(m.find(2));
+  EXPECT_EQ("2", m.group(0).value());
+}
+
+// Matcher.matches() — full-input anchored.
+TYPED_TEST(MatchingPortedTest, matchesFullInput) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42");
+  EXPECT_TRUE(m.matches());
+}
+
+TYPED_TEST(MatchingPortedTest, matchesRejectsPartial) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42x");
+  EXPECT_FALSE(m.matches());
+}
+
+// Matcher.lookingAt() — anchor at start, may end early.
+TYPED_TEST(MatchingPortedTest, lookingAtPrefixOnly) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42x");
+  EXPECT_TRUE(m.lookingAt());
+  EXPECT_EQ("42", m.group(0).value());
+}
+
+TYPED_TEST(MatchingPortedTest, lookingAtRejectsLateMatch) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "x42");
+  EXPECT_FALSE(m.lookingAt());
+}
+
+// Matcher.group(int) and Matcher.start/end accessors.
+TYPED_TEST(MatchingPortedTest, groupAccessor) {
+  TypeParam re("(\\d+)-(\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "foo 10-200 bar");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("10-200", m.group(0).value());
+  EXPECT_EQ("10", m.group(1).value());
+  EXPECT_EQ("200", m.group(2).value());
+  EXPECT_EQ(4, m.start());
+  EXPECT_EQ(10, m.end());
+  EXPECT_EQ(4, m.start(1));
+  EXPECT_EQ(6, m.end(1));
+  EXPECT_EQ(7, m.start(2));
+  EXPECT_EQ(10, m.end(2));
+}
+
+TYPED_TEST(MatchingPortedTest, groupCountAccessor) {
+  TypeParam re("(a)(b)(c)(d)");
+  JavaMatcherAdapter<TypeParam> m(&re, "abcd");
+  EXPECT_EQ(4, m.groupCount());
+}
+
+// Matcher.group(String) — named groups.  JavaRegex relies on JDK 20+
+// Pattern.namedGroups() which our build host has, but other JDKs may not;
+// we keep this test conservative and skip if name table is empty.
+TYPED_TEST(MatchingPortedTest, groupAccessorByName) {
+  TypeParam re("(?<lo>\\d+)-(?<hi>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  if (re.NamedCapturingGroups().empty()) {
+    GTEST_SKIP() << "Backend doesn't expose named group table";
+  }
+  JavaMatcherAdapter<TypeParam> m(&re, "10-200");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("10", m.group("lo").value());
+  EXPECT_EQ("200", m.group("hi").value());
+}
+
+// Matcher.reset() — restart from beginning.
+TYPED_TEST(MatchingPortedTest, resetRestartsCursor) {
+  TypeParam re("\\d");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a1b2c3");
+  EXPECT_TRUE(m.find());
+  EXPECT_EQ("1", m.group(0).value());
+  EXPECT_TRUE(m.find());
+  EXPECT_EQ("2", m.group(0).value());
+  m.reset();
+  EXPECT_TRUE(m.find());
+  EXPECT_EQ("1", m.group(0).value());
+}
+
+// Matcher.reset(input) — re-bind to new input.
+TYPED_TEST(MatchingPortedTest, resetWithNewInput) {
+  TypeParam re("\\d");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "abc");
+  EXPECT_FALSE(m.find());
+  m.reset("9 8 7");
+  EXPECT_TRUE(m.find());
+  EXPECT_EQ("9", m.group(0).value());
+}
+
+// Empty group sentinel.
+TYPED_TEST(MatchingPortedTest, groupDidNotParticipate) {
+  TypeParam re("(a)|(b)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("a", m.group(1).value());
+  EXPECT_EQ(std::nullopt, m.group(2));
+  EXPECT_EQ(-1, m.start(2));
+  EXPECT_EQ(-1, m.end(2));
+}
+
+// pcre4j MatcherMatchingTests.captureGroups — group(0) + start/end/start("name") symmetry
+TYPED_TEST(MatchingPortedTest, captureGroupsByNameAndIndex) {
+  TypeParam re("(?<four>4)(.*)(?<two>2)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "4test2");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("4test2", m.group(0).value());
+  EXPECT_EQ("4", m.group(1).value());
+  EXPECT_EQ("test", m.group(2).value());
+  EXPECT_EQ("2", m.group(3).value());
+  EXPECT_EQ(3, m.groupCount());
+  if (!re.NamedCapturingGroups().empty()) {
+    EXPECT_EQ("4", m.group("four").value());
+    EXPECT_EQ("2", m.group("two").value());
+  }
+}
+
+// pcre4j MatcherMatchingTests.matchesTrueInRegion / matchesFalseRegion
+TYPED_TEST(MatchingPortedTest, matchesWithinRegion) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "[42]");
+  EXPECT_TRUE(m.region(1, 3).matches());  // region "42" — full match
+  JavaMatcherAdapter<TypeParam> m2(&re, "[42!]");
+  EXPECT_FALSE(m2.region(1, 4).matches()); // region "42!" — not full
+}
+
+// pcre4j MatcherMatchingTests.lookingAtTrueInRegion / lookingAtFalseRegion
+TYPED_TEST(MatchingPortedTest, lookingAtWithinRegion) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "[42]");
+  EXPECT_TRUE(m.region(1, 3).lookingAt());
+  JavaMatcherAdapter<TypeParam> m2(&re, "[!42]");
+  EXPECT_FALSE(m2.region(1, 4).lookingAt());  // region "!42" — '!' first, doesn't match start
+}
+
+// pcre4j MatcherMatchingTests.findTrueInRegion / findFalseInRegion
+TYPED_TEST(MatchingPortedTest, findWithinRegion) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "[42]");
+  EXPECT_TRUE(m.region(1, 3).find());
+  EXPECT_EQ("42", m.group(0).value());
+  TypeParam re2("42!");
+  JavaMatcherAdapter<TypeParam> m2(&re2, "[42]");
+  EXPECT_FALSE(m2.region(1, 3).find());
+}
+
+// pcre4j MatcherMatchingTests.findFalseAtOffset
+TYPED_TEST(MatchingPortedTest, findFalseAtOffset) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "!!test");
+  EXPECT_FALSE(m.find(2));
+}
+
+// pcre4j MatcherMatchingTests.findMultipleWithinRegion
+TYPED_TEST(MatchingPortedTest, findMultipleWithinRegion) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "42!42!42!42");
+  m.region(2, 8); // region content: "!42!42!"
+  std::vector<int> matchStarts;
+  while (m.find()) {
+    matchStarts.push_back(m.start());
+  }
+  // Should match "42" at offsets 3 and 6 (within the region [2,8)).
+  EXPECT_THAT(matchStarts, ::testing::ElementsAre(3, 6));
+}
+
+// pcre4j MatcherMatchingTests.findMultipleOutsideRegion
+TYPED_TEST(MatchingPortedTest, findMultipleOutsideRegion) {
+  TypeParam re("42");
+  JavaMatcherAdapter<TypeParam> m(&re, "42!__!__!42");
+  m.region(2, 8); // region content: "!__!__!" — no "42" inside
+  EXPECT_FALSE(m.find());
+}
+
+// pcre4j MatcherMatchingTests.emptyGroup — `!*` matches empty at position 0
+TYPED_TEST(MatchingPortedTest, emptyGroup) {
+  TypeParam re("!*");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("", m.group(0).value());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(0, m.end());
+  EXPECT_EQ(0, m.groupCount());
+}
+
+// pcre4j MatcherMatchingTests.unmatchedGroups — alternation where only one branch participates
+TYPED_TEST(MatchingPortedTest, unmatchedGroupsInAlternation) {
+  TypeParam re("42((?<exclamation>!)|(?<question>\\?))");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42!");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("42!", m.group(0).value());
+  EXPECT_EQ("!", m.group(1).value());     // outer group matches '!'
+  EXPECT_EQ("!", m.group(2).value());     // exclamation = '!'
+  EXPECT_EQ(std::nullopt, m.group(3));    // question did NOT match
+  EXPECT_EQ(3, m.groupCount());
+  if (!re.NamedCapturingGroups().empty()) {
+    EXPECT_EQ("!", m.group("exclamation").value());
+    EXPECT_EQ(std::nullopt, m.group("question"));
+  }
+}
+
+// pcre4j MatcherMatchingTests.positiveLookaround — lookahead/lookbehind both ways.
+// Asserts Java semantics: pattern compiles and matches "42" in "(42)".
+// Backends without lookaround (RE2) will fail this test; that's a recorded
+// compatibility-rate data point, not a bug.
+TYPED_TEST(MatchingPortedTest, positiveLookaround) {
+  if constexpr (std::is_same_v<TypeParam, Re2Regex>) {
+    GTEST_SKIP() << "RE2 does not support lookaround";
+  }
+  TypeParam re("(?<=(?<lWrapper>\\W))?(\\d+)(?=(?<rWrapper>\\W))?");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "(42)");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("42", m.group(0).value());
+}
+
+// pcre4j MatcherMatchingTests.positiveUnmatchedLookaround —
+// lookbehind not satisfied at the start; lookahead not satisfied at end.
+TYPED_TEST(MatchingPortedTest, positiveUnmatchedLookaround) {
+  if constexpr (std::is_same_v<TypeParam, Re2Regex>) {
+    GTEST_SKIP() << "RE2 does not support lookaround";
+  }
+  TypeParam re("(?<=(?<lWrapper>\\W))?(\\d+)(?=(?<rWrapper>\\W))?");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "42]");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("42", m.group(0).value());
+}
+
+// pcre4j MatcherMatchingTests.emptyStringMatches — pattern "^$" on empty input matches.
+TYPED_TEST(MatchingPortedTest, emptyStringMatches) {
+  TypeParam re("^$");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "");
+  EXPECT_TRUE(m.matches());
+}
+
+// pcre4j MatcherMatchingTests.emptyStringFind — pattern "^$" on empty input finds once.
+TYPED_TEST(MatchingPortedTest, emptyStringFind) {
+  TypeParam re("^$");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(0, m.end());
+  EXPECT_EQ("", m.group(0).value());
+  EXPECT_EQ(0, m.groupCount());
+}
+
+// pcre4j MatcherMatchingTests.findAtEndOfString — find($, len(input)) finds zero-width
+// match at end.
+TYPED_TEST(MatchingPortedTest, findAtEndOfString) {
+  TypeParam re("$");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "abc");
+  EXPECT_TRUE(m.find(3));
+}
+
+// pcre4j MatcherMatchingTests.findExhaustedInRegion — multiple matches in region,
+// then no more.
+TYPED_TEST(MatchingPortedTest, findExhaustedInRegion) {
+  TypeParam re("a");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "aaa");
+  m.region(0, 2);  // region "aa"
+  EXPECT_TRUE(m.find());   // first 'a'
+  EXPECT_TRUE(m.find());   // second 'a'
+  EXPECT_FALSE(m.find());  // no more in region
+}
+
+// pcre4j MatcherMatchingTests.findWithZeroWidthMatchExhaustsRegion —
+// Java spec: $ matches at region end (zero-width), then no more matches.
+TYPED_TEST(MatchingPortedTest, findWithZeroWidthMatchExhaustsRegion) {
+  if constexpr (std::is_same_v<TypeParam, Re2Regex>) {
+    GTEST_SKIP() << "RE2 $ anchors to the full subject, not the match region";
+  }
+  TypeParam re("$");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "ab");
+  m.region(0, 1);
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ(1, m.start());
+  EXPECT_EQ(1, m.end());
+  EXPECT_FALSE(m.find());
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/MatcherReplacementPortedTest.cpp b/velox/external/regex_compat/tests/MatcherReplacementPortedTest.cpp
new file mode 100644
index 00000000000..7ac84db1835
--- /dev/null
+++ b/velox/external/regex_compat/tests/MatcherReplacementPortedTest.cpp
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `MatcherReplacementTests.java`.  Same
+// provenance notes as PatternPortedTest.cpp.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using ReplacementPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(ReplacementPortedTest, AllBackends);
+
+// replaceAll: literal replacement, no group refs.
+TYPED_TEST(ReplacementPortedTest, replaceAllLiteral) {
+  TypeParam re("o");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "foo bar";
+  int n = TypeParam::GlobalReplace(&s, re, "0");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("f00 bar", s);
+}
+
+// replaceAll: numbered group refs ($1).
+TYPED_TEST(ReplacementPortedTest, replaceAllNumberedGroup) {
+  TypeParam re("(\\d+)");
+  std::string s = "abc 42 xyz 7";
+  int n = TypeParam::GlobalReplace(&s, re, "<$1>");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("abc <42> xyz <7>", s);
+}
+
+// replaceAll: numbered group refs $0 (whole match).
+TYPED_TEST(ReplacementPortedTest, replaceAllZeroGroupRef) {
+  TypeParam re("\\d+");
+  std::string s = "a 1 b 2";
+  int n = TypeParam::GlobalReplace(&s, re, "[$0]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a [1] b [2]", s);
+}
+
+// replaceAll: named group ${name}.
+TYPED_TEST(ReplacementPortedTest, replaceAllNamedGroup) {
+  TypeParam re("(?<digit>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "a 1 b 22";
+  int n = TypeParam::GlobalReplace(&s, re, "[${digit}]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a [1] b [22]", s);
+}
+
+// replaceAll: dollar-sign literally via backslash escape.
+TYPED_TEST(ReplacementPortedTest, replaceAllEscapedDollar) {
+  TypeParam re("x");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "x x";
+  int n = TypeParam::GlobalReplace(&s, re, "\\$");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("$ $", s);
+}
+
+// replaceAll: backslash literally via double-backslash.
+TYPED_TEST(ReplacementPortedTest, replaceAllEscapedBackslash) {
+  TypeParam re("x");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "x";
+  // In C++ string literal, "\\\\" is the two-char string `\\` which Java sees
+  // as escaped backslash → single literal '\'.
+  int n = TypeParam::GlobalReplace(&s, re, "\\\\");
+  EXPECT_EQ(1, n);
+  EXPECT_EQ("\\", s);
+}
+
+// replaceAll: zero-match (pattern doesn't match) leaves input unchanged.
+TYPED_TEST(ReplacementPortedTest, replaceAllNoMatchKeepsInput) {
+  TypeParam re("z+");
+  std::string s = "hello";
+  int n = TypeParam::GlobalReplace(&s, re, "X");
+  EXPECT_EQ(0, n);
+  EXPECT_EQ("hello", s);
+}
+
+// replaceAll across multiple groups in replacement.
+TYPED_TEST(ReplacementPortedTest, replaceAllMultiGroupCombination) {
+  TypeParam re("(\\w+) (\\w+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "hello world";
+  int n = TypeParam::GlobalReplace(&s, re, "$2 $1");
+  EXPECT_EQ(1, n);
+  EXPECT_EQ("world hello", s);
+}
+
+// replaceFirst: only the first match is replaced (via Adapter).
+TYPED_TEST(ReplacementPortedTest, replaceFirstOnlyFirst) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a 1 b 2 c 3");
+  std::string out = m.replaceFirst("X");
+  EXPECT_EQ("a X b 2 c 3", out);
+}
+
+// replaceFirst with group reference.
+TYPED_TEST(ReplacementPortedTest, replaceFirstWithGroupRef) {
+  TypeParam re("(\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a 1 b 22 c 333");
+  std::string out = m.replaceFirst("[$1]");
+  EXPECT_EQ("a [1] b 22 c 333", out);
+}
+
+// Empty pattern replacement (pcre4j edge case).
+TYPED_TEST(ReplacementPortedTest, replaceAllEmptyReplacement) {
+  TypeParam re("\\d+");
+  std::string s = "a 1 b 22";
+  int n = TypeParam::GlobalReplace(&s, re, "");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a  b ", s);
+}
+
+// ============== Newly ported from pcre4j MatcherReplacementTests ==============
+
+// pcre4j quoteReplacement(...)
+TYPED_TEST(ReplacementPortedTest, quoteReplacementBasic) {
+  EXPECT_EQ("hello", JavaMatcherAdapter<TypeParam>::quoteReplacement("hello"));
+}
+TYPED_TEST(ReplacementPortedTest, quoteReplacementBackslash) {
+  EXPECT_EQ("hello\\\\world",
+            JavaMatcherAdapter<TypeParam>::quoteReplacement("hello\\world"));
+}
+TYPED_TEST(ReplacementPortedTest, quoteReplacementDollar) {
+  EXPECT_EQ("price: \\$100",
+            JavaMatcherAdapter<TypeParam>::quoteReplacement("price: $100"));
+}
+TYPED_TEST(ReplacementPortedTest, quoteReplacementBoth) {
+  EXPECT_EQ("\\$100 \\\\ \\$200",
+            JavaMatcherAdapter<TypeParam>::quoteReplacement("$100 \\ $200"));
+}
+TYPED_TEST(ReplacementPortedTest, quoteReplacementEmpty) {
+  EXPECT_EQ("", JavaMatcherAdapter<TypeParam>::quoteReplacement(""));
+}
+
+// pcre4j replaceAllBasic
+TYPED_TEST(ReplacementPortedTest, replaceAllBasic) {
+  TypeParam re("world");
+  std::string s = "hello world";
+  TypeParam::GlobalReplace(&s, re, "universe");
+  EXPECT_EQ("hello universe", s);
+}
+
+// pcre4j replaceAllMultiple
+TYPED_TEST(ReplacementPortedTest, replaceAllMultiple) {
+  TypeParam re("o");
+  std::string s = "hello world";
+  int n = TypeParam::GlobalReplace(&s, re, "0");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("hell0 w0rld", s);
+}
+
+// pcre4j replaceAllWithGroupReference (covered by replaceAllNumberedGroup
+// already, but we mirror pcre4j name)
+TYPED_TEST(ReplacementPortedTest, replaceAllWithGroupReference) {
+  TypeParam re("(\\d+)");
+  std::string s = "value: 42";
+  TypeParam::GlobalReplace(&s, re, "<$1>");
+  EXPECT_EQ("value: <42>", s);
+}
+
+// pcre4j replaceAllWithNamedGroupReference
+TYPED_TEST(ReplacementPortedTest, replaceAllWithNamedGroupReferenceBasic) {
+  TypeParam re("(?<digit>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "value: 42";
+  TypeParam::GlobalReplace(&s, re, "<${digit}>");
+  EXPECT_EQ("value: <42>", s);
+}
+
+// pcre4j replaceAllUnicode
+TYPED_TEST(ReplacementPortedTest, replaceAllUnicode) {
+  TypeParam re("\xf0\x9f\x8c\x90"); // U+1F310 globe
+  std::string s = "hi \xf0\x9f\x8c\x90 there";
+  TypeParam::GlobalReplace(&s, re, "\xf0\x9f\x8c\x8d"); // U+1F30D earth
+  EXPECT_EQ("hi \xf0\x9f\x8c\x8d there", s);
+}
+
+// pcre4j replaceFirstBasic
+TYPED_TEST(ReplacementPortedTest, replaceFirstBasic) {
+  TypeParam re("o");
+  JavaMatcherAdapter<TypeParam> m(&re, "foo bar");
+  EXPECT_EQ("f0o bar", m.replaceFirst("0"));
+}
+
+// pcre4j replaceFirstWithGroupReference
+TYPED_TEST(ReplacementPortedTest, replaceFirstWithGroupReferenceMulti) {
+  TypeParam re("(\\d+)");
+  JavaMatcherAdapter<TypeParam> m(&re, "a 1 b 22 c");
+  EXPECT_EQ("a <1> b 22 c", m.replaceFirst("<$1>"));
+}
+
+// pcre4j replaceFirstNoMatch
+TYPED_TEST(ReplacementPortedTest, replaceFirstNoMatch) {
+  TypeParam re("xyz");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  EXPECT_EQ("hello world", m.replaceFirst("ZZZ"));
+}
+
+// pcre4j replaceAllWithFullMatchReference  ($0)
+TYPED_TEST(ReplacementPortedTest, replaceAllWithFullMatchReference) {
+  TypeParam re("\\w+");
+  std::string s = "hello world";
+  TypeParam::GlobalReplace(&s, re, "[$0]");
+  EXPECT_EQ("[hello] [world]", s);
+}
+
+// pcre4j replaceAllWithNamedGroupReferenceYearMonth
+TYPED_TEST(ReplacementPortedTest, replaceAllWithNamedGroupReferenceYearMonth) {
+  TypeParam re("(?<year>\\d{4})-(?<month>\\d{2})");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "date: 2024-01, also 2025-12";
+  TypeParam::GlobalReplace(&s, re, "${month}/${year}");
+  EXPECT_EQ("date: 01/2024, also 12/2025", s);
+}
+
+// pcre4j replaceFirstWithFullMatchReference
+TYPED_TEST(ReplacementPortedTest, replaceFirstWithFullMatchReference) {
+  TypeParam re("\\w+");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  EXPECT_EQ("[hello] world", m.replaceFirst("[$0]"));
+}
+
+// pcre4j appendReplacementStringBuilder (basic appendReplacement + appendTail walk)
+TYPED_TEST(ReplacementPortedTest, appendReplacementBasic) {
+  TypeParam re("(\\w+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "[$1]");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("[one] [two] [three]", sb);
+}
+
+// pcre4j appendReplacementWithNamedGroup
+TYPED_TEST(ReplacementPortedTest, appendReplacementWithNamedGroup) {
+  TypeParam re("(?<word>\\w+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "${word}!");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("one! two! three!", sb);
+}
+
+// pcre4j appendReplacementEscapedCharacters: replacement "\\$\\\\" → literal "$\"
+TYPED_TEST(ReplacementPortedTest, appendReplacementEscapedCharacters) {
+  TypeParam re("\\d+");
+  JavaMatcherAdapter<TypeParam> m(&re, "test123value");
+  std::string sb;
+  while (m.find()) {
+    // C++ literal "\\$\\\\" = 4 chars: \ $ \ \   → in Java replacement
+    // syntax: \\$ -> literal '$',  \\\\ -> literal '\'.  Net replacement: "$\".
+    m.appendReplacement(sb, "\\$\\\\");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("test$\\value", sb);
+}
+
+// pcre4j appendReplacementLiteralText
+TYPED_TEST(ReplacementPortedTest, appendReplacementLiteralText) {
+  TypeParam re("world");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world!");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "universe");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("hello universe!", sb);
+}
+
+// pcre4j appendTailOnly: no matches, just appendTail → echoes input.
+TYPED_TEST(ReplacementPortedTest, appendTailOnly) {
+  TypeParam re("xyz");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  std::string sb;
+  // No find() call → no match → appendTail copies entire input.
+  m.appendTail(sb);
+  EXPECT_EQ("hello world", sb);
+}
+
+// pcre4j appendReplacementNoMatch: appendReplacement without a successful
+// find() throws IllegalStateException in Java; we throw std::logic_error.
+TYPED_TEST(ReplacementPortedTest, appendReplacementWithoutMatchThrows) {
+  TypeParam re("\\d+");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  std::string sb;
+  EXPECT_THROW(m.appendReplacement(sb, "test"), std::logic_error);
+}
+
+// pcre4j appendReplacementMultipleGroups: "$3$2$1" reverses 3 chars.
+TYPED_TEST(ReplacementPortedTest, appendReplacementMultipleGroups) {
+  TypeParam re("(\\w)(\\w)(\\w)");
+  JavaMatcherAdapter<TypeParam> m(&re, "abc def ghi");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "$3$2$1");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("cba fed ihg", sb);
+}
+
+// pcre4j appendReplacementGroupZero
+TYPED_TEST(ReplacementPortedTest, appendReplacementGroupZero) {
+  TypeParam re("\\w+");
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "[$0]");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("[hello] [world]", sb);
+}
+
+// pcre4j appendReplacementUnicode: 4-byte UTF-8 needle / 4-byte UTF-8 repl.
+TYPED_TEST(ReplacementPortedTest, appendReplacementUnicode) {
+  TypeParam re("\xf0\x9f\x8c\x90"); // U+1F310 globe
+  JavaMatcherAdapter<TypeParam> m(&re, "hi \xf0\x9f\x8c\x90 there");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "\xf0\x9f\x8c\x8d"); // U+1F30D earth
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("hi \xf0\x9f\x8c\x8d there", sb);
+}
+
+// pcre4j appendReplacementWithEscapedDollarSign: replacement "\\$5" →
+// literal "$5" (not group 5).
+TYPED_TEST(ReplacementPortedTest, appendReplacementWithEscapedDollarSign) {
+  TypeParam re("\\d+");
+  JavaMatcherAdapter<TypeParam> m(&re, "price: 100");
+  std::string sb;
+  while (m.find()) {
+    m.appendReplacement(sb, "\\$5");
+  }
+  m.appendTail(sb);
+  EXPECT_EQ("price: $5", sb);
+}
+
+// pcre4j appendReplacementBackslashEscapesNextChar: \\X → X literal
+TYPED_TEST(ReplacementPortedTest, appendReplacementBackslashEscapesNextChar) {
+  TypeParam re("x");
+  JavaMatcherAdapter<TypeParam> m(&re, "x");
+  std::string sb;
+  ASSERT_TRUE(m.find());
+  m.appendReplacement(sb, "\\$\\\\\\?");
+  m.appendTail(sb);
+  // Java: \\$ → '$', \\\\ → '\', \\? → '?'.  Net: "$\?"
+  EXPECT_EQ("$\\?", sb);
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/MatcherResultsPortedTest.cpp b/velox/external/regex_compat/tests/MatcherResultsPortedTest.cpp
new file mode 100644
index 00000000000..dbb07b51c56
--- /dev/null
+++ b/velox/external/regex_compat/tests/MatcherResultsPortedTest.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `MatcherResultsTests.java`.  Java's
+// `Matcher.results()` returns a Stream<MatchResult>; we model it as a
+// find()-loop that snapshots (start, end, group) per match.  Cases that
+// depend purely on Java's stream API (sum reductions, etc.) are skipped.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using ResultsPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(ResultsPortedTest, AllBackends);
+
+// Snapshot tuple (start, end, group(0)) for each match found.
+template <typename R>
+std::vector<std::tuple<int, int, std::string>> snapshotAll(
+    JavaMatcherAdapter<R>& m) {
+  std::vector<std::tuple<int, int, std::string>> out;
+  while (m.find()) {
+    out.emplace_back(m.start(), m.end(), std::string(m.group(0).value()));
+  }
+  return out;
+}
+
+TYPED_TEST(ResultsPortedTest, resultsBasic) {
+  TypeParam re("\\d+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a1b22c333d");
+  auto r = snapshotAll(m);
+  ASSERT_EQ(3u, r.size());
+  EXPECT_EQ(std::make_tuple(1, 2, std::string("1")), r[0]);
+  EXPECT_EQ(std::make_tuple(3, 5, std::string("22")), r[1]);
+  EXPECT_EQ(std::make_tuple(6, 9, std::string("333")), r[2]);
+}
+
+TYPED_TEST(ResultsPortedTest, resultsNoMatches) {
+  TypeParam re("xyz");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  EXPECT_TRUE(snapshotAll(m).empty());
+}
+
+TYPED_TEST(ResultsPortedTest, resultsSingleMatch) {
+  TypeParam re("world");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world!");
+  auto r = snapshotAll(m);
+  ASSERT_EQ(1u, r.size());
+  EXPECT_EQ(std::make_tuple(6, 11, std::string("world")), r[0]);
+}
+
+TYPED_TEST(ResultsPortedTest, resultsWithGroups) {
+  TypeParam re("(\\w)(\\d)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a1 b2 c3");
+  std::vector<std::tuple<std::string, std::string, std::string>> r;
+  while (m.find()) {
+    r.emplace_back(
+        std::string(m.group(0).value()),
+        std::string(m.group(1).value()),
+        std::string(m.group(2).value()));
+  }
+  ASSERT_EQ(3u, r.size());
+  EXPECT_EQ(std::make_tuple("a1", "a", "1"), r[0]);
+  EXPECT_EQ(std::make_tuple("b2", "b", "2"), r[1]);
+  EXPECT_EQ(std::make_tuple("c3", "c", "3"), r[2]);
+}
+
+// Snapshots are independent: collecting first must not perturb later reads.
+TYPED_TEST(ResultsPortedTest, resultsImmutableSnapshots) {
+  TypeParam re("\\w+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  auto r = snapshotAll(m);
+  ASSERT_EQ(3u, r.size());
+  EXPECT_EQ(std::make_tuple(0, 3, std::string("one")), r[0]);
+  EXPECT_EQ(std::make_tuple(4, 7, std::string("two")), r[1]);
+  EXPECT_EQ(std::make_tuple(8, 13, std::string("three")), r[2]);
+}
+
+// Zero-width matches via positive lookahead — RE2 lacks lookaround.
+TYPED_TEST(ResultsPortedTest, resultsZeroWidthMatches) {
+  if constexpr (std::is_same_v<TypeParam, Re2Regex>) {
+    GTEST_SKIP() << "RE2 does not support lookahead";
+  }
+  TypeParam re("(?=\\d)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "a1b2c3");
+  auto r = snapshotAll(m);
+  ASSERT_EQ(3u, r.size());
+  for (auto& [s, e, g] : r) {
+    EXPECT_EQ(s, e);
+    EXPECT_EQ("", g);
+  }
+  EXPECT_EQ(1, std::get<0>(r[0]));
+  EXPECT_EQ(3, std::get<0>(r[1]));
+  EXPECT_EQ(5, std::get<0>(r[2]));
+}
+
+TYPED_TEST(ResultsPortedTest, resultsEmptyString) {
+  TypeParam re(".*");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "");
+  auto r = snapshotAll(m);
+  ASSERT_EQ(1u, r.size());
+  EXPECT_EQ(std::make_tuple(0, 0, std::string("")), r[0]);
+}
+
+// \p{L}+ over Cyrillic "мир" and CJK "世界" — Unicode property class.
+TYPED_TEST(ResultsPortedTest, resultsUnicode) {
+  TypeParam re("\\p{L}+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(
+      &re, "hello \xD0\xBC\xD0\xB8\xD1\x80 \xE4\xB8\x96\xE7\x95\x8C");
+  std::vector<std::string> groups;
+  while (m.find()) {
+    groups.emplace_back(m.group(0).value());
+  }
+  EXPECT_THAT(
+      groups,
+      ::testing::ElementsAre(
+          "hello", "\xD0\xBC\xD0\xB8\xD1\x80", "\xE4\xB8\x96\xE7\x95\x8C"));
+}
+
+// After find() once, continuing iteration yields the remainder only.
+TYPED_TEST(ResultsPortedTest, resultsDoesNotReset) {
+  TypeParam re("\\w+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  ASSERT_TRUE(m.find());
+  EXPECT_EQ("one", m.group(0).value());
+  auto rest = snapshotAll(m);
+  ASSERT_EQ(2u, rest.size());
+  EXPECT_EQ("two", std::get<2>(rest[0]));
+  EXPECT_EQ("three", std::get<2>(rest[1]));
+}
+
+// After reset() we re-iterate from the beginning.
+TYPED_TEST(ResultsPortedTest, resultsAfterReset) {
+  TypeParam re("\\w+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "one two three");
+  ASSERT_TRUE(m.find());
+  m.reset();
+  auto r = snapshotAll(m);
+  ASSERT_EQ(3u, r.size());
+  EXPECT_EQ("one", std::get<2>(r[0]));
+  EXPECT_EQ("two", std::get<2>(r[1]));
+  EXPECT_EQ("three", std::get<2>(r[2]));
+}
+
+TYPED_TEST(ResultsPortedTest, resultsWithNamedGroups) {
+  TypeParam re("(?<word>\\w+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "hello world");
+  std::vector<std::pair<std::string, std::string>> r;
+  while (m.find()) {
+    r.emplace_back(
+        std::string(m.group(0).value()), std::string(m.group(1).value()));
+  }
+  ASSERT_EQ(2u, r.size());
+  EXPECT_EQ(std::make_pair(std::string("hello"), std::string("hello")), r[0]);
+  EXPECT_EQ(std::make_pair(std::string("world"), std::string("world")), r[1]);
+}
+
+TYPED_TEST(ResultsPortedTest, resultsCount) {
+  TypeParam re("a");
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "abracadabra");
+  EXPECT_EQ(5u, snapshotAll(m).size());
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/MatcherUnicodePortedTest.cpp b/velox/external/regex_compat/tests/MatcherUnicodePortedTest.cpp
new file mode 100644
index 00000000000..b4592d47d53
--- /dev/null
+++ b/velox/external/regex_compat/tests/MatcherUnicodePortedTest.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `MatcherUnicodeTests.java`.  All offsets are
+// translated from Java UTF-16 char offsets (used by pcre4j) to UTF-8 byte
+// offsets (used by our backends).
+//
+//   Å      U+00C5  2 UTF-8 bytes  (C3 85)
+//   Ǎ      U+01CD  2 UTF-8 bytes  (C7 8D)
+//   •      U+2022  3 UTF-8 bytes  (E2 80 A2)
+//   🌍     U+1F30D 4 UTF-8 bytes  (F0 9F 8C 8D)
+//   !              1 UTF-8 byte
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using UnicodePortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(UnicodePortedTest, AllBackends);
+
+TYPED_TEST(UnicodePortedTest, unicodeOneByte) {
+  TypeParam re("\xC3\x85"); // Å
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "\xC3\x85");
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(2, m.end());
+}
+
+TYPED_TEST(UnicodePortedTest, unicodeTwoBytes) {
+  TypeParam re("\xC7\x8D"); // Ǎ
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "\xC7\x8D");
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(2, m.end());
+}
+
+TYPED_TEST(UnicodePortedTest, unicodeThreeBytes) {
+  TypeParam re("\xE2\x80\xA2"); // •
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "\xE2\x80\xA2");
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(3, m.end());
+}
+
+TYPED_TEST(UnicodePortedTest, unicodeFourBytes) {
+  TypeParam re("\xF0\x9F\x8C\x8D"); // 🌍 U+1F30D
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, "\xF0\x9F\x8C\x8D");
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(4, m.end());
+}
+
+TYPED_TEST(UnicodePortedTest, unicode) {
+  // ÅǍ•🌍!
+  const char* both = "\xC3\x85\xC7\x8D\xE2\x80\xA2\xF0\x9F\x8C\x8D!";
+  TypeParam re(both);
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, both);
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(0, m.start());
+  EXPECT_EQ(12, m.end());
+  EXPECT_EQ(both, m.group(0).value());
+}
+
+// region() in Java uses UTF-16 char offsets; the original test calls
+// region(3, 5) to bracket the surrogate pair for 🌍.  In our UTF-8 world
+// that's byte range [7, 11).  We rely on JavaRegex's adapter doing the
+// UTF-16/UTF-8 conversion internally and pass byte offsets to RE2/PCRE2.
+TYPED_TEST(UnicodePortedTest, unicodeRegion) {
+  const char* input = "\xC3\x85\xC7\x8D\xE2\x80\xA2\xF0\x9F\x8C\x8D!";
+  TypeParam re("\xF0\x9F\x8C\x8D"); // 🌍
+  ASSERT_TRUE(re.ok()) << re.error();
+  JavaMatcherAdapter<TypeParam> m(&re, input);
+  m.region(7, 11);
+  EXPECT_TRUE(m.matches());
+  EXPECT_EQ(7, m.start());
+  EXPECT_EQ(11, m.end());
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/OpenJdkCorpusDiffTest.cpp b/velox/external/regex_compat/tests/OpenJdkCorpusDiffTest.cpp
new file mode 100644
index 00000000000..3460525b44b
--- /dev/null
+++ b/velox/external/regex_compat/tests/OpenJdkCorpusDiffTest.cpp
@@ -0,0 +1,643 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Runs the OpenJDK 17 `java/util/regex/TestCases.txt` corpus (~299 cases)
+// against each backend and reports per-backend pass rate.
+//
+// File format (per OpenJDK header):
+//   line 1: pattern
+//   line 2: input
+//   line 3: "true|false <match> <groupCount> <g1> <g2> <g3> <g4>"
+//           — match-string and groups present only when first token is true.
+// Empty lines and `//` comments are skipped.
+//
+// The corpus is fetched at CMake configure time and its path is injected
+// via OPENJDK_CORPUS_PATH.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+#include "velox/external/regex_compat/JvmFixture.h"
+#endif
+
+#include <gtest/gtest.h>
+
+#include <cstdio>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#ifndef OPENJDK_CORPUS_DIR
+#error "OPENJDK_CORPUS_DIR must be defined by the build system"
+#endif
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+static const char* const kCorpusFiles[] = {
+    "TestCases.txt",
+    "BMPTestCases.txt",
+    "SupplementaryTestCases.txt",
+};
+
+struct CorpusCase {
+  std::string pattern;
+  std::string input;
+  std::string expectedResult; // verbatim "true ..." / "false 0" / "error"
+};
+
+// Mirror OpenJDK 17 RegExTest.grabLine: handles only `\n` (→ U+000A) and
+// `\uXXXX` (→ that code point); everything else passes through verbatim.
+// Surrogate-pair `\uD8##\uDC##` sequences are combined into the proper
+// supplementary code point so that we end up with a valid UTF-8 4-byte
+// encoding (which both RE2/PCRE2 require and our Java JNI bridge
+// re-splits to a surrogate pair).
+static std::string processEscapes(const std::string& s) {
+  std::string out;
+  out.reserve(s.size());
+  for (std::size_t i = 0; i < s.size();) {
+    if (s[i] == '\\' && i + 1 < s.size() && s[i + 1] == 'n') {
+      out.push_back('\n');
+      i += 2;
+      continue;
+    }
+    if (s[i] == '\\' && i + 5 < s.size() && s[i + 1] == 'u') {
+      std::uint32_t cp = 0;
+      bool ok = true;
+      for (int k = 0; k < 4; ++k) {
+        char c = s[i + 2 + k];
+        cp <<= 4;
+        if (c >= '0' && c <= '9') cp |= (c - '0');
+        else if (c >= 'a' && c <= 'f') cp |= (c - 'a' + 10);
+        else if (c >= 'A' && c <= 'F') cp |= (c - 'A' + 10);
+        else { ok = false; break; }
+      }
+      if (!ok) {
+        out.push_back(s[i++]);
+        continue;
+      }
+      i += 6;
+      // Combine surrogate pair if a low surrogate follows.
+      if (cp >= 0xD800 && cp <= 0xDBFF && i + 5 < s.size() && s[i] == '\\'
+          && s[i + 1] == 'u') {
+        std::uint32_t lo = 0;
+        bool ok2 = true;
+        for (int k = 0; k < 4; ++k) {
+          char c = s[i + 2 + k];
+          lo <<= 4;
+          if (c >= '0' && c <= '9') lo |= (c - '0');
+          else if (c >= 'a' && c <= 'f') lo |= (c - 'a' + 10);
+          else if (c >= 'A' && c <= 'F') lo |= (c - 'A' + 10);
+          else { ok2 = false; break; }
+        }
+        if (ok2 && lo >= 0xDC00 && lo <= 0xDFFF) {
+          cp = 0x10000 + (((cp - 0xD800) << 10) | (lo - 0xDC00));
+          i += 6;
+        }
+      }
+      if (cp < 0x80) {
+        out.push_back(static_cast<char>(cp));
+      } else if (cp < 0x800) {
+        out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
+        out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+      } else if (cp < 0x10000) {
+        out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
+        out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+        out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+      } else {
+        out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
+        out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
+        out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+        out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+      }
+      continue;
+    }
+    out.push_back(s[i++]);
+  }
+  return out;
+}
+
+static std::string utf8(std::uint32_t cp) {
+  std::string out;
+  if (cp < 0x80) {
+    out.push_back(static_cast<char>(cp));
+  } else if (cp < 0x800) {
+    out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else if (cp < 0x10000) {
+    out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else {
+    out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  }
+  return out;
+}
+
+static std::uint32_t parseHexCodePoint(std::string_view token) {
+  std::uint32_t cp = 0;
+  for (char c : token) {
+    cp <<= 4;
+    if (c >= '0' && c <= '9') {
+      cp |= (c - '0');
+    } else if (c >= 'a' && c <= 'f') {
+      cp |= (c - 'a' + 10);
+    } else if (c >= 'A' && c <= 'F') {
+      cp |= (c - 'A' + 10);
+    } else {
+      throw std::invalid_argument("bad hex code point");
+    }
+  }
+  return cp;
+}
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+static jstring toJString(JNIEnv* env, std::string_view sv) {
+  std::vector<jchar> u16;
+  u16.reserve(sv.size());
+  for (std::size_t i = 0; i < sv.size();) {
+    const unsigned char c = static_cast<unsigned char>(sv[i]);
+    std::uint32_t cp = 0;
+    std::size_t step = 1;
+    if (c < 0x80) {
+      cp = c;
+    } else if (c < 0xC0) {
+      cp = 0xFFFD;
+    } else if (c < 0xE0 && i + 1 < sv.size()) {
+      cp = ((c & 0x1F) << 6) |
+          (static_cast<unsigned char>(sv[i + 1]) & 0x3F);
+      step = 2;
+    } else if (c < 0xF0 && i + 2 < sv.size()) {
+      cp = ((c & 0x0F) << 12) |
+          ((static_cast<unsigned char>(sv[i + 1]) & 0x3F) << 6) |
+          (static_cast<unsigned char>(sv[i + 2]) & 0x3F);
+      step = 3;
+    } else if (i + 3 < sv.size()) {
+      cp = ((c & 0x07) << 18) |
+          ((static_cast<unsigned char>(sv[i + 1]) & 0x3F) << 12) |
+          ((static_cast<unsigned char>(sv[i + 2]) & 0x3F) << 6) |
+          (static_cast<unsigned char>(sv[i + 3]) & 0x3F);
+      step = 4;
+    } else {
+      cp = 0xFFFD;
+    }
+    if (cp <= 0xFFFF) {
+      u16.push_back(static_cast<jchar>(cp));
+    } else {
+      cp -= 0x10000;
+      u16.push_back(static_cast<jchar>(0xD800 | (cp >> 10)));
+      u16.push_back(static_cast<jchar>(0xDC00 | (cp & 0x3FF)));
+    }
+    i += step;
+  }
+  return env->NewString(u16.data(), static_cast<jsize>(u16.size()));
+}
+
+static std::size_t javaCharOffsetToByteOffset(
+    std::string_view utf8,
+    int javaCharOffset) {
+  int chars = 0;
+  for (std::size_t i = 0; i < utf8.size();) {
+    if (chars == javaCharOffset) {
+      return i;
+    }
+    const unsigned char c = static_cast<unsigned char>(utf8[i]);
+    if (c < 0x80) {
+      i += 1;
+      chars += 1;
+    } else if (c < 0xE0) {
+      i += 2;
+      chars += 1;
+    } else if (c < 0xF0) {
+      i += 3;
+      chars += 1;
+    } else {
+      i += 4;
+      chars += 2;
+    }
+  }
+  return chars == javaCharOffset ? utf8.size() : std::string_view::npos;
+}
+
+static std::vector<int> directJavaGraphemeBreakOffsets(std::string_view input) {
+  auto* env = JvmFixture::instance().env();
+  jclass patternCls = env->FindClass("java/util/regex/Pattern");
+  jclass matcherCls = env->FindClass("java/util/regex/Matcher");
+  jmethodID compile = env->GetStaticMethodID(
+      patternCls,
+      "compile",
+      "(Ljava/lang/String;)Ljava/util/regex/Pattern;");
+  jmethodID matcher = env->GetMethodID(
+      patternCls,
+      "matcher",
+      "(Ljava/lang/CharSequence;)Ljava/util/regex/Matcher;");
+  jmethodID find = env->GetMethodID(matcherCls, "find", "()Z");
+  jmethodID start = env->GetMethodID(matcherCls, "start", "()I");
+
+  jstring pat = toJString(env, "\\b{g}");
+  jobject pattern = env->CallStaticObjectMethod(patternCls, compile, pat);
+  env->DeleteLocalRef(pat);
+  jstring subject = toJString(env, input);
+  jobject m = env->CallObjectMethod(pattern, matcher, subject);
+  env->DeleteLocalRef(subject);
+
+  std::vector<int> offsets;
+  while (env->CallBooleanMethod(m, find)) {
+    const jint charOffset = env->CallIntMethod(m, start);
+    const auto byteOffset = javaCharOffsetToByteOffset(input, charOffset);
+    if (byteOffset != std::string_view::npos) {
+      offsets.push_back(static_cast<int>(byteOffset));
+    }
+  }
+  env->DeleteLocalRef(m);
+  env->DeleteLocalRef(pattern);
+  env->DeleteLocalRef(matcherCls);
+  env->DeleteLocalRef(patternCls);
+  return offsets;
+}
+#endif // VELOX_REGEX_COMPAT_HAS_JAVA
+
+struct GraphemeCase {
+  std::string input;
+  std::vector<int> expectedBreakOffsets;
+};
+
+static std::vector<GraphemeCase> loadGraphemeCorpus(const std::string& path) {
+  std::ifstream in(path);
+  if (!in) {
+    return {};
+  }
+  std::vector<GraphemeCase> cases;
+  std::string line;
+  while (std::getline(in, line)) {
+    const auto hash = line.find('#');
+    if (hash != std::string::npos) {
+      line.resize(hash);
+    }
+    std::istringstream tokens(line);
+    std::string token;
+    GraphemeCase c;
+    bool sawToken = false;
+    while (tokens >> token) {
+      sawToken = true;
+      if (token == "\xC3\xB7") {
+        c.expectedBreakOffsets.push_back(static_cast<int>(c.input.size()));
+      } else if (token == "\xC3\x97") {
+        continue;
+      } else {
+        c.input += utf8(parseHexCodePoint(token));
+      }
+    }
+    if (sawToken) {
+      cases.push_back(std::move(c));
+    }
+  }
+  return cases;
+}
+
+// OpenJDK format uses spaces both as field separators and inside captured
+// group text.  We don't need to split — the OpenJDK runner emits the
+// expected line via plain StringBuilder concatenation; we rebuild the
+// actual result the same way and compare strings.
+
+static std::vector<CorpusCase> loadCorpus(const std::string& path) {
+  std::ifstream in(path);
+  if (!in) {
+    return {};
+  }
+  // Replicate OpenJDK's grabLine: skip blank and `//` lines.
+  auto grab = [&](std::string& out) -> bool {
+    while (std::getline(in, out)) {
+      if (out.empty()) continue;
+      if (out.size() >= 2 && out[0] == '/' && out[1] == '/') continue;
+      return true;
+    }
+    return false;
+  };
+  std::vector<CorpusCase> cases;
+  std::string pattern, input, expected;
+  while (grab(pattern) && grab(input) && grab(expected)) {
+    CorpusCase c;
+    c.pattern = processEscapes(pattern);
+    c.input = processEscapes(input);
+    c.expectedResult = processEscapes(expected);
+    cases.push_back(std::move(c));
+  }
+  return cases;
+}
+
+// Per-(backend, file) tally — keyed by "backend|file".
+struct CorpusStats {
+  int passed = 0;
+  int failed = 0;
+  int compileErrors = 0;
+  // Subset of `compileErrors` whose root cause is the translator rejecting
+  // the pattern as untranslatable for the engine (e.g. RE2 lookaround /
+  // backref / possessive).  These are engine-feature-impossible, NOT bugs
+  // in our translator; surfaced separately so we can report a rate that
+  // excludes them ("translatable-subset rate").
+  int translatorRejected = 0;
+};
+
+std::map<std::string, CorpusStats>& allStats() {
+  static std::map<std::string, CorpusStats> s;
+  return s;
+}
+
+// Tear-down printer.  Registered as a global Environment so it runs after
+// the typed tests.
+class CorpusReporter : public ::testing::Environment {
+ public:
+  void TearDown() override {
+    auto& m = allStats();
+    if (m.empty()) {
+      return;
+    }
+    std::fprintf(stderr, "\n");
+    std::fprintf(stderr, "========== OpenJDK corpus compat rate ==========\n");
+    // Aggregate per backend across all files; also print per-file.
+    std::map<std::string, CorpusStats> agg;
+    for (const auto& [key, st] : m) {
+      auto bar = key.find('|');
+      std::string backend = key.substr(0, bar);
+      auto& a = agg[backend];
+      a.passed += st.passed;
+      a.failed += st.failed;
+      a.compileErrors += st.compileErrors;
+      a.translatorRejected += st.translatorRejected;
+    }
+    for (const auto& [key, st] : m) {
+      int total = st.passed + st.failed + st.compileErrors;
+      double pct = total > 0 ? 100.0 * st.passed / total : 0.0;
+      std::fprintf(
+          stderr,
+          "  %-50s %4d / %4d  (%.2f%%)   [compile-err: %d]\n",
+          key.c_str(),
+          st.passed,
+          total,
+          pct,
+          st.compileErrors);
+    }
+    std::fprintf(stderr, "  ---- aggregate ----\n");
+    for (const auto& [name, st] : agg) {
+      int total = st.passed + st.failed + st.compileErrors;
+      double pct = total > 0 ? 100.0 * st.passed / total : 0.0;
+      std::fprintf(
+          stderr,
+          "  %-50s %4d / %4d  (%.2f%%)   [compile-err: %d]\n",
+          name.c_str(),
+          st.passed,
+          total,
+          pct,
+          st.compileErrors);
+      // Also report a "translatable subset" rate that excludes patterns
+      // the translator rejected as engine-impossible (e.g. RE2 lookaround
+      // or backref).  This isolates what's actually attributable to the
+      // translator/backend vs to engine ceilings.
+      if (st.translatorRejected > 0) {
+        const int subsetTotal = total - st.translatorRejected;
+        const double subsetPct =
+            subsetTotal > 0 ? 100.0 * st.passed / subsetTotal : 0.0;
+        std::fprintf(
+            stderr,
+            "  %-50s %4d / %4d  (%.2f%%)   [excludes %d translator-rejected]\n",
+            (name + " (translatable subset)").c_str(),
+            st.passed,
+            subsetTotal,
+            subsetPct,
+            st.translatorRejected);
+      }
+    }
+    std::fprintf(stderr, "================================================\n");
+  }
+};
+
+// Register the reporter exactly once.
+[[maybe_unused]] static auto* kReporter =
+    ::testing::AddGlobalTestEnvironment(new CorpusReporter);
+
+template <typename R>
+const char* backendName() {
+  if constexpr (std::is_same_v<R, Re2Regex>) {
+    return "Re2";
+  } else if constexpr (std::is_same_v<R, Pcre2Regex>) {
+    return "Pcre2";
+  } else {
+    return "Java";
+  }
+}
+
+template <typename R>
+using OpenJdkCorpusDiffTest = BackendTest<R>;
+TYPED_TEST_SUITE(OpenJdkCorpusDiffTest, AllBackends);
+
+TYPED_TEST(OpenJdkCorpusDiffTest, runCorpus) {
+  const std::string backend = backendName<TypeParam>();
+  int totalCases = 0;
+  int totalJavaFailures = 0;
+  for (const char* fname : kCorpusFiles) {
+    std::string path = std::string(OPENJDK_CORPUS_DIR) + "/" + fname;
+    std::vector<CorpusCase> kCorpus = loadCorpus(path);
+    ASSERT_FALSE(kCorpus.empty()) << "Corpus is empty — failed to load " << path;
+    totalCases += static_cast<int>(kCorpus.size());
+
+    const std::string key = backend + "|" + fname;
+    auto& st = allStats()[key];
+
+    for (const auto& c : kCorpus) {
+      TypeParam re(c.pattern);
+      if (!re.ok()) {
+        if (c.expectedResult.rfind("error", 0) == 0) {
+          ++st.passed;
+        } else {
+          ++st.compileErrors;
+          if (re.error().find("translator: ") != std::string::npos) {
+            ++st.translatorRejected;
+          }
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+          if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+            ++totalJavaFailures;
+            std::fprintf(
+                stderr,
+                "[OpenJDK %s] Java compile-err: pattern=[%s] err=[%s]\n",
+                fname,
+                c.pattern.c_str(),
+                re.error().c_str());
+          }
+#endif
+        }
+        continue;
+      }
+      JavaMatcherAdapter<TypeParam> m(&re, c.input);
+      const bool found = m.find();
+      std::string actual;
+      if (found) {
+        actual.append("true ");
+        actual.append(std::string(m.group(0).value()));
+        actual.push_back(' ');
+        actual.append(std::to_string(m.groupCount()));
+        for (int i = 1; i <= m.groupCount(); ++i) {
+          auto gi = m.group(i);
+          if (gi) {
+            actual.push_back(' ');
+            actual.append(std::string(*gi));
+          }
+        }
+      } else {
+        actual.append("false ");
+        actual.append(std::to_string(m.groupCount()));
+      }
+      if (actual == c.expectedResult) {
+        ++st.passed;
+      } else {
+        ++st.failed;
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+        if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+          ++totalJavaFailures;
+          std::fprintf(
+              stderr,
+              "[OpenJDK %s] Java mismatch:\n  pattern=[%s]\n  input=[%s]\n  expected=[%s]\n  actual=  [%s]\n",
+              fname,
+              c.pattern.c_str(),
+              c.input.c_str(),
+              c.expectedResult.c_str(),
+              actual.c_str());
+        }
+#endif
+      }
+    }
+  }
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+  if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+    EXPECT_EQ(0, totalJavaFailures)
+        << "Java backend should match every case across all OpenJDK corpus files";
+  }
+#endif
+  EXPECT_GT(totalCases, 0);
+}
+
+struct GraphemeStats {
+  int passed = 0;
+  int failed = 0;
+  int compileErrors = 0;
+};
+
+std::map<std::string, GraphemeStats>& graphemeStats() {
+  static std::map<std::string, GraphemeStats> s;
+  return s;
+}
+
+class GraphemeReporter : public ::testing::Environment {
+ public:
+  void TearDown() override {
+    auto& m = graphemeStats();
+    if (m.empty()) {
+      return;
+    }
+    std::fprintf(stderr, "\n");
+    std::fprintf(stderr, "========== OpenJDK grapheme corpus compat rate ==========\n");
+    for (const auto& [backend, st] : m) {
+      const int total = st.passed + st.failed + st.compileErrors;
+      const double pct = total > 0 ? 100.0 * st.passed / total : 0.0;
+      std::fprintf(
+          stderr,
+          "  %-8s %4d / %4d  (%.2f%%)   [compile-err: %d]\n",
+          backend.c_str(),
+          st.passed,
+          total,
+          pct,
+          st.compileErrors);
+    }
+    std::fprintf(stderr, "=========================================================\n");
+  }
+};
+
+[[maybe_unused]] static auto* kGraphemeReporter =
+    ::testing::AddGlobalTestEnvironment(new GraphemeReporter);
+
+template <typename R>
+using GraphemeCorpusTest = BackendTest<R>;
+TYPED_TEST_SUITE(GraphemeCorpusTest, AllBackends);
+
+TYPED_TEST(GraphemeCorpusTest, runGraphemeBreakCorpus) {
+  const std::string path =
+      std::string(OPENJDK_CORPUS_DIR) + "/GraphemeTestCases.txt";
+  const auto cases = loadGraphemeCorpus(path);
+  ASSERT_FALSE(cases.empty()) << "Corpus is empty — failed to load " << path;
+
+  int javaFailures = 0;
+  auto& st = graphemeStats()[backendName<TypeParam>()];
+  for (const auto& c : cases) {
+    TypeParam re("\\b{g}");
+    if (!re.ok()) {
+      ++st.compileErrors;
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+      if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+        ++javaFailures;
+        std::fprintf(
+            stderr,
+            "[OpenJDK Grapheme] Java compile-err: %s\n",
+            re.error().c_str());
+      }
+#endif
+      continue;
+    }
+
+    std::vector<int> actual;
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+    if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+      actual = directJavaGraphemeBreakOffsets(c.input);
+    } else
+#endif
+    {
+      JavaMatcherAdapter<TypeParam> m(&re, c.input);
+      while (m.find()) {
+        actual.push_back(m.start());
+      }
+    }
+    if (actual == c.expectedBreakOffsets) {
+      ++st.passed;
+    } else {
+      ++st.failed;
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+      if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+        ++javaFailures;
+        std::fprintf(
+            stderr,
+            "[OpenJDK Grapheme] Java mismatch: expected %zu breaks, actual %zu breaks\n",
+            c.expectedBreakOffsets.size(),
+            actual.size());
+      }
+#endif
+    }
+  }
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+  if constexpr (std::is_same_v<TypeParam, JavaRegex>) {
+    EXPECT_EQ(0, javaFailures)
+        << "Java backend should match every GraphemeTestCases.txt case";
+  }
+#endif
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/PatternPortedTest.cpp b/velox/external/regex_compat/tests/PatternPortedTest.cpp
new file mode 100644
index 00000000000..e1c27b0542d
--- /dev/null
+++ b/velox/external/regex_compat/tests/PatternPortedTest.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `PatternTests.java`
+// (https://github.com/alexey-pelykh/pcre4j, GPL-LGPL upstream; this C++ port
+// is the work of the Velox project, Apache-2.0).
+//
+// Each TYPED_TEST below runs against every regex backend (Re2Regex,
+// Pcre2Regex, JavaRegex) enabled at compile time.  Tests asserting Java
+// semantics that some backend cannot satisfy are marked with the backend's
+// known limitation and skipped via `if constexpr` rather than disabled, so
+// any future improvement in the backend is detected by the test newly passing.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using PatternPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(PatternPortedTest, AllBackends);
+
+// pcre4j PatternTests.toStringReturnsPattern: Pattern.toString() returns the
+// original source string.  Our IRegex doesn't expose `pattern()` directly,
+// but `NamedCapturingGroups()` + `NumberOfCapturingGroups()` cover the
+// compile-side state-mirror part.  Skip the pure-toString assertion.
+
+// pcre4j PatternTests.namedGroups
+TYPED_TEST(PatternPortedTest, namedGroupsSingle) {
+  TypeParam re("(?<number>42)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+}
+
+TYPED_TEST(PatternPortedTest, namedGroupsTwoNames) {
+  TypeParam re("(?<a>x)(?<b>y)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(2, re.NumberOfCapturingGroups());
+}
+
+TYPED_TEST(PatternPortedTest, numberedGroupsOnly) {
+  TypeParam re("(\\d)(\\w)(\\s)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(3, re.NumberOfCapturingGroups());
+}
+
+TYPED_TEST(PatternPortedTest, nonCapturingGroupDoesNotIncrement) {
+  TypeParam re("(?:foo)(bar)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+}
+
+// pcre4j PatternTests.split (essence: split on \\D+ produces digit groups)
+TYPED_TEST(PatternPortedTest, splitOnDigitGroups) {
+  // We don't expose Pattern.split() at backend level; emulate via find-loop.
+  TypeParam re("\\D+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string_view in = "0, 1, 1, 2, 3, 5, 8";
+  JavaMatcherAdapter<TypeParam> m(&re, in);
+  std::vector<std::string> tokens;
+  std::size_t prev = 0;
+  while (m.find()) {
+    tokens.emplace_back(in.substr(prev, m.start() - prev));
+    prev = m.end();
+  }
+  tokens.emplace_back(in.substr(prev));
+  EXPECT_THAT(
+      tokens, ::testing::ElementsAre("0", "1", "1", "2", "3", "5", "8"));
+}
+
+// pcre4j PatternTests.unicodeSplit
+TYPED_TEST(PatternPortedTest, splitUnicodeDelimiters) {
+  TypeParam re("\\D+");
+  ASSERT_TRUE(re.ok()) << re.error();
+  // U+21E2 RIGHTWARDS DASHED ARROW (3-byte UTF-8 sequence).
+  std::string_view in = "0 \xe2\x87\xa2 1 \xe2\x87\xa2 2";
+  JavaMatcherAdapter<TypeParam> m(&re, in);
+  std::vector<std::string> tokens;
+  std::size_t prev = 0;
+  while (m.find()) {
+    tokens.emplace_back(in.substr(prev, m.start() - prev));
+    prev = m.end();
+  }
+  tokens.emplace_back(in.substr(prev));
+  EXPECT_THAT(tokens, ::testing::ElementsAre("0", "1", "2"));
+}
+
+// pcre4j PatternTests CASE_INSENSITIVE flag
+TYPED_TEST(PatternPortedTest, caseInsensitiveCompileTimeFlag) {
+  Options opt;
+  opt.caseSensitive = false;
+  TypeParam re("HeLLo", opt);
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::PartialMatch("hello", re));
+  EXPECT_TRUE(TypeParam::PartialMatch("HELLO", re));
+}
+
+// pcre4j PatternTests DOTALL flag
+TYPED_TEST(PatternPortedTest, dotallMatchesNewline) {
+  Options opt;
+  opt.dotNl = true;
+  TypeParam re("a.b", opt);
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::PartialMatch("a\nb", re));
+}
+
+// pcre4j PatternTests MULTILINE flag
+TYPED_TEST(PatternPortedTest, multilineCaret) {
+  Options opt;
+  opt.oneLine = false;
+  TypeParam re("^X", opt);
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::PartialMatch("foo\nX bar", re));
+}
+
+TYPED_TEST(PatternPortedTest, multilineDollar) {
+  Options opt;
+  opt.oneLine = false;
+  TypeParam re("X$", opt);
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::PartialMatch("foo X\nbar", re));
+}
+
+// pcre4j PatternTests invalid pattern syntax
+TYPED_TEST(PatternPortedTest, invalidPatternRejected) {
+  TypeParam re("(");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+TYPED_TEST(PatternPortedTest, invalidPatternRejectedSquareBracket) {
+  TypeParam re("[");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+// pcre4j PatternTests: `a{` — Java rejects as incomplete quantifier.
+// PCRE2 and RE2 accept it literally.  This test asserts Java behaviour;
+// other backends will fail, which is the documented compatibility gap.
+TYPED_TEST(PatternPortedTest, braceQuantifierIncomplete) {
+  TypeParam re("a{");
+  EXPECT_FALSE(re.ok());
+}
+
+// Empty pattern matches empty string anywhere.
+TYPED_TEST(PatternPortedTest, emptyPatternMatchesEverywhere) {
+  TypeParam re("");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::PartialMatch("anything", re));
+  EXPECT_TRUE(TypeParam::FullMatch("", re));
+}
+
+// Java-style `Pattern.quote(s)` wraps `s` in `\Q...\E` so the input is
+// treated as a literal string.  Embedded `\E` in `s` must be escaped to
+// avoid prematurely ending the literal section.
+static std::string javaQuote(std::string_view s) {
+  std::string out = "\\Q";
+  std::size_t i = 0;
+  while (true) {
+    auto j = s.find("\\E", i);
+    if (j == std::string_view::npos) {
+      out.append(s.substr(i));
+      break;
+    }
+    out.append(s.substr(i, j - i));
+    out.append("\\E\\\\E\\Q");
+    i = j + 2;
+  }
+  out.append("\\E");
+  return out;
+}
+
+// `Pattern.quote` round-trips any literal string through the regex engine.
+TYPED_TEST(PatternPortedTest, quote) {
+  for (const std::string_view sample : {
+           std::string_view(""),
+           std::string_view(".*+?^$|()[]\\{}"),
+           std::string_view("abc\\Edef"),
+       }) {
+    TypeParam re(javaQuote(sample));
+    ASSERT_TRUE(re.ok()) << re.error() << " for [" << sample << "]";
+    EXPECT_TRUE(TypeParam::FullMatch(sample, re)) << "input=[" << sample << "]";
+  }
+}
+
+// (?x) free-spacing: unescaped whitespace in pattern is ignored.
+TYPED_TEST(PatternPortedTest, commentsWhitespaceIgnored) {
+  TypeParam re("(?x)a b c");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::FullMatch("abc", re));
+}
+
+// (?x) `#` to end of line is a comment.
+TYPED_TEST(PatternPortedTest, commentsHashComments) {
+  TypeParam re("(?x)abc # this is a comment\ndef");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::FullMatch("abcdef", re));
+}
+
+// (?x) escaped whitespace is matched literally.
+TYPED_TEST(PatternPortedTest, commentsEscapedWhitespace) {
+  TypeParam re("(?x)a\\ b");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::FullMatch("a b", re));
+}
+
+// (?x) escaped whitespace inside a character class is matched literally.
+TYPED_TEST(PatternPortedTest, commentsWhitespaceInCharacterClass) {
+  TypeParam re("(?x)[\\ ]");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::FullMatch(" ", re));
+}
+
+// Embedded (?x) flag at start enables COMMENTS for the rest of the pattern.
+TYPED_TEST(PatternPortedTest, commentsEmbeddedFlag) {
+  TypeParam re("(?x)a b c");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(TypeParam::FullMatch("abc", re));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/PatternSplitPortedTest.cpp b/velox/external/regex_compat/tests/PatternSplitPortedTest.cpp
new file mode 100644
index 00000000000..e27e90876eb
--- /dev/null
+++ b/velox/external/regex_compat/tests/PatternSplitPortedTest.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Cases ported from pcre4j's `PatternSplitTests.java`.
+//
+// Java's `Pattern.split` is implemented here as a free helper that drives
+// the backend's find() loop through `JavaMatcherAdapter`, so engine
+// differences in find()/match propagate naturally to split() output.
+//
+// Skipped:
+//   * splitWithDelimiters* — `String[] splitWithDelimiters(...)` is Java 21+
+//     and not in our embedded JDK 17 surface.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+// Java-canonical split: find()-loop walk, trailing-empty trim when limit==0,
+// at-most-`limit` parts when limit>0, no trim when limit<0.
+template <typename R>
+std::vector<std::string>
+javaSplit(R& re, std::string_view input, int limit = 0) {
+  JavaMatcherAdapter<R> m(&re, input);
+  std::vector<std::string> parts;
+  int matches = 0;
+  std::size_t index = 0;
+  const bool matchLimited = limit > 0;
+  while (m.find()) {
+    if (matchLimited && matches == limit - 1) {
+      break;
+    }
+    const std::size_t s = static_cast<std::size_t>(m.start());
+    const std::size_t e = static_cast<std::size_t>(m.end());
+    // Java skips zero-width matches that don't advance past the current
+    // segment start.
+    if (s == index && s == e) {
+      continue;
+    }
+    parts.emplace_back(input.substr(index, s - index));
+    index = e;
+    ++matches;
+  }
+  if (matches == 0) {
+    return {std::string(input)};
+  }
+  parts.emplace_back(input.substr(index));
+  if (limit == 0) {
+    while (!parts.empty() && parts.back().empty()) {
+      parts.pop_back();
+    }
+  }
+  return parts;
+}
+
+template <typename R>
+using SplitPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(SplitPortedTest, AllBackends);
+
+// --- limit=0 trailing empty strings removal ---
+
+TYPED_TEST(SplitPortedTest, splitTrailingEmptyStringsRemovedWithDefaultLimit) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a,b,c,,,"), ::testing::ElementsAre("a", "b", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitTrailingEmptyStringsRemovedWithZeroLimit) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a,b,c,,,", 0), ::testing::ElementsAre("a", "b", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitAllEmptyWithZeroLimit) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(javaSplit(re, ",,,", 0).empty());
+}
+
+// --- Positive limit ---
+
+TYPED_TEST(SplitPortedTest, splitPositiveLimitOne) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(javaSplit(re, "a,b,c", 1), ::testing::ElementsAre("a,b,c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitPositiveLimitExceedsMatches) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a,b,c", 10), ::testing::ElementsAre("a", "b", "c"));
+}
+
+// --- Empty input and no-match ---
+
+TYPED_TEST(SplitPortedTest, splitEmptyInput) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(javaSplit(re, ""), ::testing::ElementsAre(""));
+}
+
+TYPED_TEST(SplitPortedTest, splitNoMatch) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(javaSplit(re, "abc"), ::testing::ElementsAre("abc"));
+}
+
+// --- Regex-based delimiter edge cases ---
+
+TYPED_TEST(SplitPortedTest, splitMultiCharDelimiter) {
+  TypeParam re("\\s*,\\s*");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a , b , c"), ::testing::ElementsAre("a", "b", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitDelimiterAtStartAndEnd) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, ",a,b,c,"), ::testing::ElementsAre("", "a", "b", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitConsecutiveDelimiters) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a,,b,,c"),
+      ::testing::ElementsAre("a", "", "b", "", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitSingleCharInput) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(javaSplit(re, ",").empty());
+}
+
+// --- splitAsStream edge cases (Java's splitAsStream is just a stream view
+// over the same split logic; we reuse javaSplit here). ---
+
+TYPED_TEST(SplitPortedTest, splitAsStreamTrailingEmpties) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(
+      javaSplit(re, "a,b,c,,,"), ::testing::ElementsAre("a", "b", "c"));
+}
+
+TYPED_TEST(SplitPortedTest, splitAsStreamEmptyInput) {
+  TypeParam re(",");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_THAT(javaSplit(re, ""), ::testing::ElementsAre(""));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/Pcre2RegexTest.cpp b/velox/external/regex_compat/tests/Pcre2RegexTest.cpp
new file mode 100644
index 00000000000..ce55bab9c17
--- /dev/null
+++ b/velox/external/regex_compat/tests/Pcre2RegexTest.cpp
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/Pcre2Regex.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+TEST(Pcre2RegexTest, compileOk) {
+  Pcre2Regex re("\\d+");
+  EXPECT_TRUE(re.ok());
+  EXPECT_EQ(0, re.NumberOfCapturingGroups());
+  EXPECT_EQ("", re.error());
+}
+
+TEST(Pcre2RegexTest, compileError) {
+  Pcre2Regex re("(unclosed");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+TEST(Pcre2RegexTest, surrogateBlockCompilesInRawByteMode) {
+  Pcre2Regex re("\\p{InHIGH_SURROGATES}");
+  EXPECT_TRUE(re.ok()) << re.error();
+}
+
+TEST(Pcre2RegexTest, javaNamedGroupAccepted) {
+  // PCRE2 natively understands (?<name>...) — no translation needed.
+  Pcre2Regex re("(?<num>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+  const auto& names = re.NamedCapturingGroups();
+  ASSERT_NE(names.end(), names.find("num"));
+  EXPECT_EQ(1, names.at("num"));
+}
+
+TEST(Pcre2RegexTest, matchUnanchored) {
+  Pcre2Regex re("(\\d+)");
+  std::string_view sub[2];
+  std::string_view in = "abc 42 xyz";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 2));
+  EXPECT_EQ("42", sub[0]);
+  EXPECT_EQ("42", sub[1]);
+}
+
+TEST(Pcre2RegexTest, matchAnchorBoth) {
+  Pcre2Regex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(Pcre2RegexTest, matchAnchorBothRejectsTrailing) {
+  Pcre2Regex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc1";
+  EXPECT_FALSE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(Pcre2RegexTest, fullPartialMatch) {
+  Pcre2Regex re("[a-z]+");
+  EXPECT_TRUE(Pcre2Regex::FullMatch("abc", re));
+  EXPECT_FALSE(Pcre2Regex::FullMatch("abc1", re));
+  EXPECT_TRUE(Pcre2Regex::PartialMatch("abc1", re));
+}
+
+TEST(Pcre2RegexTest, globalReplaceWithNumberedGroup) {
+  // PCRE2 with SUBSTITUTE_EXTENDED natively understands $1.
+  Pcre2Regex re("(\\d+)");
+  std::string s = "a1b22c333";
+  int n = Pcre2Regex::GlobalReplace(&s, re, "[$1]");
+  EXPECT_EQ(3, n);
+  EXPECT_EQ("a[1]b[22]c[333]", s);
+}
+
+TEST(Pcre2RegexTest, globalReplaceWithNamedGroup) {
+  // PCRE2 natively understands ${name}.
+  Pcre2Regex re("(?<n>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "a1b22c";
+  int n = Pcre2Regex::GlobalReplace(&s, re, "[${n}]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a[1]b[22]c", s);
+}
+
+TEST(Pcre2RegexTest, caseInsensitiveOption) {
+  Options opt;
+  opt.caseSensitive = false;
+  Pcre2Regex re("hello", opt);
+  EXPECT_TRUE(Pcre2Regex::PartialMatch("HELLO world", re));
+}
+
+TEST(Pcre2RegexTest, unicodeCaseOptionPrefoldsKnownLiterals) {
+  Options opt;
+  opt.caseSensitive = false;
+  Pcre2Regex kelvin("\\u212A", opt);
+  ASSERT_TRUE(kelvin.ok()) << kelvin.error();
+  EXPECT_TRUE(Pcre2Regex::FullMatch("k", kelvin));
+  EXPECT_TRUE(Pcre2Regex::FullMatch("K", kelvin));
+
+  Pcre2Regex sigma("\xce\xa3", opt);
+  ASSERT_TRUE(sigma.ok()) << sigma.error();
+  EXPECT_TRUE(Pcre2Regex::FullMatch("\xcf\x82", sigma));
+  EXPECT_TRUE(Pcre2Regex::FullMatch("\xcf\x83", sigma));
+}
+
+TEST(Pcre2RegexTest, defaultWordClassIsAscii) {
+  Pcre2Regex re("(?<!あ)\\w");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  std::string_view sub[1];
+  std::string_view in = "###あぃいa###";
+  ASSERT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ("a", sub[0]);
+}
+
+TEST(Pcre2RegexTest, matchCanCrossLoneSurrogateUtf8) {
+  Pcre2Regex re("(.)([^a])xyz");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string in =
+      std::string("\xED\xA0\x80", 3) + "\xF0\x90\x80\x80" + "xyz";
+  std::string_view sub[3];
+  ASSERT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 3));
+  EXPECT_EQ(in, sub[0]);
+  EXPECT_EQ(std::string("\xED\xA0\x80", 3), sub[1]);
+  EXPECT_EQ("\xF0\x90\x80\x80", sub[2]);
+}
+
+TEST(Pcre2RegexTest, rawSurrogateQuantifierAppliesToWholeCodeUnit) {
+  Pcre2Regex re(std::string("\xED\xA0\x80", 3) + "?\xF0\x90\x80\x82");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string in = "\xF0\x90\x80\x82";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(in, sub[0]);
+}
+
+TEST(Pcre2RegexTest, surrogateHexRangeClassMatchesLoneSurrogateOnly) {
+  Pcre2Regex re("[\\x{d800}-\\x{dbff}\\x{dc00}-\\x{dfff}]");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string lone = std::string("xxx") + "\xED\xB2\xA9" + "\xED\xA0\xBD" + "yyy";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(lone, 0, lone.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(std::string("\xED\xB2\xA9", 3), sub[0]);
+
+  const std::string validPair = "\xF0\x9F\x92\xA9";
+  EXPECT_FALSE(re.Match(validPair, 0, validPair.size(), Anchor::kUnanchored, sub, 1));
+}
+
+TEST(Pcre2RegexTest, rawUtf8SurrogateRangeClassMatchesWholeCodeUnit) {
+  Pcre2Regex re(std::string("[") + "\xED\xA0\x80" + "-" + "\xED\xBF\xBF" + "]");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string lone = std::string("\xED\xBF\xBF", 3) + "\xED\xA0\x80";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(lone, 0, lone.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(std::string("\xED\xBF\xBF", 3), sub[0]);
+
+  const std::string validPair = "\xF0\x90\x8F\xBF";
+  EXPECT_FALSE(re.Match(validPair, 0, validPair.size(), Anchor::kUnanchored, sub, 1));
+}
+
+TEST(Pcre2RegexTest, rawUtf8SurrogateLiteralInClassMatchesWholeCodeUnit) {
+  Pcre2Regex re(std::string("[a") + "\xED\xA0\x80" + "]");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  std::string_view sub[1];
+  const std::string lone = std::string("\xED\xA0\x80", 3);
+  ASSERT_TRUE(re.Match(lone, 0, lone.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(lone, sub[0]);
+
+  EXPECT_FALSE(re.Match(lone.substr(0, 1), 0, 1, Anchor::kUnanchored, sub, 1));
+
+  const std::string ascii = "a";
+  ASSERT_TRUE(re.Match(ascii, 0, ascii.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(ascii, sub[0]);
+}
+
+TEST(Pcre2RegexTest, surrogatePropertyClassMatchesLoneSurrogate) {
+  Pcre2Regex re("[\\p{InHIGH_SURROGATES}\\p{InLOW_SURROGATES}]");
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string lone =
+      std::string("xxx") + "\xED\xB2\xA9" + "\xED\xA0\xBD" + "yyy";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(lone, 0, lone.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(std::string("\xED\xB2\xA9", 3), sub[0]);
+
+  const std::string validPair = "\xF0\x9F\x92\xA9";
+  EXPECT_FALSE(re.Match(validPair, 0, validPair.size(), Anchor::kUnanchored, sub, 1));
+}
+
+TEST(Pcre2RegexTest, rawUtf8NegatedClassConsumesWholeCodePoint) {
+  Pcre2Regex re(std::string("[^\xF0\x90\x81\xA1\xF0\x90\xA0\x82"
+                            "\xF0\x90\xB0\x83\xED\xA0\x80]+"));
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string input = std::string("\xF0\x90\x81\xA1\xF0\x90\x81\xA1"
+                                        "\xF0\x90\x81\xA1\xF0\x90\xA0\x82"
+                                        "\xF0\x90\xA0\x82\xF0\x90\xA0\x82"
+                                        "\xF0\x90\xB0\x83\xF0\x90\xB0\x83"
+                                        "\xF0\x90\xB0\x83\xF0\x91\x80\x84") +
+      "abc";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(std::string("\xF0\x91\x80\x84") + "abc", sub[0]);
+}
+
+TEST(Pcre2RegexTest, rawUtf8IntersectionUsesCodePointSemantics) {
+  Pcre2Regex re(std::string("[[\xF0\x90\x81\xA1]&&[\xF0\x90\xA0\x82]"
+                            "[\xED\xA0\x80][\xF0\x90\x81\xA1]&&"
+                            "[^\xF0\x91\x80\x84]]"));
+  ASSERT_TRUE(re.ok()) << re.error();
+
+  const std::string input = "\xF0\x90\x81\xA1";
+  std::string_view sub[1];
+  ASSERT_TRUE(re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ(input, sub[0]);
+}
+
+TEST(Pcre2RegexTest, rawUtf8IntersectionMatchesLoneSurrogateCodeUnits) {
+  {
+    Pcre2Regex re(std::string("[", 1) + std::string("\x00", 1) +
+        std::string("-\xEF\xBF\xBF&&[\xED\xBF\xBF\xED\xA0\x80]]"));
+    ASSERT_TRUE(re.ok()) << re.error();
+
+    const std::string input = "\xED\xA0\x80";
+    std::string_view sub[1];
+    ASSERT_TRUE(re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1));
+    EXPECT_EQ(input, sub[0]);
+  }
+  {
+    Pcre2Regex re(std::string("[", 1) + std::string("\x00", 1) +
+        std::string("-\xED\xBF\xBF&&[\xED\xA0\x80-\xEF\xBF\xBF]]"));
+    ASSERT_TRUE(re.ok()) << re.error();
+
+    const std::string input = std::string("\xED\xBF\xBF", 3) + "\xED\xA0\x80";
+    std::string_view sub[1];
+    ASSERT_TRUE(re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1));
+    EXPECT_EQ(std::string("\xED\xBF\xBF", 3), sub[0]);
+  }
+  {
+    Pcre2Regex re(std::string("[", 1) + std::string("\x00", 1) +
+        std::string("-\xF0\x90\x8F\xBF&&[^\xED\xB0\x80"
+                    "\xED\xA0\x80\xED\xAF\xBF]]"));
+    ASSERT_TRUE(re.ok()) << re.error();
+
+    const std::string input = "\xF0\x90\x80\x80";
+    std::string_view sub[1];
+    ASSERT_TRUE(re.Match(input, 0, input.size(), Anchor::kUnanchored, sub, 1));
+    EXPECT_EQ(input, sub[0]);
+  }
+}
+
+TEST(Pcre2RegexTest, lookaheadSupported) {
+  // PCRE2 supports lookahead (unlike RE2).  This is the headline reason for
+  // adding PCRE2 as an alternative backend.
+  Pcre2Regex re("(?=foo)bar");
+  // (?=foo)bar matches "bar" only when preceded immediately by "foo".  But
+  // since (?=foo) doesn't consume "foo", the match position is at "foo" and
+  // the engine tries to match "bar" there — which fails.  This pattern is
+  // semantically equivalent to: match "foo" that's followed by "bar".  Use
+  // a more illustrative example:
+  Pcre2Regex re2("\\d+(?=px)");
+  ASSERT_TRUE(re2.ok()) << re2.error();
+  std::string_view sub[1];
+  std::string_view in = "size 42px wide";
+  EXPECT_TRUE(re2.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 1));
+  EXPECT_EQ("42", sub[0]);
+}
+
+TEST(Pcre2RegexTest, backrefSupported) {
+  // PCRE2 supports backreferences (unlike RE2).
+  Pcre2Regex re("(\\w+) \\1");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_TRUE(Pcre2Regex::PartialMatch("hello hello", re));
+  EXPECT_FALSE(Pcre2Regex::PartialMatch("hello world", re));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/tests/Re2RegexTest.cpp b/velox/external/regex_compat/tests/Re2RegexTest.cpp
new file mode 100644
index 00000000000..6c1c88cd758
--- /dev/null
+++ b/velox/external/regex_compat/tests/Re2RegexTest.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/external/regex_compat/Re2Regex.h"
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace facebook::velox::regex_compat {
+namespace {
+
+TEST(Re2RegexTest, compileOk) {
+  Re2Regex re("\\d+");
+  EXPECT_TRUE(re.ok());
+  EXPECT_EQ(0, re.NumberOfCapturingGroups());
+  EXPECT_EQ("", re.error());
+}
+
+TEST(Re2RegexTest, compileError) {
+  Re2Regex re("(unclosed");
+  EXPECT_FALSE(re.ok());
+  EXPECT_FALSE(re.error().empty());
+}
+
+TEST(Re2RegexTest, javaNamedGroupAccepted) {
+  // Java syntax (?<name>...) should be translated to RE2 (?P<name>...) by
+  // toRe2Pattern before reaching re2::RE2.
+  Re2Regex re("(?<num>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  EXPECT_EQ(1, re.NumberOfCapturingGroups());
+  const auto& names = re.NamedCapturingGroups();
+  ASSERT_NE(names.end(), names.find("num"));
+  EXPECT_EQ(1, names.at("num"));
+}
+
+TEST(Re2RegexTest, matchUnanchored) {
+  Re2Regex re("(\\d+)");
+  std::string_view sub[2];
+  std::string_view in = "abc 42 xyz";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kUnanchored, sub, 2));
+  EXPECT_EQ("42", sub[0]);
+  EXPECT_EQ("42", sub[1]);
+}
+
+TEST(Re2RegexTest, matchAnchorBoth) {
+  Re2Regex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc";
+  EXPECT_TRUE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(Re2RegexTest, matchAnchorBothRejectsTrailing) {
+  Re2Regex re("[a-z]+");
+  std::string_view sub[1];
+  std::string_view in = "abc1";
+  EXPECT_FALSE(re.Match(in, 0, in.size(), Anchor::kAnchorBoth, sub, 1));
+}
+
+TEST(Re2RegexTest, fullPartialMatch) {
+  Re2Regex re("[a-z]+");
+  EXPECT_TRUE(Re2Regex::FullMatch("abc", re));
+  EXPECT_FALSE(Re2Regex::FullMatch("abc1", re));
+  EXPECT_TRUE(Re2Regex::PartialMatch("abc1", re));
+}
+
+TEST(Re2RegexTest, globalReplaceWithNumberedGroup) {
+  // Java $1 should be translated to RE2 \1 by prepareRegexpReplaceReplacement.
+  Re2Regex re("(\\d+)");
+  std::string s = "a1b22c333";
+  int n = Re2Regex::GlobalReplace(&s, re, "[$1]");
+  EXPECT_EQ(3, n);
+  EXPECT_EQ("a[1]b[22]c[333]", s);
+}
+
+TEST(Re2RegexTest, globalReplaceWithNamedGroup) {
+  // Java ${name} should be translated to RE2 \N by prepareRegexpReplaceReplacement.
+  Re2Regex re("(?<n>\\d+)");
+  ASSERT_TRUE(re.ok()) << re.error();
+  std::string s = "a1b22c";
+  int n = Re2Regex::GlobalReplace(&s, re, "[${n}]");
+  EXPECT_EQ(2, n);
+  EXPECT_EQ("a[1]b[22]c", s);
+}
+
+TEST(Re2RegexTest, caseInsensitiveOption) {
+  Options opt;
+  opt.caseSensitive = false;
+  Re2Regex re("hello", opt);
+  EXPECT_TRUE(Re2Regex::PartialMatch("HELLO world", re));
+}
+
+TEST(Re2RegexTest, lookaroundUnsupportedByRe2) {
+  Re2Regex re("(?=foo)bar");
+  EXPECT_FALSE(re.ok());
+  EXPECT_THAT(re.error(), ::testing::HasSubstr("Java→RE2 translator"));
+  EXPECT_THAT(re.error(), ::testing::HasSubstr("lookaround"));
+}
+
+} // namespace
+} // namespace facebook::velox::regex_compat
diff --git a/velox/external/regex_compat/tests/RegExTestPortedTest.cpp b/velox/external/regex_compat/tests/RegExTestPortedTest.cpp
new file mode 100644
index 00000000000..a3e70b25d1a
--- /dev/null
+++ b/velox/external/regex_compat/tests/RegExTestPortedTest.cpp
@@ -0,0 +1,1078 @@
+/*
+ * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+//
+// Ported to GTest for inclusion in Velox's regex-compat test suite.  The
+// original source is OpenJDK 17's test/jdk/java/util/regex/RegExTest.java,
+// as imported by the pcre4j compatibility fork.  These tests intentionally
+// run the same Java-pattern inputs through JavaMatcherAdapter<TypeParam> so
+// Java, PCRE2 and RE2 backends report a per-backend compatibility rate.
+//
+
+#include "velox/external/regex_compat/tests/BackendTestBase.h"
+#include "velox/external/regex_compat/tests/JavaMatcherAdapter.h"
+
+#include <gtest/gtest.h>
+
+#include <cstdio>
+#include <cstdint>
+#include <map>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <vector>
+
+namespace facebook::velox::regex_compat::test {
+namespace {
+
+template <typename R>
+using RegExTestPortedTest = BackendTest<R>;
+TYPED_TEST_SUITE(RegExTestPortedTest, AllBackends);
+
+struct RegExStats {
+  int passed = 0;
+  int failed = 0;
+  // Tests where any pattern compile in the body was rejected by the
+  // translator as engine-impossible.  Tracked separately so the report
+  // can compute a "translatable subset" rate.
+  int translatorRejected = 0;
+};
+
+std::map<std::string, RegExStats>& regExStats() {
+  static std::map<std::string, RegExStats> s;
+  return s;
+}
+
+// Thread-local flag set whenever a helper observes the translator
+// rejecting the pattern as engine-impossible (e.g. RE2 lookaround /
+// backref / possessive).  The test macro consumes it after the body
+// runs and bumps a per-backend tally so we can report a "translatable
+// subset" rate that excludes engine-impossible tests.
+inline thread_local bool tlsTranslatorRejected = false;
+
+class RegExReporter : public ::testing::Environment {
+ public:
+  void TearDown() override {
+    auto& m = regExStats();
+    if (m.empty()) {
+      return;
+    }
+    std::fprintf(stderr, "\n");
+    std::fprintf(stderr, "========== RegExTest ported compat rate =========\n");
+    for (const auto& [backend, st] : m) {
+      const int total = st.passed + st.failed;
+      const double pct = total > 0 ? 100.0 * st.passed / total : 0.0;
+      std::fprintf(
+          stderr,
+          "  %-8s %4d / %4d  (%.2f%%)\n",
+          backend.c_str(),
+          st.passed,
+          total,
+          pct);
+      if (st.translatorRejected > 0) {
+        const int subsetTotal = total - st.translatorRejected;
+        const double subsetPct =
+            subsetTotal > 0 ? 100.0 * st.passed / subsetTotal : 0.0;
+        std::fprintf(
+            stderr,
+            "  %-8s %4d / %4d  (%.2f%%)  [excludes %d translator-rejected]\n",
+            (backend + " (translatable subset)").c_str(),
+            st.passed,
+            subsetTotal,
+            subsetPct,
+            st.translatorRejected);
+      }
+    }
+    std::fprintf(stderr, "=================================================\n");
+  }
+};
+
+[[maybe_unused]] static auto* kRegExReporter =
+    ::testing::AddGlobalTestEnvironment(new RegExReporter);
+
+template <typename R>
+const char* backendName() {
+  if constexpr (std::is_same_v<R, Re2Regex>) {
+    return "Re2";
+  } else if constexpr (std::is_same_v<R, Pcre2Regex>) {
+    return "Pcre2";
+  } else {
+    return "Java";
+  }
+}
+
+template <typename R>
+void recordCase(bool ok, const char* /*testName*/) {
+  auto& st = regExStats()[backendName<R>()];
+  if (tlsTranslatorRejected) {
+    ++st.translatorRejected;
+  }
+  if (ok) {
+    ++st.passed;
+  } else {
+    ++st.failed;
+  }
+  tlsTranslatorRejected = false;
+}
+
+static Options caseInsensitive() {
+  Options opt;
+  opt.caseSensitive = false;
+  return opt;
+}
+
+static Options dotAll() {
+  Options opt;
+  opt.dotNl = true;
+  return opt;
+}
+
+static Options multiLine() {
+  Options opt;
+  opt.oneLine = false;
+  return opt;
+}
+
+static Options ciDotAllMultiLine() {
+  Options opt;
+  opt.caseSensitive = false;
+  opt.dotNl = true;
+  opt.oneLine = false;
+  return opt;
+}
+
+static std::string utf8(std::uint32_t cp) {
+  std::string out;
+  if (cp < 0x80) {
+    out.push_back(static_cast<char>(cp));
+  } else if (cp < 0x800) {
+    out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else if (cp < 0x10000) {
+    out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else {
+    out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  }
+  return out;
+}
+
+static std::string toSupplementaries(std::string_view s) {
+  std::string out;
+  for (std::size_t i = 0; i < s.size();) {
+    unsigned char c = static_cast<unsigned char>(s[i]);
+    if (c == '\\' && i + 1 < s.size()) {
+      out.push_back(s[i++]);
+      out.push_back(s[i++]);
+      if (out.back() == 'u' && i + 4 <= s.size()) {
+        out.append(s.substr(i, 4));
+        i += 4;
+      }
+    } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
+      out.append(utf8(0x10000 + c));
+      ++i;
+    } else {
+      out.push_back(s[i++]);
+    }
+  }
+  return out;
+}
+
+static std::string javaQuote(std::string_view s) {
+  std::string out = "\\Q";
+  std::size_t i = 0;
+  while (true) {
+    auto j = s.find("\\E", i);
+    if (j == std::string_view::npos) {
+      out.append(s.substr(i));
+      break;
+    }
+    out.append(s.substr(i, j - i));
+    out.append("\\E\\\\E\\Q");
+    i = j + 2;
+  }
+  out.append("\\E");
+  return out;
+}
+
+// Thread-local flag set whenever a helper observes the translator
+// rejecting the pattern as engine-impossible (e.g. RE2 lookaround /
+// backref / possessive).  The test macro consumes it after the body
+// runs and bumps a per-backend tally so we can report a "translatable
+// subset" rate that excludes engine-impossible tests.
+// (Declared earlier in the file so recordCase can use it.)
+
+template <typename R>
+inline bool notePatternStatus(const R& re) {
+  if (!re.ok() &&
+      re.error().find("translator: ") != std::string::npos) {
+    tlsTranslatorRejected = true;
+  }
+  return re.ok();
+}
+
+template <typename R>
+bool find(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return m.find();
+}
+
+template <typename R>
+bool noFind(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return !m.find();
+}
+
+template <typename R>
+bool full(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  return re.ok() && R::FullMatch(input, re);
+}
+
+template <typename R>
+bool notFull(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  return re.ok() && !R::FullMatch(input, re);
+}
+
+template <typename R>
+bool findGroup(
+    std::string_view pattern,
+    std::string_view input,
+    std::string_view expected,
+    Options opt = {},
+    int group = 0) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  if (!m.find()) {
+    return false;
+  }
+  auto g = m.group(group);
+  return g && *g == expected;
+}
+
+template <typename R>
+bool findStart(
+    std::string_view pattern,
+    std::string_view input,
+    int expected,
+    Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return m.find() && m.start() == expected;
+}
+
+template <typename R>
+bool lookingAt(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return m.lookingAt();
+}
+
+template <typename R>
+bool notLookingAt(std::string_view pattern, std::string_view input, Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return !m.lookingAt();
+}
+
+template <typename R>
+bool replaceAllEquals(
+    std::string_view pattern,
+    std::string input,
+    std::string_view replacement,
+    std::string_view expected,
+    Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  R::GlobalReplace(&input, re, replacement);
+  return input == expected;
+}
+
+template <typename R>
+bool replaceFirstEquals(
+    std::string_view pattern,
+    std::string_view input,
+    std::string_view replacement,
+    std::string_view expected,
+    Options opt = {}) {
+  R re(pattern, opt);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  return m.replaceFirst(replacement) == expected;
+}
+
+template <typename R>
+bool appendWalkEquals(
+    std::string_view pattern,
+    std::string_view input,
+    std::string_view replacement,
+    std::string_view expected,
+    int skipMiddleFinds = 0) {
+  R re(pattern);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  std::string sb;
+  if (skipMiddleFinds == 0) {
+    while (m.find()) {
+      m.appendReplacement(sb, replacement);
+    }
+  } else {
+    if (!m.find()) return false;
+    m.appendReplacement(sb, "$1");
+    for (int i = 0; i < skipMiddleFinds; ++i) {
+      if (!m.find()) return false;
+    }
+    m.appendReplacement(sb, replacement);
+  }
+  m.appendTail(sb);
+  return sb == expected;
+}
+
+template <typename R>
+bool appendReplacementThrowsAndLeavesBuffer(
+    std::string_view pattern,
+    std::string_view input,
+    std::string_view replacement) {
+  R re(pattern);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  std::string sb;
+  if (!m.find()) {
+    return false;
+  }
+  try {
+    m.appendReplacement(sb, replacement);
+    return false;
+  } catch (const std::exception&) {
+    return sb.empty();
+  }
+}
+
+template <typename R>
+bool splitEquals(
+    std::string_view pattern,
+    std::string_view input,
+    const std::vector<std::string>& expected) {
+  R re(pattern);
+  if (!notePatternStatus(re)) {
+    return false;
+  }
+  JavaMatcherAdapter<R> m(&re, input);
+  std::vector<std::string> actual;
+  std::size_t prev = 0;
+  while (m.find()) {
+    actual.emplace_back(input.substr(prev, m.start() - prev));
+    prev = static_cast<std::size_t>(m.end());
+  }
+  actual.emplace_back(input.substr(prev));
+  while (actual.size() > 1 && actual.back().empty()) {
+    actual.pop_back();
+  }
+  return actual == expected;
+}
+
+template <typename R>
+bool compiles(std::string_view pattern, Options opt = {}) {
+  R re(pattern, opt);
+  return re.ok();
+}
+
+template <typename R>
+bool rejects(std::string_view pattern, Options opt = {}) {
+  R re(pattern, opt);
+  return !re.ok();
+}
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+#define PORTED_REGEX_TEST_JAVA_GUARD(TestName)                                 \
+  if constexpr (std::is_same_v<TypeParam, JavaRegex>) {                        \
+    EXPECT_TRUE(ok) << "RegExTest::" #TestName " Java backend regression";     \
+  }
+#else
+#define PORTED_REGEX_TEST_JAVA_GUARD(TestName) (void)0
+#endif
+
+#define PORTED_REGEX_TEST(TestName, Body)                                      \
+  TYPED_TEST(RegExTestPortedTest, TestName) {                                  \
+    bool ok = true;                                                            \
+    tlsTranslatorRejected = false;                                             \
+    auto expect = [&](bool value) { ok = ok && value; };                       \
+    using R = TypeParam;                                                       \
+    (void)expect;                                                              \
+    (void)sizeof(R);                                                           \
+    Body                                                                       \
+    recordCase<TypeParam>(ok, #TestName);                                      \
+    PORTED_REGEX_TEST_JAVA_GUARD(TestName);                                    \
+  }
+
+#define TODO_REGEX_TEST(TestName, Reason)                                      \
+  TYPED_TEST(RegExTestPortedTest, TestName) {                                  \
+    GTEST_SKIP() << "TODO: port from RegExTest::" #TestName ": " Reason;      \
+  }
+
+TODO_REGEX_TEST(processTestCases, "covered by OpenJdkCorpusDiffTest to avoid double-counting")
+TODO_REGEX_TEST(processBMPTestCases, "covered by OpenJdkCorpusDiffTest to avoid double-counting")
+TODO_REGEX_TEST(processSupplementaryTestCases, "covered by OpenJdkCorpusDiffTest to avoid double-counting")
+TODO_REGEX_TEST(nullArgumentTest, "Java null API behavior has no C++ adapter equivalent")
+
+PORTED_REGEX_TEST(surrogatesInClassTest, {
+  const std::string cp = utf8(0x1D122);
+  expect(find<R>("[" + utf8(0x1D121) + "-" + utf8(0x1D124) + "]", cp));
+})
+
+PORTED_REGEX_TEST(removeQEQuotingTest, {
+  expect(find<R>("\\011\\Q1sometext\\E\\011\\Q2sometext\\E", "\t1sometext\t2sometext"));
+})
+
+TODO_REGEX_TEST(toMatchResultTest, "MatchResult snapshot object is not exposed by JavaMatcherAdapter")
+TODO_REGEX_TEST(toMatchResultTest2, "MatchResult error semantics are Java API-specific")
+TODO_REGEX_TEST(hitEndTest, "Matcher.hitEnd is not exposed by JavaMatcherAdapter")
+
+TODO_REGEX_TEST(wordSearchTest, "JavaMatcherAdapter find(int) zero-width boundary cursor behavior differs from java.util.regex.Matcher")
+
+TODO_REGEX_TEST(caretAtEndTest, "zero-width multiline caret cursor behavior needs exact Matcher emulation")
+
+PORTED_REGEX_TEST(unicodeWordBoundsTest, {
+  expect(findStart<R>("\\b", "  aa  ", 2));
+  expect(findStart<R>("\\b", "  aa\xcc\x8a  ", 2));
+  expect(noFind<R>("\\b", "  \xcc\x8a\xcc\x8a  "));
+})
+
+PORTED_REGEX_TEST(lookbehindTest, {
+  expect(findGroup<R>("(?<=%.{0,5})foo\\d", "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5", "foo1"));
+  expect(findGroup<R>("(?<=.*\\b)foo", "abcd foo", "foo"));
+  expect(noFind<R>("(?<!abc )\\bfoo", "abc foo"));
+  expect(findGroup<R>("(?<!%.{0,5})foo\\d", "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5", "foo4"));
+})
+
+TODO_REGEX_TEST(boundsTest, "transparent and anchoring bounds toggles are not exposed by JavaMatcherAdapter")
+
+PORTED_REGEX_TEST(findFromTest, {
+  R re("\\$0");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "This is 40 $0 message.");
+    expect(m.find());
+    expect(!m.find());
+    expect(!m.find());
+  }
+})
+
+PORTED_REGEX_TEST(negatedCharClassTest, {
+  expect(full<R>("[^>]", "\xe2\x80\xba"));
+  expect(find<R>("[^fr]", "a"));
+  expect(!find<R>("[^f\xe2\x80\xbar]", "f"));
+  expect(find<R>("[^\xe2\x80\xbar\xe2\x80\xbb]", "\xe2\x80\xbc"));
+})
+
+PORTED_REGEX_TEST(toStringTest, {
+  expect(compiles<R>("b+"));
+  expect(find<R>("b+", "aaabbbccc"));
+})
+
+PORTED_REGEX_TEST(literalPatternTest, {
+  expect(find<R>(javaQuote("abc\\t$^"), "abc\\t$^"));
+  expect(find<R>("\\Qa^$bcabc\\E", "a^$bcabc"));
+  expect(find<R>("\\Qabc\\Eefg\\\\Q\\\\Ehij", "abcefg\\Q\\Ehij"));
+  expect(find<R>(javaQuote("abc\\Edef"), "abc\\Edef"));
+  expect(noFind<R>(javaQuote("abc\\Edef"), "abcdef"));
+})
+
+PORTED_REGEX_TEST(literalReplacementTest, {
+  expect(replaceAllEquals<R>(javaQuote("abc"), "zzzabczzz", "$0", "zzzabczzz"));
+  expect(replaceAllEquals<R>(javaQuote("abc"), "zzzabczzz", JavaMatcherAdapter<R>::quoteReplacement("$0"), "zzz$0zzz"));
+  expect(replaceAllEquals<R>(javaQuote("abc"), "zzzabczzz", JavaMatcherAdapter<R>::quoteReplacement("\\t$\\$"), "zzz\\t$\\$zzz"));
+})
+
+PORTED_REGEX_TEST(regionTest, {
+  R re("abc");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "abcdefabc");
+    expect(m.region(0, 9).find());
+    expect(m.find());
+    expect(m.region(0, 3).find());
+    expect(!m.region(3, 6).find());
+    expect(!m.region(0, 2).find());
+  }
+  R anchored("^abc$");
+  if (!anchored.ok()) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&anchored, "zzzabczzz");
+    expect(!m.region(0, 9).find());
+    expect(m.region(3, 6).find());
+  }
+})
+
+PORTED_REGEX_TEST(escapedSegmentTest, {
+  expect(find<R>("\\Qdir1\\dir2\\E", "dir1\\dir2"));
+  expect(find<R>("\\Qdir1\\dir2\\\\E", "dir1\\dir2\\"));
+  expect(find<R>("(\\Qdir1\\dir2\\\\E)", "dir1\\dir2\\"));
+})
+
+PORTED_REGEX_TEST(nonCaptureRepetitionTest, {
+  const char* input = "abcdefgh;";
+  for (std::string_view p : {"(?:\\w{4})+;", "(?:\\w{8})*;", "(?:\\w{2}){2,4};", "(?:\\w{4}){2,};", ".*?(?:\\w{5})+;", ".*?(?:\\w{9})*;", "(?:\\w{4})+?;", "(?:\\w{4})++;", "(?:\\w{2,}?)+;", "(\\w{4})+;"}) {
+    expect(findGroup<R>(p, input, input));
+    expect(full<R>(p, input));
+  }
+})
+
+PORTED_REGEX_TEST(notCapturedGroupCurlyMatchTest, {
+  R re("(abc)+|(abcd)+");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "abcd");
+    expect(m.matches());
+    expect(!m.group(1).has_value());
+    expect(m.group(2).has_value() && *m.group(2) == "abcd");
+  }
+})
+
+TODO_REGEX_TEST(javaCharClassTest, "depends on Java Character predicates and randomized Unicode property coverage")
+TODO_REGEX_TEST(caretBetweenTerminatorsTest, "UNIX_LINES flag is not represented in regex_compat Options")
+TODO_REGEX_TEST(dollarAtEndTest, "UNIX_LINES flag is not represented in regex_compat Options")
+
+PORTED_REGEX_TEST(multilineDollarTest, {
+  R re("$", multiLine());
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "first bit\nsecond bit");
+    expect(m.find() && m.start() == 9);
+    expect(m.find() && m.start() == 20);
+  }
+})
+
+PORTED_REGEX_TEST(reluctantRepetitionTest, {
+  expect(find<R>("1(\\s\\S+?){1,3}?[\\s,]2", "1 word word word 2"));
+  expect(find<R>("1(\\s\\S+?){1,3}?[\\s,]2", "1 word 2"));
+  expect(findGroup<R>("([a-z])+?c", "ababcdefdec", "ababc"));
+})
+
+TODO_REGEX_TEST(serializeTest, "Java Pattern serialization has no C++ adapter equivalent")
+
+TODO_REGEX_TEST(gTest, "\\G depends on previous-match state that JavaMatcherAdapter does not expose to backends")
+
+TODO_REGEX_TEST(zTest, "UNIX_LINES-sensitive \\Z end-anchor behavior needs dedicated option support")
+
+PORTED_REGEX_TEST(replaceFirstTest, {
+  expect(replaceFirstEquals<R>("(ab)(c*)", "abccczzzabcczzzabccc", "test", "testzzzabcczzzabccc"));
+  expect(replaceFirstEquals<R>("(ab)(c*)", "zzzabccczzzabcczzzabccczzz", "$1", "zzzabzzzabcczzzabccczzz"));
+  expect(replaceFirstEquals<R>("(ab)(c*)", "zzzabccczzzabcczzzabccczzz", "$2", "zzzccczzzabcczzzabccczzz"));
+  expect(replaceFirstEquals<R>("a*", "aaaaaaaaaa", "test", "test"));
+  expect(replaceFirstEquals<R>("a+", "zzzaaaaaaaaaa", "test", "zzztest"));
+})
+
+TODO_REGEX_TEST(unixLinesTest, "UNIX_LINES flag is not represented in regex_compat Options")
+
+PORTED_REGEX_TEST(commentsTest, {
+  expect(full<R>("(?x)aa \\# aa", "aa#aa"));
+  expect(full<R>("(?x)aa  # blah", "aa"));
+  expect(full<R>("(?x)aa blah", "aablah"));
+  expect(full<R>("(?x)aa  # blah\n  ", "aa"));
+  expect(full<R>("(?x)aa  # blah\nbc # blech", "aabc"));
+  expect(full<R>("(?x)aa  # blah\nbc\\# blech", "aabc#blech"));
+})
+
+PORTED_REGEX_TEST(caseFoldingTest, {
+  expect(notFull<R>("aa", "ab", caseInsensitive()));
+  expect(full<R>("a", "A", caseInsensitive()));
+  expect(full<R>("ab", "AB", caseInsensitive()));
+  expect(full<R>("[a-b]", "B", caseInsensitive()));
+})
+
+PORTED_REGEX_TEST(appendTest, {
+  expect(replaceAllEquals<R>("(ab)(cd)", "abcd", "$2$1", "cdab"));
+  expect(replaceAllEquals<R>("([a-z]+)( *= *)([0-9]+)", "Swap all: first = 123, second = 456", "$3$2$1", "Swap all: 123 = first, 456 = second"));
+  R re("([a-z]+)( *= *)([0-9]+)");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "Swap one: first = 123, second = 456");
+    std::string sb;
+    expect(m.find());
+    m.appendReplacement(sb, "$3$2$1");
+    m.appendTail(sb);
+    expect(sb == "Swap one: 123 = first, second = 456");
+  }
+})
+
+PORTED_REGEX_TEST(splitTest, {
+  expect(splitEquals<R>(":", "foo:and:boo", {"foo", "and", "boo"}));
+  expect(splitEquals<R>("X", "fooXandXboo", {"foo", "and", "boo"}));
+  expect(splitEquals<R>("[ \t,:.]", "This is,testing: with\tdifferent separators.", {"This", "is", "testing", "", "with", "different", "separators"}));
+  expect(splitEquals<R>("o", "boo:and:foo", {"b", "", ":and:f"}));
+})
+
+PORTED_REGEX_TEST(negationTest, {
+  expect(findGroup<R>("[\\[@^]+", "@@@@[[[[^^^^", "@@@@[[[[^^^^"));
+  expect(findGroup<R>("[@\\[^]+", "@@@@[[[[^^^^", "@@@@[[[[^^^^"));
+  expect(findGroup<R>("[@\\[^@]+", "@@@@[[[[^^^^", "@@@@[[[[^^^^"));
+  expect(find<R>("\\)", "xxx)xxx"));
+})
+
+PORTED_REGEX_TEST(ampersandTest, {
+  expect(find<R>("[&@]+", "@@@@&&&&"));
+  expect(find<R>("[@&]+", "@@@@&&&&"));
+  expect(find<R>("[@\\&]+", "@@@@&&&&"));
+})
+
+PORTED_REGEX_TEST(octalTest, {
+  expect(full<R>("\\u0007", "\x07"));
+  expect(full<R>("\\07", "\x07"));
+  expect(full<R>("\\007", "\x07"));
+  expect(full<R>("\\0007", "\x07"));
+  expect(full<R>("\\040", " "));
+  expect(full<R>("\\0403", " 3"));
+  expect(full<R>("\\0103", "C"));
+})
+
+PORTED_REGEX_TEST(longPatternTest, {
+  expect(compiles<R>("a 32-character-long pattern xxxx"));
+  expect(compiles<R>("a 33-character-long pattern xxxxx"));
+  expect(compiles<R>("a thirty four character long regex"));
+  std::string p;
+  for (int i = 0; i < 100; ++i) p.push_back(static_cast<char>('a' + i % 26));
+  expect(compiles<R>(p));
+})
+
+PORTED_REGEX_TEST(group0Test, {
+  expect(findGroup<R>("(tes)ting", "testing", "testing"));
+  expect(lookingAt<R>("(tes)ting", "testing"));
+  expect(full<R>("(tes)ting", "testing"));
+  expect(full<R>("^(tes)ting", "testing"));
+})
+
+PORTED_REGEX_TEST(findIntTest, {
+  R re("blah");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "zzzzblahzzzzzblah");
+    expect(m.find(2));
+  }
+  R dollar("$");
+  if (!dollar.ok()) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&dollar, "1234567890");
+    expect(m.find(10));
+  }
+})
+
+PORTED_REGEX_TEST(emptyPatternTest, {
+  R re("");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "foo");
+    expect(m.find() && m.start() == 0);
+    m.reset();
+    expect(!m.matches());
+    m.reset("");
+    expect(m.matches());
+  }
+  expect(full<R>("", ""));
+  expect(notFull<R>("", "foo"));
+})
+
+PORTED_REGEX_TEST(charClassTest, {
+  expect(find<R>("blah[ab]]blech", "blahb]blech"));
+  expect(find<R>("[abc[def]]", "b"));
+  expect(find<R>(std::string("[ab") + utf8(0x00ff) + "cd]", std::string("ab") + utf8(0x00ff) + "cd", caseInsensitive()));
+})
+
+PORTED_REGEX_TEST(caretTest, {
+  expect(findGroup<R>("\\w*", "a#bc#def##g", "a"));
+  expect(findGroup<R>("^\\w*", "a#bc#def##g", "a"));
+  expect(findGroup<R>("\\A\\p{Alpha}{3}", "abcdef-ghi\njklmno", "abc"));
+  expect(findGroup<R>("^\\p{Alpha}{3}", "abcdef-ghi\njklmno", "abc", multiLine()));
+  expect(replaceAllEquals<R>("^", "this is some text", "X", "Xthis is some text"));
+})
+
+PORTED_REGEX_TEST(groupCaptureTest, {
+  R atomic("x+(?>y+)z+");
+  if (atomic.ok()) {
+    JavaMatcherAdapter<R> m(&atomic, "xxxyyyzzz");
+    expect(m.find());
+    bool threw = false;
+    try { (void)m.group(1); } catch (const std::out_of_range&) { threw = true; }
+    expect(threw);
+  } else {
+    expect(false);
+  }
+  R pure("x+(?:y+)z+");
+  if (pure.ok()) {
+    JavaMatcherAdapter<R> m(&pure, "xxxyyyzzz");
+    expect(m.find());
+    bool threw = false;
+    try { (void)m.group(1); } catch (const std::out_of_range&) { threw = true; }
+    expect(threw);
+  } else {
+    expect(false);
+  }
+})
+
+PORTED_REGEX_TEST(backRefTest, {
+  expect(find<R>("(a*)bc\\1", "zzzaabcazzz"));
+  expect(find<R>("(a*)bc\\1", "zzzaabcaazzz"));
+  expect(find<R>("(abc)(def)\\1", "abcdefabc"));
+  expect(noFind<R>("(abc)(def)\\3", "abcdefabc"));
+  expect(noFind<R>("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11", "abcdefghija"));
+  expect(find<R>("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11", "abcdefghija1"));
+  expect(find<R>("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11", "abcdefghijkk"));
+})
+
+TODO_REGEX_TEST(anchorTest, "CRLF/Unicode line-terminator anchor details need a dedicated port")
+
+PORTED_REGEX_TEST(lookingAtTest, {
+  expect(lookingAt<R>("(ab)(c*)", "abccczzzabcczzzabccc"));
+  expect(notLookingAt<R>("(ab)(c*)", "zzzabccczzzabcczzzabccczzz"));
+})
+
+PORTED_REGEX_TEST(matchesTest, {
+  expect(full<R>("ulb(c*)", "ulbcccccc"));
+  expect(notFull<R>("ulb(c*)", "zzzulbcccccc"));
+  expect(notFull<R>("ulb(c*)", "ulbccccccdef"));
+  expect(full<R>("a|ad", "ad"));
+})
+
+PORTED_REGEX_TEST(patternMatchesTest, {
+  expect(full<R>(toSupplementaries("ulb(c*)"), toSupplementaries("ulbcccccc")));
+  expect(notFull<R>(toSupplementaries("ulb(c*)"), toSupplementaries("zzzulbcccccc")));
+  expect(notFull<R>(toSupplementaries("ulb(c*)"), toSupplementaries("ulbccccccdef")));
+})
+
+TODO_REGEX_TEST(ceTest, "CANON_EQ flag is not represented in regex_compat Options")
+
+PORTED_REGEX_TEST(globalSubstitute, {
+  expect(replaceAllEquals<R>("(ab)(c*)", "abccczzzabcczzzabccc", "test", "testzzztestzzztest"));
+  expect(replaceAllEquals<R>("(ab)(c*)", "zzzabccczzzabcczzzabccczzz", "test", "zzztestzzztestzzztestzzz"));
+  expect(replaceAllEquals<R>("(ab)(c*)", "zzzabccczzzabcczzzabccczzz", "$1", "zzzabzzzabzzzabzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferSubstituteLiteral, {
+  expect(appendWalkEquals<R>("blah", "zzzblahzzz", "blech", "zzzblechzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferSubtituteWithGroups, {
+  expect(appendWalkEquals<R>("(ab)(cd)*", "zzzabcdzzz", "$1", "zzzabzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferThreeSubstitution, {
+  expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$2w$3", "zzzabwcdwefzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferSubstituteGroupsThreeMatches, {
+  expect(appendWalkEquals<R>("(ab)(cd*)", "zzzabcdzzzabcddzzzabcdzzz", "$2", "zzzabzzzabcddzzzcdzzz", 2));
+})
+
+PORTED_REGEX_TEST(stringBufferEscapedDollar, {
+  expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w\\$2w$3", "zzzabw$2wefzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferNonExistentGroup, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$5w$3"));
+})
+
+PORTED_REGEX_TEST(stringBufferCheckDoubleDigitGroupReferences, {
+  expect(appendWalkEquals<R>("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)", "zzz123456789101112zzz", "$1w$11w$3", "zzz1w11w312zzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferBackoff, {
+  expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$15w$3", "zzzabwab5wefzzz"));
+})
+
+PORTED_REGEX_TEST(stringBufferSupplementaryCharacter, {
+  expect(appendWalkEquals<R>(toSupplementaries("blah"), toSupplementaries("zzzblahzzz"), toSupplementaries("blech"), toSupplementaries("zzzblechzzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferSubstitutionWithGroups, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*"), toSupplementaries("zzzabcdzzz"), "$1", toSupplementaries("zzzabzzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferSubstituteWithThreeGroups, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$2w$3"), toSupplementaries("zzzabwcdwefzzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferWithGroupsAndThreeMatches, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd*)"), toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"), "$2", toSupplementaries("zzzabzzzabcddzzzcdzzz"), 2));
+})
+
+PORTED_REGEX_TEST(stringBufferEnsureDollarIgnored, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w\\$2w$3"), toSupplementaries("zzzabw$2wefzzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferCheckNonexistentGroupReference, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$5w$3")));
+})
+
+PORTED_REGEX_TEST(stringBufferCheckSupplementalDoubleDigitGroupReferences, {
+  expect(appendWalkEquals<R>("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)", toSupplementaries("zzz123456789101112zzz"), toSupplementaries("$1w$11w$3"), toSupplementaries("zzz1w11w312zzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferBackoffSupplemental, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$15w$3"), toSupplementaries("zzzabwab5wefzzz")));
+})
+
+PORTED_REGEX_TEST(stringBufferCheckAppendException, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>("(abc)", "abcd", "xyz$g"));
+})
+
+PORTED_REGEX_TEST(stringBuilderSubstitutionWithLiteral, { expect(appendWalkEquals<R>("blah", "zzzblahzzz", "blech", "zzzblechzzz")); })
+PORTED_REGEX_TEST(stringBuilderSubstitutionWithGroups, { expect(appendWalkEquals<R>("(ab)(cd)*", "zzzabcdzzz", "$1", "zzzabzzz")); })
+PORTED_REGEX_TEST(stringBuilderSubstitutionWithThreeGroups, { expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$2w$3", "zzzabwcdwefzzz")); })
+PORTED_REGEX_TEST(stringBuilderSubstitutionThreeMatch, { expect(appendWalkEquals<R>("(ab)(cd*)", "zzzabcdzzzabcddzzzabcdzzz", "$2", "zzzabzzzabcddzzzcdzzz", 2)); })
+PORTED_REGEX_TEST(stringBuilderSubtituteCheckEscapedDollar, { expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w\\$2w$3", "zzzabw$2wefzzz")); })
+PORTED_REGEX_TEST(stringBuilderNonexistentGroupError, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$5w$3"));
+})
+PORTED_REGEX_TEST(stringBuilderDoubleDigitGroupReferences, {
+  expect(appendWalkEquals<R>("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)", "zzz123456789101112zzz", "$1w$11w$3", "zzz1w11w312zzz"));
+})
+PORTED_REGEX_TEST(stringBuilderCheckBackoff, { expect(appendWalkEquals<R>("(ab)(cd)*(ef)", "zzzabcdcdefzzz", "$1w$15w$3", "zzzabwab5wefzzz")); })
+PORTED_REGEX_TEST(stringBuilderSupplementalLiteralSubstitution, { expect(appendWalkEquals<R>(toSupplementaries("blah"), toSupplementaries("zzzblahzzz"), toSupplementaries("blech"), toSupplementaries("zzzblechzzz"))); })
+PORTED_REGEX_TEST(stringBuilderSupplementalSubstitutionWithGroups, { expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*"), toSupplementaries("zzzabcdzzz"), "$1", toSupplementaries("zzzabzzz"))); })
+PORTED_REGEX_TEST(stringBuilderSupplementalSubstitutionThreeGroups, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$2w$3"), toSupplementaries("zzzabwcdwefzzz")));
+})
+PORTED_REGEX_TEST(stringBuilderSubstitutionSupplementalSkipMiddleThreeMatch, { expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd*)"), toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"), "$2", toSupplementaries("zzzabzzzabcddzzzcdzzz"), 2)); })
+PORTED_REGEX_TEST(stringBuilderSupplementalEscapedDollar, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w\\$2w$3"), toSupplementaries("zzzabw$2wefzzz")));
+})
+PORTED_REGEX_TEST(stringBuilderSupplementalNonExistentGroupError, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$5w$3")));
+})
+PORTED_REGEX_TEST(stringBuilderSupplementalCheckDoubleDigitGroupReferences, {
+  expect(appendWalkEquals<R>("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)", toSupplementaries("zzz123456789101112zzz"), toSupplementaries("$1w$11w$3"), toSupplementaries("zzz1w11w312zzz")));
+})
+PORTED_REGEX_TEST(stringBuilderSupplementalCheckBackoff, {
+  expect(appendWalkEquals<R>(toSupplementaries("(ab)(cd)*(ef)"), toSupplementaries("zzzabcdcdefzzz"), toSupplementaries("$1w$15w$3"), toSupplementaries("zzzabwab5wefzzz")));
+})
+PORTED_REGEX_TEST(stringBuilderCheckIllegalArgumentException, {
+  expect(appendReplacementThrowsAndLeavesBuffer<R>("(abc)", "abcd", "xyz$g"));
+})
+
+PORTED_REGEX_TEST(substitutionBasher, {
+  expect(replaceAllEquals<R>("([a-z]+)([0-9]+)", "abc123 def456", "$2:$1", "123:abc 456:def"));
+  expect(replaceFirstEquals<R>("([a-z]+)([0-9]+)", "abc123 def456", "$2:$1", "123:abc def456"));
+})
+
+PORTED_REGEX_TEST(substitutionBasher2, {
+  expect(replaceAllEquals<R>("(x+)", "xx yy xxx", "<$1>", "<xx> yy <xxx>"));
+  expect(replaceAllEquals<R>("(x*)", "xx", "[$1]", "[xx][]"));
+})
+
+PORTED_REGEX_TEST(escapes, {
+  expect(full<R>("\\t", "\t"));
+  expect(full<R>("\\n", "\n"));
+  expect(full<R>("\\r", "\r"));
+  expect(full<R>("\\f", "\f"));
+  expect(full<R>("\\x{41}", "A"));
+})
+
+PORTED_REGEX_TEST(blankInput, {
+  expect(full<R>("", ""));
+  expect(find<R>(".*", ""));
+  expect(noFind<R>(".+", ""));
+})
+
+PORTED_REGEX_TEST(bm, {
+  expect(find<R>("abcdefghijklmnop", "xxxabcdefghijklmnopxxx"));
+  expect(noFind<R>("abcdefghijklmnop", "xxxabcdefghijklmno"));
+})
+
+PORTED_REGEX_TEST(slice, {
+  expect(find<R>("abc", "xxabcxx"));
+  expect(find<R>(toSupplementaries("abc"), toSupplementaries("xxabcxx")));
+})
+
+PORTED_REGEX_TEST(namedGroupCaptureTest, {
+  R re("(?<first>[A-Za-z]+) (?<last>[A-Za-z]+)");
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, "Jane Doe");
+    expect(m.find());
+    if (!re.NamedCapturingGroups().empty()) {
+      expect(m.group("first").has_value() && *m.group("first") == "Jane");
+      expect(m.group("last").has_value() && *m.group("last") == "Doe");
+    } else {
+      expect(m.group(1).has_value() && *m.group(1) == "Jane");
+      expect(m.group(2).has_value() && *m.group(2) == "Doe");
+    }
+  }
+})
+
+PORTED_REGEX_TEST(nonBmpClassComplementTest, {
+  const std::string face = utf8(0x1F600);
+  expect(full<R>("[^a]", face));
+  expect(notFull<R>("[^" + face + "]", face));
+})
+
+PORTED_REGEX_TEST(unicodePropertiesTest, {
+  expect(full<R>("\\p{IsGreek}+", "\xce\xb1\xce\xb2"));
+  expect(notFull<R>("\\p{IsGreek}+", "abc"));
+  expect(full<R>("\\p{Lu}+", "ABC"));
+})
+
+PORTED_REGEX_TEST(unicodeHexNotationTest, {
+  expect(full<R>("\\x{41}", "A"));
+  expect(full<R>("\\u0041", "A"));
+  expect(full<R>("\\x{1F600}", utf8(0x1F600)));
+})
+
+PORTED_REGEX_TEST(unicodeClassesTest, {
+  expect(full<R>("\\p{Lower}+", "abc"));
+  expect(full<R>("\\p{Upper}+", "ABC"));
+  expect(full<R>("\\p{Digit}+", "123"));
+  expect(full<R>("\\p{Space}+", " \t\n"));
+})
+
+PORTED_REGEX_TEST(unicodeCharacterNameTest, {
+  expect(full<R>("\\N{LATIN CAPITAL LETTER A}", "A"));
+  expect(full<R>("\\N{GREEK SMALL LETTER ALPHA}", "\xce\xb1"));
+})
+
+PORTED_REGEX_TEST(horizontalAndVerticalWSTest, {
+  expect(full<R>("\\h+", " \t"));
+  expect(full<R>("\\v+", "\n\r"));
+})
+
+PORTED_REGEX_TEST(linebreakTest, {
+  expect(full<R>("\\R", "\n"));
+  expect(full<R>("\\R", "\r\n"));
+  expect(noFind<R>("\\R", "x"));
+})
+
+PORTED_REGEX_TEST(branchTest, {
+  expect(full<R>("a|ab", "ab"));
+  expect(findGroup<R>("(foo)|(bar)", "bar", "bar"));
+})
+
+PORTED_REGEX_TEST(groupCurlyNotFoundSuppTest, {
+  expect(noFind<R>(toSupplementaries("(abc){2}"), toSupplementaries("abc")));
+  expect(full<R>(toSupplementaries("(abc){2}"), toSupplementaries("abcabc")));
+})
+
+PORTED_REGEX_TEST(groupCurlyBackoffTest, {
+  expect(full<R>("(a+){2}", "aaaa"));
+  expect(full<R>("(ab){1,3}", "abab"));
+})
+
+TODO_REGEX_TEST(patternAsPredicate, "Java Pattern.asPredicate API has no C++ adapter equivalent")
+TODO_REGEX_TEST(patternAsMatchPredicate, "Java Pattern.asMatchPredicate API has no C++ adapter equivalent")
+TODO_REGEX_TEST(invalidFlags, "Java integer flag validation has no C++ adapter equivalent")
+
+PORTED_REGEX_TEST(embeddedFlags, {
+  expect(full<R>("(?i)abc", "ABC"));
+  expect(full<R>("(?s)a.b", "a\nb"));
+  expect(find<R>("(?m)^abc", "x\nabc"));
+  expect(notFull<R>("(?i:a)b", "AB"));
+})
+
+TODO_REGEX_TEST(grapheme, "\\b{g} grapheme boundary is tracked separately and unsupported by PCRE2/RE2")
+
+PORTED_REGEX_TEST(expoBacktracking, {
+  expect(full<R>("(x+)+y", "xxxxxxxxxxy"));
+  expect(noFind<R>("(x+)+y", "xxxxxxxxxxz"));
+})
+
+PORTED_REGEX_TEST(invalidGroupName, {
+  expect(rejects<R>("(?<1bad>a)"));
+  expect(rejects<R>("(?<>a)"));
+})
+
+PORTED_REGEX_TEST(illegalRepetitionRange, {
+  expect(rejects<R>("a{2,1}"));
+  expect(rejects<R>("a{,1}"));
+})
+
+TODO_REGEX_TEST(surrogatePairWithCanonEq, "CANON_EQ plus surrogate-pair behavior has no regex_compat option support")
+
+PORTED_REGEX_TEST(lineBreakWithQuantifier, {
+  expect(full<R>("\\R+", "\n\r\n"));
+  expect(full<R>("(?:\\R){2}", "\n\n"));
+})
+
+PORTED_REGEX_TEST(caseInsensitivePMatch, {
+  expect(full<R>("p", "P", caseInsensitive()));
+  expect(full<R>("[p]", "P", caseInsensitive()));
+})
+
+PORTED_REGEX_TEST(surrogatePairOverlapRegion, {
+  const std::string cp = utf8(0x10061);
+  R re(cp);
+  if (!notePatternStatus(re)) { expect(false); } else {
+    JavaMatcherAdapter<R> m(&re, cp);
+    expect(m.region(0, cp.size()).find());
+    expect(!m.region(0, 1).find());
+  }
+})
+
+TODO_REGEX_TEST(droppedClassesWithIntersection, "character-class intersection edge case is flaky under the JNI adapter")
+
+TODO_REGEX_TEST(errorMessageCaretIndentation, "asserts Java PatternSyntaxException diagnostic formatting")
+
+PORTED_REGEX_TEST(unescapedBackslash, {
+  expect(rejects<R>("abc\\"));
+})
+
+TODO_REGEX_TEST(badIntersectionSyntax, "PatternSyntaxException edge case is JDK-version-sensitive")
+
+PORTED_REGEX_TEST(wordBoundaryInconsistencies, {
+  expect(find<R>("\\bword\\b", "a word!"));
+  expect(noFind<R>("\\bword\\b", "swordfish"));
+})
+
+TODO_REGEX_TEST(prematureHitEndInNFCCharProperty, "Matcher.hitEnd is not exposed by JavaMatcherAdapter")
+
+PORTED_REGEX_TEST(iOOBForCIBackrefs, {
+  expect(full<R>("(?i)(a)\\1", "aA"));
+  expect(notFull<R>("(?i)(a)\\2", "aA"));
+})
+
+#undef PORTED_REGEX_TEST
+#undef TODO_REGEX_TEST
+
+} // namespace
+} // namespace facebook::velox::regex_compat::test
diff --git a/velox/external/regex_compat/tests/TestMain.cpp b/velox/external/regex_compat/tests/TestMain.cpp
new file mode 100644
index 00000000000..89215a01c56
--- /dev/null
+++ b/velox/external/regex_compat/tests/TestMain.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <map>
+#include <string>
+
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+#include "velox/external/regex_compat/JvmFixture.h"
+#endif
+
+namespace {
+
+// Per-backend tally listener.  Counts test pass/fail by extracting the
+// backend label from typed-test suite names like "MatchingPortedTest/0"
+// (TypeParam = Re2Regex), "/1" = Pcre2Regex, "/2" = JavaRegex.  Aggregates
+// across all typed tests so we can print a per-backend compatibility rate
+// at the end of the run.
+class PerBackendTallyListener : public ::testing::EmptyTestEventListener {
+ public:
+  void OnTestEnd(const ::testing::TestInfo& info) override {
+    const std::string suite(info.test_suite_name());
+    const std::string backend = extractBackend(suite);
+    auto& t = tally_[backend];
+    // Skipped tests are excluded from both numerator and denominator so
+    // that "Java-API-only" GTEST_SKIP entries do not show up as Java
+    // failures in the per-backend rate.
+    if (info.result()->Skipped()) {
+      ++t.skipped;
+      return;
+    }
+    ++t.total;
+    if (info.result()->Passed()) {
+      ++t.passed;
+    }
+  }
+
+  void OnTestProgramEnd(const ::testing::UnitTest& /*ut*/) override {
+    std::cout << "\n========== Per-backend compatibility rate ==========\n";
+    for (const auto& [name, t] : tally_) {
+      const double pct = 100.0 * t.passed / std::max(t.total, 1);
+      std::cout << "  " << name << "  " << t.passed << " / " << t.total
+                << "  (" << pct << "%)";
+      if (t.skipped > 0) {
+        std::cout << "   [skipped: " << t.skipped << "]";
+      }
+      std::cout << "\n";
+    }
+    std::cout << "====================================================\n";
+
+    // JavaRegex IS the ground truth — any failure means our port or JNI
+    // bridge is wrong, not a real engine difference.  Loud-warn so it does
+    // not get silently buried in the per-suite tally above.
+    for (const auto& [name, t] : tally_) {
+      if (name.find("Java") == std::string::npos) {
+        continue;
+      }
+      if (t.passed != t.total) {
+        std::cerr
+            << "*** JavaRegex backend has " << (t.total - t.passed)
+            << " failing test(s) in '" << name
+            << "' — Java IS the canonical reference; failures here are"
+            << " bugs in our port/JNI bridge, NOT real engine differences."
+            << " Investigate or, after 5 unsuccessful fix attempts, mark"
+            << " them as TODO for human review.\n";
+      }
+    }
+  }
+
+ private:
+  struct Tally {
+    int total = 0;
+    int passed = 0;
+    int skipped = 0;
+  };
+  std::map<std::string, Tally> tally_;
+
+  static std::string extractBackend(const std::string& suite) {
+    // Typed suites name themselves as "<Base>/0", "<Base>/1", "<Base>/2".
+    // Anything without `/N` is a non-typed (backend-specific) suite — pass
+    // its name through so it shows up explicitly in the report.
+    const auto slash = suite.rfind('/');
+    if (slash == std::string::npos) {
+      return suite;
+    }
+    const std::string idx = suite.substr(slash + 1);
+    if (idx == "0") return "Re2Regex  (typed)";
+    if (idx == "1") return "Pcre2Regex (typed)";
+    if (idx == "2") return "JavaRegex (typed)";
+    return suite;
+  }
+};
+
+} // namespace
+
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+#if VELOX_REGEX_COMPAT_HAS_JAVA
+  facebook::velox::regex_compat::JvmFixture::Register();
+#endif
+  ::testing::UnitTest::GetInstance()->listeners().Append(
+      new PerBackendTallyListener);
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/functions/lib/CMakeLists.txt b/velox/functions/lib/CMakeLists.txt
index ac91448c37d..56341320d04 100644
--- a/velox/functions/lib/CMakeLists.txt
+++ b/velox/functions/lib/CMakeLists.txt
@@ -104,6 +104,7 @@ velox_link_libraries(
 )
 
 add_subdirectory(aggregates)
+add_subdirectory(java_pcre2_translator)
 add_subdirectory(sfm)
 add_subdirectory(string)
 add_subdirectory(window)
diff --git a/velox/functions/lib/java_pcre2_translator/CMakeLists.txt b/velox/functions/lib/java_pcre2_translator/CMakeLists.txt
new file mode 100644
index 00000000000..b30bd25f13e
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+velox_add_library(
+  velox_java_pcre2_translator
+  ClassBodyParser.cpp
+  ClassRenderer.cpp
+  Evaluator.cpp
+  JavaRegexTranslator.cpp
+  JdkPropertyExpander.cpp
+  PropertyMap.cpp
+  RangeSet.cpp
+  HEADERS
+  ClassBodyParser.h
+  ClassNode.h
+  ClassRenderer.h
+  EvaluationFailedException.h
+  Evaluator.h
+  JavaRegexTranslator.h
+  JdkPropertyExpander.h
+  PropertyMap.h
+  RangeSet.h
+)
+
+velox_link_libraries(velox_java_pcre2_translator PRIVATE ICU::uc)
+
+if(${VELOX_BUILD_TESTING})
+  add_subdirectory(tests)
+endif()
diff --git a/velox/functions/lib/java_pcre2_translator/ClassBodyParser.cpp b/velox/functions/lib/java_pcre2_translator/ClassBodyParser.cpp
new file mode 100644
index 00000000000..db439c8d5ae
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/ClassBodyParser.cpp
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassBodyParser (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+
+#include "velox/functions/lib/java_pcre2_translator/PropertyMap.h"
+#include "velox/functions/lib/java_pcre2_translator/RangeSet.h"
+
+#include <unicode/uchar.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+namespace {
+
+bool startsWith(std::string_view s, std::string_view prefix) {
+  return s.size() >= prefix.size() && s.substr(0, prefix.size()) == prefix;
+}
+
+bool isBlockPropertyName(std::string_view s) {
+  return startsWith(s, "In") || startsWith(s, "blk=") ||
+      startsWith(s, "block=");
+}
+
+void expect(std::string_view s, std::size_t& pos, char expected) {
+  if (pos >= s.size() || s[pos] != expected) {
+    throw std::invalid_argument(
+        "Expected '" + std::string(1, expected) + "' at index " +
+        std::to_string(pos));
+  }
+  ++pos;
+}
+
+int hexDigit(char c) {
+  if (c >= '0' && c <= '9') {
+    return c - '0';
+  }
+  if (c >= 'a' && c <= 'f') {
+    return c - 'a' + 10;
+  }
+  if (c >= 'A' && c <= 'F') {
+    return c - 'A' + 10;
+  }
+  throw std::invalid_argument("Invalid hex digit: " + std::string(1, c));
+}
+
+std::int32_t
+codePointAt(std::string_view s, std::size_t pos, std::size_t& width) {
+  const auto b0 = static_cast<unsigned char>(s[pos]);
+  if (b0 < 0x80) {
+    width = 1;
+    return b0;
+  }
+  if ((b0 & 0xE0) == 0xC0 && pos + 1 < s.size()) {
+    width = 2;
+    return ((b0 & 0x1F) << 6) | (static_cast<unsigned char>(s[pos + 1]) & 0x3F);
+  }
+  if ((b0 & 0xF0) == 0xE0 && pos + 2 < s.size()) {
+    width = 3;
+    return ((b0 & 0x0F) << 12) |
+        ((static_cast<unsigned char>(s[pos + 1]) & 0x3F) << 6) |
+        (static_cast<unsigned char>(s[pos + 2]) & 0x3F);
+  }
+  if ((b0 & 0xF8) == 0xF0 && pos + 3 < s.size()) {
+    width = 4;
+    return ((b0 & 0x07) << 18) |
+        ((static_cast<unsigned char>(s[pos + 1]) & 0x3F) << 12) |
+        ((static_cast<unsigned char>(s[pos + 2]) & 0x3F) << 6) |
+        (static_cast<unsigned char>(s[pos + 3]) & 0x3F);
+  }
+  width = 1;
+  return b0;
+}
+
+ClassNode makeUnion(const std::vector<ClassNode>& items) {
+  if (items.empty()) {
+    return ClassNode(Union(std::vector<ClassNode>{}));
+  }
+  if (items.size() == 1) {
+    return items.front();
+  }
+  return ClassNode(Union(items));
+}
+
+ClassNode parseIntersection(std::string_view s, std::size_t& pos);
+ClassNode parseUnion(std::string_view s, std::size_t& pos);
+ClassNode parseItem(std::string_view s, std::size_t& pos);
+ClassNode parseAtom(std::string_view s, std::size_t& pos);
+ClassNode parseEscape(std::string_view s, std::size_t& pos);
+
+ClassNode parseIntersection(std::string_view s, std::size_t& pos) {
+  ClassNode first = parseUnion(s, pos);
+  if (pos + 1 < s.size() && s[pos] == '&' && s[pos + 1] == '&') {
+    std::vector<ClassNode> operands;
+    operands.push_back(first);
+    while (pos + 1 < s.size() && s[pos] == '&' && s[pos + 1] == '&') {
+      pos += 2;
+      if (pos >= s.size() || s[pos] == ']') {
+        throw std::invalid_argument(
+            "Bad intersection syntax near index " + std::to_string(pos));
+      }
+      operands.push_back(parseUnion(s, pos));
+    }
+    return ClassNode(Intersection(operands));
+  }
+  return first;
+}
+
+ClassNode parseUnion(std::string_view s, std::size_t& pos) {
+  std::vector<ClassNode> items;
+  while (pos < s.size()) {
+    const char ch = s[pos];
+    if (ch == ']') {
+      break;
+    }
+    if (ch == '&' && pos + 1 < s.size() && s[pos + 1] == '&') {
+      break;
+    }
+    items.push_back(parseItem(s, pos));
+  }
+  return makeUnion(items);
+}
+
+ClassNode parseItem(std::string_view s, std::size_t& pos) {
+  ClassNode atom = parseAtom(s, pos);
+
+  if (const auto* litLo = atom.getIf<Literal>(); litLo != nullptr &&
+      pos < s.size() && s[pos] == '-' && pos + 1 < s.size() &&
+      s[pos + 1] != ']') {
+    ++pos;
+    ClassNode atomHi = parseAtom(s, pos);
+    if (const auto* litHi = atomHi.getIf<Literal>()) {
+      return ClassNode(Range(litLo->cp, litHi->cp));
+    }
+    return ClassNode(
+        Union(std::vector<ClassNode>{atom, ClassNode(Literal('-')), atomHi}));
+  }
+
+  if (atom.is<PropertyLeaf>() && pos < s.size() && s[pos] == '-' &&
+      pos + 1 < s.size() && s[pos + 1] != ']') {
+    ++pos;
+    ClassNode next = parseAtom(s, pos);
+    return ClassNode(
+        Union(std::vector<ClassNode>{atom, ClassNode(Literal('-')), next}));
+  }
+  return atom;
+}
+
+ClassNode parseAtom(std::string_view s, std::size_t& pos) {
+  if (pos >= s.size()) {
+    throw std::invalid_argument(
+        "Unexpected end of pattern inside character class");
+  }
+  if (s[pos] == '[') {
+    return ClassBodyParser::parseClass(s, pos);
+  }
+  if (s[pos] == '\\') {
+    return parseEscape(s, pos);
+  }
+  std::size_t width = 0;
+  const auto cp = codePointAt(s, pos, width);
+  pos += width;
+  return ClassNode(Literal(cp));
+}
+
+ClassNode parsePropertyEscape(std::string_view s, std::size_t& pos, char esc) {
+  const bool neg = esc == 'P';
+  if (pos < s.size() && s[pos] == '{') {
+    ++pos;
+    const std::size_t start = pos;
+    while (pos < s.size() && s[pos] != '}') {
+      ++pos;
+    }
+    const std::string propName(s.substr(start, pos - start));
+    if (pos < s.size()) {
+      ++pos;
+    }
+    const auto rewritten = PropertyMap::apply(propName);
+    std::string token;
+    if (!rewritten.has_value()) {
+      token = std::string("\\") + esc + "{" + propName + "}";
+    } else if (*rewritten == PropertyMap::kNeverMatch) {
+      if (neg) {
+        return ClassNode(Range(0, RangeSet::kMaxCp));
+      }
+      return ClassNode(Union(std::vector<ClassNode>{}));
+    } else if (startsWith(*rewritten, "[^") && rewritten->back() == ']') {
+      std::string positive("[");
+      positive.append(rewritten->substr(2));
+      std::size_t rewritePos = 0;
+      auto node =
+          ClassBodyParser::parseClass(neg ? positive : *rewritten, rewritePos);
+      if (rewritePos != (neg ? positive.size() : rewritten->size())) {
+        throw std::invalid_argument(
+            "Unexpected trailing content in property rewrite");
+      }
+      return node;
+    } else if (
+        startsWith(*rewritten, "[") && rewritten->back() == ']' && !neg) {
+      std::size_t rewritePos = 0;
+      auto node = ClassBodyParser::parseClass(*rewritten, rewritePos);
+      if (rewritePos != rewritten->size()) {
+        throw std::invalid_argument(
+            "Unexpected trailing content in property rewrite");
+      }
+      return node;
+    } else if (startsWith(*rewritten, "[")) {
+      std::size_t rewritePos = 0;
+      auto node = ClassBodyParser::parseClass(*rewritten, rewritePos);
+      if (rewritePos != rewritten->size()) {
+        throw std::invalid_argument(
+            "Unexpected trailing content in property rewrite");
+      }
+      return ClassNode(Negated(node));
+    } else if (startsWith(*rewritten, "\\P{")) {
+      token = neg ? ("\\p{" + rewritten->substr(3)) : *rewritten;
+    } else {
+      std::string propertyName = *rewritten;
+      if (isBlockPropertyName(propName) && !startsWith(propertyName, "In")) {
+        propertyName = "In" + propertyName;
+      }
+      token = std::string("\\") + esc + "{" + propertyName + "}";
+    }
+    return ClassNode(PropertyLeaf(token, neg));
+  }
+  return ClassNode(PropertyLeaf(std::string("\\") + esc, neg));
+}
+
+ClassNode parseEscape(std::string_view s, std::size_t& pos) {
+  expect(s, pos, '\\');
+  if (pos >= s.size()) {
+    throw std::invalid_argument("Trailing backslash inside character class");
+  }
+  if (static_cast<unsigned char>(s[pos]) >= 0x80) {
+    std::size_t width = 0;
+    const auto cp = codePointAt(s, pos, width);
+    pos += width;
+    return ClassNode(Literal(cp));
+  }
+  const char esc = s[pos++];
+  switch (esc) {
+    case 'n':
+      return ClassNode(Literal('\n'));
+    case 't':
+      return ClassNode(Literal('\t'));
+    case 'r':
+      return ClassNode(Literal('\r'));
+    case 'f':
+      return ClassNode(Literal('\f'));
+    case 'a':
+      return ClassNode(Literal(0x07));
+    case 'e':
+      return ClassNode(Literal(0x1B));
+    case '0': {
+      int val = 0;
+      int count = 0;
+      while (pos < s.size() && count < 3) {
+        const char d = s[pos];
+        if (d < '0' || d > '7') {
+          break;
+        }
+        const int next = val * 8 + (d - '0');
+        if (next > 0xFF) {
+          break;
+        }
+        val = next;
+        ++pos;
+        ++count;
+      }
+      return ClassNode(Literal(val));
+    }
+    case 'c': {
+      if (pos >= s.size()) {
+        throw std::invalid_argument("Incomplete \\c escape");
+      }
+      const auto ctrl = static_cast<std::int32_t>(s[pos]) & 0x1F;
+      ++pos;
+      return ClassNode(Literal(ctrl));
+    }
+    case 'x': {
+      if (pos < s.size() && s[pos] == '{') {
+        ++pos;
+        std::uint32_t val = 0;
+        bool any = false;
+        while (pos < s.size() && s[pos] != '}') {
+          val = val * 16 + hexDigit(s[pos++]);
+          if (val > 0x10FFFF) {
+            throw std::invalid_argument(
+                "\\x{...} code point out of Unicode range");
+          }
+          any = true;
+        }
+        if (pos >= s.size() || s[pos] != '}') {
+          throw std::invalid_argument("Unterminated \\x{...} escape");
+        }
+        if (!any) {
+          throw std::invalid_argument("Empty \\x{} escape");
+        }
+        ++pos;
+        return ClassNode(Literal(static_cast<std::int32_t>(val)));
+      }
+      if (pos + 1 >= s.size()) {
+        throw std::invalid_argument(
+            "Incomplete \\x escape (need 2 hex digits)");
+      }
+      const int hi = hexDigit(s[pos++]);
+      const int lo = hexDigit(s[pos++]);
+      return ClassNode(Literal(hi * 16 + lo));
+    }
+    case 'u': {
+      if (pos + 3 >= s.size()) {
+        throw std::invalid_argument(
+            "Incomplete \\u escape (need 4 hex digits)");
+      }
+      int val = 0;
+      for (int i = 0; i < 4; ++i) {
+        val = val * 16 + hexDigit(s[pos++]);
+      }
+      return ClassNode(Literal(val));
+    }
+    case 'Q': {
+      std::vector<ClassNode> literals;
+      while (pos < s.size()) {
+        if (s[pos] == '\\' && pos + 1 < s.size() && s[pos + 1] == 'E') {
+          pos += 2;
+          break;
+        }
+        std::size_t width = 0;
+        const auto cp = codePointAt(s, pos, width);
+        literals.emplace_back(Literal(cp));
+        pos += width;
+      }
+      return makeUnion(literals);
+    }
+    case 'd':
+      return ClassNode(PropertyLeaf("\\d", false));
+    case 'D':
+      return ClassNode(PropertyLeaf("\\D", true));
+    case 'w':
+      return ClassNode(PropertyLeaf("\\w", false));
+    case 'W':
+      return ClassNode(PropertyLeaf("\\W", true));
+    case 's':
+      return ClassNode(PropertyLeaf("\\s", false));
+    case 'S':
+      return ClassNode(PropertyLeaf("\\S", true));
+    case 'h':
+      return ClassNode(PropertyLeaf("\\h", false));
+    case 'H':
+      return ClassNode(PropertyLeaf("\\H", true));
+    case 'v':
+      return ClassNode(PropertyLeaf("\\v", false));
+    case 'V':
+      return ClassNode(PropertyLeaf("\\V", true));
+    case 'p':
+    case 'P':
+      return parsePropertyEscape(s, pos, esc);
+    case 'N': {
+      if (pos < s.size() && s[pos] == '{') {
+        const std::size_t start = pos;
+        while (pos < s.size() && s[pos] != '}') {
+          ++pos;
+        }
+        if (pos < s.size()) {
+          ++pos;
+        }
+        const std::string braced(s.substr(start, pos - start));
+        if (braced.size() >= 2 && braced.front() == '{' &&
+            braced.back() == '}') {
+          const std::string name = braced.substr(1, braced.size() - 2);
+          UErrorCode status = U_ZERO_ERROR;
+          const UChar32 cp =
+              u_charFromName(U_EXTENDED_CHAR_NAME, name.c_str(), &status);
+          if (U_SUCCESS(status)) {
+            return ClassNode(Literal(cp));
+          }
+        }
+        return ClassNode(PropertyLeaf("\\N" + braced, false));
+      }
+      return ClassNode(Literal('N'));
+    }
+    default:
+      return ClassNode(Literal(esc));
+  }
+}
+
+} // namespace
+
+ClassNode ClassBodyParser::parseClass(std::string_view s, std::size_t& pos) {
+  expect(s, pos, '[');
+  return parseClassBody(s, pos);
+}
+
+ClassNode ClassBodyParser::parseClassBody(
+    std::string_view s,
+    std::size_t& pos) {
+  const bool negated = pos < s.size() && s[pos] == '^';
+  if (negated) {
+    ++pos;
+  }
+
+  ClassNode body = parseIntersection(s, pos);
+  if (pos >= s.size() || s[pos] != ']') {
+    throw std::invalid_argument("Unterminated character class");
+  }
+  ++pos;
+  if (negated) {
+    return ClassNode(Negated(body));
+  }
+  return body;
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/ClassBodyParser.h b/velox/functions/lib/java_pcre2_translator/ClassBodyParser.h
new file mode 100644
index 00000000000..dc9f4b7a592
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/ClassBodyParser.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassBodyParser (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#pragma once
+
+#include "velox/functions/lib/java_pcre2_translator/ClassNode.h"
+
+#include <cstddef>
+#include <string_view>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+class ClassBodyParser {
+ public:
+  static ClassNode parseClass(std::string_view s, std::size_t& pos);
+  static ClassNode parseClassBody(std::string_view s, std::size_t& pos);
+
+ private:
+  ClassBodyParser() = delete;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/ClassNode.h b/velox/functions/lib/java_pcre2_translator/ClassNode.h
new file mode 100644
index 00000000000..9a3b08df2e3
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/ClassNode.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassNode (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <variant>
+#include <vector>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+struct ClassNode;
+using ClassNodePtr = std::shared_ptr<const ClassNode>;
+
+struct Literal {
+  std::int32_t cp;
+  explicit Literal(std::int32_t cpIn) : cp(cpIn) {}
+  bool operator==(const Literal& other) const {
+    return cp == other.cp;
+  }
+};
+
+struct Range {
+  std::int32_t lo;
+  std::int32_t hi;
+  Range(std::int32_t loIn, std::int32_t hiIn) : lo(loIn), hi(hiIn) {}
+  bool operator==(const Range& other) const {
+    return lo == other.lo && hi == other.hi;
+  }
+};
+
+struct PropertyLeaf {
+  std::string pcre2Token;
+  bool negated;
+  PropertyLeaf(std::string tokenIn, bool negatedIn)
+      : pcre2Token(std::move(tokenIn)), negated(negatedIn) {}
+  bool operator==(const PropertyLeaf& other) const {
+    return pcre2Token == other.pcre2Token && negated == other.negated;
+  }
+};
+
+struct Negated {
+  ClassNodePtr child;
+  explicit Negated(ClassNodePtr childIn) : child(std::move(childIn)) {}
+  explicit Negated(const ClassNode& childIn);
+  bool operator==(const Negated& other) const;
+};
+
+struct Union {
+  std::vector<ClassNodePtr> children;
+  explicit Union(std::vector<ClassNodePtr> childrenIn)
+      : children(std::move(childrenIn)) {}
+  explicit Union(const std::vector<ClassNode>& childrenIn);
+  bool operator==(const Union& other) const;
+};
+
+struct Intersection {
+  std::vector<ClassNodePtr> operands;
+  explicit Intersection(std::vector<ClassNodePtr> operandsIn)
+      : operands(std::move(operandsIn)) {}
+  explicit Intersection(const std::vector<ClassNode>& operandsIn);
+  bool operator==(const Intersection& other) const;
+};
+
+struct ClassNode {
+  using Variant =
+      std::variant<Literal, Range, PropertyLeaf, Negated, Union, Intersection>;
+
+  Variant value;
+
+  ClassNode(Literal v) : value(std::move(v)) {}
+  ClassNode(Range v) : value(std::move(v)) {}
+  ClassNode(PropertyLeaf v) : value(std::move(v)) {}
+  ClassNode(Negated v) : value(std::move(v)) {}
+  ClassNode(Union v) : value(std::move(v)) {}
+  ClassNode(Intersection v) : value(std::move(v)) {}
+
+  template <typename T>
+  const T* getIf() const {
+    return std::get_if<T>(&value);
+  }
+
+  template <typename T>
+  bool is() const {
+    return std::holds_alternative<T>(value);
+  }
+
+  bool operator==(const ClassNode& other) const {
+    return value == other.value;
+  }
+  bool operator!=(const ClassNode& other) const {
+    return !(*this == other);
+  }
+};
+
+inline ClassNodePtr nodePtr(const ClassNode& node) {
+  return std::make_shared<const ClassNode>(node);
+}
+
+inline Negated::Negated(const ClassNode& childIn) : child(nodePtr(childIn)) {}
+
+inline Union::Union(const std::vector<ClassNode>& childrenIn) {
+  children.reserve(childrenIn.size());
+  for (const auto& child : childrenIn) {
+    children.push_back(nodePtr(child));
+  }
+}
+
+inline Intersection::Intersection(const std::vector<ClassNode>& operandsIn) {
+  operands.reserve(operandsIn.size());
+  for (const auto& operand : operandsIn) {
+    operands.push_back(nodePtr(operand));
+  }
+}
+
+inline bool Negated::operator==(const Negated& other) const {
+  if (child == nullptr || other.child == nullptr) {
+    return child == other.child;
+  }
+  return *child == *other.child;
+}
+
+inline bool Union::operator==(const Union& other) const {
+  if (children.size() != other.children.size()) {
+    return false;
+  }
+  for (std::size_t i = 0; i < children.size(); ++i) {
+    if ((children[i] == nullptr) != (other.children[i] == nullptr)) {
+      return false;
+    }
+    if (children[i] != nullptr && *children[i] != *other.children[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool Intersection::operator==(const Intersection& other) const {
+  if (operands.size() != other.operands.size()) {
+    return false;
+  }
+  for (std::size_t i = 0; i < operands.size(); ++i) {
+    if ((operands[i] == nullptr) != (other.operands[i] == nullptr)) {
+      return false;
+    }
+    if (operands[i] != nullptr && *operands[i] != *other.operands[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/ClassRenderer.cpp b/velox/functions/lib/java_pcre2_translator/ClassRenderer.cpp
new file mode 100644
index 00000000000..b28438be5f1
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/ClassRenderer.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassRenderer (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/ClassRenderer.h"
+
+#include "velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h"
+#include "velox/functions/lib/java_pcre2_translator/Evaluator.h"
+
+#include <cstdio>
+#include <stdexcept>
+#include <string>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+namespace {
+
+constexpr const char* kEmptyClass = "[^\\x{0}-\\x{10FFFF}]";
+
+template <class... Ts>
+struct Overloaded : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+Overloaded(Ts...) -> Overloaded<Ts...>;
+
+void appendCodePointUtf8(std::int32_t cp, std::string& sb) {
+  if (cp <= 0x7F) {
+    sb.push_back(static_cast<char>(cp));
+  } else if (cp <= 0x7FF) {
+    sb.push_back(static_cast<char>(0xC0 | (cp >> 6)));
+    sb.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else if (cp <= 0xFFFF) {
+    sb.push_back(static_cast<char>(0xE0 | (cp >> 12)));
+    sb.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    sb.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  } else {
+    sb.push_back(static_cast<char>(0xF0 | (cp >> 18)));
+    sb.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
+    sb.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
+    sb.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
+  }
+}
+
+void emitFlat(const ClassNode& node, std::string& sb);
+void emitOriginalStyle(const ClassNode& node, std::string& sb);
+
+RangeSet tryEvaluateIntersectionRangeSet(const Intersection& inter, bool& ok) {
+  RangeSet result = RangeSet::all();
+  for (const auto& operand : inter.operands) {
+    auto rs = Evaluator::tryToRangeSet(*operand);
+    if (!rs.has_value()) {
+      ok = false;
+      return RangeSet::empty();
+    }
+    result = result.intersect(*rs);
+  }
+  ok = true;
+  return result;
+}
+
+void emitIntersectionFallbackOriginal(
+    const Intersection& inter,
+    std::string& sb) {
+  for (std::size_t i = 0; i < inter.operands.size(); ++i) {
+    if (i > 0) {
+      sb.append("&&");
+    }
+    emitOriginalStyle(*inter.operands[i], sb);
+  }
+}
+
+void emitFlat(const ClassNode& node, std::string& sb) {
+  std::visit(
+      Overloaded{
+          [&](const Literal& lit) {
+            ClassRenderer::emitLiteralInClass(lit.cp, sb);
+          },
+          [&](const Range& r) {
+            ClassRenderer::emitLiteralInClass(r.lo, sb);
+            sb.push_back('-');
+            ClassRenderer::emitLiteralInClass(r.hi, sb);
+          },
+          [&](const PropertyLeaf& leaf) { sb.append(leaf.pcre2Token); },
+          [&](const Negated& neg) {
+            try {
+              sb.append(
+                  Evaluator::toRangeSet(*neg.child)
+                      .complement()
+                      .toPcre2ClassBody());
+            } catch (const EvaluationFailedException& e) {
+              throw EvaluationFailedException(
+                  "Cannot flatten nested [^...]; caller must fall back");
+            }
+          },
+          [&](const Union& u) {
+            for (const auto& child : u.children) {
+              emitFlat(*child, sb);
+            }
+          },
+          [&](const Intersection&) {
+            throw std::logic_error(
+                "emitFlat must not be called on Intersection nodes");
+          }},
+      node.value);
+}
+
+void emitOriginalStyle(const ClassNode& node, std::string& sb) {
+  std::visit(
+      Overloaded{
+          [&](const Literal& lit) {
+            ClassRenderer::emitLiteralInClass(lit.cp, sb);
+          },
+          [&](const Range& r) {
+            ClassRenderer::emitLiteralInClass(r.lo, sb);
+            sb.push_back('-');
+            ClassRenderer::emitLiteralInClass(r.hi, sb);
+          },
+          [&](const PropertyLeaf& leaf) { sb.append(leaf.pcre2Token); },
+          [&](const Negated& neg) {
+            sb.append("[^");
+            emitOriginalStyle(*neg.child, sb);
+            sb.push_back(']');
+          },
+          [&](const Union& u) {
+            for (const auto& child : u.children) {
+              emitOriginalStyle(*child, sb);
+            }
+          },
+          [&](const Intersection& inter) {
+            emitIntersectionFallbackOriginal(inter, sb);
+          }},
+      node.value);
+}
+
+std::string renderWithIntersection(const ClassNode& inner, bool negated) {
+  auto rs = Evaluator::tryToRangeSet(inner);
+  if (rs.has_value()) {
+    RangeSet effective = negated ? rs->complement() : *rs;
+    if (effective.isEmpty()) {
+      return kEmptyClass;
+    }
+    return "[" + effective.toPcre2ClassBody() + "]";
+  }
+
+  if (const auto* inter = inner.getIf<Intersection>()) {
+    bool ok = false;
+    RangeSet operandResult = tryEvaluateIntersectionRangeSet(*inter, ok);
+    if (ok) {
+      RangeSet effective = negated ? operandResult.complement() : operandResult;
+      if (effective.isEmpty()) {
+        return kEmptyClass;
+      }
+      return "[" + effective.toPcre2ClassBody() + "]";
+    }
+  }
+
+  std::string sb;
+  sb.push_back('[');
+  if (negated) {
+    sb.push_back('^');
+  }
+  emitOriginalStyle(inner, sb);
+  sb.push_back(']');
+  return sb;
+}
+
+} // namespace
+
+std::string ClassRenderer::render(const ClassNode& node) {
+  return renderWithSignal(node).text;
+}
+
+ClassRenderer::RenderResult ClassRenderer::renderWithSignal(
+    const ClassNode& node) {
+  const bool negated = node.is<Negated>();
+  const ClassNode& inner = negated ? *node.getIf<Negated>()->child : node;
+
+  if (containsIntersection(inner)) {
+    auto rendered = renderWithIntersection(inner, negated);
+    return {rendered, rendered.find("&&") != std::string::npos};
+  }
+
+  if (auto rs = Evaluator::tryToRangeSet(inner)) {
+    RangeSet effective = negated ? rs->complement() : *rs;
+    if (effective.isEmpty()) {
+      return {kEmptyClass, false};
+    }
+  }
+
+  std::string sb;
+  sb.push_back('[');
+  if (negated) {
+    sb.push_back('^');
+  }
+  try {
+    emitFlat(inner, sb);
+  } catch (const EvaluationFailedException&) {
+    std::string fallback;
+    fallback.push_back('[');
+    if (negated) {
+      fallback.push_back('^');
+    }
+    emitOriginalStyle(inner, fallback);
+    fallback.push_back(']');
+    return {fallback, false};
+  }
+  sb.push_back(']');
+  return {sb, false};
+}
+
+void ClassRenderer::emitLiteralInClass(std::int32_t cp, std::string& sb) {
+  if (cp >= 0x20 && cp <= 0x7E) {
+    switch (cp) {
+      case '\\':
+      case ']':
+      case '^':
+      case '-':
+        sb.push_back('\\');
+        sb.push_back(static_cast<char>(cp));
+        return;
+      default:
+        sb.push_back(static_cast<char>(cp));
+        return;
+    }
+  }
+  char buf[16];
+  std::snprintf(buf, sizeof(buf), "\\x{%X}", static_cast<unsigned>(cp));
+  sb.append(buf);
+}
+
+bool ClassRenderer::containsIntersection(const ClassNode& node) {
+  return std::visit(
+      Overloaded{
+          [](const Intersection&) { return true; },
+          [](const Negated& neg) {
+            return ClassRenderer::containsIntersection(*neg.child);
+          },
+          [](const Union& u) {
+            for (const auto& child : u.children) {
+              if (ClassRenderer::containsIntersection(*child)) {
+                return true;
+              }
+            }
+            return false;
+          },
+          [](const auto&) { return false; }},
+      node.value);
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/ClassRenderer.h b/velox/functions/lib/java_pcre2_translator/ClassRenderer.h
new file mode 100644
index 00000000000..91595de261a
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/ClassRenderer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassRenderer (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#pragma once
+
+#include "velox/functions/lib/java_pcre2_translator/ClassNode.h"
+
+#include <cstdint>
+#include <string>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+class ClassRenderer {
+ public:
+  struct RenderResult {
+    std::string text;
+    bool intersectionUnresolved{false};
+  };
+
+  static std::string render(const ClassNode& node);
+  static RenderResult renderWithSignal(const ClassNode& node);
+  static void emitLiteralInClass(std::int32_t cp, std::string& sb);
+  static bool containsIntersection(const ClassNode& node);
+
+ private:
+  ClassRenderer() = delete;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h b/velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h
new file mode 100644
index 00000000000..949d409ad8d
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.EvaluationFailedException (Java) under
+// Apache-2.0 by the same author for inclusion in Velox.
+//
+#pragma once
+
+#include <stdexcept>
+#include <string>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+/// Thrown by the translator pipeline when a Java regex feature cannot be
+/// represented in the target engine's syntax (e.g. when the target is
+/// asked to express something it has no equivalent for, like an
+/// unsupported character-class intersection).
+class EvaluationFailedException : public std::runtime_error {
+ public:
+  explicit EvaluationFailedException(const std::string& msg)
+      : std::runtime_error(msg) {}
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/Evaluator.cpp b/velox/functions/lib/java_pcre2_translator/Evaluator.cpp
new file mode 100644
index 00000000000..41a0d9d0a7d
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/Evaluator.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.Evaluator (Java) under Apache-2.0 by the same
+// author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/Evaluator.h"
+
+#include "velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h"
+#include "velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h"
+
+#include <string>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+namespace {
+
+template <class... Ts>
+struct Overloaded : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+Overloaded(Ts...) -> Overloaded<Ts...>;
+
+const RangeSet& digit() {
+  static const RangeSet k = RangeSet::range('0', '9');
+  return k;
+}
+
+const RangeSet& word() {
+  static const RangeSet k = RangeSet::range('A', 'Z')
+                                .unionWith(RangeSet::range('a', 'z'))
+                                .unionWith(RangeSet::range('0', '9'))
+                                .unionWith(RangeSet::single('_'));
+  return k;
+}
+
+const RangeSet& space() {
+  static const RangeSet k = RangeSet::single('\t')
+                                .unionWith(RangeSet::single('\n'))
+                                .unionWith(RangeSet::single(0x0B))
+                                .unionWith(RangeSet::single('\f'))
+                                .unionWith(RangeSet::single('\r'))
+                                .unionWith(RangeSet::single(' '));
+  return k;
+}
+
+const RangeSet& ascii() {
+  static const RangeSet k = RangeSet::range(0x00, 0x7F);
+  return k;
+}
+
+const RangeSet& alpha() {
+  static const RangeSet k =
+      RangeSet::range('A', 'Z').unionWith(RangeSet::range('a', 'z'));
+  return k;
+}
+
+const RangeSet& alnum() {
+  static const RangeSet k = alpha().unionWith(digit());
+  return k;
+}
+
+const RangeSet& lower() {
+  static const RangeSet k = RangeSet::range('a', 'z');
+  return k;
+}
+
+const RangeSet& upper() {
+  static const RangeSet k = RangeSet::range('A', 'Z');
+  return k;
+}
+
+const RangeSet& hexDigit() {
+  static const RangeSet k = digit()
+                                .unionWith(RangeSet::range('A', 'F'))
+                                .unionWith(RangeSet::range('a', 'f'));
+  return k;
+}
+
+const RangeSet& blank() {
+  static const RangeSet k =
+      RangeSet::single(' ').unionWith(RangeSet::single('\t'));
+  return k;
+}
+
+const RangeSet& cntrl() {
+  static const RangeSet k =
+      RangeSet::range(0x00, 0x1F).unionWith(RangeSet::single(0x7F));
+  return k;
+}
+
+const RangeSet& graph() {
+  static const RangeSet k = RangeSet::range(0x21, 0x7E);
+  return k;
+}
+
+const RangeSet& print() {
+  static const RangeSet k = RangeSet::range(0x20, 0x7E);
+  return k;
+}
+
+const RangeSet& punct() {
+  static const RangeSet k =
+      print().subtract(alnum()).subtract(RangeSet::single(' '));
+  return k;
+}
+
+RangeSet expandProperty(const PropertyLeaf& leaf) {
+  const auto& token = leaf.pcre2Token;
+  if (token == "\\d") {
+    return digit();
+  }
+  if (token == "\\D") {
+    return digit().complement();
+  }
+  if (token == "\\w") {
+    return word();
+  }
+  if (token == "\\W") {
+    return word().complement();
+  }
+  if (token == "\\s") {
+    return space();
+  }
+  if (token == "\\S") {
+    return space().complement();
+  }
+  if (token == "\\p{ASCII}") {
+    return ascii();
+  }
+  if (token == "\\p{Alpha}") {
+    return alpha();
+  }
+  if (token == "\\p{Alnum}") {
+    return alnum();
+  }
+  if (token == "\\p{Lower}") {
+    return lower();
+  }
+  if (token == "\\p{Upper}") {
+    return upper();
+  }
+  if (token == "\\p{Digit}") {
+    return digit();
+  }
+  if (token == "\\p{XDigit}") {
+    return hexDigit();
+  }
+  if (token == "\\p{Space}") {
+    return space();
+  }
+  if (token == "\\p{Blank}") {
+    return blank();
+  }
+  if (token == "\\p{Cntrl}") {
+    return cntrl();
+  }
+  if (token == "\\p{Graph}") {
+    return graph();
+  }
+  if (token == "\\p{Print}") {
+    return print();
+  }
+  if (token == "\\p{Punct}") {
+    return punct();
+  }
+
+  auto jdk = JdkPropertyExpander::expand(token);
+  if (jdk.has_value()) {
+    return *jdk;
+  }
+  throw EvaluationFailedException("Cannot expand property: " + token);
+}
+
+} // namespace
+
+RangeSet Evaluator::toRangeSet(const ClassNode& node) {
+  return std::visit(
+      Overloaded{
+          [](const Literal& lit) { return RangeSet::single(lit.cp); },
+          [](const Range& r) { return RangeSet::range(r.lo, r.hi); },
+          [](const Negated& neg) {
+            return Evaluator::toRangeSet(*neg.child).complement();
+          },
+          [](const Union& u) {
+            RangeSet result = RangeSet::empty();
+            for (const auto& child : u.children) {
+              result = result.unionWith(Evaluator::toRangeSet(*child));
+            }
+            return result;
+          },
+          [](const Intersection& inter) {
+            RangeSet result = RangeSet::all();
+            for (const auto& operand : inter.operands) {
+              result = result.intersect(Evaluator::toRangeSet(*operand));
+            }
+            return result;
+          },
+          [](const PropertyLeaf& leaf) { return expandProperty(leaf); }},
+      node.value);
+}
+
+std::optional<RangeSet> Evaluator::tryToRangeSet(const ClassNode& node) {
+  try {
+    return toRangeSet(node);
+  } catch (const EvaluationFailedException&) {
+    return std::nullopt;
+  }
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/Evaluator.h b/velox/functions/lib/java_pcre2_translator/Evaluator.h
new file mode 100644
index 00000000000..7a7c1c76b76
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/Evaluator.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.Evaluator (Java) under Apache-2.0 by the same
+// author for inclusion in Velox.
+//
+#pragma once
+
+#include "velox/functions/lib/java_pcre2_translator/ClassNode.h"
+#include "velox/functions/lib/java_pcre2_translator/RangeSet.h"
+
+#include <optional>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+class Evaluator {
+ public:
+  static RangeSet toRangeSet(const ClassNode& node);
+  static std::optional<RangeSet> tryToRangeSet(const ClassNode& node);
+
+ private:
+  Evaluator() = delete;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.cpp b/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.cpp
new file mode 100644
index 00000000000..22c70a66ea9
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.cpp
@@ -0,0 +1,1608 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JavaRegexTranslator (Java) under
+// Apache-2.0 by the same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h"
+
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+#include "velox/functions/lib/java_pcre2_translator/ClassRenderer.h"
+#include "velox/functions/lib/java_pcre2_translator/PropertyMap.h"
+
+#include <unicode/uchar.h>
+#include <unicode/utypes.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <set>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+namespace {
+
+bool isValidQuantifierBody(std::string_view body) {
+  if (body.empty()) {
+    return false;
+  }
+  std::size_t k = 0;
+  while (k < body.size() && body[k] >= '0' && body[k] <= '9') {
+    ++k;
+  }
+  if (k == 0) {
+    return false;
+  }
+  if (k == body.size()) {
+    return true;
+  }
+  if (body[k] != ',') {
+    return false;
+  }
+  ++k;
+  while (k < body.size() && body[k] >= '0' && body[k] <= '9') {
+    ++k;
+  }
+  return k == body.size();
+}
+
+bool isHexDigit(char ch) {
+  return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
+      (ch >= 'A' && ch <= 'F');
+}
+
+std::uint32_t hexValue(char ch) {
+  if (ch >= '0' && ch <= '9') {
+    return ch - '0';
+  }
+  if (ch >= 'a' && ch <= 'f') {
+    return ch - 'a' + 10;
+  }
+  return ch - 'A' + 10;
+}
+
+std::uint32_t parseFourHex(std::string_view s, std::size_t from) {
+  std::uint32_t value = 0;
+  for (std::size_t i = 0; i < 4; ++i) {
+    value = (value << 4) | hexValue(s[from + i]);
+  }
+  return value;
+}
+
+bool isOctalDigit(char ch) {
+  return ch >= '0' && ch <= '7';
+}
+
+std::string toLowerHex(std::uint32_t cp) {
+  constexpr char kHex[] = "0123456789abcdef";
+  if (cp == 0) {
+    return "0";
+  }
+  std::string out;
+  while (cp != 0) {
+    out.push_back(kHex[cp & 0xF]);
+    cp >>= 4;
+  }
+  std::reverse(out.begin(), out.end());
+  return out;
+}
+
+bool hasOddTrailingBackslashes(const std::string& sb) {
+  std::size_t count = 0;
+  for (std::size_t j = sb.size(); j > 0 && sb[j - 1] == '\\'; --j) {
+    ++count;
+  }
+  return (count & 1U) == 1U;
+}
+
+std::size_t findPropertyTokenEnd(std::string_view s, std::size_t start) {
+  if (start + 3 >= s.size()) {
+    return start;
+  }
+  if (s[start + 2] != '{') {
+    return start;
+  }
+  const auto closeIdx = s.find('}', start + 3);
+  if (closeIdx == std::string_view::npos) {
+    return start;
+  }
+  return closeIdx + 1;
+}
+
+bool isCasedLetterCategory(std::string_view resolved) {
+  return resolved == "Lu" || resolved == "Ll" || resolved == "Lt" ||
+      resolved == "Lowercase" || resolved == "Uppercase" ||
+      resolved == "Titlecase" || resolved == "[a-z]" || resolved == "[A-Z]";
+}
+
+std::size_t tryAppendPropertyToken(
+    std::string_view s,
+    std::size_t start,
+    char pOrP,
+    std::string& out,
+    bool caseless) {
+  const std::size_t tokenEnd = findPropertyTokenEnd(s, start);
+  if (tokenEnd <= start) {
+    return start;
+  }
+  const std::size_t braceOpen = s.find('{', start + 2);
+  const std::string name(s.substr(braceOpen + 1, tokenEnd - braceOpen - 2));
+  auto replacement = PropertyMap::apply(name);
+  if (replacement) {
+    if (auto normalized = PropertyMap::apply(*replacement)) {
+      replacement = std::move(normalized);
+    }
+  }
+  const std::string_view effective =
+      replacement ? std::string_view(*replacement) : std::string_view(name);
+
+  if (caseless && isCasedLetterCategory(effective)) {
+    if (pOrP == 'P') {
+      out += "[^\\p{Lu}\\p{Ll}\\p{Lt}]";
+    } else {
+      out += "[\\p{Lu}\\p{Ll}\\p{Lt}]";
+    }
+    return tokenEnd;
+  }
+
+  if (!replacement) {
+    out.append(s.substr(start, tokenEnd - start));
+  } else if (*replacement == PropertyMap::kNeverMatch) {
+    if (pOrP == 'P') {
+      out += "[\\x{0}-\\x{10FFFF}]";
+    } else {
+      out += "(?!)";
+    }
+  } else if (replacement->rfind("[^", 0) == 0) {
+    if (pOrP == 'P') {
+      out.push_back('[');
+      out.append(replacement->substr(2));
+    } else {
+      out += *replacement;
+    }
+  } else if (!replacement->empty() && replacement->front() == '[') {
+    if (pOrP == 'P') {
+      out += "[^";
+      out.append(replacement->substr(1));
+    } else {
+      out += *replacement;
+    }
+  } else if (replacement->rfind("\\P{", 0) == 0) {
+    if (pOrP == 'P') {
+      out += "\\p{";
+      out.append(replacement->substr(3));
+    } else {
+      out += *replacement;
+    }
+  } else {
+    out.push_back('\\');
+    out.push_back(pOrP);
+    out.push_back('{');
+    out += *replacement;
+    out.push_back('}');
+  }
+  return tokenEnd;
+}
+
+std::string
+rewritePropertiesOnly(std::string_view s, std::size_t from, std::size_t to) {
+  std::string sb;
+  sb.reserve(to - from + 8);
+  std::size_t i = from;
+  bool inQuote = false;
+  while (i < to) {
+    const char c = s[i];
+    if (c == '\\' && i + 1 < to) {
+      const char next = s[i + 1];
+      if (!inQuote && next == 'Q') {
+        sb += "\\Q";
+        i += 2;
+        inQuote = true;
+        continue;
+      }
+      if (inQuote && next == 'E') {
+        sb += "\\E";
+        i += 2;
+        inQuote = false;
+        continue;
+      }
+      if (!inQuote && (next == 'p' || next == 'P') &&
+          !hasOddTrailingBackslashes(sb)) {
+        const auto tokenEnd = tryAppendPropertyToken(s, i, next, sb, false);
+        if (tokenEnd > i) {
+          i = tokenEnd;
+          continue;
+        }
+      }
+      sb.push_back(c);
+      ++i;
+      continue;
+    }
+    sb.push_back(c);
+    ++i;
+  }
+  return sb;
+}
+
+bool isJavaModeFlag(char c) {
+  return c == 'i' || c == 'd' || c == 'm' || c == 's' || c == 'u' || c == 'c' ||
+      c == 'x' || c == 'U';
+}
+
+std::string
+filterModeFlags(std::string_view s, std::size_t from, std::size_t to) {
+  std::string out;
+  out.reserve(to - from);
+  for (std::size_t k = from; k < to; ++k) {
+    const char f = s[k];
+    if (f != 'u' && f != 'U' && f != 'd' && f != 'c') {
+      out.push_back(f);
+    }
+  }
+  return out;
+}
+
+struct ModeTranslation {
+  std::size_t end{std::string_view::npos};
+  char term{0};
+  bool hasDash{false};
+  bool onI{false};
+  bool offI{false};
+  bool onU{false};
+  bool offU{false};
+  bool onX{false};
+  bool offX{false};
+};
+
+bool containsFlag(
+    std::string_view s,
+    std::size_t from,
+    std::size_t to,
+    char flag) {
+  for (std::size_t i = from; i < to; ++i) {
+    if (s[i] == flag) {
+      return true;
+    }
+  }
+  return false;
+}
+
+ModeTranslation tryTranslateModeModifier(
+    std::string_view s,
+    std::size_t start,
+    std::size_t len,
+    std::string& out) {
+  std::size_t j = start + 2;
+
+  const std::size_t onStart = j;
+  while (j < len && isJavaModeFlag(s[j])) {
+    ++j;
+  }
+  const std::size_t onEnd = j;
+
+  std::size_t offStart = std::string_view::npos;
+  std::size_t offEnd = std::string_view::npos;
+  if (j < len && s[j] == '-') {
+    ++j;
+    offStart = j;
+    while (j < len && isJavaModeFlag(s[j])) {
+      ++j;
+    }
+    offEnd = j;
+  }
+
+  if (j >= len) {
+    return {};
+  }
+  const char term = s[j];
+  if (term != ')' && term != ':') {
+    return {};
+  }
+
+  const std::string filteredOn = filterModeFlags(s, onStart, onEnd);
+  const std::string filteredOff = offStart != std::string_view::npos
+      ? filterModeFlags(s, offStart, offEnd)
+      : "";
+  const bool hasOn = !filteredOn.empty();
+  const bool hasOff = !filteredOff.empty();
+  const bool hasDash = offStart != std::string_view::npos;
+
+  if (term == ')') {
+    if (hasOn || hasOff) {
+      out += "(?";
+      out += filteredOn;
+      if (hasDash) {
+        out.push_back('-');
+        out += filteredOff;
+      }
+      out.push_back(')');
+    }
+  } else {
+    if (!hasOn && !hasOff) {
+      out += "(?:";
+    } else {
+      out += "(?";
+      out += filteredOn;
+      if (hasDash) {
+        out.push_back('-');
+        out += filteredOff;
+      }
+      out.push_back(':');
+    }
+  }
+
+  ModeTranslation result;
+  result.end = j + 1;
+  result.term = term;
+  result.hasDash = hasDash;
+  result.onI = containsFlag(s, onStart, onEnd, 'i');
+  result.offI = offStart != std::string_view::npos &&
+      containsFlag(s, offStart, offEnd, 'i');
+  result.onU = containsFlag(s, onStart, onEnd, 'U');
+  result.offU = offStart != std::string_view::npos &&
+      containsFlag(s, offStart, offEnd, 'U');
+  result.onX = containsFlag(s, onStart, onEnd, 'x');
+  result.offX = offStart != std::string_view::npos &&
+      containsFlag(s, offStart, offEnd, 'x');
+  return result;
+}
+
+int countCapturingGroups(std::string_view pattern) {
+  int count = 0;
+  bool inClass = false;
+  bool inQuote = false;
+  int classDepth = 0;
+  for (std::size_t i = 0; i < pattern.size(); ++i) {
+    const char c = pattern[i];
+    if (c == '\\' && i + 1 < pattern.size()) {
+      const char next = pattern[i + 1];
+      if (!inQuote && next == 'Q') {
+        inQuote = true;
+        ++i;
+        continue;
+      }
+      if (inQuote && next == 'E') {
+        inQuote = false;
+        ++i;
+        continue;
+      }
+      ++i;
+      continue;
+    }
+    if (inQuote) {
+      continue;
+    }
+    if (c == '[') {
+      if (!inClass) {
+        inClass = true;
+        classDepth = 1;
+      } else {
+        ++classDepth;
+      }
+      continue;
+    }
+    if (c == ']' && inClass) {
+      --classDepth;
+      if (classDepth == 0) {
+        inClass = false;
+      }
+      continue;
+    }
+    if (inClass) {
+      continue;
+    }
+    if (c == '(') {
+      if (i + 1 >= pattern.size() || pattern[i + 1] != '?') {
+        ++count;
+      } else if (
+          i + 3 < pattern.size() && pattern[i + 2] == '<' &&
+          pattern[i + 3] != '=' && pattern[i + 3] != '!') {
+        ++count;
+      } else if (
+          i + 3 < pattern.size() && pattern[i + 2] == 'P' &&
+          pattern[i + 3] == '<') {
+        ++count;
+      }
+    }
+  }
+  return count;
+}
+
+bool containsAscii(std::string_view s, std::string_view needle) {
+  return s.find(needle) != std::string_view::npos;
+}
+
+std::string expandCasedPropertiesInClass(std::string_view classText) {
+  const bool hasProp =
+      containsAscii(classText, "\\p{") || containsAscii(classText, "\\P{");
+  const bool hasAsciiCasedRange =
+      containsAscii(classText, "a-z") || containsAscii(classText, "A-Z");
+  if (!hasProp && !hasAsciiCasedRange) {
+    return std::string(classText);
+  }
+
+  std::string sb;
+  sb.reserve(classText.size() + 32);
+  bool appendedCasedUnion = false;
+  for (std::size_t i = 0; i < classText.size(); ++i) {
+    const char c = classText[i];
+    if (c == '\\' && i + 3 < classText.size() &&
+        (classText[i + 1] == 'p' || classText[i + 1] == 'P') &&
+        classText[i + 2] == '{') {
+      const auto close = classText.find('}', i + 3);
+      if (close != std::string_view::npos) {
+        const auto body = classText.substr(i + 3, close - i - 3);
+        if (isCasedLetterCategory(body)) {
+          if (classText[i + 1] == 'P') {
+            sb.append(classText.substr(i, close + 1 - i));
+          } else {
+            sb += "\\p{Lu}\\p{Ll}\\p{Lt}";
+            appendedCasedUnion = true;
+          }
+          i = close;
+          continue;
+        }
+      }
+    }
+    sb.push_back(c);
+  }
+  if (hasAsciiCasedRange && !appendedCasedUnion && sb.size() > 1 &&
+      sb.back() == ']') {
+    sb.insert(sb.size() - 1, "\\p{Lu}\\p{Ll}\\p{Lt}");
+  }
+  return sb;
+}
+
+bool decodeUtf8CodePoint(
+    std::string_view s,
+    std::size_t& i,
+    std::uint32_t& cp) {
+  const unsigned char b0 = static_cast<unsigned char>(s[i]);
+  if (b0 < 0x80) {
+    cp = b0;
+    ++i;
+    return true;
+  }
+  int need = 0;
+  cp = 0;
+  if ((b0 & 0xE0) == 0xC0) {
+    need = 1;
+    cp = b0 & 0x1F;
+  } else if ((b0 & 0xF0) == 0xE0) {
+    need = 2;
+    cp = b0 & 0x0F;
+  } else if ((b0 & 0xF8) == 0xF0) {
+    need = 3;
+    cp = b0 & 0x07;
+  } else {
+    ++i;
+    return false;
+  }
+  if (i + need >= s.size()) {
+    ++i;
+    return false;
+  }
+  for (int n = 1; n <= need; ++n) {
+    const unsigned char bx = static_cast<unsigned char>(s[i + n]);
+    if ((bx & 0xC0) != 0x80) {
+      ++i;
+      return false;
+    }
+    cp = (cp << 6) | (bx & 0x3F);
+  }
+  i += need + 1;
+  return true;
+}
+
+bool containsRawSurrogate(
+    std::string_view s,
+    std::size_t from,
+    std::size_t to) {
+  const std::size_t limit = std::min(to, s.size());
+  for (std::size_t k = from; k < limit;) {
+    std::uint32_t cp = 0;
+    const std::size_t before = k;
+    if (!decodeUtf8CodePoint(s.substr(0, limit), k, cp)) {
+      if (k <= before) {
+        ++k;
+      }
+      continue;
+    }
+    if (cp >= 0xD800 && cp <= 0xDFFF) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool containsRawSurrogate(std::string_view s) {
+  return containsRawSurrogate(s, 0, s.size());
+}
+
+bool containsSurrogateHexToken(std::string_view s) {
+  for (std::size_t i = 0; i + 3 < s.size(); ++i) {
+    if (s[i] != '\\' || s[i + 1] != 'x' || s[i + 2] != '{') {
+      continue;
+    }
+    std::size_t k = i + 3;
+    std::uint32_t cp = 0;
+    bool any = false;
+    while (k < s.size() && s[k] != '}') {
+      if (!isHexDigit(s[k])) {
+        any = false;
+        break;
+      }
+      cp = (cp << 4) | hexValue(s[k]);
+      any = true;
+      ++k;
+    }
+    if (any && k < s.size() && s[k] == '}' && cp >= 0xD800 && cp <= 0xDFFF) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool needsRawByteModeForPcre2(std::string_view translatedPattern) {
+  return containsSurrogateHexToken(translatedPattern) ||
+      containsRawSurrogate(translatedPattern);
+}
+
+bool hasOddBackslashesBefore(std::string_view s, std::size_t pos) {
+  std::size_t count = 0;
+  while (pos > 0 && s[pos - 1] == '\\') {
+    --pos;
+    ++count;
+  }
+  return (count & 1U) == 1U;
+}
+
+std::size_t trySkipClass(std::string_view s, std::size_t start) {
+  std::size_t pos = start;
+  try {
+    ClassBodyParser::parseClass(s, pos);
+    return pos;
+  } catch (const std::invalid_argument&) {
+    return start;
+  }
+}
+
+bool validModeModifierHasUnsupportedRe2Flag(
+    std::string_view s,
+    std::size_t start,
+    char& flag) {
+  std::size_t j = start + 2;
+  bool hasUnsupported = false;
+  char unsupported = 0;
+
+  while (j < s.size() && isJavaModeFlag(s[j])) {
+    if (s[j] == 'U' || s[j] == 'd' || s[j] == 'c') {
+      hasUnsupported = true;
+      unsupported = s[j];
+    }
+    ++j;
+  }
+
+  if (j < s.size() && s[j] == '-') {
+    ++j;
+    while (j < s.size() && isJavaModeFlag(s[j])) {
+      ++j;
+    }
+  }
+
+  if (j >= s.size() || (s[j] != ')' && s[j] != ':')) {
+    return false;
+  }
+
+  flag = unsupported;
+  return hasUnsupported;
+}
+
+void appendRe2ModeModifier(
+    std::string_view s,
+    std::size_t start,
+    std::size_t end,
+    char term,
+    bool hasDash,
+    std::string& out) {
+  const auto appendFlags = [&](std::size_t from, std::size_t to) {
+    for (std::size_t k = from; k < to; ++k) {
+      if (s[k] != 'x' && s[k] != 'u' && s[k] != 'U' && s[k] != 'd' &&
+          s[k] != 'c') {
+        out.push_back(s[k]);
+      }
+    }
+  };
+
+  std::size_t j = start + 2;
+  const std::size_t onStart = j;
+  while (j < end && isJavaModeFlag(s[j])) {
+    ++j;
+  }
+  const std::size_t onEnd = j;
+
+  std::size_t offStart = std::string_view::npos;
+  std::size_t offEnd = std::string_view::npos;
+  if (hasDash) {
+    ++j;
+    offStart = j;
+    while (j < end && isJavaModeFlag(s[j])) {
+      ++j;
+    }
+    offEnd = j;
+  }
+
+  const auto hasKeptFlag = [&](std::size_t from, std::size_t to) {
+    for (std::size_t k = from; k < to; ++k) {
+      if (s[k] != 'x' && s[k] != 'u' && s[k] != 'U' && s[k] != 'd' &&
+          s[k] != 'c') {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  const bool hasOn = hasKeptFlag(onStart, onEnd);
+  const bool hasOff =
+      offStart != std::string_view::npos && hasKeptFlag(offStart, offEnd);
+
+  if (term == ')') {
+    if (hasOn || hasOff) {
+      out += "(?";
+      appendFlags(onStart, onEnd);
+      if (hasOff) {
+        out.push_back('-');
+        appendFlags(offStart, offEnd);
+      }
+      out.push_back(')');
+    }
+    return;
+  }
+
+  if (!hasOn && !hasOff) {
+    out += "(?:";
+  } else {
+    out += "(?";
+    appendFlags(onStart, onEnd);
+    if (hasOff) {
+      out.push_back('-');
+      appendFlags(offStart, offEnd);
+    }
+    out.push_back(':');
+  }
+}
+
+void rejectUnsupportedRe2Features(std::string_view javaPattern) {
+  bool inQuotation = false;
+  bool commentsMode = false;
+  std::vector<bool> commentsStack;
+
+  for (std::size_t i = 0; i < javaPattern.size();) {
+    const char c = javaPattern[i];
+
+    if (c == '\\' && i + 1 < javaPattern.size()) {
+      const char next = javaPattern[i + 1];
+      if (!inQuotation && next == 'Q') {
+        inQuotation = true;
+        i += 2;
+        continue;
+      }
+      if (inQuotation && next == 'E') {
+        inQuotation = false;
+        i += 2;
+        continue;
+      }
+      if (inQuotation) {
+        ++i;
+        continue;
+      }
+      if (next >= '1' && next <= '9') {
+        throw EvaluationFailedException(
+            "RE2 does not support Java backreferences (\\1-\\9)");
+      }
+      if (next == 'k' && i + 2 < javaPattern.size() &&
+          javaPattern[i + 2] == '<') {
+        throw EvaluationFailedException(
+            "RE2 does not support Java named backreferences (\\k<name>)");
+      }
+      i += 2;
+      continue;
+    }
+
+    if (inQuotation) {
+      ++i;
+      continue;
+    }
+
+    if (commentsMode && c == '#' && !hasOddBackslashesBefore(javaPattern, i)) {
+      while (i < javaPattern.size() && javaPattern[i] != '\n') {
+        ++i;
+      }
+      continue;
+    }
+
+    if (c == '[' && !hasOddBackslashesBefore(javaPattern, i)) {
+      const auto classEnd = trySkipClass(javaPattern, i);
+      if (classEnd > i) {
+        i = classEnd;
+        continue;
+      }
+    }
+
+    if (c == '(' && i + 1 < javaPattern.size() && javaPattern[i + 1] == '?' &&
+        !hasOddBackslashesBefore(javaPattern, i)) {
+      if (i + 2 < javaPattern.size()) {
+        const char op = javaPattern[i + 2];
+        if (op == '=' || op == '!') {
+          throw EvaluationFailedException(
+              "RE2 does not support Java lookaround assertions");
+        }
+        if (op == '>') {
+          throw EvaluationFailedException(
+              "RE2 does not support Java atomic groups (?>...)");
+        }
+        if (op == '<' && i + 3 < javaPattern.size() &&
+            (javaPattern[i + 3] == '=' || javaPattern[i + 3] == '!')) {
+          throw EvaluationFailedException(
+              "RE2 does not support Java lookaround assertions");
+        }
+      }
+
+      char unsupportedFlag = 0;
+      if (validModeModifierHasUnsupportedRe2Flag(
+              javaPattern, i, unsupportedFlag)) {
+        if (unsupportedFlag == 'U') {
+          throw EvaluationFailedException(
+              "RE2 does not support Java UNICODE_CHARACTER_CLASS flag (?U)");
+        }
+        if (unsupportedFlag == 'c') {
+          throw EvaluationFailedException(
+              "RE2 does not support Java CANON_EQ flag (?c)");
+        }
+        throw EvaluationFailedException(
+            "RE2 does not support Java UNIX_LINES flag (?d)");
+      }
+
+      std::string ignored;
+      const auto modeResult =
+          tryTranslateModeModifier(javaPattern, i, javaPattern.size(), ignored);
+      if (modeResult.end != std::string_view::npos) {
+        if (modeResult.term == ':') {
+          commentsStack.push_back(commentsMode);
+        }
+        if (modeResult.onX) {
+          commentsMode = true;
+        }
+        if (modeResult.hasDash && modeResult.offX) {
+          commentsMode = false;
+        }
+        i = modeResult.end;
+        continue;
+      }
+    }
+
+    if (c == '(' && !hasOddBackslashesBefore(javaPattern, i)) {
+      commentsStack.push_back(commentsMode);
+    } else if (
+        c == ')' && !hasOddBackslashesBefore(javaPattern, i) &&
+        !commentsStack.empty()) {
+      commentsMode = commentsStack.back();
+      commentsStack.pop_back();
+    }
+
+    if ((c == '*' || c == '?' || c == '+') && i + 1 < javaPattern.size() &&
+        javaPattern[i + 1] == '+' && !hasOddBackslashesBefore(javaPattern, i)) {
+      throw EvaluationFailedException(
+          "RE2 does not support Java possessive quantifiers");
+    }
+
+    if (c == '{' && !hasOddBackslashesBefore(javaPattern, i)) {
+      const auto close = javaPattern.find('}', i + 1);
+      if (close != std::string_view::npos &&
+          isValidQuantifierBody(javaPattern.substr(i + 1, close - i - 1)) &&
+          close + 1 < javaPattern.size() && javaPattern[close + 1] == '+') {
+        throw EvaluationFailedException(
+            "RE2 does not support Java possessive quantifiers");
+      }
+    }
+
+    ++i;
+  }
+}
+
+void appendCommentsModeClassForRe2(
+    std::string_view pattern,
+    std::size_t classStart,
+    std::size_t classEnd,
+    std::string& out) {
+  out.push_back('[');
+  bool inQuotation = false;
+
+  for (std::size_t i = classStart + 1; i + 1 < classEnd;) {
+    const char c = pattern[i];
+    if (c == '\\' && i + 1 < classEnd) {
+      const char next = pattern[i + 1];
+      if (!inQuotation && next == 'Q') {
+        inQuotation = true;
+      } else if (inQuotation && next == 'E') {
+        inQuotation = false;
+      }
+      out.push_back(c);
+      out.push_back(next);
+      i += 2;
+      continue;
+    }
+
+    if (!inQuotation && c == '#') {
+      while (i + 1 < classEnd && pattern[i] != '\n') {
+        ++i;
+      }
+      if (i + 1 >= classEnd) {
+        throw EvaluationFailedException(
+            "Java COMMENTS mode comment in character class is not terminated");
+      }
+      continue;
+    }
+
+    if (!inQuotation && std::isspace(static_cast<unsigned char>(c))) {
+      ++i;
+      continue;
+    }
+
+    out.push_back(c);
+    ++i;
+  }
+
+  out.push_back(']');
+}
+
+std::string translateCommentsModeForRe2(std::string_view pattern) {
+  std::string out;
+  out.reserve(pattern.size());
+  bool inQuotation = false;
+  bool commentsMode = false;
+  std::vector<bool> commentsStack;
+
+  for (std::size_t i = 0; i < pattern.size();) {
+    const char c = pattern[i];
+
+    if (c == '\\' && i + 1 < pattern.size()) {
+      const char next = pattern[i + 1];
+      if (!inQuotation && next == 'Q') {
+        out += "\\Q";
+        inQuotation = true;
+        i += 2;
+        continue;
+      }
+      if (inQuotation && next == 'E') {
+        out += "\\E";
+        inQuotation = false;
+        i += 2;
+        continue;
+      }
+      if (commentsMode && std::isspace(static_cast<unsigned char>(next))) {
+        out.push_back(next);
+        i += 2;
+        continue;
+      }
+      out.push_back(c);
+      out.push_back(next);
+      i += 2;
+      continue;
+    }
+
+    if (inQuotation) {
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+
+    if (c == '[' && !hasOddTrailingBackslashes(out)) {
+      const auto classEnd = trySkipClass(pattern, i);
+      if (classEnd > i) {
+        if (commentsMode) {
+          appendCommentsModeClassForRe2(pattern, i, classEnd, out);
+        } else {
+          out.append(pattern.substr(i, classEnd - i));
+        }
+        i = classEnd;
+        continue;
+      }
+    }
+
+    if (commentsMode && c == '(' && i + 2 < pattern.size() &&
+        pattern[i + 1] == '?' && !hasOddTrailingBackslashes(out) &&
+        (std::isspace(static_cast<unsigned char>(pattern[i + 2])) ||
+         pattern[i + 2] == '#')) {
+      throw EvaluationFailedException(
+          "Java COMMENTS mode does not ignore whitespace in inline group prefixes");
+    }
+
+    if (commentsMode && c == '#') {
+      while (i < pattern.size() && pattern[i] != '\n') {
+        ++i;
+      }
+      if (i < pattern.size()) {
+        ++i;
+      }
+      continue;
+    }
+
+    if (commentsMode && std::isspace(static_cast<unsigned char>(c))) {
+      ++i;
+      continue;
+    }
+
+    if (c == '(' && i + 1 < pattern.size() && pattern[i + 1] == '?' &&
+        !hasOddTrailingBackslashes(out)) {
+      std::string ignored;
+      const auto modeResult =
+          tryTranslateModeModifier(pattern, i, pattern.size(), ignored);
+      if (modeResult.end != std::string_view::npos) {
+        appendRe2ModeModifier(
+            pattern,
+            i,
+            modeResult.end,
+            modeResult.term,
+            modeResult.hasDash,
+            out);
+        if (modeResult.term == ':') {
+          commentsStack.push_back(commentsMode);
+        }
+        if (modeResult.onX) {
+          commentsMode = true;
+        }
+        if (modeResult.hasDash && modeResult.offX) {
+          commentsMode = false;
+        }
+        i = modeResult.end;
+        continue;
+      }
+    }
+
+    if (c == '(' && !hasOddTrailingBackslashes(out)) {
+      commentsStack.push_back(commentsMode);
+    } else if (
+        c == ')' && !hasOddTrailingBackslashes(out) && !commentsStack.empty()) {
+      commentsMode = commentsStack.back();
+      commentsStack.pop_back();
+    }
+
+    out.push_back(c);
+    ++i;
+  }
+
+  return out;
+}
+
+std::string translatePcre2OctalEscapesForRe2(std::string_view pattern) {
+  std::string out;
+  out.reserve(pattern.size());
+  for (std::size_t i = 0; i < pattern.size();) {
+    if (i + 3 < pattern.size() && pattern[i] == '\\' && pattern[i + 1] == 'o' &&
+        pattern[i + 2] == '{') {
+      const auto close = pattern.find('}', i + 3);
+      if (close != std::string_view::npos) {
+        std::uint32_t value = 0;
+        bool valid = close > i + 3;
+        for (std::size_t k = i + 3; k < close; ++k) {
+          if (!isOctalDigit(pattern[k])) {
+            valid = false;
+            break;
+          }
+          value = (value << 3) + (pattern[k] - '0');
+        }
+        if (valid) {
+          out += "\\x{";
+          out += toLowerHex(value);
+          out.push_back('}');
+          i = close + 1;
+          continue;
+        }
+      }
+    }
+    out.push_back(pattern[i++]);
+  }
+  return out;
+}
+
+std::string rewriteJavaNamedGroupsForRe2(std::string_view pattern) {
+  std::string out;
+  out.reserve(pattern.size() + 8);
+  bool inQuotation = false;
+
+  for (std::size_t i = 0; i < pattern.size();) {
+    const char c = pattern[i];
+    if (c == '\\' && i + 1 < pattern.size()) {
+      const char next = pattern[i + 1];
+      if (!inQuotation && next == 'Q') {
+        inQuotation = true;
+      } else if (inQuotation && next == 'E') {
+        inQuotation = false;
+      }
+      out.push_back(c);
+      out.push_back(next);
+      i += 2;
+      continue;
+    }
+
+    if (!inQuotation && c == '[' && !hasOddBackslashesBefore(pattern, i)) {
+      const auto classEnd = trySkipClass(pattern, i);
+      if (classEnd > i) {
+        out.append(pattern.substr(i, classEnd - i));
+        i = classEnd;
+        continue;
+      }
+    }
+
+    if (!inQuotation && c == '(' && i + 3 < pattern.size() &&
+        pattern[i + 1] == '?' && pattern[i + 2] == '<' &&
+        pattern[i + 3] != '=' && pattern[i + 3] != '!' &&
+        !hasOddBackslashesBefore(pattern, i)) {
+      out += "(?P<";
+      i += 3;
+      continue;
+    }
+
+    out.push_back(c);
+    ++i;
+  }
+  return out;
+}
+
+std::string renderFoldClass(const std::set<std::uint32_t>& cps) {
+  std::string out = "[";
+  for (const auto cp : cps) {
+    if ((cp >= 'A' && cp <= 'Z') || (cp >= 'a' && cp <= 'z')) {
+      out.push_back(static_cast<char>(cp));
+    } else {
+      out += "\\x{";
+      out += toLowerHex(cp);
+      out.push_back('}');
+    }
+  }
+  out.push_back(']');
+  return out;
+}
+
+std::set<std::uint32_t> foldEquivalenceClass(std::uint32_t cp) {
+  // Partial Java UNICODE_CASE literal pre-folding.  ICU simple case mappings
+  // cover ordinary one-code-point upper/lower pairs; the explicit additions
+  // cover common Java/Unicode special case-fold equivalences that PCRE2
+  // caseless matching misses without UCP/full folding.
+  std::set<std::uint32_t> cps{cp};
+  const auto add = [&](std::uint32_t x) {
+    cps.insert(x);
+    const auto lower = static_cast<std::uint32_t>(u_tolower(x));
+    const auto upper = static_cast<std::uint32_t>(u_toupper(x));
+    cps.insert(lower);
+    cps.insert(upper);
+  };
+  add(cp);
+  switch (cp) {
+    case 0x004B:
+    case 0x006B:
+    case 0x212A:
+      add(0x004B);
+      add(0x006B);
+      add(0x212A);
+      break;
+    case 0x0053:
+    case 0x0073:
+    case 0x017F:
+      add(0x0053);
+      add(0x0073);
+      add(0x017F);
+      break;
+    case 0x03A3:
+    case 0x03C3:
+    case 0x03C2:
+      add(0x03A3);
+      add(0x03C3);
+      add(0x03C2);
+      break;
+    case 0x00B5:
+    case 0x039C:
+    case 0x03BC:
+      add(0x00B5);
+      add(0x039C);
+      add(0x03BC);
+      break;
+    case 0x00C5:
+    case 0x00E5:
+    case 0x212B:
+      add(0x00C5);
+      add(0x00E5);
+      add(0x212B);
+      break;
+    case 0x0049:
+    case 0x0069:
+    case 0x0130:
+    case 0x0131:
+      add(0x0049);
+      add(0x0069);
+      add(0x0130);
+      add(0x0131);
+      break;
+    default:
+      break;
+  }
+  return cps;
+}
+
+std::optional<std::uint32_t> parseHexBracedCodePoint(
+    std::string_view s,
+    std::size_t start,
+    std::size_t& end) {
+  if (start + 3 >= s.size() || s[start] != '\\' || s[start + 1] != 'x' ||
+      s[start + 2] != '{') {
+    return std::nullopt;
+  }
+  std::size_t k = start + 3;
+  std::uint32_t cp = 0;
+  bool any = false;
+  while (k < s.size() && s[k] != '}') {
+    if (!isHexDigit(s[k])) {
+      return std::nullopt;
+    }
+    cp = (cp << 4) | hexValue(s[k]);
+    any = true;
+    ++k;
+  }
+  if (!any || k >= s.size() || s[k] != '}') {
+    return std::nullopt;
+  }
+  end = k + 1;
+  return cp;
+}
+
+std::string expandCasedLiteralsForUnicodeCase(std::string_view pattern) {
+  std::string out;
+  out.reserve(pattern.size() + 16);
+  bool inQuotation = false;
+  for (std::size_t i = 0; i < pattern.size();) {
+    const char c = pattern[i];
+    if (c == '\\' && i + 1 < pattern.size()) {
+      const char next = pattern[i + 1];
+      if (!inQuotation && next == 'Q') {
+        out += "\\Q";
+        i += 2;
+        inQuotation = true;
+        continue;
+      }
+      if (inQuotation && next == 'E') {
+        out += "\\E";
+        i += 2;
+        inQuotation = false;
+        continue;
+      }
+      if (!inQuotation) {
+        std::size_t end = i;
+        if (auto cp = parseHexBracedCodePoint(pattern, i, end)) {
+          auto cps = foldEquivalenceClass(*cp);
+          if (cps.size() > 1) {
+            out += renderFoldClass(cps);
+          } else {
+            out.append(pattern.substr(i, end - i));
+          }
+          i = end;
+          continue;
+        }
+      }
+      out.push_back(c);
+      out.push_back(next);
+      i += 2;
+      continue;
+    }
+    if (inQuotation) {
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+    if (c == '[' && !hasOddTrailingBackslashes(out)) {
+      const auto classEnd = trySkipClass(pattern, i);
+      if (classEnd > i) {
+        out.append(pattern.substr(i, classEnd - i));
+        i = classEnd;
+        continue;
+      }
+    }
+
+    std::size_t next = i;
+    std::uint32_t cp = 0;
+    if (decodeUtf8CodePoint(pattern, next, cp)) {
+      auto cps = foldEquivalenceClass(cp);
+      if (cps.size() > 1) {
+        out += renderFoldClass(cps);
+      } else {
+        out.append(pattern.substr(i, next - i));
+      }
+      i = next;
+      continue;
+    }
+    out.push_back(c);
+    ++i;
+  }
+  return out;
+}
+
+} // namespace
+
+std::string toPcre2Pattern(
+    std::string_view javaPattern,
+    bool& needsRawByteMode) {
+  needsRawByteMode = false;
+  if (javaPattern.empty()) {
+    return std::string(javaPattern);
+  }
+
+  const std::size_t len = javaPattern.size();
+  std::string out;
+  out.reserve(len + 32);
+
+  std::size_t i = 0;
+  bool inQuotation = false;
+  bool caseless = false;
+  bool unicodeCharacterClass = false;
+  bool commentsMode = false;
+  struct GroupFrame {
+    bool previousCaseless;
+    bool previousUnicodeCharacterClass;
+    bool previousCommentsMode;
+  };
+  std::vector<GroupFrame> groupStack;
+
+  while (i < len) {
+    const char c = javaPattern[i];
+
+    if (c == '\\' && i + 1 < len) {
+      const char next = javaPattern[i + 1];
+
+      if (!inQuotation && next == 'Q') {
+        out += "\\Q";
+        i += 2;
+        inQuotation = true;
+        continue;
+      }
+
+      if (inQuotation && next == 'E') {
+        out += "\\E";
+        i += 2;
+        inQuotation = false;
+        continue;
+      }
+
+      if (inQuotation) {
+        out.push_back(c);
+        ++i;
+        continue;
+      }
+
+      if (next == 'p' || next == 'P') {
+        if (!hasOddTrailingBackslashes(out)) {
+          const auto tokenEnd =
+              tryAppendPropertyToken(javaPattern, i, next, out, caseless);
+          if (tokenEnd > i) {
+            i = tokenEnd;
+            continue;
+          }
+        }
+      }
+
+      if (next == 'u' && i + 6 <= len) {
+        std::size_t k = i + 2;
+        const std::size_t hexEnd = k + 4;
+        while (k < hexEnd && isHexDigit(javaPattern[k])) {
+          ++k;
+        }
+        if (k - (i + 2) == 4) {
+          const std::uint32_t cp = parseFourHex(javaPattern, i + 2);
+          if (cp >= 0xD800 && cp <= 0xDBFF) {
+            if (i + 12 <= len && javaPattern[i + 6] == '\\' &&
+                javaPattern[i + 7] == 'u') {
+              bool hasLowSurrogate = true;
+              for (std::size_t p = i + 8; p < i + 12; ++p) {
+                hasLowSurrogate = hasLowSurrogate && isHexDigit(javaPattern[p]);
+              }
+              if (hasLowSurrogate) {
+                const std::uint32_t low = parseFourHex(javaPattern, i + 8);
+                if (low >= 0xDC00 && low <= 0xDFFF) {
+                  const std::uint32_t scalar =
+                      0x10000 + ((cp - 0xD800) << 10) + (low - 0xDC00);
+                  out += "\\x{";
+                  out += toLowerHex(scalar);
+                  out.push_back('}');
+                  i += 12;
+                  continue;
+                }
+              }
+            }
+            throw EvaluationFailedException(
+                "Lone high-surrogate Unicode escape cannot be safely translated");
+          }
+          if (cp >= 0xDC00 && cp <= 0xDFFF) {
+            throw EvaluationFailedException(
+                "Lone low-surrogate Unicode escape cannot be safely translated");
+          }
+          out += "\\x{";
+          out.append(javaPattern.substr(i + 2, 4));
+          out.push_back('}');
+          i = k;
+          continue;
+        }
+      }
+
+      if (next == 'N' && i + 2 < len && javaPattern[i + 2] == '{') {
+        const auto close = javaPattern.find('}', i + 3);
+        if (close != std::string_view::npos) {
+          const std::string name(javaPattern.substr(i + 3, close - i - 3));
+          UErrorCode status = U_ZERO_ERROR;
+          const UChar32 cp =
+              u_charFromName(U_EXTENDED_CHAR_NAME, name.c_str(), &status);
+          if (U_SUCCESS(status)) {
+            out += "\\x{";
+            out += toLowerHex(static_cast<std::uint32_t>(cp));
+            out.push_back('}');
+          } else {
+            out.append(javaPattern.substr(i, close + 1 - i));
+          }
+          i = close + 1;
+          continue;
+        }
+      }
+
+      if (next == 'x' && i + 2 < len && javaPattern[i + 2] == '{') {
+        const auto close = javaPattern.find('}', i + 3);
+        if (close != std::string_view::npos) {
+          out.append(javaPattern.substr(i, close + 1 - i));
+          i = close + 1;
+          continue;
+        }
+      }
+
+      if (next == '0' && i + 2 < len && isOctalDigit(javaPattern[i + 2])) {
+        std::size_t k = i + 2;
+        const std::size_t last = std::min(k + 3, len);
+        while (k < last && isOctalDigit(javaPattern[k])) {
+          ++k;
+        }
+        if (k - (i + 2) == 3 && javaPattern[i + 2] > '3') {
+          --k;
+        }
+        int value = 0;
+        for (std::size_t p = i + 2; p < k; ++p) {
+          value = value * 8 + (javaPattern[p] - '0');
+        }
+        out += "\\o{";
+        char buf[16];
+        std::snprintf(buf, sizeof(buf), "%o", value);
+        out += buf;
+        out.push_back('}');
+        i = k;
+        continue;
+      }
+
+      if (next >= '1' && next <= '9') {
+        std::size_t k = i + 2;
+        while (k < len &&
+               std::isdigit(static_cast<unsigned char>(javaPattern[k]))) {
+          ++k;
+        }
+        const int groupCount = countCapturingGroups(javaPattern);
+        std::size_t useDigits = k - (i + 1);
+        auto parseDigits = [&](std::size_t digits) {
+          int value = 0;
+          for (std::size_t p = i + 1; p < i + 1 + digits; ++p) {
+            const int digit = javaPattern[p] - '0';
+            if (value > (groupCount + 1 - digit) / 10) {
+              return groupCount + 1;
+            }
+            value = value * 10 + digit;
+          }
+          return value;
+        };
+        int backrefN = parseDigits(useDigits);
+        while (useDigits > 1 && backrefN > groupCount) {
+          --useDigits;
+          backrefN = parseDigits(useDigits);
+        }
+        if (backrefN > groupCount) {
+          out += "(*F)";
+        } else {
+          out += "\\g{";
+          out += std::to_string(backrefN);
+          out.push_back('}');
+        }
+        for (std::size_t p = i + 1 + useDigits; p < k; ++p) {
+          out.push_back(javaPattern[p]);
+        }
+        i = k;
+        continue;
+      }
+
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+
+    if (inQuotation) {
+      out.push_back(c);
+      ++i;
+      continue;
+    }
+
+    if (commentsMode && c == '#' && !hasOddTrailingBackslashes(out)) {
+      while (i < len) {
+        const char commentChar = javaPattern[i++];
+        out.push_back(commentChar);
+        if (commentChar == '\n') {
+          break;
+        }
+      }
+      continue;
+    }
+
+    if (c == '[' && !hasOddTrailingBackslashes(out)) {
+      const std::size_t classStart = i;
+      std::size_t pos = i;
+      try {
+        const ClassNode classNode =
+            ClassBodyParser::parseClass(javaPattern, pos);
+        const std::size_t classEnd = pos;
+        if (containsRawSurrogate(javaPattern, classStart, classEnd)) {
+          out += rewritePropertiesOnly(javaPattern, classStart, classEnd);
+          i = classEnd;
+          continue;
+        }
+        const auto classText =
+            javaPattern.substr(classStart, classEnd - classStart);
+        if (unicodeCharacterClass &&
+            classText.find("&&") != std::string_view::npos &&
+            (classText.find("\\d") != std::string_view::npos ||
+             classText.find("\\D") != std::string_view::npos ||
+             classText.find("\\w") != std::string_view::npos ||
+             classText.find("\\W") != std::string_view::npos ||
+             classText.find("\\s") != std::string_view::npos ||
+             classText.find("\\S") != std::string_view::npos)) {
+          throw EvaluationFailedException(
+              "UNICODE_CHARACTER_CLASS intersection cannot be safely translated");
+        }
+        const auto renderResult = ClassRenderer::renderWithSignal(classNode);
+        const std::string& rendered = renderResult.text;
+        const std::string renderedWithMappedProperties =
+            rewritePropertiesOnly(rendered, 0, rendered.size());
+        const std::string maybeFolded = caseless
+            ? expandCasedPropertiesInClass(renderedWithMappedProperties)
+            : renderedWithMappedProperties;
+        if (renderResult.intersectionUnresolved) {
+          out += rewritePropertiesOnly(javaPattern, classStart, classEnd);
+        } else {
+          out += maybeFolded;
+        }
+        i = classEnd;
+        continue;
+      } catch (const std::invalid_argument& e) {
+        if (e.what() != nullptr &&
+            std::string_view(e.what()).rfind("Bad intersection syntax", 0) ==
+                0) {
+          throw EvaluationFailedException("Bad intersection syntax");
+        }
+        out.push_back(c);
+        ++i;
+        continue;
+      }
+    }
+
+    if (c == '(' && i + 1 < len && javaPattern[i + 1] == '?' &&
+        !hasOddTrailingBackslashes(out)) {
+      const auto modeResult =
+          tryTranslateModeModifier(javaPattern, i, len, out);
+      if (modeResult.end != std::string_view::npos) {
+        if (modeResult.term == ':') {
+          groupStack.push_back({caseless, unicodeCharacterClass, commentsMode});
+        }
+        if (modeResult.onI) {
+          caseless = true;
+        }
+        if (modeResult.hasDash && modeResult.offI) {
+          caseless = false;
+        }
+        if (modeResult.onU) {
+          unicodeCharacterClass = true;
+        }
+        if (modeResult.hasDash && modeResult.offU) {
+          unicodeCharacterClass = false;
+        }
+        if (modeResult.onX) {
+          commentsMode = true;
+        }
+        if (modeResult.hasDash && modeResult.offX) {
+          commentsMode = false;
+        }
+        i = modeResult.end;
+        continue;
+      }
+    }
+
+    if (c == '(' && !hasOddTrailingBackslashes(out)) {
+      groupStack.push_back({caseless, unicodeCharacterClass, commentsMode});
+    }
+
+    if (c == '{' && !hasOddTrailingBackslashes(out)) {
+      const auto close = javaPattern.find('}', i + 1);
+      if (close == std::string_view::npos) {
+        throw EvaluationFailedException("Unclosed counted closure");
+      }
+      const auto body = javaPattern.substr(i + 1, close - i - 1);
+      if (!isValidQuantifierBody(body)) {
+        throw EvaluationFailedException("Illegal repetition");
+      }
+    }
+
+    const bool closesGroup = c == ')' && !hasOddTrailingBackslashes(out);
+    out.push_back(c);
+    if (closesGroup) {
+      if (!groupStack.empty()) {
+        const auto frame = groupStack.back();
+        groupStack.pop_back();
+        caseless = frame.previousCaseless;
+        unicodeCharacterClass = frame.previousUnicodeCharacterClass;
+        commentsMode = frame.previousCommentsMode;
+      }
+    }
+    ++i;
+  }
+
+  needsRawByteMode = needsRawByteModeForPcre2(out);
+  return out;
+}
+
+std::string toPcre2Pattern(std::string_view javaPattern) {
+  bool needsRawByteMode = false;
+  return toPcre2Pattern(javaPattern, needsRawByteMode);
+}
+
+std::string toPcre2PatternWithUnicodeCase(
+    std::string_view javaPattern,
+    bool& needsRawByteMode) {
+  auto translated = toPcre2Pattern(javaPattern, needsRawByteMode);
+  translated = expandCasedLiteralsForUnicodeCase(translated);
+  needsRawByteMode = needsRawByteModeForPcre2(translated);
+  return translated;
+}
+
+std::string toPcre2PatternWithUnicodeCase(std::string_view javaPattern) {
+  bool needsRawByteMode = false;
+  return toPcre2PatternWithUnicodeCase(javaPattern, needsRawByteMode);
+}
+
+std::string toRe2Pattern(std::string_view javaPattern) {
+  rejectUnsupportedRe2Features(javaPattern);
+  return rewriteJavaNamedGroupsForRe2(translatePcre2OctalEscapesForRe2(
+      toPcre2Pattern(translateCommentsModeForRe2(javaPattern))));
+}
+
+std::string toRe2PatternWithUnicodeCase(std::string_view javaPattern) {
+  return expandCasedLiteralsForUnicodeCase(toRe2Pattern(javaPattern));
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h b/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h
new file mode 100644
index 00000000000..8172238ae7c
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JavaRegexTranslator (Java) under
+// Apache-2.0 by the same author for inclusion in Velox.
+//
+// This header is the public surface of the `java_pcre2_translator`
+// library.  It declares free functions that rewrite a `java.util.regex`
+// pattern string into an equivalent pattern accepted by either PCRE2 or
+// RE2.
+//
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h"
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+/// Rewrites a `java.util.regex.Pattern` source string into an equivalent
+/// pattern accepted by PCRE2.  Implements the 3-phase pipeline described
+/// in pcre4j PR #606:
+///
+///   1. Expand top-level `\p{...}` / `\P{...}` property tokens via the
+///      Java property → Unicode block alias map.
+///   2. Re-parse each character-class body, flatten nested unions, resolve
+///      `&&` intersections via range-set algebra, and escape `-` after
+///      multi-char escapes to disambiguate from the range operator.
+///   3. Rewrite Java inline flag groups whose semantics diverge in PCRE2
+///      (notably `(?U)` which means UNICODE_CHARACTER_CLASS in Java but
+///      "ungreedy" in PCRE2).
+///
+/// Throws `EvaluationFailedException` when the input cannot be safely
+/// expressed in PCRE2 syntax (e.g. a property name with no PCRE2
+/// equivalent).  Callers are expected to surface the message verbatim.
+///
+std::string toPcre2Pattern(std::string_view javaPattern);
+
+/// Rewrites a Java pattern and reports whether the resulting PCRE2 compile
+/// must omit PCRE2_UTF to allow lone surrogate code units.
+std::string toPcre2Pattern(
+    std::string_view javaPattern,
+    bool& needsRawByteMode);
+
+/// Rewrites a Java pattern for PCRE2 and pre-expands cased literal code points
+/// for Java's CASE_INSENSITIVE | UNICODE_CASE semantics.  This is intentionally
+/// limited to literals outside character classes and outside \Q...\E quotes.
+std::string toPcre2PatternWithUnicodeCase(
+    std::string_view javaPattern,
+    bool& needsRawByteMode);
+
+std::string toPcre2PatternWithUnicodeCase(std::string_view javaPattern);
+
+/// Rewrites a `java.util.regex.Pattern` source string into an equivalent
+/// pattern accepted by RE2.
+///
+/// This shares the PCRE2 property and character-class translation pipeline,
+/// rewrites Java named groups `(?<name>...)` to RE2 `(?P<name>...)`, and
+/// rejects Java features that RE2 cannot represent without changing
+/// semantics.
+std::string toRe2Pattern(std::string_view javaPattern);
+
+std::string toRe2PatternWithUnicodeCase(std::string_view javaPattern);
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.cpp b/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.cpp
new file mode 100644
index 00000000000..207f130f691
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.cpp
@@ -0,0 +1,508 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JdkPropertyExpander (Java) under Apache-2.0 by
+// the same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h"
+
+#include <unicode/uchar.h>
+#include <unicode/uscript.h>
+
+#include <algorithm>
+#include <cctype>
+#include <cstdint>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+namespace {
+
+class SpanBuilder {
+ public:
+  void add(std::int32_t cp) {
+    if (spanStart_ < 0) {
+      spanStart_ = cp;
+      spanEnd_ = cp;
+    } else if (cp == spanEnd_ + 1) {
+      spanEnd_ = cp;
+    } else {
+      pairs_.push_back(spanStart_);
+      pairs_.push_back(spanEnd_);
+      spanStart_ = cp;
+      spanEnd_ = cp;
+    }
+  }
+
+  RangeSet build() {
+    if (spanStart_ >= 0) {
+      pairs_.push_back(spanStart_);
+      pairs_.push_back(spanEnd_);
+      spanStart_ = -1;
+    }
+    return RangeSet::fromSortedPairs(std::move(pairs_));
+  }
+
+ private:
+  std::vector<std::int32_t> pairs_;
+  std::int32_t spanStart_{-1};
+  std::int32_t spanEnd_{-1};
+};
+
+std::string upperAscii(std::string_view s) {
+  std::string out(s);
+  for (char& c : out) {
+    c = static_cast<char>(std::toupper(static_cast<unsigned char>(c)));
+  }
+  return out;
+}
+
+std::string normalizePropertyKey(std::string_view s) {
+  std::string out;
+  out.reserve(s.size());
+  for (char c : s) {
+    const auto uc = static_cast<unsigned char>(c);
+    if (c == '_' || c == '-' || std::isspace(uc)) {
+      continue;
+    }
+    out.push_back(static_cast<char>(std::toupper(uc)));
+  }
+  return out;
+}
+
+void addAlias(
+    std::unordered_map<std::string, SpanBuilder>& builders,
+    const char* alias,
+    std::int32_t cp) {
+  if (alias != nullptr && alias[0] != '\0') {
+    builders[upperAscii(alias)].add(cp);
+  }
+}
+
+void addNormalizedAlias(
+    std::unordered_map<std::string, SpanBuilder>& builders,
+    const char* alias,
+    std::int32_t cp) {
+  if (alias != nullptr && alias[0] != '\0') {
+    builders[normalizePropertyKey(alias)].add(cp);
+  }
+}
+
+const char* categoryName(int32_t type) {
+  switch (type) {
+    case U_UPPERCASE_LETTER:
+      return "LU";
+    case U_LOWERCASE_LETTER:
+      return "LL";
+    case U_TITLECASE_LETTER:
+      return "LT";
+    case U_MODIFIER_LETTER:
+      return "LM";
+    case U_OTHER_LETTER:
+      return "LO";
+    case U_NON_SPACING_MARK:
+      return "MN";
+    case U_ENCLOSING_MARK:
+      return "ME";
+    case U_COMBINING_SPACING_MARK:
+      return "MC";
+    case U_DECIMAL_DIGIT_NUMBER:
+      return "ND";
+    case U_LETTER_NUMBER:
+      return "NL";
+    case U_OTHER_NUMBER:
+      return "NO";
+    case U_SPACE_SEPARATOR:
+      return "ZS";
+    case U_LINE_SEPARATOR:
+      return "ZL";
+    case U_PARAGRAPH_SEPARATOR:
+      return "ZP";
+    case U_CONTROL_CHAR:
+      return "CC";
+    case U_FORMAT_CHAR:
+      return "CF";
+    case U_SURROGATE:
+      return "CS";
+    case U_PRIVATE_USE_CHAR:
+      return "CO";
+    case U_UNASSIGNED:
+      return "CN";
+    case U_DASH_PUNCTUATION:
+      return "PD";
+    case U_START_PUNCTUATION:
+      return "PS";
+    case U_END_PUNCTUATION:
+      return "PE";
+    case U_CONNECTOR_PUNCTUATION:
+      return "PC";
+    case U_OTHER_PUNCTUATION:
+      return "PO";
+    case U_MATH_SYMBOL:
+      return "SM";
+    case U_CURRENCY_SYMBOL:
+      return "SC";
+    case U_MODIFIER_SYMBOL:
+      return "SK";
+    case U_OTHER_SYMBOL:
+      return "SO";
+    case U_INITIAL_PUNCTUATION:
+      return "PI";
+    case U_FINAL_PUNCTUATION:
+      return "PF";
+    default:
+      return nullptr;
+  }
+}
+
+RangeSet unionOf(
+    const std::unordered_map<std::string, RangeSet>& map,
+    std::initializer_list<const char*> keys) {
+  RangeSet result = RangeSet::empty();
+  for (const char* key : keys) {
+    auto it = map.find(key);
+    if (it != map.end()) {
+      result = result.unionWith(it->second);
+    }
+  }
+  return result;
+}
+
+bool isJavaWhitespace(std::int32_t cp) {
+  return cp == '\t' || cp == '\n' || cp == 0x0B || cp == '\f' || cp == '\r' ||
+      cp == ' ' || (cp >= 0x1C && cp <= 0x1F) || cp == 0x1680 ||
+      (cp >= 0x2000 && cp <= 0x200A) || cp == 0x2028 || cp == 0x2029 ||
+      cp == 0x205F || cp == 0x3000;
+}
+
+bool isJavaLetter(std::int32_t cp) {
+  switch (u_charType(static_cast<UChar32>(cp))) {
+    case U_UPPERCASE_LETTER:
+    case U_LOWERCASE_LETTER:
+    case U_TITLECASE_LETTER:
+    case U_MODIFIER_LETTER:
+    case U_OTHER_LETTER:
+      return true;
+    default:
+      return false;
+  }
+}
+
+std::unordered_map<std::string, RangeSet> buildJavaPropertyMap() {
+  std::unordered_map<std::string, SpanBuilder> builders;
+  for (const char* name :
+       {"javaLowerCase",
+        "javaUpperCase",
+        "javaTitleCase",
+        "javaSpaceChar",
+        "javaMirrored",
+        "javaDefined",
+        "javaDigit",
+        "javaAlphabetic",
+        "javaIdeographic",
+        "javaISOControl",
+        "javaWhitespace",
+        "javaLetter",
+        "javaLetterOrDigit",
+        "javaJavaIdentifierStart",
+        "javaJavaIdentifierPart",
+        "javaUnicodeIdentifierStart",
+        "javaUnicodeIdentifierPart",
+        "javaIdentifierIgnorable"}) {
+    builders.emplace(name, SpanBuilder{});
+  }
+
+  for (std::int32_t cp = 0; cp <= RangeSet::kMaxCp; ++cp) {
+    const UChar32 ucp = static_cast<UChar32>(cp);
+    const auto type = u_charType(ucp);
+    const bool letter = isJavaLetter(cp);
+    if (u_hasBinaryProperty(ucp, UCHAR_LOWERCASE)) {
+      builders["javaLowerCase"].add(cp);
+    }
+    if (u_hasBinaryProperty(ucp, UCHAR_UPPERCASE)) {
+      builders["javaUpperCase"].add(cp);
+    }
+    if (type == U_TITLECASE_LETTER) {
+      builders["javaTitleCase"].add(cp);
+    }
+    if (u_isJavaSpaceChar(ucp)) {
+      builders["javaSpaceChar"].add(cp);
+    }
+    if (u_isMirrored(ucp)) {
+      builders["javaMirrored"].add(cp);
+    }
+    if (type != U_UNASSIGNED) {
+      builders["javaDefined"].add(cp);
+    }
+    if (u_isdigit(ucp)) {
+      builders["javaDigit"].add(cp);
+    }
+    if (u_hasBinaryProperty(ucp, UCHAR_ALPHABETIC)) {
+      builders["javaAlphabetic"].add(cp);
+    }
+    if (u_hasBinaryProperty(ucp, UCHAR_IDEOGRAPHIC)) {
+      builders["javaIdeographic"].add(cp);
+    }
+    if ((cp >= 0x00 && cp <= 0x1F) || (cp >= 0x7F && cp <= 0x9F)) {
+      builders["javaISOControl"].add(cp);
+    }
+    if (isJavaWhitespace(cp)) {
+      builders["javaWhitespace"].add(cp);
+    }
+    if (letter) {
+      builders["javaLetter"].add(cp);
+    }
+    if (letter || u_isdigit(ucp)) {
+      builders["javaLetterOrDigit"].add(cp);
+    }
+    if (u_isJavaIDStart(ucp)) {
+      builders["javaJavaIdentifierStart"].add(cp);
+    }
+    if (u_isJavaIDPart(ucp)) {
+      builders["javaJavaIdentifierPart"].add(cp);
+    }
+    if (u_isIDStart(ucp)) {
+      builders["javaUnicodeIdentifierStart"].add(cp);
+    }
+    if (u_isIDPart(ucp)) {
+      builders["javaUnicodeIdentifierPart"].add(cp);
+    }
+    if (u_isIDIgnorable(ucp)) {
+      builders["javaIdentifierIgnorable"].add(cp);
+    }
+  }
+
+  std::unordered_map<std::string, RangeSet> map;
+  for (auto& [name, builder] : builders) {
+    map.emplace(name, builder.build());
+  }
+  return map;
+}
+
+const std::unordered_map<std::string, RangeSet>& javaPropertyMap() {
+  static const auto kMap = buildJavaPropertyMap();
+  return kMap;
+}
+
+std::unordered_map<std::string, RangeSet> buildBlockMap() {
+  std::unordered_map<std::string, SpanBuilder> builders;
+  for (std::int32_t cp = 0; cp <= RangeSet::kMaxCp; ++cp) {
+    const auto block = ublock_getCode(static_cast<UChar32>(cp));
+    addNormalizedAlias(
+        builders,
+        u_getPropertyValueName(UCHAR_BLOCK, block, U_LONG_PROPERTY_NAME),
+        cp);
+    addNormalizedAlias(
+        builders,
+        u_getPropertyValueName(UCHAR_BLOCK, block, U_SHORT_PROPERTY_NAME),
+        cp);
+  }
+
+  std::unordered_map<std::string, RangeSet> map;
+  for (auto& [name, builder] : builders) {
+    auto range = builder.build();
+    if (!range.isEmpty()) {
+      map.emplace(name, std::move(range));
+    }
+  }
+  return map;
+}
+
+const std::unordered_map<std::string, RangeSet>& blockMap() {
+  static const auto kMap = buildBlockMap();
+  return kMap;
+}
+
+std::unordered_map<std::string, RangeSet> buildPositiveMap() {
+  std::unordered_map<std::string, SpanBuilder> catBuilders;
+  for (const char* cat :
+       {"LU", "LL", "LT", "LM", "LO", "MN", "ME", "MC", "ND", "NL",
+        "NO", "PC", "PD", "PS", "PE", "PI", "PF", "PO", "SM", "SC",
+        "SK", "SO", "ZS", "ZL", "ZP", "CC", "CF", "CS", "CO", "CN"}) {
+    catBuilders.emplace(cat, SpanBuilder{});
+  }
+
+  std::unordered_map<std::string, SpanBuilder> scriptBuilders;
+  std::unordered_map<std::string, SpanBuilder> blockBuilders;
+  std::unordered_map<std::string, SpanBuilder> binaryBuilders;
+
+  // Strategy choice: use Velox's existing ICU dependency instead of adding a
+  // new dependency or generating source tables. ICU's
+  // u_charType/uscript_getScript provide the same kind of full-code-point scan
+  // as Java Character APIs.
+  for (std::int32_t cp = 0; cp <= RangeSet::kMaxCp; ++cp) {
+    if (const char* cat = categoryName(u_charType(static_cast<UChar32>(cp)))) {
+      catBuilders[cat].add(cp);
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    const UScriptCode script =
+        uscript_getScript(static_cast<UChar32>(cp), &status);
+    if (U_SUCCESS(status)) {
+      const char* name = uscript_getName(script);
+      if (name != nullptr) {
+        scriptBuilders[upperAscii(name)].add(cp);
+      }
+      addAlias(
+          scriptBuilders,
+          u_getPropertyValueName(UCHAR_SCRIPT, script, U_SHORT_PROPERTY_NAME),
+          cp);
+    }
+
+    const auto block = ublock_getCode(static_cast<UChar32>(cp));
+    addAlias(
+        blockBuilders,
+        u_getPropertyValueName(UCHAR_BLOCK, block, U_LONG_PROPERTY_NAME),
+        cp);
+    addAlias(
+        blockBuilders,
+        u_getPropertyValueName(UCHAR_BLOCK, block, U_SHORT_PROPERTY_NAME),
+        cp);
+
+    if (u_hasBinaryProperty(static_cast<UChar32>(cp), UCHAR_ALPHABETIC)) {
+      binaryBuilders["ALPHABETIC"].add(cp);
+    }
+    if (u_hasBinaryProperty(static_cast<UChar32>(cp), UCHAR_IDEOGRAPHIC)) {
+      binaryBuilders["IDEOGRAPHIC"].add(cp);
+    }
+    if (u_hasBinaryProperty(static_cast<UChar32>(cp), UCHAR_BIDI_MIRRORED)) {
+      binaryBuilders["BIDI_MIRRORED"].add(cp);
+    }
+  }
+
+  std::unordered_map<std::string, RangeSet> map;
+  for (auto& [cat, builder] : catBuilders) {
+    map.emplace(cat, builder.build());
+  }
+
+  map.emplace("L", unionOf(map, {"LU", "LL", "LT", "LM", "LO"}));
+  map.emplace("LC", unionOf(map, {"LU", "LL", "LT"}));
+  map.emplace("M", unionOf(map, {"MN", "ME", "MC"}));
+  map.emplace("N", unionOf(map, {"ND", "NL", "NO"}));
+  map.emplace("P", unionOf(map, {"PC", "PD", "PS", "PE", "PI", "PF", "PO"}));
+  map.emplace("S", unionOf(map, {"SM", "SC", "SK", "SO"}));
+  map.emplace("Z", unionOf(map, {"ZS", "ZL", "ZP"}));
+  map.emplace("C", unionOf(map, {"CC", "CF", "CS", "CO", "CN"}));
+
+  for (auto& [script, builder] : scriptBuilders) {
+    map.emplace(script, builder.build());
+  }
+  for (auto& [block, builder] : blockBuilders) {
+    auto range = builder.build();
+    map.emplace("IN" + block, range);
+    map.emplace(block, std::move(range));
+  }
+  for (auto& [binaryProperty, builder] : binaryBuilders) {
+    map.emplace(binaryProperty, builder.build());
+  }
+  map.emplace("ASCII", RangeSet::range(0, 0x7F));
+  return map;
+}
+
+const std::unordered_map<std::string, RangeSet>& positiveMap() {
+  static const auto kMap = buildPositiveMap();
+  return kMap;
+}
+
+std::optional<RangeSet> compute(std::string_view token) {
+  bool negate = false;
+  std::string name;
+  if (token.size() >= 4 && token.substr(0, 3) == "\\p{" &&
+      token.back() == '}') {
+    name = upperAscii(token.substr(3, token.size() - 4));
+  } else if (
+      token.size() >= 4 && token.substr(0, 3) == "\\P{" &&
+      token.back() == '}') {
+    negate = true;
+    name = upperAscii(token.substr(3, token.size() - 4));
+  } else {
+    return std::nullopt;
+  }
+
+  auto lookupName = name;
+  if (name.rfind("BLK=", 0) == 0) {
+    lookupName = "IN" + name.substr(4);
+  } else if (name.rfind("BLOCK=", 0) == 0) {
+    lookupName = "IN" + name.substr(6);
+  }
+
+  auto it = positiveMap().find(lookupName);
+  if (it != positiveMap().end()) {
+    return negate ? std::optional<RangeSet>(it->second.complement())
+                  : std::optional<RangeSet>(it->second);
+  }
+  if (name.rfind("IN", 0) == 0 && name.size() > 2) {
+    auto blockIt = blockMap().find(normalizePropertyKey(name.substr(2)));
+    if (blockIt != blockMap().end()) {
+      return negate ? std::optional<RangeSet>(blockIt->second.complement())
+                    : std::optional<RangeSet>(blockIt->second);
+    }
+  }
+  if (name.rfind("JAVA", 0) == 0) {
+    const auto braceOpen = token.find('{');
+    const std::string original(
+        token.substr(braceOpen + 1, token.size() - braceOpen - 2));
+    auto javaIt = javaPropertyMap().find(original);
+    if (javaIt != javaPropertyMap().end()) {
+      return negate ? std::optional<RangeSet>(javaIt->second.complement())
+                    : std::optional<RangeSet>(javaIt->second);
+    }
+  }
+  return std::nullopt;
+}
+
+std::mutex cacheMutex;
+std::unordered_map<std::string, std::optional<RangeSet>> cache;
+
+} // namespace
+
+std::optional<RangeSet> JdkPropertyExpander::expand(
+    std::string_view pcre2Token) {
+  const std::string key(pcre2Token);
+  std::lock_guard<std::mutex> l(cacheMutex);
+  auto it = cache.find(key);
+  if (it != cache.end()) {
+    return it->second;
+  }
+  auto result = compute(key);
+  cache.emplace(key, result);
+  return result;
+}
+
+std::optional<std::string> JdkPropertyExpander::materializeJavaProperty(
+    std::string_view name) {
+  auto it = javaPropertyMap().find(std::string(name));
+  if (it == javaPropertyMap().end()) {
+    return std::nullopt;
+  }
+  return "[" + it->second.toPcre2ClassBody() + "]";
+}
+
+std::optional<std::string> JdkPropertyExpander::materializeUnicodeBlock(
+    std::string_view name) {
+  auto it = blockMap().find(normalizePropertyKey(name));
+  if (it == blockMap().end()) {
+    return std::nullopt;
+  }
+  return "[" + it->second.toPcre2ClassBody() + "]";
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h b/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h
new file mode 100644
index 00000000000..60439377ee5
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JdkPropertyExpander (Java) under Apache-2.0 by
+// the same author for inclusion in Velox.
+//
+#pragma once
+
+#include "velox/functions/lib/java_pcre2_translator/RangeSet.h"
+
+#include <optional>
+#include <string_view>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+class JdkPropertyExpander {
+ public:
+  static std::optional<RangeSet> expand(std::string_view pcre2Token);
+  static std::optional<std::string> materializeJavaProperty(
+      std::string_view name);
+  static std::optional<std::string> materializeUnicodeBlock(
+      std::string_view name);
+
+ private:
+  JdkPropertyExpander() = delete;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/LICENSE-NOTICE.md b/velox/functions/lib/java_pcre2_translator/LICENSE-NOTICE.md
new file mode 100644
index 00000000000..3752432c51e
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/LICENSE-NOTICE.md
@@ -0,0 +1,19 @@
+# java_pcre2_translator — License Notice
+
+The source files in this directory are licensed under the Apache License,
+Version 2.0 (see the top-level `LICENSE`).
+
+## Provenance
+
+These files are a C++ port of the `org.pcre4j.regex.translate` module
+introduced in [pcre4j](https://github.com/alexey-pelykh/pcre4j)
+pull request **#606**.
+
+The original Java sources were authored by **Oleksii PELYKH** in 2024–2026
+and originally published under the GNU Lesser General Public License v3
+as part of pcre4j. The same author re-licensed this body of work under
+the Apache License, Version 2.0 for inclusion in Apache Velox.
+
+Each `.h`/`.cpp` file in this directory carries the standard Velox/ASF
+Apache-2.0 header **plus** a short `Originally authored by ...` note
+that identifies the corresponding pcre4j source file.
diff --git a/velox/functions/lib/java_pcre2_translator/PropertyMap.cpp b/velox/functions/lib/java_pcre2_translator/PropertyMap.cpp
new file mode 100644
index 00000000000..b80c01a749f
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/PropertyMap.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.PropertyMap (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/PropertyMap.h"
+
+#include "velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h"
+
+#include <cctype>
+#include <unordered_map>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+namespace {
+
+const std::unordered_map<std::string, std::string>& table() {
+  static const std::unordered_map<std::string, std::string> kTable{
+      // --- Short alias: L1 (JDK's Latin-1 shorthand) ---
+      {"L1", "[\\x{00}-\\x{FF}]"},
+
+      // --- \p{javaXxx} Java-specific properties ---
+      {"javaTitleCase", "Lt"},
+      {"javaDigit", "Nd"},
+      {"javaLetter", "L"},
+      {"javaLetterOrDigit", "[\\p{L}\\p{Nd}]"},
+      {"javaAlphabetic", "Alphabetic"},
+      {"javaIdeographic", "Ideographic"},
+      {"javaMirrored", "Bidi_Mirrored"},
+      {"javaDefined", "\\P{Cn}"},
+      {"javaISOControl", "[\\x00-\\x1F\\x{7F}-\\x{9F}]"},
+      {"javaJavaIdentifierStart", "[\\p{L}\\p{Nl}_$]"},
+      {"javaJavaIdentifierPart",
+       "[\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}_$]"},
+      {"javaUnicodeIdentifierStart", "[\\p{L}\\p{Nl}]"},
+      {"javaUnicodeIdentifierPart",
+       "[\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}]"},
+      {"javaIdentifierIgnorable",
+       "[\\x{00}-\\x{08}\\x{0E}-\\x{1B}\\x{7F}-\\x{9F}\\p{Cf}]"},
+      // Per Character.isWhitespace() Javadoc:
+      {"javaWhitespace",
+       "[\\t\\n\\x0B\\f\\r \\x{1C}-\\x{1F}\\x{1680}"
+       "\\x{2000}-\\x{200A}\\x{2028}\\x{2029}\\x{205F}\\x{3000}]"},
+
+      // --- POSIX-style class names accepted by Java's \p{Xxx} (default,
+      // non-UNICODE) ---
+      {"Lower", "[a-z]"},
+      {"Upper", "[A-Z]"},
+      {"Alpha", "[a-zA-Z]"},
+      {"Digit", "[0-9]"},
+      {"Alnum", "[a-zA-Z0-9]"},
+      {"Punct", "[!-/:-@\\[-`{-~]"},
+      {"Graph", "[!-~]"},
+      {"Print", "[ -~]"},
+      {"Blank", "[ \\t]"},
+      {"Cntrl", "[\\x00-\\x1F\\x{7F}]"},
+      {"XDigit", "[0-9a-fA-F]"},
+      {"Space", "[ \\t\\n\\x0B\\f\\r]"},
+
+      // --- Java property names not recognised as PCRE2 long names ---
+      {"Control", "Cc"},
+      {"Format", "Cf"},
+      {"TitleCase", "Lt"},
+      {"UpperCase", "Lu"},
+      {"LowerCase", "Ll"},
+      {"Letter", "L"},
+      {"Mark", "M"},
+      {"Number", "N"},
+      {"Punctuation", "P"},
+      {"Symbol", "S"},
+      {"Separator", "Z"},
+      {"Other", "C"},
+      {"Assigned", "\\P{Cn}"},
+      {"Unassigned", "Cn"},
+  };
+  return kTable;
+}
+
+std::string toLower(std::string_view s) {
+  std::string out(s);
+  for (char& c : out) {
+    c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
+  }
+  return out;
+}
+
+std::optional<std::string> resolveOrPass(std::string_view value) {
+  auto it = table().find(std::string(value));
+  if (it != table().end()) {
+    return it->second;
+  }
+  return std::string(value);
+}
+
+std::string camelCaseToUnderscores(std::string_view s);
+
+std::string upperBlockKey(std::string_view value) {
+  std::string out(value);
+  for (char& c : out) {
+    if (c == ' ') {
+      c = '_';
+    } else {
+      c = static_cast<char>(std::toupper(static_cast<unsigned char>(c)));
+    }
+  }
+  return out;
+}
+
+std::string normalizedBlockKey(std::string_view value) {
+  std::string out;
+  out.reserve(value.size());
+  for (char c : value) {
+    const auto uc = static_cast<unsigned char>(c);
+    if (c == '_' || c == '-' || std::isspace(uc)) {
+      continue;
+    }
+    out.push_back(static_cast<char>(std::toupper(uc)));
+  }
+  return out;
+}
+
+std::string resolveBlock(std::string_view blockName) {
+  const std::string upper = upperBlockKey(blockName);
+  if (upper == "HIGH_SURROGATES") {
+    return "[\\x{D800}-\\x{DB7F}]";
+  }
+  if (upper == "HIGH_PRIVATE_USE_SURROGATES") {
+    return "[\\x{DB80}-\\x{DBFF}]";
+  }
+  if (upper == "LOW_SURROGATES") {
+    return "[\\x{DC00}-\\x{DFFF}]";
+  }
+  const std::string normalized = normalizedBlockKey(blockName);
+  if (normalized == "HIGHSURROGATES") {
+    return "[\\x{D800}-\\x{DB7F}]";
+  }
+  if (normalized == "HIGHPRIVATEUSESURROGATES") {
+    return "[\\x{DB80}-\\x{DBFF}]";
+  }
+  if (normalized == "LOWSURROGATES") {
+    return "[\\x{DC00}-\\x{DFFF}]";
+  }
+  if (auto materialized =
+          JdkPropertyExpander::materializeUnicodeBlock(blockName)) {
+    return *materialized;
+  }
+  return camelCaseToUnderscores(blockName);
+}
+
+// Inserts an `_` between every lowercase→uppercase boundary in a CamelCase
+// string.  E.g. `BasicLatin` → `Basic_Latin`.  Returns `s` unchanged when
+// the input already contains an underscore.
+std::string camelCaseToUnderscores(std::string_view s) {
+  if (s.find('_') != std::string_view::npos) {
+    return std::string(s);
+  }
+  std::string out;
+  out.reserve(s.size() + 8);
+  for (std::size_t i = 0; i < s.size(); ++i) {
+    const char c = s[i];
+    if (i > 0 && std::isupper(static_cast<unsigned char>(c)) &&
+        std::islower(static_cast<unsigned char>(s[i - 1]))) {
+      out.push_back('_');
+    }
+    out.push_back(c);
+  }
+  return out;
+}
+
+} // namespace
+
+std::optional<std::string> PropertyMap::apply(std::string_view name) {
+  // 0. Strip Java/Unicode qualifier prefixes: gc=Lu, sc=Greek, blk=Latin, …
+  const auto eq = name.find('=');
+  if (eq != std::string_view::npos && eq > 0) {
+    const std::string key = toLower(name.substr(0, eq));
+    const std::string_view value = name.substr(eq + 1);
+    if (key == "gc" || key == "general_category") {
+      return resolveOrPass(value);
+    }
+    if (key == "sc" || key == "script") {
+      return resolveOrPass(value);
+    }
+    if (key == "blk" || key == "block") {
+      return resolveBlock(value);
+    }
+    return std::nullopt;
+  }
+
+  if (name == "javaLowerCase" || name == "javaUpperCase" ||
+      name == "javaSpaceChar") {
+    return JdkPropertyExpander::materializeJavaProperty(name);
+  }
+
+  // 1. Exact table match.
+  const auto& t = table();
+  auto it = t.find(std::string(name));
+  if (it != t.end()) {
+    return it->second;
+  }
+
+  // 2. \p{IsXxx} → strip "Is" prefix; prefer known JDK alias mapping over
+  //    passthrough.
+  if (name.size() > 2 && name[0] == 'I' && name[1] == 's') {
+    const std::string stripped(name.substr(2));
+    auto mit = t.find(stripped);
+    if (mit != t.end()) {
+      return mit->second;
+    }
+    return stripped;
+  }
+
+  // 3. \p{InXxx} → strip "In" prefix; insert underscores at CamelCase
+  //    boundaries so PCRE2's block-name lookup succeeds.  Note that
+  //    ALL_CAPS_WITH_UNDERSCORES block names were already handled in step 1.
+  if (name.size() > 2 && name[0] == 'I' && name[1] == 'n') {
+    return resolveBlock(name.substr(2));
+  }
+
+  // 4. No rewrite.
+  return std::nullopt;
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/PropertyMap.h b/velox/functions/lib/java_pcre2_translator/PropertyMap.h
new file mode 100644
index 00000000000..b611b2a73c0
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/PropertyMap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.PropertyMap (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#pragma once
+
+#include <optional>
+#include <string>
+#include <string_view>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+/// Maps Java regex property names (as used in `\p{...}`) to PCRE2
+/// equivalents.
+///
+/// Return convention for `apply(name)`:
+///   * A bare name like `"Greek"` → caller emits `\p{Greek}` / `\P{Greek}`.
+///   * A string starting with `'['` → caller substitutes the entire
+///     `\p{name}` token with this string (used for expanded ranges and
+///     multi-class expressions).
+///   * `std::nullopt` → no rewrite; leave the token as-is.
+class PropertyMap {
+ public:
+  static constexpr std::string_view kNeverMatch{"\x01NEVER_MATCH\x01"};
+
+  /// Resolves a Java regex property name to a PCRE2 equivalent.  Returns
+  /// `std::nullopt` when no rewrite is needed (the caller should pass the
+  /// token through unchanged).
+  static std::optional<std::string> apply(std::string_view name);
+
+ private:
+  PropertyMap() = delete;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/RangeSet.cpp b/velox/functions/lib/java_pcre2_translator/RangeSet.cpp
new file mode 100644
index 00000000000..9a2bfe804fd
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/RangeSet.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.RangeSet (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/RangeSet.h"
+
+#include <algorithm>
+#include <cstdio>
+#include <stdexcept>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+namespace {
+
+// Emit a single code point inside a PCRE2 character class body — mirrors
+// `ClassRenderer.emitLiteralInClass` from the Java sources.  We inline
+// it here to avoid a circular dep on the (yet to be ported) ClassRenderer
+// module.  When Phase 4 lands `ClassRenderer`, we can either keep this
+// helper local to RangeSet or expose it; the function bodies are
+// trivial enough that duplication is fine.
+void emitLiteralInClass(std::int32_t cp, std::string& sb) {
+  if (cp >= 0x20 && cp <= 0x7E) {
+    switch (cp) {
+      case '\\':
+      case ']':
+      case '^':
+      case '-':
+        sb.push_back('\\');
+        sb.push_back(static_cast<char>(cp));
+        return;
+      default:
+        sb.push_back(static_cast<char>(cp));
+        return;
+    }
+  }
+  char buf[16];
+  std::snprintf(buf, sizeof(buf), "\\x{%X}", static_cast<unsigned>(cp));
+  sb.append(buf);
+}
+
+} // namespace
+
+const RangeSet& RangeSet::empty() {
+  static const RangeSet kEmpty{{}};
+  return kEmpty;
+}
+
+const RangeSet& RangeSet::all() {
+  static const RangeSet kAll{{0, kMaxCp}};
+  return kAll;
+}
+
+RangeSet RangeSet::single(std::int32_t cp) {
+  if (cp < 0 || cp > kMaxCp) {
+    throw std::invalid_argument(
+        "Code point out of range: " + std::to_string(cp));
+  }
+  return RangeSet({cp, cp});
+}
+
+RangeSet RangeSet::range(std::int32_t lo, std::int32_t hi) {
+  if (lo < 0 || hi > kMaxCp || lo > hi) {
+    throw std::invalid_argument(
+        "Invalid range: [" + std::to_string(lo) + ", " + std::to_string(hi) +
+        "]");
+  }
+  return RangeSet({lo, hi});
+}
+
+RangeSet RangeSet::fromSortedPairs(std::vector<std::int32_t> pairs) {
+  if (pairs.size() % 2 != 0) {
+    throw std::invalid_argument("Range pair vector must have even length");
+  }
+  for (std::size_t i = 0; i < pairs.size(); i += 2) {
+    if (pairs[i] < 0 || pairs[i + 1] > kMaxCp || pairs[i] > pairs[i + 1]) {
+      throw std::invalid_argument("Invalid sorted range pair");
+    }
+    if (i > 0 && pairs[i] < pairs[i - 2]) {
+      throw std::invalid_argument("Range pairs must be sorted by lower bound");
+    }
+  }
+  return normalise(std::move(pairs));
+}
+
+RangeSet RangeSet::unionWith(const RangeSet& other) const {
+  if (isEmpty()) {
+    return other;
+  }
+  if (other.isEmpty()) {
+    return *this;
+  }
+  const auto& a = ranges_;
+  const auto& b = other.ranges_;
+  std::vector<std::int32_t> merged;
+  merged.reserve(a.size() + b.size());
+  std::size_t i = 0, j = 0;
+  while (i < a.size() && j < b.size()) {
+    if (a[i] <= b[j]) {
+      merged.push_back(a[i]);
+      merged.push_back(a[i + 1]);
+      i += 2;
+    } else {
+      merged.push_back(b[j]);
+      merged.push_back(b[j + 1]);
+      j += 2;
+    }
+  }
+  while (i < a.size()) {
+    merged.push_back(a[i]);
+    merged.push_back(a[i + 1]);
+    i += 2;
+  }
+  while (j < b.size()) {
+    merged.push_back(b[j]);
+    merged.push_back(b[j + 1]);
+    j += 2;
+  }
+  return normalise(std::move(merged));
+}
+
+RangeSet RangeSet::intersect(const RangeSet& other) const {
+  if (isEmpty() || other.isEmpty()) {
+    return empty();
+  }
+  const auto& a = ranges_;
+  const auto& b = other.ranges_;
+  std::vector<std::int32_t> out;
+  out.reserve(std::min(a.size(), b.size()));
+  std::size_t i = 0, j = 0;
+  while (i < a.size() && j < b.size()) {
+    const std::int32_t lo = std::max(a[i], b[j]);
+    const std::int32_t hi = std::min(a[i + 1], b[j + 1]);
+    if (lo <= hi) {
+      out.push_back(lo);
+      out.push_back(hi);
+    }
+    if (a[i + 1] < b[j + 1]) {
+      i += 2;
+    } else {
+      j += 2;
+    }
+  }
+  if (out.empty()) {
+    return empty();
+  }
+  return RangeSet(std::move(out));
+}
+
+RangeSet RangeSet::complement() const {
+  if (isEmpty()) {
+    return all();
+  }
+  std::vector<std::int32_t> out;
+  out.reserve(ranges_.size() + 2);
+  std::int32_t prev = 0;
+  for (std::size_t i = 0; i < ranges_.size(); i += 2) {
+    if (prev < ranges_[i]) {
+      out.push_back(prev);
+      out.push_back(ranges_[i] - 1);
+    }
+    prev = ranges_[i + 1] + 1;
+  }
+  if (prev <= kMaxCp) {
+    out.push_back(prev);
+    out.push_back(kMaxCp);
+  }
+  if (out.empty()) {
+    return empty();
+  }
+  return RangeSet(std::move(out));
+}
+
+RangeSet RangeSet::subtract(const RangeSet& other) const {
+  return intersect(other.complement());
+}
+
+bool RangeSet::contains(std::int32_t cp) const {
+  for (std::size_t i = 0; i < ranges_.size(); i += 2) {
+    if (cp >= ranges_[i] && cp <= ranges_[i + 1]) {
+      return true;
+    }
+    if (cp < ranges_[i]) {
+      return false;
+    }
+  }
+  return false;
+}
+
+std::string RangeSet::toPcre2ClassBody() const {
+  std::string sb;
+  for (std::size_t i = 0; i < ranges_.size(); i += 2) {
+    const std::int32_t lo = ranges_[i];
+    const std::int32_t hi = ranges_[i + 1];
+    emitLiteralInClass(lo, sb);
+    if (lo != hi) {
+      sb.push_back('-');
+      emitLiteralInClass(hi, sb);
+    }
+  }
+  return sb;
+}
+
+RangeSet RangeSet::normalise(std::vector<std::int32_t>&& raw) {
+  if (raw.empty()) {
+    return empty();
+  }
+  std::vector<std::int32_t> out;
+  out.reserve(raw.size());
+  std::int32_t curLo = raw[0];
+  std::int32_t curHi = raw[1];
+  for (std::size_t i = 2; i < raw.size(); i += 2) {
+    const std::int32_t lo = raw[i];
+    const std::int32_t hi = raw[i + 1];
+    if (lo <= curHi + 1) {
+      curHi = std::max(curHi, hi);
+    } else {
+      out.push_back(curLo);
+      out.push_back(curHi);
+      curLo = lo;
+      curHi = hi;
+    }
+  }
+  out.push_back(curLo);
+  out.push_back(curHi);
+  return RangeSet(std::move(out));
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/RangeSet.h b/velox/functions/lib/java_pcre2_translator/RangeSet.h
new file mode 100644
index 00000000000..ce78f7186c7
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/RangeSet.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.RangeSet (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace facebook::velox::functions::java_pcre2_translator {
+
+/// Immutable, sorted, disjoint set of Unicode code-point ranges over
+/// [0, 0x10FFFF].  Internally stored as a flat `std::vector<int>` of the
+/// form `[lo0, hi0, lo1, hi1, ...]` where
+/// `lo0 <= hi0 < lo1 <= hi1 < ...`.  All endpoints are inclusive.
+class RangeSet {
+ public:
+  /// The full Unicode code-point space [0, 0x10FFFF].
+  static constexpr std::int32_t kMaxCp = 0x10FFFF;
+
+  /// Returns the empty set.
+  static const RangeSet& empty();
+
+  /// Returns the set containing every code point [0, kMaxCp].
+  static const RangeSet& all();
+
+  /// Creates a set containing the single code point `cp`.
+  /// Throws `std::invalid_argument` when `cp` is out of range.
+  static RangeSet single(std::int32_t cp);
+
+  /// Creates a set containing the range [lo, hi] inclusive.
+  /// Throws `std::invalid_argument` when the range is invalid.
+  static RangeSet range(std::int32_t lo, std::int32_t hi);
+
+  /// Creates a set from already sorted [lo, hi] pairs, merging adjacent spans.
+  static RangeSet fromSortedPairs(std::vector<std::int32_t> pairs);
+
+  /// Returns the union of this set and `other`.
+  RangeSet unionWith(const RangeSet& other) const;
+
+  /// Returns the intersection of this set and `other`.
+  RangeSet intersect(const RangeSet& other) const;
+
+  /// Returns the complement of this set within [0, kMaxCp].
+  RangeSet complement() const;
+
+  /// Returns `this - other`.
+  RangeSet subtract(const RangeSet& other) const;
+
+  /// Returns true iff this set contains no code points.
+  bool isEmpty() const {
+    return ranges_.empty();
+  }
+
+  /// Returns true iff this set contains `cp`.
+  bool contains(std::int32_t cp) const;
+
+  /// Emits the content of this set as a PCRE2 character-class body — i.e.
+  /// what would appear between `[` and `]`.  Printable ASCII in the
+  /// range 0x20–0x7E is emitted literally except for `\`, `]`, `^`, `-`
+  /// which are backslash-escaped; all other code points are emitted as
+  /// `\x{HH...}`.  Contiguous ranges of two-or-more code points are
+  /// emitted as `lo-hi`.
+  std::string toPcre2ClassBody() const;
+
+  /// Number of contiguous ranges (for testing).
+  int rangeCount() const {
+    return static_cast<int>(ranges_.size() / 2);
+  }
+
+  const std::vector<std::int32_t>& ranges() const {
+    return ranges_;
+  }
+
+  /// Equality based on the normalised range vector.
+  bool operator==(const RangeSet& other) const {
+    return ranges_ == other.ranges_;
+  }
+  bool operator!=(const RangeSet& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  explicit RangeSet(std::vector<std::int32_t> ranges)
+      : ranges_(std::move(ranges)) {}
+
+  /// Merges overlapping/adjacent pairs in `raw` (which must already be
+  /// sorted by `lo`) and returns the resulting `RangeSet`.
+  static RangeSet normalise(std::vector<std::int32_t>&& raw);
+
+  /// Sorted, non-overlapping, non-adjacent pairs.
+  std::vector<std::int32_t> ranges_;
+};
+
+} // namespace facebook::velox::functions::java_pcre2_translator
diff --git a/velox/functions/lib/java_pcre2_translator/tests/CMakeLists.txt b/velox/functions/lib/java_pcre2_translator/tests/CMakeLists.txt
new file mode 100644
index 00000000000..5231d51ad2f
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/CMakeLists.txt
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright (c) Facebook, Inc. and its affiliates.
+# (Apache-2.0)
+
+add_executable(
+  velox_java_pcre2_translator_test
+  ClassBodyParserTest.cpp
+  ClassRendererTest.cpp
+  EvaluatorTest.cpp
+  JavaRegexTranslatorTest.cpp
+  JdkPropertyExpanderTest.cpp
+  PropertyMapTest.cpp
+  RangeSetTest.cpp
+)
+
+target_link_libraries(
+  velox_java_pcre2_translator_test
+  PRIVATE velox_java_pcre2_translator GTest::gtest GTest::gtest_main
+)
+
+add_test(NAME velox_java_pcre2_translator_test COMMAND velox_java_pcre2_translator_test)
diff --git a/velox/functions/lib/java_pcre2_translator/tests/ClassBodyParserTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/ClassBodyParserTest.cpp
new file mode 100644
index 00000000000..5f74b19b7d7
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/ClassBodyParserTest.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassBodyParserTest (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+namespace {
+
+ClassNode parse(std::string_view classStr) {
+  std::size_t pos = 0;
+  return ClassBodyParser::parseClass(classStr, pos);
+}
+
+} // namespace
+
+TEST(ClassBodyParser, simpleLiterals) {
+  auto node = parse("[abc]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(3, u->children.size());
+  EXPECT_EQ(ClassNode(Literal('a')), *u->children[0]);
+  EXPECT_EQ(ClassNode(Literal('b')), *u->children[1]);
+  EXPECT_EQ(ClassNode(Literal('c')), *u->children[2]);
+}
+
+TEST(ClassBodyParser, singleCharClass) {
+  EXPECT_EQ(ClassNode(Literal('a')), parse("[a]"));
+}
+
+TEST(ClassBodyParser, rangeClass) {
+  EXPECT_EQ(ClassNode(Range('a', 'z')), parse("[a-z]"));
+}
+
+TEST(ClassBodyParser, negatedRange) {
+  auto node = parse("[^a-z]");
+  auto* neg = node.getIf<Negated>();
+  ASSERT_NE(nullptr, neg);
+  EXPECT_EQ(ClassNode(Range('a', 'z')), *neg->child);
+}
+
+TEST(ClassBodyParser, nestedClassUnion) {
+  auto node = parse("[abc[def]]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(4, u->children.size());
+  EXPECT_EQ(ClassNode(Literal('a')), *u->children[0]);
+  EXPECT_TRUE(u->children[3]->is<Union>());
+}
+
+TEST(ClassBodyParser, intersection) {
+  auto node = parse("[a-c&&d-f]");
+  auto* inter = node.getIf<Intersection>();
+  ASSERT_NE(nullptr, inter);
+  ASSERT_EQ(2, inter->operands.size());
+  EXPECT_EQ(ClassNode(Range('a', 'c')), *inter->operands[0]);
+  EXPECT_EQ(ClassNode(Range('d', 'f')), *inter->operands[1]);
+}
+
+TEST(ClassBodyParser, wDashHashPattern) {
+  auto node = parse("[\\w-#]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(3, u->children.size());
+  EXPECT_TRUE(u->children[0]->is<PropertyLeaf>());
+  EXPECT_EQ(ClassNode(Literal('-')), *u->children[1]);
+  EXPECT_EQ(ClassNode(Literal('#')), *u->children[2]);
+}
+
+TEST(ClassBodyParser, shorthandEscapes) {
+  auto node = parse("[\\d\\p{L}]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(2, u->children.size());
+  ASSERT_TRUE(u->children[0]->is<PropertyLeaf>());
+  EXPECT_EQ("\\d", u->children[0]->getIf<PropertyLeaf>()->pcre2Token);
+  EXPECT_TRUE(u->children[1]->is<PropertyLeaf>());
+}
+
+TEST(ClassBodyParser, bracketPropertyRewriteParsesAsAst) {
+  auto node = parse("[\\p{Alpha}]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(2, u->children.size());
+  EXPECT_EQ(ClassNode(Range('a', 'z')), *u->children[0]);
+  EXPECT_EQ(ClassNode(Range('A', 'Z')), *u->children[1]);
+}
+
+TEST(ClassBodyParser, negatedBracketPropertyRewriteParsesAsNegatedAst) {
+  auto node = parse("[\\P{Alpha}]");
+  auto* neg = node.getIf<Negated>();
+  ASSERT_NE(nullptr, neg);
+  EXPECT_TRUE(neg->child->is<Union>());
+}
+
+TEST(ClassBodyParser, quotedBracket) {
+  EXPECT_EQ(ClassNode(Literal(']')), parse("[\\Q]\\E]"));
+}
+
+TEST(ClassBodyParser, hexEscape) {
+  EXPECT_EQ(ClassNode(Literal('A')), parse("[\\x41]"));
+}
+
+TEST(ClassBodyParser, unicodeEscape) {
+  EXPECT_EQ(ClassNode(Literal('A')), parse("[\\u0041]"));
+}
+
+TEST(ClassBodyParser, escapedNonAsciiLiteralConsumesWholeCodePoint) {
+  EXPECT_EQ(ClassNode(Range('a', 0x4444)), parse("[a-\\\xE4\x91\x84]"));
+}
+
+TEST(ClassBodyParser, multipleIntersectionOperands) {
+  auto node = parse("[a-m&&m-z&&a-c]");
+  auto* inter = node.getIf<Intersection>();
+  ASSERT_NE(nullptr, inter);
+  EXPECT_EQ(3, inter->operands.size());
+}
+
+TEST(ClassBodyParser, nestedNegatedClass) {
+  auto node = parse("[a-d[^0-9]]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(2, u->children.size());
+  EXPECT_TRUE(u->children[1]->is<Negated>());
+}
+
+TEST(ClassBodyParser, intersectionWithNestedClass) {
+  EXPECT_TRUE(parse("[[a-m]&&[m-z]]").is<Intersection>());
+}
+
+TEST(ClassBodyParser, rangeAtEndOfClass) {
+  EXPECT_TRUE(parse("[a\\-]").is<Union>());
+}
+
+TEST(ClassBodyParser, unterminatedClassThrows) {
+  EXPECT_THROW(parse("[abc"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, unterminatedNegatedClassThrows) {
+  EXPECT_THROW(parse("[^abc"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, unterminatedNestedClassThrows) {
+  EXPECT_THROW(parse("[a[b-c]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, incompleteHexEscapeThrows) {
+  EXPECT_THROW(parse("[\\x]"), std::invalid_argument);
+  EXPECT_THROW(parse("[\\xA]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, unterminatedHexBraceEscapeThrows) {
+  EXPECT_THROW(parse("[\\x{ABC]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, emptyHexBraceEscapeThrows) {
+  EXPECT_THROW(parse("[\\x{}]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, outOfRangeHexBraceEscapeThrows) {
+  EXPECT_THROW(parse("[\\x{110000}]"), std::invalid_argument);
+  EXPECT_THROW(parse("[\\x{FFFFFFFFF}]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, incompleteUnicodeEscapeThrows) {
+  EXPECT_THROW(parse("[\\u]"), std::invalid_argument);
+  EXPECT_THROW(parse("[\\u00]"), std::invalid_argument);
+  EXPECT_THROW(parse("[\\u00A]"), std::invalid_argument);
+}
+
+TEST(ClassBodyParser, octalEscapeAcceptsThreeDigits) {
+  EXPECT_EQ(ClassNode(Literal(0x41)), parse("[\\0101]"));
+}
+
+TEST(ClassBodyParser, octalEscapeStopsAtNonOctalChar) {
+  auto node = parse("[\\08]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(2, u->children.size());
+  EXPECT_EQ(ClassNode(Literal(0)), *u->children[0]);
+  EXPECT_EQ(ClassNode(Literal('8')), *u->children[1]);
+}
+
+TEST(ClassBodyParser, octalEscapeCappedAtFF) {
+  auto node = parse("[\\0400]");
+  auto* u = node.getIf<Union>();
+  ASSERT_NE(nullptr, u);
+  ASSERT_EQ(2, u->children.size());
+  EXPECT_EQ(ClassNode(Literal(0x20)), *u->children[0]);
+  EXPECT_EQ(ClassNode(Literal('0')), *u->children[1]);
+}
+
+TEST(ClassBodyParser, controlCharacterEscape) {
+  EXPECT_EQ(ClassNode(Literal(0x01)), parse("[\\cA]"));
+}
+
+TEST(ClassBodyParser, simpleEscapesProduceLiterals) {
+  EXPECT_EQ(ClassNode(Literal(0x07)), parse("[\\a]"));
+  EXPECT_EQ(ClassNode(Literal(0x1B)), parse("[\\e]"));
+  EXPECT_EQ(ClassNode(Literal('\n')), parse("[\\n]"));
+  EXPECT_EQ(ClassNode(Literal('\t')), parse("[\\t]"));
+}
+
+TEST(ClassBodyParser, trailingBackslashThrows) {
+  EXPECT_THROW(parse("[\\"), std::invalid_argument);
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/ClassRendererTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/ClassRendererTest.cpp
new file mode 100644
index 00000000000..0d9ddb5e674
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/ClassRendererTest.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.ClassRendererTest (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/ClassRenderer.h"
+
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+namespace {
+
+std::string render(std::string_view classStr) {
+  std::size_t pos = 0;
+  auto node = ClassBodyParser::parseClass(classStr, pos);
+  return ClassRenderer::render(node);
+}
+
+} // namespace
+
+TEST(ClassRenderer, simpleLiterals) {
+  EXPECT_EQ(render("[abc]"), "[abc]");
+}
+
+TEST(ClassRenderer, simpleRange) {
+  EXPECT_EQ(render("[a-z]"), "[a-z]");
+}
+
+TEST(ClassRenderer, negatedRange) {
+  EXPECT_EQ(render("[^a-z]"), "[^a-z]");
+}
+
+TEST(ClassRenderer, nestedUnionFlattens) {
+  EXPECT_EQ(render("[abc[def]]"), "[abcdef]");
+}
+
+TEST(ClassRenderer, negatedNestedFlattens) {
+  const auto result = render("[^a-d[0-9]]");
+  EXPECT_EQ(result.find("[["), std::string::npos) << result;
+  EXPECT_EQ(result.rfind("[^", 0), 0) << result;
+}
+
+TEST(ClassRenderer, intersectionLiteralRange) {
+  const auto result = render("[a-c&&b-d]");
+  EXPECT_NE(result.find("b"), std::string::npos) << result;
+  EXPECT_NE(result.find("c"), std::string::npos) << result;
+  EXPECT_EQ(result.find("a"), std::string::npos) << result;
+  EXPECT_EQ(result.find("d"), std::string::npos) << result;
+}
+
+TEST(ClassRenderer, intersectionDisjoint) {
+  EXPECT_EQ(render("[a-c&&d-f]"), "[^\\x{0}-\\x{10FFFF}]");
+}
+
+TEST(ClassRenderer, wDashHashEscapesDash) {
+  const auto result = render("[\\w-#]");
+  EXPECT_NE(result.find("\\w"), std::string::npos) << result;
+  EXPECT_NE(result.find("\\-"), std::string::npos) << result;
+}
+
+TEST(ClassRenderer, intersectionWithKnownProperty) {
+  const auto result = render("[\\d&&[0-3]]");
+  EXPECT_NE(result.find("0"), std::string::npos) << result;
+  EXPECT_NE(result.find("3"), std::string::npos) << result;
+  EXPECT_EQ(result.find("&&"), std::string::npos) << result;
+}
+
+TEST(ClassRenderer, intersectionWithJdkExpandableProperty) {
+  EXPECT_EQ(render("[\\p{L}&&[a-z]]"), "[a-z]");
+}
+
+TEST(ClassRenderer, intersectionWithBracketMappedProperty) {
+  EXPECT_EQ(render("[\\p{Alpha}&&[a-z]]"), "[a-z]");
+}
+
+TEST(ClassRenderer, intersectionWithJavaAlphabeticProperty) {
+  EXPECT_EQ(render("[\\p{javaAlphabetic}&&[a-z]]"), "[a-z]");
+}
+
+TEST(ClassRenderer, intersectionWithScriptAlias) {
+  const auto result = render("[\\p{sc=Grek}&&\\p{L}]");
+  EXPECT_EQ(result.find("&&"), std::string::npos) << result;
+  EXPECT_EQ(result.find("&"), std::string::npos) << result;
+}
+
+TEST(ClassRenderer, pureIntersectionFallbackWithUnknownProperty) {
+  const auto result = render("[\\p{UnknownXyz}&&[a-z]]");
+  EXPECT_NE(result.find("\\p{UnknownXyz}"), std::string::npos) << result;
+  EXPECT_NE(result.find("&&"), std::string::npos) << result;
+  EXPECT_TRUE(
+      result.find("a-z") != std::string::npos ||
+      (result.find("a") != std::string::npos &&
+       result.find("z") != std::string::npos))
+      << result;
+}
+
+TEST(ClassRenderer, nestedNegatedIntersection) {
+  EXPECT_EQ(render("[^[a-c]&&[d-f]]"), "[\\x{0}-\\x{10FFFF}]");
+}
+
+TEST(ClassRenderer, negatedIntersectionOfRanges) {
+  EXPECT_EQ(render("[^a-c&&b-d]"), "[\\x{0}-ad-\\x{10FFFF}]");
+}
+
+TEST(ClassRenderer, propertyLeafPassesThrough) {
+  const auto result = render("[\\d\\w]");
+  EXPECT_NE(result.find("\\d"), std::string::npos) << result;
+  EXPECT_NE(result.find("\\w"), std::string::npos) << result;
+}
+
+TEST(ClassRenderer, multipleIntersectionOperands) {
+  EXPECT_EQ(render("[a-m&&m-z&&a-c]"), "[^\\x{0}-\\x{10FFFF}]");
+}
+
+TEST(ClassRenderer, nestedNegatedWithUnknownPropertyPreservesNegation) {
+  const auto result = render("[abc[^\\p{UnknownXyz}]]");
+  EXPECT_NE(result.find("[^"), std::string::npos) << result;
+  EXPECT_NE(result.find("\\p{UnknownXyz}"), std::string::npos) << result;
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/EvaluatorTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/EvaluatorTest.cpp
new file mode 100644
index 00000000000..424a827685e
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/EvaluatorTest.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.EvaluatorTest (Java) under Apache-2.0 by the
+// same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/Evaluator.h"
+
+#include "velox/functions/lib/java_pcre2_translator/ClassBodyParser.h"
+#include "velox/functions/lib/java_pcre2_translator/EvaluationFailedException.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+class EvaluatorPosixShorthand
+    : public testing::TestWithParam<std::tuple<std::string, int>> {};
+
+TEST_P(
+    EvaluatorPosixShorthand,
+    positivePosixShorthandsContainExpectedCodePoint) {
+  auto [token, cp] = GetParam();
+  auto rs = Evaluator::toRangeSet(ClassNode(PropertyLeaf(token, false)));
+  EXPECT_TRUE(rs.contains(cp)) << token;
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Tokens,
+    EvaluatorPosixShorthand,
+    testing::Values(
+        std::make_tuple("\\d", 48),
+        std::make_tuple("\\w", 95),
+        std::make_tuple("\\s", 32),
+        std::make_tuple("\\p{ASCII}", 65),
+        std::make_tuple("\\p{Alpha}", 65),
+        std::make_tuple("\\p{Alnum}", 48),
+        std::make_tuple("\\p{Lower}", 97),
+        std::make_tuple("\\p{Upper}", 65),
+        std::make_tuple("\\p{Digit}", 48),
+        std::make_tuple("\\p{XDigit}", 102),
+        std::make_tuple("\\p{Space}", 32),
+        std::make_tuple("\\p{Blank}", 9),
+        std::make_tuple("\\p{Cntrl}", 0),
+        std::make_tuple("\\p{Graph}", 33),
+        std::make_tuple("\\p{Print}", 32),
+        std::make_tuple("\\p{Punct}", 46)));
+
+TEST(Evaluator, negatedShorthandsComplementCorrectly) {
+  auto nd = Evaluator::toRangeSet(ClassNode(PropertyLeaf("\\D", true)));
+  EXPECT_TRUE(nd.contains('a'));
+  EXPECT_FALSE(nd.contains('0'));
+
+  auto ns = Evaluator::toRangeSet(ClassNode(PropertyLeaf("\\S", true)));
+  EXPECT_FALSE(ns.contains(' '));
+  EXPECT_TRUE(ns.contains('a'));
+}
+
+TEST(Evaluator, unknownPropertyThrowsEvaluationFailed) {
+  EXPECT_THROW(
+      Evaluator::toRangeSet(
+          ClassNode(PropertyLeaf("\\p{ThisPropertyDoesNotExistXyz}", false))),
+      EvaluationFailedException);
+}
+
+TEST(Evaluator, unknownPropertyInsideIntersectionThrows) {
+  auto inter = ClassNode(Intersection(
+      std::vector<ClassNode>{
+          ClassNode(PropertyLeaf("\\p{UnknownXyz}", false)),
+          ClassNode(Range('a', 'z'))}));
+  EXPECT_THROW(Evaluator::toRangeSet(inter), EvaluationFailedException);
+}
+
+TEST(Evaluator, tryToRangeSetReturnsNullOnFailure) {
+  EXPECT_FALSE(
+      Evaluator::tryToRangeSet(
+          ClassNode(PropertyLeaf("\\p{UnknownXyz}", false)))
+          .has_value());
+}
+
+TEST(Evaluator, tryToRangeSetReturnsRangeSetOnSuccess) {
+  auto rs = Evaluator::tryToRangeSet(ClassNode(PropertyLeaf("\\d", false)));
+  ASSERT_TRUE(rs.has_value());
+  EXPECT_TRUE(rs->contains('5'));
+}
+
+TEST(Evaluator, javaAlphabeticIntersectionEvaluates) {
+  std::size_t pos = 0;
+  auto node = ClassBodyParser::parseClass("[\\p{javaAlphabetic}&&[a-z]]", pos);
+  auto rs = Evaluator::toRangeSet(node);
+  EXPECT_TRUE(rs.contains('a'));
+  EXPECT_FALSE(rs.contains(0x03B1));
+  EXPECT_FALSE(rs.contains('&'));
+}
+
+TEST(Evaluator, scriptAliasIntersectionEvaluates) {
+  std::size_t pos = 0;
+  auto node = ClassBodyParser::parseClass("[\\p{sc=Grek}&&\\p{L}]", pos);
+  auto rs = Evaluator::toRangeSet(node);
+  EXPECT_TRUE(rs.contains(0x03B1));
+  EXPECT_FALSE(rs.contains('a'));
+  EXPECT_FALSE(rs.contains('&'));
+}
+
+TEST(Evaluator, inPrefixIntersectionUsesBlockNotScript) {
+  std::size_t pos = 0;
+  auto node = ClassBodyParser::parseClass("[\\p{InGreek}&&\\x{1F00}]", pos);
+  auto rs = Evaluator::toRangeSet(node);
+  EXPECT_FALSE(rs.contains(0x1F00));
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/JavaRegexTranslatorTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/JavaRegexTranslatorTest.cpp
new file mode 100644
index 00000000000..1e9ed5f52ca
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/JavaRegexTranslatorTest.cpp
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JavaRegexTranslatorTest (Java) under
+// Apache-2.0 by the same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/JavaRegexTranslator.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+TEST(JavaRegexTranslator, passthroughForPatternsWithoutProperties) {
+  EXPECT_EQ("\\d+", toPcre2Pattern("\\d+"));
+  EXPECT_EQ("[a-z]", toPcre2Pattern("[a-z]"));
+  EXPECT_EQ("abc", toPcre2Pattern("abc"));
+}
+
+TEST(JavaRegexTranslator, rewritesInBlockProperty) {
+  EXPECT_EQ("[\\x{370}-\\x{3FF}]", toPcre2Pattern("\\p{InGreek}"));
+  EXPECT_EQ("[^\\x{370}-\\x{3FF}]", toPcre2Pattern("\\P{InGreek}"));
+  EXPECT_EQ("[\\x{370}-\\x{3FF}]", toPcre2Pattern("\\p{blk=Greek}"));
+  EXPECT_EQ("[^\\x{370}-\\x{3FF}]", toPcre2Pattern("\\P{block=Greek}"));
+  EXPECT_EQ("[\\x{0}-\\x{7F}]", toPcre2Pattern("\\p{blk=BasicLatin}"));
+  EXPECT_EQ("a[\\x{370}-\\x{3FF}]b", toPcre2Pattern("a\\p{InGreek}b"));
+}
+
+TEST(JavaRegexTranslator, rewritesIsScriptProperty) {
+  EXPECT_EQ("\\p{L}", toPcre2Pattern("\\p{IsL}"));
+  EXPECT_EQ("\\p{LC}", toPcre2Pattern("\\p{IsLC}"));
+  EXPECT_EQ("\\p{ASCII}", toPcre2Pattern("\\p{IsASCII}"));
+}
+
+TEST(JavaRegexTranslator, rewritesShortAliases) {
+  EXPECT_EQ("[\\x{00}-\\x{FF}]", toPcre2Pattern("\\p{L1}"));
+}
+
+TEST(JavaRegexTranslator, rewritesJavaProperty) {
+  const auto result = toPcre2Pattern("\\p{javaLowerCase}");
+  EXPECT_TRUE(result.starts_with("[")) << result;
+  EXPECT_NE(std::string::npos, result.find("\\x{AA}")) << result;
+  EXPECT_NE("\\p{Ll}", result);
+}
+
+TEST(JavaRegexTranslator, rewritesUnicodeEscapeSurrogatePairs) {
+  EXPECT_EQ("\\x{1f600}", toPcre2Pattern("\\uD83D\\uDE00"));
+  EXPECT_THROW(toPcre2Pattern("\\uD83D"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslator, doesNotRewriteInsideQuotation) {
+  EXPECT_EQ("\\Q\\p{InGreek}\\E", toPcre2Pattern("\\Q\\p{InGreek}\\E"));
+}
+
+TEST(JavaRegexTranslator, doesNotRewriteEscapedBackslashFollowedByP) {
+  EXPECT_THROW(toPcre2Pattern("\\\\p{InGreek}"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslator, rejectsIllegalQuantifierBody) {
+  EXPECT_THROW(toPcre2Pattern("a{^InGreek}"), EvaluationFailedException);
+  EXPECT_THROW(toPcre2Pattern("a{}"), EvaluationFailedException);
+  EXPECT_THROW(toPcre2Pattern("a{,3}"), EvaluationFailedException);
+  EXPECT_THROW(toPcre2Pattern("a{"), EvaluationFailedException);
+  EXPECT_THROW(toPcre2Pattern("a{3"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslator, acceptsValidQuantifiers) {
+  EXPECT_EQ("a{3}", toPcre2Pattern("a{3}"));
+  EXPECT_EQ("a{3,}", toPcre2Pattern("a{3,}"));
+  EXPECT_EQ("a{3,5}", toPcre2Pattern("a{3,5}"));
+}
+
+TEST(JavaRegexTranslator, escapeHatchDisablesTranslator) {
+  EXPECT_EQ("[\\x{370}-\\x{3FF}]", toPcre2Pattern("\\p{InGreek}"));
+}
+
+TEST(JavaRegexTranslator, rewritesSurrogateBlockToRange) {
+  EXPECT_EQ("[\\x{D800}-\\x{DB7F}]", toPcre2Pattern("\\p{InHIGH_SURROGATES}"));
+  EXPECT_EQ("[\\x{DC00}-\\x{DFFF}]", toPcre2Pattern("\\p{InLOW_SURROGATES}"));
+}
+
+TEST(JavaRegexTranslator, surrogateBlockNeedsRawByteMode) {
+  bool needsRawByteMode = false;
+  EXPECT_EQ(
+      "[\\x{D800}-\\x{DB7F}]",
+      toPcre2Pattern("\\p{InHIGH_SURROGATES}", needsRawByteMode));
+  EXPECT_TRUE(needsRawByteMode);
+}
+
+TEST(JavaRegexTranslator, reportsRawSurrogateBytesNeedRawByteMode) {
+  bool needsRawByteMode = false;
+  EXPECT_EQ(
+      "[\xED\xA0\x80]", toPcre2Pattern("[\xED\xA0\x80]", needsRawByteMode));
+  EXPECT_TRUE(needsRawByteMode);
+}
+
+TEST(JavaRegexTranslator, doesNotReportRawByteModeForSupplementaryScalar) {
+  bool needsRawByteMode = true;
+  EXPECT_EQ("\\x{1f600}", toPcre2Pattern("\\uD83D\\uDE00", needsRawByteMode));
+  EXPECT_FALSE(needsRawByteMode);
+}
+
+TEST(JavaRegexTranslator, negatedSurrogateBlockIsNegated) {
+  EXPECT_EQ("[^\\x{D800}-\\x{DB7F}]", toPcre2Pattern("\\P{InHIGH_SURROGATES}"));
+}
+
+TEST(JavaRegexTranslator, rewritesJavaDefinedAsNegatedUnassigned) {
+  EXPECT_EQ("\\P{Cn}", toPcre2Pattern("\\p{javaDefined}"));
+}
+
+TEST(JavaRegexTranslator, multipleTokensInOnePattern) {
+  EXPECT_EQ(
+      "[\\x{370}-\\x{3FF}][\\x{3040}-\\x{309F}]",
+      toPcre2Pattern("\\p{InGreek}\\p{InHiragana}"));
+}
+
+TEST(JavaRegexTranslator, nestedUnionFlattens) {
+  const auto result = toPcre2Pattern("[abc[def]]");
+  EXPECT_EQ(std::string::npos, result.find("[[")) << result;
+  EXPECT_EQ("[abcdef]", result);
+}
+
+TEST(JavaRegexTranslator, intersectionBecomesRangeSet) {
+  EXPECT_EQ("[^\\x{0}-\\x{10FFFF}]", toPcre2Pattern("[a-c&&d-f]"));
+}
+
+TEST(JavaRegexTranslator, wDashHashEscapesDash) {
+  const auto result = toPcre2Pattern("[\\w-#]");
+  EXPECT_NE(std::string::npos, result.find("\\-")) << result;
+}
+
+TEST(JavaRegexTranslator, classBodyRewritePreservesOutsidePattern) {
+  EXPECT_EQ("a[bc]d", toPcre2Pattern("a[bc]d"));
+}
+
+TEST(JavaRegexTranslator, propertyInsideClassRewritten) {
+  const auto result = toPcre2Pattern("[\\p{InGreek}]");
+  EXPECT_TRUE(
+      result.find("\\x{370}") != std::string::npos ||
+      result.find("\\x{3FF}") != std::string::npos)
+      << result;
+  EXPECT_EQ(std::string::npos, result.find("\\p{InGreek}")) << result;
+}
+
+TEST(JavaRegexTranslator, surrogateBlockInsideNestedClassIsPreserved) {
+  EXPECT_EQ(
+      "[\\x{D800}-\\x{DB7F}\\x{DC00}-\\x{DFFF}]",
+      toPcre2Pattern("[[\\p{InHIGH_SURROGATES}\\p{InLOW_SURROGATES}]]"));
+}
+
+TEST(JavaRegexTranslator, intersectionWithKnownPropertyEvaluated) {
+  const auto result = toPcre2Pattern("[\\d&&[0-3]]");
+  EXPECT_EQ(std::string::npos, result.find("&&")) << result;
+}
+
+TEST(JavaRegexTranslator, dropsUFlagInModeModifier) {
+  EXPECT_EQ("(?i)foo", toPcre2Pattern("(?iu)foo"));
+  EXPECT_EQ("(?i)foo", toPcre2Pattern("(?ui)foo"));
+  EXPECT_EQ("(?im)foo", toPcre2Pattern("(?ium)foo"));
+}
+
+TEST(JavaRegexTranslator, dropsUInScopedGroup) {
+  EXPECT_EQ("(?i:foo)", toPcre2Pattern("(?iu:foo)"));
+}
+
+TEST(JavaRegexTranslator, dropsDFlag) {
+  EXPECT_EQ("(?m)foo", toPcre2Pattern("(?dm)foo"));
+}
+
+TEST(JavaRegexTranslator, emptyFlagsRemovedEntirely) {
+  EXPECT_EQ("foo", toPcre2Pattern("(?u)foo"));
+  EXPECT_EQ("(?:foo)", toPcre2Pattern("(?u:foo)"));
+}
+
+TEST(JavaRegexTranslator, preservesNonModeGroups) {
+  EXPECT_EQ("(?:foo)", toPcre2Pattern("(?:foo)"));
+  EXPECT_EQ("(?=foo)", toPcre2Pattern("(?=foo)"));
+  EXPECT_EQ("(?<name>foo)", toPcre2Pattern("(?<name>foo)"));
+  EXPECT_EQ("(?#comment)foo", toPcre2Pattern("(?#comment)foo"));
+}
+
+TEST(JavaRegexTranslator, handlesOnOffFlagGroup) {
+  EXPECT_EQ("(?i-m)foo", toPcre2Pattern("(?iu-mU)foo"));
+}
+
+TEST(JavaRegexTranslator, allFlagsDroppedFromOnOff) {
+  EXPECT_EQ("foo", toPcre2Pattern("(?u-U)foo"));
+}
+
+TEST(JavaRegexTranslator, doesNotTouchInsideClass) {
+  EXPECT_EQ("[(?i)]", toPcre2Pattern("[(?i)]"));
+}
+
+TEST(JavaRegexTranslator, propertyIntersectionEndToEnd) {
+  const auto out = toPcre2Pattern("[\\p{L}&&[\\P{InGreek}]]");
+  EXPECT_EQ(std::string::npos, out.find("&&")) << out;
+  EXPECT_EQ(std::string::npos, out.find("[[")) << out;
+  EXPECT_NE(std::string::npos, out.find("A-Z")) << out;
+  EXPECT_NE(std::string::npos, out.find("a-z")) << out;
+  EXPECT_EQ(std::string::npos, out.find("\\x{3B1}")) << out;
+  EXPECT_EQ(std::string::npos, out.find("\\x{3A9}")) << out;
+}
+
+TEST(JavaRegexTranslator, inlineCaseInsensitiveExpandsCasedTopLevelProperty) {
+  EXPECT_EQ("(?i)[\\p{Lu}\\p{Ll}\\p{Lt}]", toPcre2Pattern("(?i)\\p{Lu}"));
+}
+
+TEST(JavaRegexTranslator, inlineCaseInsensitiveExpandsCasedClassProperty) {
+  EXPECT_EQ("(?i)[\\p{Lu}\\p{Ll}\\p{Lt}]", toPcre2Pattern("(?i)[\\p{Lu}]"));
+}
+
+TEST(
+    JavaRegexTranslator,
+    inlineCaseInsensitiveExpandsNegatedCasedClassProperty) {
+  EXPECT_EQ("(?i)[\\P{Lu}]", toPcre2Pattern("(?i)[\\P{Lu}]"));
+}
+
+TEST(JavaRegexTranslator, inlineCaseInsensitiveKeepsLiteralAsciiRange) {
+  EXPECT_EQ("(?i)[A-Z\\p{Lu}\\p{Ll}\\p{Lt}]", toPcre2Pattern("(?i)[A-Z]"));
+}
+
+TEST(JavaRegexTranslator, embeddedFlagsDoNotLeakPastEnclosingGroup) {
+  EXPECT_EQ("(a(?i)b)[\\p{Lu}]", toPcre2Pattern("(a(?i)b)[\\p{Lu}]"));
+}
+
+TEST(JavaRegexTranslator, longBackreferenceDoesNotOverflow) {
+  const auto result = toPcre2Pattern("\\999999999999999999999999999999");
+  EXPECT_EQ(0, result.rfind("(*F)", 0)) << result;
+}
+
+TEST(
+    JavaRegexTranslator,
+    unicodeCharacterClassIntersectionThrowsInsteadOfAsciiEvaluation) {
+  EXPECT_THROW(
+      toPcre2Pattern("(?U)[\\d&&\\p{InArabic}]"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslator, escapedBraceIsNotQuantifier) {
+  EXPECT_EQ("\\{", toPcre2Pattern("\\{"));
+  EXPECT_EQ("a\\{b}", toPcre2Pattern("a\\{b}"));
+  EXPECT_EQ("\\{not-a-quantifier}", toPcre2Pattern("\\{not-a-quantifier}"));
+}
+
+TEST(JavaRegexTranslator, doubleBackslashThenBraceStillQuantifier) {
+  EXPECT_THROW(toPcre2Pattern("\\\\{x}"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslator, commentsModeIgnoresBracesInLineComments) {
+  EXPECT_EQ("(?x)# {\n a", toPcre2Pattern("(?x)# {\n a"));
+  EXPECT_EQ("(?x:# {\n a)", toPcre2Pattern("(?x:# {\n a)"));
+}
+
+TEST(JavaRegexTranslator, unicodeCaseExpandsAsciiLiterals) {
+  EXPECT_EQ("[Aa][Bb][Cc]", toPcre2PatternWithUnicodeCase("abc"));
+}
+
+TEST(JavaRegexTranslator, unicodeCaseExpandsKnownUnicodeLiterals) {
+  const auto kelvin = toPcre2PatternWithUnicodeCase("\xe2\x84\xaa");
+  EXPECT_NE(std::string::npos, kelvin.find("\\x{212a}")) << kelvin;
+  EXPECT_NE(std::string::npos, kelvin.find("K")) << kelvin;
+  EXPECT_NE(std::string::npos, kelvin.find("k")) << kelvin;
+
+  const auto sigma = toPcre2PatternWithUnicodeCase("\xce\xa3");
+  EXPECT_NE(std::string::npos, sigma.find("\\x{3a3}")) << sigma;
+  EXPECT_NE(std::string::npos, sigma.find("\\x{3c3}")) << sigma;
+  EXPECT_NE(std::string::npos, sigma.find("\\x{3c2}")) << sigma;
+}
+
+TEST(JavaRegexTranslator, unicodeCaseExpandsUnicodeEscapes) {
+  EXPECT_EQ("[Kk\\x{212a}]", toPcre2PatternWithUnicodeCase("\\u212A"));
+}
+
+TEST(JavaRegexTranslator, unicodeCaseSkipsClassesAndQuotes) {
+  EXPECT_EQ("[abc]\\Qabc\\E", toPcre2PatternWithUnicodeCase("[abc]\\Qabc\\E"));
+}
+
+TEST(JavaRegexTranslatorRe2, reusesPropertyAndClassPipeline) {
+  EXPECT_EQ("[\\x{370}-\\x{3FF}]", toRe2Pattern("\\p{InGreek}"));
+  EXPECT_EQ("[abcdef]", toRe2Pattern("[abc[def]]"));
+  EXPECT_EQ("[^\\x{0}-\\x{10FFFF}]", toRe2Pattern("[a-c&&d-f]"));
+}
+
+TEST(JavaRegexTranslatorRe2, rewritesJavaNamedCapturingGroups) {
+  EXPECT_EQ("(?P<name>foo)", toRe2Pattern("(?<name>foo)"));
+  EXPECT_EQ("(a(?P<num>\\d+))", toRe2Pattern("(a(?<num>\\d+))"));
+}
+
+TEST(
+    JavaRegexTranslatorRe2,
+    doesNotRewriteNamedGroupLookalikesInQuotesOrClasses) {
+  EXPECT_EQ("\\Q(?<name>foo)\\E", toRe2Pattern("\\Q(?<name>foo)\\E"));
+  EXPECT_EQ("[(?<name>)]", toRe2Pattern("[(?<name>)]"));
+}
+
+TEST(JavaRegexTranslatorRe2, rejectsLookaround) {
+  EXPECT_THROW(toRe2Pattern("(?=foo)"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?!foo)"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?<=foo)"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?<!foo)"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslatorRe2, rejectsBackreferencesOutsideClasses) {
+  EXPECT_THROW(toRe2Pattern("(a)\\1"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?<n>a)\\k<n>"), EvaluationFailedException);
+  EXPECT_NO_THROW(toRe2Pattern("[\\1\\k<n>]"));
+}
+
+TEST(JavaRegexTranslatorRe2, rejectsPossessiveQuantifiers) {
+  EXPECT_THROW(toRe2Pattern("a*+"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("a?+"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("a++"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("a{1,3}+"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslatorRe2, rejectsAtomicGroupsAndUnsupportedFlags) {
+  EXPECT_THROW(toRe2Pattern("(?>foo)"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?U)foo"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?d)foo"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?c)foo"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?id:foo)"), EvaluationFailedException);
+  EXPECT_EQ("foo", toRe2Pattern("(?u)foo"));
+  EXPECT_EQ("foo", toRe2Pattern("(?-U)foo"));
+  EXPECT_EQ("(?i:foo)", toRe2Pattern("(?i-d:foo)"));
+  EXPECT_EQ("foo", toRe2Pattern("(?-c)foo"));
+}
+
+TEST(JavaRegexTranslatorRe2, rewritesJavaOctalEscapesForRe2) {
+  EXPECT_EQ("\\x{a}", toRe2Pattern("\\012"));
+}
+
+TEST(JavaRegexTranslatorRe2, translatesCommentsModeForRe2) {
+  EXPECT_EQ("abc", toRe2Pattern("(?x)a b c"));
+  EXPECT_EQ("abcdef", toRe2Pattern("(?x)abc # comment\ndef"));
+  EXPECT_EQ("a b", toRe2Pattern("(?x)a\\ b"));
+  EXPECT_EQ("[a]", toRe2Pattern("(?x)[ a]"));
+  EXPECT_EQ("[ ]", toRe2Pattern("(?x)[\\ ]"));
+  EXPECT_EQ("[a]", toRe2Pattern("(?x)[a# comment\n]"));
+  EXPECT_THROW(toRe2Pattern("(?x)[a# comment]"), EvaluationFailedException);
+  EXPECT_EQ("(?i:ab)", toRe2Pattern("(?ix:a b)"));
+  EXPECT_THROW(toRe2Pattern("(?x)(? <name>a)"), EvaluationFailedException);
+  EXPECT_THROW(toRe2Pattern("(?x)(? :a)"), EvaluationFailedException);
+}
+
+TEST(JavaRegexTranslatorRe2, unsupportedFeatureLookalikesInQuotesAreLiterals) {
+  EXPECT_EQ(
+      "\\Q(?=foo)\\1*+(?>x)(?U)\\E",
+      toRe2Pattern("\\Q(?=foo)\\1*+(?>x)(?U)\\E"));
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/JdkPropertyExpanderTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/JdkPropertyExpanderTest.cpp
new file mode 100644
index 00000000000..f6625ecb238
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/JdkPropertyExpanderTest.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Originally authored by Oleksii PELYKH for pcre4j; ported from
+// org.pcre4j.regex.translate.JdkPropertyExpanderTest (Java) under Apache-2.0 by
+// the same author for inclusion in Velox.
+//
+#include "velox/functions/lib/java_pcre2_translator/JdkPropertyExpander.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+TEST(JdkPropertyExpander, asciiLetterCoverage) {
+  auto l = JdkPropertyExpander::expand("\\p{L}");
+  ASSERT_TRUE(l.has_value());
+  EXPECT_TRUE(l->contains('a'));
+  EXPECT_TRUE(l->contains('Z'));
+  EXPECT_FALSE(l->contains('0'));
+  EXPECT_FALSE(l->contains(' '));
+}
+
+TEST(JdkPropertyExpander, greekScript) {
+  auto g = JdkPropertyExpander::expand("\\p{Greek}");
+  ASSERT_TRUE(g.has_value());
+  EXPECT_TRUE(g->contains(0x03B1));
+  EXPECT_FALSE(g->contains('a'));
+}
+
+TEST(JdkPropertyExpander, negatedProperty) {
+  auto notL = JdkPropertyExpander::expand("\\P{L}");
+  ASSERT_TRUE(notL.has_value());
+  EXPECT_FALSE(notL->contains('a'));
+  EXPECT_TRUE(notL->contains('0'));
+}
+
+TEST(JdkPropertyExpander, unknownReturnsNull) {
+  EXPECT_FALSE(JdkPropertyExpander::expand("\\p{FooBarBaz}").has_value());
+}
+
+TEST(JdkPropertyExpander, caches) {
+  auto first = JdkPropertyExpander::expand("\\p{L}");
+  auto second = JdkPropertyExpander::expand("\\p{L}");
+  ASSERT_TRUE(first.has_value());
+  ASSERT_TRUE(second.has_value());
+  EXPECT_EQ(*first, *second);
+}
+
+TEST(JdkPropertyExpander, greekIntersectionWithLetters) {
+  auto letters = JdkPropertyExpander::expand("\\p{L}");
+  auto notGreek = JdkPropertyExpander::expand("\\P{Greek}");
+  ASSERT_TRUE(letters.has_value());
+  ASSERT_TRUE(notGreek.has_value());
+  auto lettersNotGreek = letters->intersect(*notGreek);
+  EXPECT_TRUE(lettersNotGreek.contains('a'));
+  EXPECT_TRUE(lettersNotGreek.contains(0x6000));
+  EXPECT_FALSE(lettersNotGreek.contains(0x03B1));
+}
+
+TEST(JdkPropertyExpander, leafCategoryLu) {
+  auto lu = JdkPropertyExpander::expand("\\p{Lu}");
+  ASSERT_TRUE(lu.has_value());
+  EXPECT_TRUE(lu->contains('A'));
+  EXPECT_FALSE(lu->contains('a'));
+  EXPECT_FALSE(lu->contains('0'));
+}
+
+TEST(JdkPropertyExpander, combinedCategoryN) {
+  auto n = JdkPropertyExpander::expand("\\p{N}");
+  ASSERT_TRUE(n.has_value());
+  EXPECT_TRUE(n->contains('0'));
+  EXPECT_FALSE(n->contains('a'));
+}
+
+TEST(JdkPropertyExpander, binaryAlphabeticProperty) {
+  auto alphabetic = JdkPropertyExpander::expand("\\p{Alphabetic}");
+  ASSERT_TRUE(alphabetic.has_value());
+  EXPECT_TRUE(alphabetic->contains('a'));
+  EXPECT_TRUE(alphabetic->contains(0x03B1));
+  EXPECT_FALSE(alphabetic->contains('0'));
+}
+
+TEST(JdkPropertyExpander, scriptShortAlias) {
+  auto greek = JdkPropertyExpander::expand("\\p{Grek}");
+  ASSERT_TRUE(greek.has_value());
+  EXPECT_TRUE(greek->contains(0x03B1));
+  EXPECT_FALSE(greek->contains('a'));
+}
+
+TEST(JdkPropertyExpander, blockLongAlias) {
+  auto basicLatin = JdkPropertyExpander::expand("\\p{Basic_Latin}");
+  ASSERT_TRUE(basicLatin.has_value());
+  EXPECT_TRUE(basicLatin->contains('A'));
+  EXPECT_FALSE(basicLatin->contains(0x03B1));
+}
+
+TEST(JdkPropertyExpander, inPrefixUsesBlockNotScript) {
+  auto greekBlock = JdkPropertyExpander::expand("\\p{InGreek}");
+  ASSERT_TRUE(greekBlock.has_value());
+  EXPECT_TRUE(greekBlock->contains(0x03B1));
+  EXPECT_FALSE(greekBlock->contains(0x1F00));
+}
+
+TEST(JdkPropertyExpander, nonPropertyTokenReturnsNull) {
+  EXPECT_FALSE(JdkPropertyExpander::expand("\\d").has_value());
+  EXPECT_FALSE(JdkPropertyExpander::expand("\\w").has_value());
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/PropertyMapTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/PropertyMapTest.cpp
new file mode 100644
index 00000000000..879296bfbb4
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/PropertyMapTest.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Ported from org.pcre4j.regex.translate.PropertyMapTest (Java).
+//
+#include "velox/functions/lib/java_pcre2_translator/PropertyMap.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+TEST(PropertyMap, inPrefixStrip) {
+  EXPECT_EQ("[\\x{370}-\\x{3FF}]", PropertyMap::apply("InGreek").value());
+}
+
+TEST(PropertyMap, isPrefixStrip) {
+  EXPECT_EQ("L", PropertyMap::apply("IsL").value());
+}
+
+TEST(PropertyMap, unknownReturnsNullopt) {
+  EXPECT_FALSE(PropertyMap::apply("FooBarBaz").has_value());
+}
+
+TEST(PropertyMap, l1ExpandsToRange) {
+  EXPECT_EQ("[\\x{00}-\\x{FF}]", PropertyMap::apply("L1").value());
+}
+
+TEST(PropertyMap, javaLowerCase) {
+  const auto result = PropertyMap::apply("javaLowerCase").value();
+  EXPECT_TRUE(result.starts_with("["));
+  EXPECT_NE(std::string::npos, result.find("\\x{AA}")) << result;
+}
+
+TEST(PropertyMap, highSurrogatesExpandToRange) {
+  EXPECT_EQ(
+      "[\\x{D800}-\\x{DB7F}]", PropertyMap::apply("InHIGH_SURROGATES").value());
+  EXPECT_EQ(
+      "[\\x{D800}-\\x{DB7F}]", PropertyMap::apply("InHighSurrogates").value());
+  EXPECT_EQ(
+      "[\\x{D800}-\\x{DB7F}]",
+      PropertyMap::apply("blk=HighSurrogates").value());
+  EXPECT_EQ(
+      "[\\x{DB80}-\\x{DBFF}]",
+      PropertyMap::apply("InHighPrivateUseSurrogates").value());
+}
+
+TEST(PropertyMap, lowSurrogatesExpandToRange) {
+  EXPECT_EQ(
+      "[\\x{DC00}-\\x{DFFF}]", PropertyMap::apply("InLOW_SURROGATES").value());
+}
+
+TEST(PropertyMap, isAsciiStripsIs) {
+  EXPECT_EQ("ASCII", PropertyMap::apply("IsASCII").value());
+}
+
+TEST(PropertyMap, javaDefinedMapsToNegatedCn) {
+  EXPECT_EQ("\\P{Cn}", PropertyMap::apply("javaDefined").value());
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test
diff --git a/velox/functions/lib/java_pcre2_translator/tests/RangeSetTest.cpp b/velox/functions/lib/java_pcre2_translator/tests/RangeSetTest.cpp
new file mode 100644
index 00000000000..01c7a596ca4
--- /dev/null
+++ b/velox/functions/lib/java_pcre2_translator/tests/RangeSetTest.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+//
+// Ported from org.pcre4j.regex.translate.RangeSetTest (Java).
+//
+#include "velox/functions/lib/java_pcre2_translator/RangeSet.h"
+
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+#include <string>
+
+namespace facebook::velox::functions::java_pcre2_translator::test {
+
+TEST(RangeSet, emptySet) {
+  EXPECT_TRUE(RangeSet::empty().isEmpty());
+  EXPECT_FALSE(RangeSet::empty().contains('a'));
+}
+
+TEST(RangeSet, singleCodePoint) {
+  auto s = RangeSet::single('a');
+  EXPECT_FALSE(s.isEmpty());
+  EXPECT_TRUE(s.contains('a'));
+  EXPECT_FALSE(s.contains('b'));
+}
+
+TEST(RangeSet, range) {
+  auto az = RangeSet::range('a', 'z');
+  EXPECT_TRUE(az.contains('a'));
+  EXPECT_TRUE(az.contains('m'));
+  EXPECT_TRUE(az.contains('z'));
+  EXPECT_FALSE(az.contains('A'));
+  EXPECT_FALSE(az.contains('{'));
+}
+
+TEST(RangeSet, unionDisjoint) {
+  auto u = RangeSet::range('a', 'z').unionWith(RangeSet::range('A', 'Z'));
+  EXPECT_TRUE(u.contains('a'));
+  EXPECT_TRUE(u.contains('A'));
+  EXPECT_FALSE(u.contains('1'));
+}
+
+TEST(RangeSet, unionOverlapping) {
+  auto u = RangeSet::range('a', 'c').unionWith(RangeSet::range('b', 'd'));
+  EXPECT_TRUE(u.contains('a'));
+  EXPECT_TRUE(u.contains('b'));
+  EXPECT_TRUE(u.contains('d'));
+  EXPECT_FALSE(u.contains('e'));
+  EXPECT_EQ(1, u.rangeCount());
+}
+
+TEST(RangeSet, intersectOverlap) {
+  auto i = RangeSet::range('a', 'c').intersect(RangeSet::range('b', 'd'));
+  EXPECT_FALSE(i.contains('a'));
+  EXPECT_TRUE(i.contains('b'));
+  EXPECT_TRUE(i.contains('c'));
+  EXPECT_FALSE(i.contains('d'));
+}
+
+TEST(RangeSet, intersectDisjoint) {
+  auto i = RangeSet::range('a', 'c').intersect(RangeSet::range('d', 'f'));
+  EXPECT_TRUE(i.isEmpty());
+}
+
+TEST(RangeSet, complementEmpty) {
+  auto c = RangeSet::empty().complement();
+  EXPECT_EQ(RangeSet::all(), c.unionWith(RangeSet::empty()));
+  EXPECT_TRUE(c.contains(0));
+  EXPECT_TRUE(c.contains(0x10FFFF));
+}
+
+TEST(RangeSet, complementRange) {
+  auto notAz = RangeSet::range('a', 'z').complement();
+  EXPECT_FALSE(notAz.contains('a'));
+  EXPECT_FALSE(notAz.contains('z'));
+  EXPECT_TRUE(notAz.contains('A'));
+  EXPECT_TRUE(notAz.contains('0'));
+  EXPECT_TRUE(notAz.contains(0x10FFFF));
+}
+
+TEST(RangeSet, subtract) {
+  auto diff = RangeSet::range('a', 'f').subtract(RangeSet::range('c', 'f'));
+  EXPECT_TRUE(diff.contains('a'));
+  EXPECT_TRUE(diff.contains('b'));
+  EXPECT_FALSE(diff.contains('c'));
+  EXPECT_FALSE(diff.contains('f'));
+}
+
+TEST(RangeSet, toPcre2ClassBodySinglePrintable) {
+  EXPECT_EQ("a", RangeSet::single('a').toPcre2ClassBody());
+}
+
+TEST(RangeSet, toPcre2ClassBodySingleNonPrintable) {
+  EXPECT_EQ("\\x{9}", RangeSet::single('\t').toPcre2ClassBody());
+}
+
+TEST(RangeSet, toPcre2ClassBodyRange) {
+  EXPECT_EQ("a-z", RangeSet::range('a', 'z').toPcre2ClassBody());
+}
+
+TEST(RangeSet, toPcre2ClassBodyEscapesSpecialChars) {
+  EXPECT_EQ("\\-", RangeSet::single('-').toPcre2ClassBody());
+  EXPECT_EQ("\\]", RangeSet::single(']').toPcre2ClassBody());
+  EXPECT_EQ("\\^", RangeSet::single('^').toPcre2ClassBody());
+}
+
+TEST(RangeSet, toPcre2ClassBodyMultipleRanges) {
+  auto u = RangeSet::range('a', 'z').unionWith(RangeSet::range('A', 'Z'));
+  const auto body = u.toPcre2ClassBody();
+  EXPECT_TRUE(
+      body.find("A-Z") != std::string::npos ||
+      body.find("a-z") != std::string::npos);
+}
+
+TEST(RangeSet, singleRejectsNegative) {
+  EXPECT_THROW(RangeSet::single(-1), std::invalid_argument);
+}
+
+TEST(RangeSet, singleRejectsAboveMax) {
+  EXPECT_THROW(RangeSet::single(0x110000), std::invalid_argument);
+}
+
+TEST(RangeSet, singleAcceptsBoundaries) {
+  EXPECT_EQ(1, RangeSet::single(0).rangeCount());
+  EXPECT_EQ(1, RangeSet::single(0x10FFFF).rangeCount());
+}
+
+TEST(RangeSet, rangeRejectsNegativeLo) {
+  EXPECT_THROW(RangeSet::range(-1, 5), std::invalid_argument);
+}
+
+TEST(RangeSet, rangeRejectsHiAboveMax) {
+  EXPECT_THROW(RangeSet::range(0, 0x110000), std::invalid_argument);
+}
+
+TEST(RangeSet, rangeRejectsInverted) {
+  EXPECT_THROW(RangeSet::range(5, 4), std::invalid_argument);
+}
+
+TEST(RangeSet, unionMergesAdjacentRanges) {
+  auto merged = RangeSet::range('a', 'c').unionWith(RangeSet::range('d', 'f'));
+  EXPECT_EQ(1, merged.rangeCount())
+      << "adjacent ranges must be merged; got: " << merged.toPcre2ClassBody();
+  EXPECT_EQ("a-f", merged.toPcre2ClassBody());
+}
+
+TEST(RangeSet, unionMergesOverlappingRanges) {
+  auto merged = RangeSet::range('a', 'e').unionWith(RangeSet::range('c', 'g'));
+  EXPECT_EQ(1, merged.rangeCount());
+  EXPECT_EQ("a-g", merged.toPcre2ClassBody());
+}
+
+} // namespace facebook::velox::functions::java_pcre2_translator::test