From 72905681f7e76ea1b2bd60dceee929c5ad38e936 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 16:32:50 +0000 Subject: [PATCH 01/11] c++23 --- cmake/compiler_flags.cmake | 17 ++--------------- datatypes/mcs_float128.h | 12 +++++++++--- dbcon/joblist/rowestimator.cpp | 4 ++-- tests/simd_processors.cpp | 32 ++++++++++++++++++++++++-------- utils/udfsdk/mcsv1_udaf.h | 2 +- utils/windowfunction/wf_udaf.cpp | 4 ++-- 6 files changed, 40 insertions(+), 31 deletions(-) diff --git a/cmake/compiler_flags.cmake b/cmake/compiler_flags.cmake index b88e1eb2f0..da247d3d3b 100644 --- a/cmake/compiler_flags.cmake +++ b/cmake/compiler_flags.cmake @@ -16,21 +16,7 @@ macro(SET_FLAGS_RELEASE) endforeach() endmacro() -# C++ standard { -if(have_CXX__std_c__20) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") -else() - my_check_cxx_compiler_flag("-std=c++2a") - if(have_CXX__std_c__2a) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++2a") - else() - message_once(CS_NO_CXX20 "C++ Compiler does not understand -std=c++20") - return() - endif() -endif() - -unset(CMAKE_CXX_STANDARD) -# } end C++ standard +set(CMAKE_CXX_STANDARD 23) # Hacks to keep alive with MariaDB server { string(REPLACE -D_GLIBCXX_DEBUG "" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) @@ -59,6 +45,7 @@ set(FLAGS_ALL -DHAVE_CONFIG_H -DBOOST_BIND_GLOBAL_PLACEHOLDERS -Wno-suggest-override + -foperator-names ) if(COLUMNSTORE_WITH_LIBCPP) list(APPEND FLAGS_ALL -stdlib=libc++) diff --git a/datatypes/mcs_float128.h b/datatypes/mcs_float128.h index 2a97dadad7..f312da425a 100644 --- a/datatypes/mcs_float128.h +++ b/datatypes/mcs_float128.h @@ -25,14 +25,20 @@ #include #include "mcs_numeric_limits.h" -#ifdef __aarch64__ -using float128_t = long double; +#if defined(__STDCPP_FLOAT128_T__) +# if defined(__has_include) && __has_include() +# include +# endif + using float128_t = std::float128_t; +#elif defined(__aarch64__) + using float128_t = long double; #else -using float128_t = __float128; + using float128_t = __float128; #endif namespace datatypes { + /* Main union type we use to manipulate the floating-point type. */ typedef union { diff --git a/dbcon/joblist/rowestimator.cpp b/dbcon/joblist/rowestimator.cpp index 57b7100ba4..028a6838e3 100644 --- a/dbcon/joblist/rowestimator.cpp +++ b/dbcon/joblist/rowestimator.cpp @@ -188,7 +188,7 @@ float RowEstimator::estimateOpFactor(const T& min, const T& max, const T& value, uint32_t distinctValues, char cpStatus, const execplan::CalpontSystemCatalog::ColType& ct) { - float factor = 1.0; + float128_t factor = 1.0; switch (op) { @@ -255,7 +255,7 @@ float RowEstimator::estimateOpFactor(const T& min, const T& max, const T& value, factor = 1.0; } - return factor; + return float(factor); } // Estimate the percentage of rows that will be returned for a particular extent. diff --git a/tests/simd_processors.cpp b/tests/simd_processors.cpp index 0d009aff7c..3d9aa6e804 100644 --- a/tests/simd_processors.cpp +++ b/tests/simd_processors.cpp @@ -24,11 +24,18 @@ #include "datatypes/mcs_int128.h" #include "simd_sse.h" #include "simd_arm.h" + #if defined(__x86_64__) #define TESTS_USING_SSE 1 -using float64_t = double; -using float32_t = float; #endif + +// Use standard double/float to match SIMD specializations consistently. +// Name them with an mcs_ prefix to avoid ambiguity with std::float64_t when +// 'using namespace std;' is in effect. +using mcs_float64_t = double; +using mcs_float32_t = float; + + #ifdef __aarch64__ #define TESTS_USING_ARM 1 #endif @@ -41,10 +48,19 @@ class SimdProcessorTypedTest : public testing::Test public: using IntegralType = T; #if TESTS_USING_SSE - using SimdType = - std::conditional_t::value, simd::vi128f_wr, - std::conditional_t::value, simd::vi128d_wr, simd::vi128_wr>>; - using Proc = typename simd::SimdFilterProcessor; + using IsF32 = std::integral_constant::value && sizeof(T) == sizeof(float)>; + using IsF64 = std::integral_constant::value && sizeof(T) == sizeof(double)>; + using SimdType = std::conditional_t< + IsF32::value, + simd::vi128f_wr, + std::conditional_t>; + using ScalarForProc = std::conditional_t< + IsF32::value, + float, + std::conditional_t>; + using Proc = typename simd::SimdFilterProcessor; #else using Proc = typename simd::SimdFilterProcessor::WrapperType, T>; #endif @@ -445,7 +461,7 @@ TEST(SimdProcessorTest, Uint64) } TEST(SimdProcessorTest, Float64) { - using IntegralType = float64_t; + using IntegralType = mcs_float64_t; IntegralType l[2]{-5.0, 12.5620}; IntegralType r[2]{2.9, 1}; IntegralType minlr[8]{-5.0, 1}; @@ -482,7 +498,7 @@ TEST(SimdProcessorTest, Float64) } TEST(SimdProcessorTest, Float32) { - using IntegralType = float32_t; + using IntegralType = mcs_float32_t; IntegralType l[4]{82, 102, -5.6, 9.5}; IntegralType r[4]{2.0, 1, -5.7, 6}; IntegralType minlr[8]{2.0, 1, -5.7, 6}; diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index 8a0c1783c2..0fe86dab5a 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -1012,7 +1012,7 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in template inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) const { - T val = 0; + T val{}; if (valIn.compatible(longTypeId)) { val = valIn.cast(); diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index c4b5002f03..768d1f2e4e 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -1128,7 +1128,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) // Currently, distinct only works on the first parameter. if (k == 0 && fDistinct) { - std::pair val = make_pair(valIn.isNull() ? nullptr : valIn.safeString(""), 1); + std::pair val = make_pair(valIn.safeString(""), 1); std::pair distinct; distinct = fDistinctMap.insert(val); if (distinct.second == false) @@ -1139,7 +1139,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) } } - datum.columnData = valIn.isNull() ? nullptr : valIn.safeString(""); + datum.columnData = valIn.safeString(""); break; } From 0dcfffdac86688125c9d09f3a3d6ab0c6dac955b Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 19:20:20 +0000 Subject: [PATCH 02/11] Using Glaze instead of mariadb JSON parser --- CMakeLists.txt | 1 + cmake/glaze.cmake | 9 + tests/simd_processors.cpp | 18 +- utils/funcexp/CMakeLists.txt | 2 +- utils/funcexp/func_json_array.cpp | 66 ++-- utils/funcexp/func_json_array_append.cpp | 134 +++----- utils/funcexp/func_json_array_insert.cpp | 168 ++++------ utils/funcexp/func_json_contains.cpp | 259 ++++++--------- utils/funcexp/func_json_contains_path.cpp | 140 +++----- utils/funcexp/func_json_depth.cpp | 76 ++--- utils/funcexp/func_json_equals.cpp | 65 ++-- utils/funcexp/func_json_exists.cpp | 52 ++- utils/funcexp/func_json_extract.cpp | 253 +++++++-------- utils/funcexp/func_json_format.cpp | 41 +-- utils/funcexp/func_json_insert.cpp | 327 ++++++++----------- utils/funcexp/func_json_keys.cpp | 162 ++++------ utils/funcexp/func_json_length.cpp | 73 ++--- utils/funcexp/func_json_merge.cpp | 275 ++++------------ utils/funcexp/func_json_merge_patch.cpp | 337 +++---------------- utils/funcexp/func_json_normalize.cpp | 44 ++- utils/funcexp/func_json_object.cpp | 85 +++-- utils/funcexp/func_json_overlaps.cpp | 322 ++++--------------- utils/funcexp/func_json_query.cpp | 86 ++--- utils/funcexp/func_json_quote.cpp | 35 +- utils/funcexp/func_json_remove.cpp | 196 ++++------- utils/funcexp/func_json_search.cpp | 289 ++++++++--------- utils/funcexp/func_json_type.cpp | 60 ++-- utils/funcexp/func_json_unquote.cpp | 46 +-- utils/funcexp/func_json_valid.cpp | 25 +- utils/funcexp/func_json_value.cpp | 145 +++------ utils/funcexp/functor_json.h | 84 +---- utils/funcexp/glaze_path.h | 212 ++++++++++++ utils/funcexp/jsonhelpers.cpp | 375 ---------------------- utils/funcexp/jsonhelpers.h | 106 ------ utils/windowfunction/wf_udaf.cpp | 6 +- 35 files changed, 1588 insertions(+), 2986 deletions(-) create mode 100644 cmake/glaze.cmake create mode 100644 utils/funcexp/glaze_path.h delete mode 100644 utils/funcexp/jsonhelpers.cpp delete mode 100644 utils/funcexp/jsonhelpers.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 47de6382c7..73979aa14e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ include(CheckCXXSourceCompiles) include(packages) include(boost) include(thrift) +include(glaze) include(dirs) include(includes) include(libs) diff --git a/cmake/glaze.cmake b/cmake/glaze.cmake new file mode 100644 index 0000000000..a978eb68f7 --- /dev/null +++ b/cmake/glaze.cmake @@ -0,0 +1,9 @@ +include(FetchContent) + +FetchContent_Declare( + glaze + GIT_REPOSITORY https://github.com/stephenberry/glaze.git + GIT_TAG v5.7.0 + GIT_SHALLOW TRUE +) +FetchContent_MakeAvailable(glaze) \ No newline at end of file diff --git a/tests/simd_processors.cpp b/tests/simd_processors.cpp index 3d9aa6e804..26ec38fba7 100644 --- a/tests/simd_processors.cpp +++ b/tests/simd_processors.cpp @@ -35,7 +35,6 @@ using mcs_float64_t = double; using mcs_float32_t = float; - #ifdef __aarch64__ #define TESTS_USING_ARM 1 #endif @@ -48,18 +47,11 @@ class SimdProcessorTypedTest : public testing::Test public: using IntegralType = T; #if TESTS_USING_SSE - using IsF32 = std::integral_constant::value && sizeof(T) == sizeof(float)>; - using IsF64 = std::integral_constant::value && sizeof(T) == sizeof(double)>; - using SimdType = std::conditional_t< - IsF32::value, - simd::vi128f_wr, - std::conditional_t>; - using ScalarForProc = std::conditional_t< - IsF32::value, - float, - std::conditional_t>; + using IsF32 = std::integral_constant::value && sizeof(T) == sizeof(float)>; + using IsF64 = std::integral_constant::value && sizeof(T) == sizeof(double)>; + using SimdType = std::conditional_t>; + using ScalarForProc = std::conditional_t>; using Proc = typename simd::SimdFilterProcessor; #else using Proc = typename simd::SimdFilterProcessor::WrapperType, T>; diff --git a/utils/funcexp/CMakeLists.txt b/utils/funcexp/CMakeLists.txt index 6f1c758a78..1ade4f8299 100644 --- a/utils/funcexp/CMakeLists.txt +++ b/utils/funcexp/CMakeLists.txt @@ -138,7 +138,6 @@ set(funcexp_LIB_SRCS func_weekday.cpp func_year.cpp func_yearweek.cpp - jsonhelpers.cpp sql_crypt.cpp ) @@ -150,6 +149,7 @@ columnstore_link( pron loggingcpp dataconvert + glaze::glaze ${MARIADB_STRING_LIBS} ${NETSNMP_LIBRARIES} ) diff --git a/utils/funcexp/func_json_array.cpp b/utils/funcexp/func_json_array.cpp index 02b02e519e..6996496264 100644 --- a/utils/funcexp/func_json_array.cpp +++ b/utils/funcexp/func_json_array.cpp @@ -1,23 +1,13 @@ +#include #include -using namespace std; #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; - #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_array::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& resultType) +execplan::CalpontSystemCatalog::ColType Func_json_array::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType) { return fp.size() > 0 ? fp[0]->data()->resultType() : resultType; } @@ -28,25 +18,45 @@ std::string Func_json_array::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo if (fp.size() == 0) return "[]"; - const CHARSET_INFO* retCS = type.getCharset(); - std::string ret("["); - - if (appendJSValue(ret, retCS, row, fp[0])) - goto error; + glz::json_t arr; + auto& a = arr.get_array(); + a.reserve(fp.size()); - for (size_t i = 1; i < fp.size(); i++) + for (size_t i = 0; i < fp.size(); ++i) { - ret.append(", "); - if (appendJSValue(ret, retCS, row, fp[i])) - goto error; + bool argNull = false; + const auto ns = fp[i]->data()->getStrVal(row, argNull); + if (argNull) + { + a.emplace_back(); // null + continue; + } + + auto& valType = fp[i]->data()->resultType(); + if (isCharType(valType.colDataType)) + { + a.emplace_back(ns.safeString("")); + continue; + } + + glz::json_t v; + if (auto e = glz::read_json(v, ns.unsafeStringRef())) + { + a.emplace_back(ns.safeString("")); + } + else + { + a.emplace_back(std::move(v)); + } } - ret.append("]"); - return ret; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(arr, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_array_append.cpp b/utils/funcexp/func_json_array_append.cpp index a163cf869f..a6afcf5945 100644 --- a/utils/funcexp/func_json_array_append.cpp +++ b/utils/funcexp/func_json_array_append.cpp @@ -1,21 +1,13 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_array_append::operationType( - FunctionParm& fp, CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_array_append::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -23,95 +15,65 @@ CalpontSystemCatalog::ColType Func_json_array_append::operationType( std::string Func_json_array_append::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - const auto& js = fp[0]->data()->getStrVal(row, isNull); + const auto js_ns = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - const CHARSET_INFO* cs = getCharset(fp[0]); - - json_engine_t jsEg; - const uchar* arrEnd; - size_t strRestLen; - std::string retJS; - retJS.reserve(js.length() + padding); - - initJSPaths(paths, fp, 1, 2); - - utils::NullString tmpJS(js); - for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++) + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) { - const char* rawJS = tmpJS.str(); - const size_t jsLen = tmpJS.length(); - JSONPath& path = paths[j]; - - if (!path.parsed && parseJSPath(path, row, fp[i], false)) - goto error; - - initJSEngine(jsEg, cs, tmpJS); - - if (locateJSPath(jsEg, path)) - goto error; - - if (json_read_value(&jsEg)) - goto error; + isNull = true; + return ""; + } - if (jsEg.value_type == JSON_VALUE_ARRAY) + for (size_t i = 1; i + 1 < fp.size(); i += 2) + { + bool pNull = false, vNull = false; + const auto p_ns = fp[i]->data()->getStrVal(row, pNull); + const auto v_ns = fp[i + 1]->data()->getStrVal(row, vNull); + if (pNull || vNull) { - int itemSize; - if (json_skip_level_and_count(&jsEg, &itemSize)) - goto error; - - arrEnd = jsEg.s.c_str - jsEg.sav_c_len; - strRestLen = jsLen - (arrEnd - (const uchar*)rawJS); - retJS.append(rawJS, arrEnd - (const uchar*)rawJS); - if (itemSize) - retJS.append(", "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; + isNull = true; + return ""; + } - retJS.append((const char*)arrEnd, strRestLen); + glz::json_t value; + if (auto ev = glz::read_json(value, v_ns.unsafeStringRef())) + { + isNull = true; + return ""; } - else + + std::vector nodes; + if (!glaze_path::find_matches_mutable(doc, p_ns.unsafeStringRef(), nodes)) { - const uchar *start, *end; + isNull = true; + return ""; + } - /* Wrap as an array. */ - retJS.append(rawJS, (const char*)jsEg.value_begin - rawJS); - start = jsEg.value_begin; - if (jsEg.value_type == JSON_VALUE_OBJECT) + for (auto* node : nodes) + { + if (node->is_array()) { - if (json_skip_level(&jsEg)) - goto error; - end = jsEg.s.c_str; + node->get_array().push_back(value); } else - end = jsEg.value_end; - - retJS.append("["); - retJS.append((const char*)start, end - start); - retJS.append(", "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append("]"); - retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str); + { + glz::json_t arr; + arr.get_array().push_back(*node); + arr.get_array().push_back(value); + *node = std::move(arr); + } } - - // tmpJS save the json string for next loop - tmpJS.assign(retJS); - retJS.clear(); } - initJSEngine(jsEg, cs, tmpJS); - retJS.clear(); - if (doFormat(&jsEg, retJS, Func_json_format::LOOSE)) - goto error; - - isNull = false; - return retJS; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(doc, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_array_insert.cpp b/utils/funcexp/func_json_array_insert.cpp index 5c6ee56edb..bf5603b49c 100644 --- a/utils/funcexp/func_json_array_insert.cpp +++ b/utils/funcexp/func_json_array_insert.cpp @@ -1,21 +1,13 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_array_insert::operationType( - FunctionParm& fp, CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_array_insert::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -23,120 +15,84 @@ CalpontSystemCatalog::ColType Func_json_array_insert::operationType( std::string Func_json_array_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - const auto& js = fp[0]->data()->getStrVal(row, isNull); + const auto js_ns = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - const CHARSET_INFO* cs = getCharset(fp[0]); - - json_engine_t jsEg; - std::string retJS; - retJS.reserve(js.length() + 8); - - initJSPaths(paths, fp, 1, 2); + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) + { + isNull = true; + return ""; + } - utils::NullString tmpJS(js); - for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++) + for (size_t i = 1; i + 1 < fp.size(); i += 2) { - const char* rawJS = tmpJS.str(); - const size_t jsLen = tmpJS.length(); - JSONPath& path = paths[j]; - if (!path.parsed) + bool pNull = false, vNull = false; + const auto p_ns = fp[i]->data()->getStrVal(row, pNull); + const auto v_ns = fp[i + 1]->data()->getStrVal(row, vNull); + if (pNull || vNull) { - if (parseJSPath(path, row, fp[i]) || path.p.last_step - 1 < path.p.steps || - path.p.last_step->type != JSON_PATH_ARRAY) - { - if (path.p.s.error == 0) - path.p.s.error = SHOULD_END_WITH_ARRAY; - goto error; - } - path.p.last_step--; + isNull = true; + return ""; } - initJSEngine(jsEg, cs, tmpJS); - - path.currStep = path.p.steps; - - int jsErr = 0; - if (locateJSPath(jsEg, path, &jsErr)) + glz::json_t value; + if (auto ev = glz::read_json(value, v_ns.unsafeStringRef())) { - if (jsErr) - goto error; - - // Can't find the array to insert. - continue; + isNull = true; + return ""; } - if (json_read_value(&jsEg)) - goto error; - - if (jsEg.value_type != JSON_VALUE_ARRAY) + // Parse path and require it ends with an array index step + std::vector steps; + if (!funcexp::glaze_path::parse(p_ns.unsafeStringRef(), steps)) { - /* Must be an array. */ - continue; + isNull = true; + return ""; } - - const char* itemPos = 0; - IntType itemSize = 0; - - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END) + if (steps.empty() || steps.back().kind != funcexp::glaze_path::StepKind::Index) { - DBUG_ASSERT(jsEg.state == JST_VALUE); - if (itemSize == path.p.last_step[1].n_item) - { - itemPos = (const char*)jsEg.s.c_str; - break; - } - itemSize++; - - if (json_read_value(&jsEg) || (!json_value_scalar(&jsEg) && json_skip_level(&jsEg))) - goto error; + isNull = true; + return ""; } - if (unlikely(jsEg.s.error || *jsEg.killed_ptr)) - goto error; + // Split into parent and index + auto last = steps.back(); + steps.pop_back(); - if (itemPos) - { - retJS.append(rawJS, itemPos - rawJS); - if (itemSize > 0) - retJS.append(" "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append(","); - if (itemSize == 0) - retJS.append(" "); - retJS.append(itemPos, rawJS + jsLen - itemPos); - } - else + std::vector parents; + funcexp::glaze_path::find_matches_mutable_steps(doc, steps, parents); + + for (auto* parent : parents) { - /* Insert position wasn't found - append to the array. */ - DBUG_ASSERT(jsEg.state == JST_ARRAY_END); - itemPos = (const char*)(jsEg.s.c_str - jsEg.sav_c_len); - retJS.append(rawJS, itemPos - rawJS); - if (itemSize > 0) - retJS.append(", "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append(itemPos, rawJS + jsLen - itemPos); + // Ensure parent is an array, or wrap into array first + if (!parent->is_array()) + { + glz::json_t arr; + arr.get_array().push_back(*parent); + *parent = std::move(arr); + } + auto& arr = parent->get_array(); + int idx = last.index; + if (idx < 0) + idx = static_cast(arr.size()) + idx; + if (idx < 0 || static_cast(idx) > arr.size()) + { + isNull = true; + return ""; + } + arr.insert(arr.begin() + idx, value); } - - // tmpJS save the json string for next loop - tmpJS.assign(retJS); - retJS.clear(); } - initJSEngine(jsEg, cs, tmpJS); - retJS.clear(); - if (doFormat(&jsEg, retJS, Func_json_format::LOOSE)) - goto error; - - isNull = false; - return retJS; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(doc, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_contains.cpp b/utils/funcexp/func_json_contains.cpp index f399ddc602..10d146745e 100644 --- a/utils/funcexp/func_json_contains.cpp +++ b/utils/funcexp/func_json_contains.cpp @@ -1,154 +1,90 @@ +#include + #include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace { -static bool checkContains(json_engine_t* jsEg, json_engine_t* valEg) +static bool contains_json(const glz::json_t& doc, const glz::json_t& val) { - json_engine_t localJsEg; - bool isEgSet; - - switch (jsEg->value_type) + if (doc.is_object()) { - case JSON_VALUE_OBJECT: + if (!val.is_object()) + return false; + const auto& D = doc.get_object(); + const auto& V = val.get_object(); + for (const auto& [k, vv] : V) { - json_string_t keyName; - - if (valEg->value_type != JSON_VALUE_OBJECT) + auto it = D.find(k); + if (it == D.end()) + return false; + if (!contains_json(it->second, vv)) return false; - - localJsEg = *jsEg; - isEgSet = false; - json_string_set_cs(&keyName, valEg->s.cs); - while (json_scan_next(valEg) == 0 && valEg->state != JST_OBJ_END) - { - const uchar *keyStart, *keyEnd; - - DBUG_ASSERT(valEg->state == JST_KEY); - keyStart = valEg->s.c_str; - do - { - keyEnd = valEg->s.c_str; - } while (json_read_keyname_chr(valEg) == 0); - - if (unlikely(valEg->s.error) || json_read_value(valEg)) - return false; - - if (isEgSet) - *jsEg = localJsEg; - else - isEgSet = true; - - json_string_set_str(&keyName, keyStart, keyEnd); - if (!findKeyInObject(jsEg, &keyName) || json_read_value(jsEg) || !checkContains(jsEg, valEg)) - return false; - } - - return valEg->state == JST_OBJ_END && !json_skip_level(jsEg); } - case JSON_VALUE_ARRAY: - if (valEg->value_type != JSON_VALUE_ARRAY) + return true; + } + if (doc.is_array()) + { + const auto& A = doc.get_array(); + if (val.is_array()) + { + // Every element in val must be contained by some element in doc array + for (const auto& vv : val.get_array()) { - localJsEg = *valEg; - isEgSet = false; - while (json_scan_next(jsEg) == 0 && jsEg->state != JST_ARRAY_END) + bool any = false; + for (const auto& dv : A) { - int currLevel, isScaler; - DBUG_ASSERT(jsEg->state == JST_VALUE); - if (json_read_value(jsEg)) - return false; - - if (!(isScaler = json_value_scalar(jsEg))) - currLevel = json_get_level(jsEg); - - if (isEgSet) - *valEg = localJsEg; - else - isEgSet = true; - - if (checkContains(jsEg, valEg)) + if (contains_json(dv, vv)) { - if (json_skip_level(jsEg)) - return false; - return true; + any = true; + break; } - if (unlikely(valEg->s.error) || unlikely(jsEg->s.error) || - (!isScaler && json_skip_to_level(jsEg, currLevel))) - return false; } - return false; - } - /* else */ - localJsEg = *jsEg; - isEgSet = false; - while (json_scan_next(valEg) == 0 && valEg->state != JST_ARRAY_END) - { - DBUG_ASSERT(valEg->state == JST_VALUE); - if (json_read_value(valEg)) - return false; - - if (isEgSet) - *jsEg = localJsEg; - else - isEgSet = true; - if (!checkContains(jsEg, valEg)) + if (!any) return false; } - - return valEg->state == JST_ARRAY_END; - - case JSON_VALUE_STRING: - if (valEg->value_type != JSON_VALUE_STRING) - return false; - /* - TODO: make proper json-json comparison here that takes excipient - into account. - */ - return valEg->value_len == jsEg->value_len && memcmp(valEg->value, jsEg->value, valEg->value_len) == 0; - case JSON_VALUE_NUMBER: - if (valEg->value_type == JSON_VALUE_NUMBER) - { - double jsEgVal, valEgVal; - char* end; - int err; - - jsEgVal = jsEg->s.cs->strntod((char*)jsEg->value, jsEg->value_len, &end, &err); - ; - valEgVal = valEg->s.cs->strntod((char*)valEg->value, valEg->value_len, &end, &err); - ; - - return (fabs(jsEgVal - valEgVal) < 1e-12); - } - else - return false; - - default: break; + return true; + } + // val is not array: any element contains val + for (const auto& dv : A) + if (contains_json(dv, val)) + return true; + return false; } - - /* - We have these not mentioned in the 'switch' above: - - case JSON_VALUE_TRUE: - case JSON_VALUE_FALSE: - case JSON_VALUE_NULL: - */ - return valEg->value_type == jsEg->value_type; + if (doc.is_string()) + { + return val.is_string() && doc.get_string() == val.get_string(); + } + if (doc.is_boolean()) + { + return val.is_boolean() && doc.get_boolean() == val.get_boolean(); + } + if (doc.is_null()) + { + return val.is_null(); + } + if (doc.is_number() && val.is_number()) + { + std::string sd, sv; + if (auto ed = glz::write_json(doc, sd)) + return false; + if (auto ev = glz::write_json(val, sv)) + return false; + char* endd = nullptr; + char* endv = nullptr; + double dd = std::strtod(sd.c_str(), &endd); + double dv = std::strtod(sv.c_str(), &endv); + return std::fabs(dd - dv) < 1e-12; + } + return false; } } // namespace namespace funcexp { -CalpontSystemCatalog::ColType Func_json_contains::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_contains::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -156,58 +92,51 @@ CalpontSystemCatalog::ColType Func_json_contains::operationType(FunctionParm& fp /** * getBoolVal API definition */ -bool Func_json_contains::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_contains::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { bool isNullJS = false, isNullVal = false; - const auto& js = fp[0]->data()->getStrVal(row, isNullJS); - const auto& val = fp[1]->data()->getStrVal(row, isNullVal); + const auto js_ns = fp[0]->data()->getStrVal(row, isNullJS); + const auto val_ns = fp[1]->data()->getStrVal(row, isNullVal); if (isNullJS || isNullVal) { isNull = true; return false; } - bool result = false; - - if (!arg2Parsed) + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) { - if (!arg2Const) - { - ConstantColumn* constCol = dynamic_cast(fp[1]->data()); - arg2Const = (constCol != nullptr); - } - arg2Val = val; - arg2Parsed = arg2Const; + isNull = true; + return false; + } + glz::json_t needle; + if (auto e2 = glz::read_json(needle, val_ns.unsafeStringRef())) + { + isNull = true; + return false; } - json_engine_t jsEg; - initJSEngine(jsEg, getCharset(fp[0]), js); - + // Optional path: use first match; if none, return NULL (match prior behavior) if (fp.size() > 2) { - if (!path.parsed && parseJSPath(path, row, fp[2], false)) - goto error; - - if (locateJSPath(jsEg, path)) - goto error; + bool pNull = false; + const auto p = fp[2]->data()->getStrVal(row, pNull); + if (pNull) + { + isNull = true; + return false; + } + std::vector matches; + if (!glaze_path::find_matches(doc, p.unsafeStringRef(), matches) || matches.empty()) + { + isNull = true; + return false; + } + doc = *matches.front(); } - json_engine_t valEg; - initJSEngine(valEg, getCharset(fp[1]), arg2Val); - - if (json_read_value(&jsEg) || json_read_value(&valEg)) - goto error; - - result = checkContains(&jsEg, &valEg); - - if (unlikely(jsEg.s.error || valEg.s.error)) - goto error; - + bool result = contains_json(doc, needle); return result; - -error: - isNull = true; - return false; } } // namespace funcexp diff --git a/utils/funcexp/func_json_contains_path.cpp b/utils/funcexp/func_json_contains_path.cpp index 2d3bb83930..941bbaf4d9 100644 --- a/utils/funcexp/func_json_contains_path.cpp +++ b/utils/funcexp/func_json_contains_path.cpp @@ -1,23 +1,15 @@ -#include +#include #include -using namespace std; #include "functor_json.h" -#include "functioncolumn.h" #include "constantcolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_contains_path::operationType( - FunctionParm& fp, CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_contains_path::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -25,30 +17,30 @@ CalpontSystemCatalog::ColType Func_json_contains_path::operationType( /** * getBoolVal API definition */ -bool Func_json_contains_path::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_contains_path::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { const auto& js_ns = fp[0]->data()->getStrVal(row, isNull); if (isNull) return false; - const std::string_view js = js_ns.unsafeStringRef(); - -#if MYSQL_VERSION_ID >= 100900 - int arrayCounters[JSON_DEPTH_LIMIT]; - bool hasNegPath = false; -#endif - const int argSize = fp.size() - 2; + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) + { + isNull = true; + return false; + } + // Parse mode once (const optimization preserved) if (!isModeParsed) { if (!isModeConst) - isModeConst = (dynamic_cast(fp[1]->data()) != nullptr); + isModeConst = (dynamic_cast(fp[1]->data()) != nullptr); auto mode_ns = fp[1]->data()->getStrVal(row, isNull); if (isNull) return false; - std::string mode = mode_ns.unsafeStringRef(); + std::string mode = mode_ns.safeString(""); transform(mode.begin(), mode.end(), mode.begin(), ::tolower); if (mode != "one" && mode != "all") @@ -61,84 +53,58 @@ bool Func_json_contains_path::getBoolVal(Row& row, FunctionParm& fp, bool& isNul isModeParsed = isModeConst; } - initJSPaths(paths, fp, 2, 1); - if (paths.size() == 0) - hasFound.assign(argSize, false); - - for (size_t i = 2; i < fp.size(); i++) + const int argSize = fp.size() - 2; + if (argSize <= 0) { - JSONPath& path = paths[i - 2]; + isNull = true; + return false; + } - if (!path.parsed) + if (isModeOne) + { + // True if any path has at least one match + for (size_t i = 2; i < fp.size(); ++i) { - if (parseJSPath(path, row, fp[i])) + bool pNull = false; + const auto p = fp[i]->data()->getStrVal(row, pNull); + if (pNull) { isNull = true; return false; } -#if MYSQL_VERSION_ID >= 100900 - hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX; -#endif + std::vector matches; + if (!glaze_path::find_matches(doc, p.unsafeStringRef(), matches)) + { + isNull = true; + return false; // path parse error + } + if (!matches.empty()) + return true; } + return false; } - - json_engine_t jsEg; - json_path_t p; - json_get_path_start(&jsEg, getCharset(fp[0]), (const uchar*)js.data(), (const uchar*)js.data() + js.size(), - &p); - - bool result = false; - int needFound = 0; - - if (!isModeOne) - { - hasFound.assign(argSize, false); - needFound = argSize; - } - - while (json_get_path_next(&jsEg, &p) == 0) + else { -#if MYSQL_VERSION_ID >= 100900 - if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY && - json_skip_array_and_count(&jsEg, arrayCounters + (p.last_step - p.steps))) - { - result = true; - break; - } -#endif - - for (int restSize = argSize, curr = 0; restSize > 0; restSize--, curr++) + // True only if all paths have at least one match + for (size_t i = 2; i < fp.size(); ++i) { - JSONPath& path = paths[curr]; -#if MYSQL_VERSION_ID >= 100900 - int cmp = cmpJSPath(&path.p, &p, jsEg.value_type, arrayCounters); -#else - int cmp = cmpJSPath(&path.p, &p, jsEg.value_type); -#endif - if (cmp >= 0) + bool pNull = false; + const auto p = fp[i]->data()->getStrVal(row, pNull); + if (pNull) { - if (isModeOne) - { - result = true; - break; - } - /* mode_all */ - if (hasFound[restSize - 1]) - continue; /* already found */ - if (--needFound == 0) - { - result = true; - break; - } - hasFound[restSize - 1] = true; + isNull = true; + return false; + } + std::vector matches; + if (!glaze_path::find_matches(doc, p.unsafeStringRef(), matches)) + { + isNull = true; + return false; // path parse error } + if (matches.empty()) + return false; } + return true; } - - if (likely(jsEg.s.error == 0)) - return result; - - isNull = true; - return false; } } // namespace funcexp diff --git a/utils/funcexp/func_json_depth.cpp b/utils/funcexp/func_json_depth.cpp index 66bcc22c9f..834bde5c6e 100644 --- a/utils/funcexp/func_json_depth.cpp +++ b/utils/funcexp/func_json_depth.cpp @@ -1,20 +1,14 @@ +#include #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; +#include +#include #include "rowgroup.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_depth::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_depth::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -26,42 +20,38 @@ int64_t Func_json_depth::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& i if (isNull) return 0; - int depth = 0, currDepth = 0; - bool incDepth = true; - - json_engine_t jsEg; - initJSEngine(jsEg, getCharset(fp[0]), js); + const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; + glz::json_t value; + if (auto err = glz::read_json(value, sv)) + { + isNull = true; + return 0; + } - do + // Compute depth: scalars have depth 1; arrays/objects are 1 + max(child depth) + std::function compute_depth = [&](const glz::json_t& v) -> int64_t { - switch (jsEg.state) + if (v.is_object()) { - case JST_VALUE: - case JST_KEY: - if (incDepth) - { - currDepth++; - incDepth = false; - if (currDepth > depth) - depth = currDepth; - } - break; - case JST_OBJ_START: - case JST_ARRAY_START: incDepth = true; break; - case JST_OBJ_END: - case JST_ARRAY_END: - if (!incDepth) - currDepth--; - incDepth = false; - break; - default: break; + int64_t max_child = 0; + for (const auto& [k, child] : v.get_object()) + { + max_child = std::max(max_child, compute_depth(child)); + } + return 1 + max_child; } - } while (json_scan_next(&jsEg) == 0); - - if (likely(!jsEg.s.error)) - return depth; + if (v.is_array()) + { + int64_t max_child = 0; + for (const auto& child : v.get_array()) + { + max_child = std::max(max_child, compute_depth(child)); + } + return 1 + max_child; + } + return 1; // scalars/null + }; - isNull = true; - return 0; + return compute_depth(value); } } // namespace funcexp diff --git a/utils/funcexp/func_json_equals.cpp b/utils/funcexp/func_json_equals.cpp index 0e0270cc2b..d74ba5f8f8 100644 --- a/utils/funcexp/func_json_equals.cpp +++ b/utils/funcexp/func_json_equals.cpp @@ -1,22 +1,14 @@ +#include #include #include -using namespace std; #include "functor_json.h" -#include "functioncolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_equals::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_equals::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -24,26 +16,9 @@ CalpontSystemCatalog::ColType Func_json_equals::operationType(FunctionParm& fp, /** * getBoolVal API definition */ -bool Func_json_equals::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_equals::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { - // auto release the DYNAMIC_STRING - using DynamicString = unique_ptr; - - DynamicString str1{new DYNAMIC_STRING(), dynstr_free}; - if (init_dynamic_string(str1.get(), NULL, 0, 0)) - { - isNull = true; - return true; - } - - DynamicString str2{new DYNAMIC_STRING(), dynstr_free}; - if (init_dynamic_string(str2.get(), NULL, 0, 0)) - { - isNull = true; - return true; - } - const auto js1_ns = fp[0]->data()->getStrVal(row, isNull); if (isNull) return false; @@ -52,23 +27,33 @@ bool Func_json_equals::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, if (isNull) return false; - const string_view js1 = js1_ns.unsafeStringRef(); - const string_view js2 = js2_ns.unsafeStringRef(); + const std::string_view js1 = js1_ns.unsafeStringRef(); + const std::string_view js2 = js2_ns.unsafeStringRef(); - bool result = false; - if (json_normalize(str1.get(), js1.data(), js1.size(), getCharset(fp[0]))) + glz::json_t v1, v2; + if (auto e1 = glz::read_json(v1, js1)) { isNull = true; - return result; + return false; } - - if (json_normalize(str2.get(), js2.data(), js2.size(), getCharset(fp[1]))) + if (auto e2 = glz::read_json(v2, js2)) { isNull = true; - return result; + return false; } - result = strcmp(str1->str, str2->str) ? false : true; - return result; + // Compare canonical serialized representations to determine equality + std::string s1, s2; + if (auto e = glz::write_json(v1, s1)) + { + isNull = true; + return false; + } + if (auto e = glz::write_json(v2, s2)) + { + isNull = true; + return false; + } + return s1 == s2; } } // namespace funcexp diff --git a/utils/funcexp/func_json_exists.cpp b/utils/funcexp/func_json_exists.cpp index f7a3325eeb..7f6858829d 100644 --- a/utils/funcexp/func_json_exists.cpp +++ b/utils/funcexp/func_json_exists.cpp @@ -1,19 +1,14 @@ +// Include Glaze first +#include + #include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_exists::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_exists::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -21,31 +16,34 @@ CalpontSystemCatalog::ColType Func_json_exists::operationType(FunctionParm& fp, /** * getBoolVal API definition */ -bool Func_json_exists::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_exists::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return false; - int jsErr = 0; - json_engine_t jsEg; - initJSEngine(jsEg, getCharset(fp[0]), js); - - if (!path.parsed && parseJSPath(path, row, fp[1])) - goto error; - - if (locateJSPath(jsEg, path, &jsErr)) + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) { - if (jsErr) - goto error; + isNull = true; return false; } - return true; + bool pNull = false; + const auto path_ns = fp[1]->data()->getStrVal(row, pNull); + if (pNull) + { + isNull = true; + return false; + } -error: - isNull = true; - return false; + std::vector matches; + if (!glaze_path::find_matches(doc, path_ns.unsafeStringRef(), matches)) + { + isNull = true; + return false; + } + return !matches.empty(); } } // namespace funcexp diff --git a/utils/funcexp/func_json_extract.cpp b/utils/funcexp/func_json_extract.cpp index d07986faf3..f8e77b8dec 100644 --- a/utils/funcexp/func_json_extract.cpp +++ b/utils/funcexp/func_json_extract.cpp @@ -1,162 +1,139 @@ -#include +// Glaze first +#include + +#include #include "functor_json.h" -#include "functioncolumn.h" +#include "glaze_path.h" #include "rowgroup.h" #include "treenode.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "mcs_decimal.h" namespace funcexp { -int Func_json_extract::doExtract(Row& row, FunctionParm& fp, json_value_types* type, std::string& retJS, - bool compareWhole = true) +namespace +{ +static json_value_types map_type(const glz::json_t& v) +{ + if (v.is_object()) + return JSON_VALUE_OBJECT; + if (v.is_array()) + return JSON_VALUE_ARRAY; + if (v.is_string()) + return JSON_VALUE_STRING; + if (v.is_number()) + return JSON_VALUE_NUMBER; + if (v.is_boolean()) + return v.get_boolean() ? JSON_VALUE_TRUE : JSON_VALUE_FALSE; + return JSON_VALUE_NULL; +} +} // namespace + +int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_value_types* type, + std::string& retJS, bool compareWhole) { bool isNull = false; const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return 1; - const char* rawJS = js.str(); - json_engine_t jsEg, savJSEg; - json_path_t p; - const uchar* value; - bool notFirstVal = false; - size_t valLen; - bool mayMulVal; - int wildcards; - bool isMatch; -#if MYSQL_VERSION_ID >= 100900 - int arrayCounter[JSON_DEPTH_LIMIT]; - bool hasNegPath = false; -#endif - const size_t argSize = fp.size(); - std::string tmp; - - initJSPaths(paths, fp, 1, 1); - - for (size_t i = 1; i < argSize; i++) - { - JSONPath& path = paths[i - 1]; - path.p.types_used = JSON_PATH_KEY_NULL; - if (!path.parsed && parseJSPath(path, row, fp[i])) - return 1; - -#if MYSQL_VERSION_ID >= 100900 - hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX; -#endif - } -#if MYSQL_VERSION_ID >= 100900 - wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | JSON_PATH_ARRAY_RANGE); -#else - wildcards = (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD); -#endif - mayMulVal = argSize > 2 || (paths[0].p.types_used & wildcards); - - *type = mayMulVal ? JSON_VALUE_ARRAY : JSON_VALUE_NULL; + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) + return 1; - if (compareWhole) - { - retJS.clear(); - if (mayMulVal) - retJS.append("["); - } + const size_t argSize = fp.size(); + if (argSize <= 1) + return 1; - json_get_path_start(&jsEg, getCharset(fp[0]), (const uchar*)rawJS, (const uchar*)rawJS + js.length(), &p); + // Multiple paths -> array of results (null for not found), now with wildcards and recursive descent + std::vector results; + results.reserve(argSize - 1); - while (json_get_path_next(&jsEg, &p) == 0) + size_t found_count = 0; + for (size_t i = 1; i < argSize; ++i) { -#if MYSQL_VERSION_ID >= 100900 - if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY && - json_skip_array_and_count(&jsEg, arrayCounter + (p.last_step - p.steps))) - return 1; -#endif - -#if MYSQL_VERSION_ID >= 100900 - isMatch = matchJSPath(paths, &p, jsEg.value_type, arrayCounter, false); -#else - isMatch = matchJSPath(paths, &p, jsEg.value_type, nullptr, false); -#endif - if (!isMatch) + bool pNull = false; + const auto pstr_ns = fp[i]->data()->getStrVal(row, pNull); + if (pNull) + { + results.emplace_back(); continue; + } - value = jsEg.value_begin; - if (*type == JSON_VALUE_NULL) - *type = jsEg.value_type; - - /* we only care about the first found value */ - if (!compareWhole) + std::vector matches; + if (!glaze_path::find_matches(doc, pstr_ns.unsafeStringRef(), matches)) { - retJS = js.safeString(""); - return 0; + results.emplace_back(); + continue; } - if (json_value_scalar(&jsEg)) - valLen = jsEg.value_end - value; - else + if (matches.empty()) { - if (mayMulVal) - savJSEg = jsEg; - if (json_skip_level(&jsEg)) - return 1; - valLen = jsEg.s.c_str - value; - if (mayMulVal) - jsEg = savJSEg; + results.emplace_back(); + continue; } - if (notFirstVal) - retJS.append(", "); - retJS.append((const char*)value, valLen); - - notFirstVal = true; - - if (!mayMulVal) + if (compareWhole) { - /* Loop to the end of the JSON just to make sure it's valid. */ - while (json_get_path_next(&jsEg, &p) == 0) + // For compareWhole: if a single match and single path, emit the value; else emit array of matches + if (argSize - 1 == 1 && matches.size() == 1) + { + results.push_back(*matches.front()); + ++found_count; + } + else { + glz::json_t arr; + auto& a = arr.get_array(); + a.reserve(matches.size()); + for (auto* m : matches) + a.push_back(*m); + results.push_back(std::move(arr)); + ++found_count; } - break; + } + else + { + // For scalar conversions: pick the first match + results.push_back(*matches.front()); + ++found_count; } } - if (unlikely(jsEg.s.error)) - return 1; - - if (!notFirstVal) - /* Nothing was found. */ + if (found_count == 0) return 1; - if (mayMulVal) - retJS.append("]"); - - utils::NullString retJS_ns(retJS); - initJSEngine(jsEg, getCharset(fp[0]), retJS_ns); - if (doFormat(&jsEg, tmp, Func_json_format::LOOSE)) - return 1; + // If only one path and compareWhole true and results[0] not an array-of-matches, return value directly + glz::json_t out_json; + if (results.size() == 1 && compareWhole) + { + out_json = results[0]; + *type = map_type(out_json); + } + else + { + out_json.get_array() = std::move(results); + *type = JSON_VALUE_ARRAY; + } retJS.clear(); - retJS.swap(tmp); + if (auto w = glz::write_json(out_json, retJS)) + return 1; return 0; } -CalpontSystemCatalog::ColType Func_json_extract::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_extract::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } -std::string Func_json_extract::getStrVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +std::string Func_json_extract::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { std::string retJS; json_value_types valType; - if (doExtract(row, fp, &valType, retJS) == 0) + if (doExtract(row, fp, &valType, retJS, true) == 0) return retJS; isNull = true; @@ -171,19 +148,15 @@ int64_t Func_json_extract::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& int64_t ret = 0; if (doExtract(row, fp, &valType, retJS, false) == 0) { - switch (valType) + if (valType == JSON_VALUE_TRUE) + return 1; + if (valType == JSON_VALUE_NUMBER || valType == JSON_VALUE_STRING) { - case JSON_VALUE_NUMBER: - case JSON_VALUE_STRING: - { - char* end; - int err; - ret = getCharset(fp[0])->strntoll(retJS.data(), retJS.size(), 10, &end, &err); - break; - } - case JSON_VALUE_TRUE: ret = 1; break; - default: break; - }; + char* end; + int err; + const CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); + ret = cs->strntoll(retJS.data(), retJS.size(), 10, &end, &err); + } } return ret; @@ -197,26 +170,22 @@ double Func_json_extract::getDoubleVal(rowgroup::Row& row, FunctionParm& fp, boo double ret = 0.0; if (doExtract(row, fp, &valType, retJS, false) == 0) { - switch (valType) + if (valType == JSON_VALUE_TRUE) + return 1.0; + if (valType == JSON_VALUE_NUMBER || valType == JSON_VALUE_STRING) { - case JSON_VALUE_NUMBER: - case JSON_VALUE_STRING: - { - char* end; - int err; - ret = getCharset(fp[0])->strntod(retJS.data(), retJS.size(), &end, &err); - break; - } - case JSON_VALUE_TRUE: ret = 1.0; break; - default: break; - }; + char* end; + int err; + const CHARSET_INFO* cs = fp[0]->data()->resultType().getCharset(); + ret = cs->strntod(retJS.data(), retJS.size(), &end, &err); + } } return ret; } -execplan::IDB_Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& /*type*/) +datatypes::Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { json_value_types valType; std::string retJS; @@ -227,7 +196,7 @@ execplan::IDB_Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, Funct { case JSON_VALUE_STRING: case JSON_VALUE_NUMBER: return fp[0]->data()->getDecimalVal(row, isNull); - case JSON_VALUE_TRUE: return IDB_Decimal(1, 0, 1); + case JSON_VALUE_TRUE: return datatypes::Decimal(1, 0, 1); case JSON_VALUE_OBJECT: case JSON_VALUE_ARRAY: case JSON_VALUE_FALSE: @@ -236,6 +205,6 @@ execplan::IDB_Decimal Func_json_extract::getDecimalVal(rowgroup::Row& row, Funct }; } - return IDB_Decimal(0, 0, 1); + return datatypes::Decimal(0, 0, 1); } } // namespace funcexp diff --git a/utils/funcexp/func_json_format.cpp b/utils/funcexp/func_json_format.cpp index 71759a0ecf..b8137c0f87 100644 --- a/utils/funcexp/func_json_format.cpp +++ b/utils/funcexp/func_json_format.cpp @@ -1,23 +1,19 @@ -#include -using namespace std; - +// Include Glaze first to avoid specialization-after-instantiation +#include #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; +#include #include "rowgroup.h" -using namespace rowgroup; -#include "joblisttypes.h" -using namespace joblist; +static constexpr int LOCAL_TAB_SIZE_LIMIT = 8; -#include "jsonhelpers.h" -using namespace funcexp::helpers; +// Glaze JSON +#include namespace funcexp { -CalpontSystemCatalog::ColType Func_json_format::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_format::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -41,21 +37,28 @@ std::string Func_json_format::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo if (tabSize < 0) tabSize = 0; - else if (tabSize > TAB_SIZE_LIMIT) - tabSize = TAB_SIZE_LIMIT; + else if (tabSize > LOCAL_TAB_SIZE_LIMIT) + tabSize = LOCAL_TAB_SIZE_LIMIT; } } - json_engine_t jsEg; - initJSEngine(jsEg, getCharset(fp[0]), js); - std::string ret; - if (doFormat(&jsEg, ret, fmt, tabSize)) + const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; + glz::json_t value; + if (auto err = glz::read_json(value, sv)) + { + isNull = true; + return ""; + } + + std::string out; + // Current Glaze in dependency offers two-argument write_json; use that and check for errors + if (auto werr = glz::write_json(value, out)) { isNull = true; return ""; } isNull = false; - return ret; + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_insert.cpp b/utils/funcexp/func_json_insert.cpp index df464a64af..2b88dde696 100644 --- a/utils/funcexp/func_json_insert.cpp +++ b/utils/funcexp/func_json_insert.cpp @@ -1,21 +1,14 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_insert::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_insert::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -23,223 +16,173 @@ CalpontSystemCatalog::ColType Func_json_insert::operationType(FunctionParm& fp, std::string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - const auto& js = fp[0]->data()->getStrVal(row, isNull); + const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - const bool isInsertMode = mode == INSERT || mode == SET; - const bool isReplaceMode = mode == REPLACE || mode == SET; - - json_engine_t jsEg; - - int jsErr = 0; - json_string_t keyName; - const CHARSET_INFO* cs = getCharset(fp[0]); - json_string_set_cs(&keyName, cs); + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) + { + isNull = true; + return ""; + } - initJSPaths(paths, fp, 1, 2); + const bool isInsertMode = (mode == INSERT) || (mode == SET); + const bool isReplaceMode = (mode == REPLACE) || (mode == SET); - // Save the result of each merge and the result of the final merge separately - std::string retJS; - utils::NullString tmpJS(js); - for (size_t i = 1, j = 0; i < fp.size(); i += 2, j++) + // process pairs: path, value + for (size_t i = 1; i + 1 < fp.size(); i += 2) { - const char* rawJS = tmpJS.str(); - const size_t jsLen = tmpJS.length(); - - JSONPath& path = paths[j]; - const json_path_step_t* lastStep; - const char* valEnd; + bool pNull = false, vNull = false; + const auto p_ns = fp[i]->data()->getStrVal(row, pNull); + const auto v_ns = fp[i + 1]->data()->getStrVal(row, vNull); + if (pNull || vNull) + { + isNull = true; + return ""; + } - if (!path.parsed) + glz::json_t value; + if (auto ev = glz::read_json(value, v_ns.unsafeStringRef())) { - if (parseJSPath(path, row, fp[i], false)) - goto error; + isNull = true; + return ""; + } - path.p.last_step--; + std::vector steps; + if (!funcexp::glaze_path::parse(p_ns.unsafeStringRef(), steps)) + { + isNull = true; + return ""; } - initJSEngine(jsEg, cs, tmpJS); - if (path.p.last_step < path.p.steps) - goto v_found; + // Only constrain wildcards on parent steps; last step may be wildcard + bool parent_has_illegal = false; + for (size_t si = 0; si + 1 < steps.size(); ++si) + { + const auto& s = steps[si]; + if (s.kind != funcexp::glaze_path::StepKind::Key && s.kind != funcexp::glaze_path::StepKind::Index) + parent_has_illegal = true; + } + if (parent_has_illegal) + { + isNull = true; + return ""; + } - if (path.p.last_step >= path.p.steps && locateJSPath(jsEg, path, &jsErr)) + if (steps.empty()) { - if (jsErr) - goto error; - continue; + isNull = true; + return ""; } - if (json_read_value(&jsEg)) - goto error; + funcexp::glaze_path::Step last = steps.back(); + steps.pop_back(); - lastStep = path.p.last_step + 1; - if (lastStep->type & JSON_PATH_ARRAY) + // Find all parent matches (wildcards/recursive supported) + std::vector parents; + funcexp::glaze_path::find_matches_mutable_steps(doc, steps, parents); + + if (parents.empty()) { - IntType itemSize = 0; + // If no parents matched, skip this pair (no-op) + continue; + } - if (jsEg.value_type != JSON_VALUE_ARRAY) + for (auto* cur : parents) + { + // Apply at last step for each matched parent + if (last.kind == funcexp::glaze_path::StepKind::Key) { - const uchar* valStart = jsEg.value_begin; - bool isArrAutoWrap; - - if (isInsertMode) + if (!cur->is_object()) { - if (isReplaceMode) - isArrAutoWrap = lastStep->n_item > 0; - else - { - if (lastStep->n_item == 0) - continue; - isArrAutoWrap = true; - } + isNull = true; + return ""; } - else + auto& obj = cur->get_object(); + auto it = obj.find(last.key); + bool exists = (it != obj.end()); + if (isReplaceMode && exists) { - if (lastStep->n_item) - continue; - isArrAutoWrap = false; + it->second = value; } - - retJS.clear(); - /* Wrap the value as an array. */ - retJS.append(rawJS, (const char*)valStart - rawJS); - if (isArrAutoWrap) - retJS.append("["); - - if (jsEg.value_type == JSON_VALUE_OBJECT) + else if (isInsertMode && !exists) { - if (json_skip_level(&jsEg)) - goto error; + obj.emplace(last.key, value); } - - if (isArrAutoWrap) - retJS.append((const char*)valStart, jsEg.s.c_str - valStart); - retJS.append(", "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - if (isArrAutoWrap) - retJS.append("]"); - retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str); - - goto continue_point; + // SET semantics covered by the above } + else if (last.kind == funcexp::glaze_path::StepKind::Index) + { + if (!cur->is_array()) + { + // If parent is not array, wrap into array first to permit insert + glz::json_t arr; + arr.get_array().push_back(*cur); + *cur = std::move(arr); + } + auto& arr = cur->get_array(); + int idx = last.index; + if (idx < 0) + idx = static_cast(arr.size()) + idx; - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END) + if (isReplaceMode && idx >= 0 && static_cast(idx) < arr.size()) + { + arr[static_cast(idx)] = value; + } + else if (isInsertMode) + { + // insert at index or append if index == size + if (idx < 0 || static_cast(idx) > arr.size()) + { + isNull = true; + return ""; + } + arr.insert(arr.begin() + idx, value); + } + } + else if (last.kind == funcexp::glaze_path::StepKind::KeyWildcard) { - switch (jsEg.state) + // Apply to all keys in object for REPLACE/SET; INSERT has no effect + if (!cur->is_object()) + continue; + if (isReplaceMode) { - case JST_VALUE: - if (itemSize == lastStep->n_item) - goto v_found; - itemSize++; - if (json_skip_array_item(&jsEg)) - goto error; - break; - default: break; + auto& obj = cur->get_object(); + for (auto& [k, v] : obj) + v = value; } } - - if (unlikely(jsEg.s.error)) - goto error; - - if (!isInsertMode) - continue; - - valEnd = (const char*)(jsEg.s.c_str - jsEg.sav_c_len); - retJS.clear(); - retJS.append(rawJS, valEnd - rawJS); - if (itemSize > 0) - retJS.append(", "); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append(valEnd, rawJS + jsLen - valEnd); - } - else /*JSON_PATH_KEY*/ - { - IntType keySize = 0; - - if (jsEg.value_type != JSON_VALUE_OBJECT) - continue; - - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END) + else if (last.kind == funcexp::glaze_path::StepKind::IndexWildcard) { - switch (jsEg.state) + // For arrays: REPLACE/SET replace all elements; INSERT appends value once + if (!cur->is_array()) + { + // For non-array parent, wrap then proceed as append/replace + glz::json_t arr; + arr.get_array().push_back(*cur); + *cur = std::move(arr); + } + auto& arr = cur->get_array(); + if (isReplaceMode) + { + for (auto& el : arr) + el = value; + } + else if (isInsertMode) { - case JST_KEY: - json_string_set_str(&keyName, lastStep->key, lastStep->key_end); - if (json_key_matches(&jsEg, &keyName)) - goto v_found; - keySize++; - if (json_skip_key(&jsEg)) - goto error; - break; - default: break; + arr.push_back(value); } } - - if (unlikely(jsEg.s.error)) - goto error; - - if (!isInsertMode) - continue; - - valEnd = (const char*)(jsEg.s.c_str - jsEg.sav_c_len); - - retJS.clear(); - retJS.append(rawJS, valEnd - rawJS); - - if (keySize > 0) - retJS.append(", "); - - retJS.append("\""); - retJS.append((const char*)lastStep->key, lastStep->key_end - lastStep->key); - retJS.append("\":"); - - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append(valEnd, rawJS + jsLen - valEnd); - } - - goto continue_point; - - v_found: - - if (!isReplaceMode) - continue; - - if (json_read_value(&jsEg)) - goto error; - - valEnd = (const char*)jsEg.value_begin; - retJS.clear(); - if (!json_value_scalar(&jsEg)) - { - if (json_skip_level(&jsEg)) - goto error; } - - retJS.append(rawJS, valEnd - rawJS); - if (appendJSValue(retJS, cs, row, fp[i + 1])) - goto error; - retJS.append((const char*)jsEg.s.c_str, rawJS + jsLen - (const char*)jsEg.s.c_str); - - continue_point: - // tmpJS save the json string for next loop - tmpJS.assign(retJS); - retJS.clear(); } - initJSEngine(jsEg, cs, tmpJS); - retJS.clear(); - if (doFormat(&jsEg, retJS, Func_json_format::LOOSE)) - goto error; - - isNull = false; - return retJS; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(doc, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_keys.cpp b/utils/funcexp/func_json_keys.cpp index d926ff8b31..c170c767b3 100644 --- a/utils/funcexp/func_json_keys.cpp +++ b/utils/funcexp/func_json_keys.cpp @@ -1,52 +1,14 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; +// Include Glaze first +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; - -namespace -{ -bool checkKeyInList(const std::string& res, const uchar* key, const int keyLen) -{ - const uchar* curr = (const uchar*)res.c_str() + 2; /* beginning '["' */ - const uchar* end = (const uchar*)res.c_str() + res.size() - 1; /* ending '"' */ - - while (curr < end) - { - int i; - for (i = 0; curr[i] != '"' && i < keyLen; i++) - { - if (curr[i] != key[i]) - break; - } - if (curr[i] == '"') - { - if (i == keyLen) - return true; - } - else - { - while (curr[i] != '"') - i++; - } - curr += i + 4; /* skip ', "' */ - } - return false; -} -} // namespace +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_keys::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_keys::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -58,73 +20,73 @@ std::string Func_json_keys::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool if (isNull) return ""; - IntType keySize = 0; - std::string ret; - json_engine_t jsEg; - initJSEngine(jsEg, getCharset(fp[0]), js); - - if (fp.size() > 1) + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) { - if (!path.parsed && parseJSPath(path, row, fp[1], false)) - goto error; - - if (locateJSPath(jsEg, path)) - goto error; + isNull = true; + return ""; } - if (json_read_value(&jsEg)) - goto error; - - if (jsEg.value_type != JSON_VALUE_OBJECT) - goto error; - - ret.append("["); - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END) + const glz::json_t* node = &doc; + if (fp.size() > 1) { - const uchar *keyStart, *keyEnd; - int keyLen; - - switch (jsEg.state) + bool pNull = false; + const auto p = fp[1]->data()->getStrVal(row, pNull); + if (pNull) { - case JST_KEY: - keyStart = jsEg.s.c_str; - do - { - keyEnd = jsEg.s.c_str; - } while (json_read_keyname_chr(&jsEg) == 0); - - if (unlikely(jsEg.s.error)) - goto error; - - keyLen = (int)(keyEnd - keyStart); - - if (!checkKeyInList(ret, keyStart, keyLen)) - { - if (keySize > 0) - ret.append(", "); - ret.append("\""); - ret.append((const char*)keyStart, keyLen); - ret.append("\""); - keySize++; - } - break; - case JST_OBJ_START: - case JST_ARRAY_START: - if (json_skip_level(&jsEg)) - break; + isNull = true; + return ""; + } + std::vector matches; + if (!glaze_path::find_matches(doc, p.unsafeStringRef(), matches) || matches.empty()) + { + isNull = true; + return ""; + } + // Choose the first object match; otherwise NULL + const glz::json_t* first_obj = nullptr; + for (const auto* m : matches) + { + if (m->is_object()) + { + first_obj = m; break; - default: break; + } } + if (!first_obj) + { + isNull = true; + return ""; + } + node = first_obj; + } + + if (!node->is_object()) + { + isNull = true; + return ""; } - if (unlikely(!jsEg.s.error)) + std::vector keys; + keys.reserve(node->get_object().size()); + for (const auto& [k, v] : node->get_object()) { - ret.append("]"); - return ret; + // Avoid duplicates by checking recent entries (object keys are unique anyway) + keys.push_back(k); } -error: - isNull = true; - return ""; + glz::json_t out; + auto& arr = out.get_array(); + arr.reserve(keys.size()); + for (auto& k : keys) + arr.emplace_back(k); + + std::string ret; + if (auto w = glz::write_json(out, ret)) + { + isNull = true; + return ""; + } + return ret; } } // namespace funcexp diff --git a/utils/funcexp/func_json_length.cpp b/utils/funcexp/func_json_length.cpp index 474e8bbca7..91a9c37a15 100644 --- a/utils/funcexp/func_json_length.cpp +++ b/utils/funcexp/func_json_length.cpp @@ -1,21 +1,13 @@ + +#include #include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; #include "rowgroup.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_length::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_length::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -27,55 +19,26 @@ int64_t Func_json_length::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& if (isNull) return 0; - json_engine_t jsEg; - int length = 0; - int err; - - initJSEngine(jsEg, getCharset(fp[0]), js); - + // Path-based form will be migrated later; return NULL for now when path is provided if (fp.size() > 1) { - if (!path.parsed && parseJSPath(path, row, fp[1], false)) - goto error; - - if (locateJSPath(jsEg, path)) - goto error; - } - - if (json_read_value(&jsEg)) - goto error; - - if (json_value_scalar(&jsEg)) - return 1; - - while (!(err = json_scan_next(&jsEg)) && jsEg.state != JST_OBJ_END && jsEg.state != JST_ARRAY_END) - { - switch (jsEg.state) - { - case JST_VALUE: - case JST_KEY: length++; break; - case JST_OBJ_START: - case JST_ARRAY_START: - if (json_skip_level(&jsEg)) - goto error; - break; - default: break; - }; + isNull = true; + return 0; } - if (!err) + const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; + glz::json_t value; + if (auto err = glz::read_json(value, sv)) { - // Parse to the end of the JSON just to check it's valid. - while (json_scan_next(&jsEg) == 0) - { - } + isNull = true; + return 0; } - if (likely(!jsEg.s.error)) - return length; - -error: - isNull = true; - return 0; + if (value.is_array()) + return static_cast(value.get_array().size()); + if (value.is_object()) + return static_cast(value.get_object().size()); + // Scalars and null count as length 1 + return 1; } } // namespace funcexp diff --git a/utils/funcexp/func_json_merge.cpp b/utils/funcexp/func_json_merge.cpp index 24dc9d069c..23a6a3e339 100644 --- a/utils/funcexp/func_json_merge.cpp +++ b/utils/funcexp/func_json_merge.cpp @@ -1,215 +1,64 @@ -#include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace { -int doMerge(std::string& retJS, json_engine_t* jsEg1, json_engine_t* jsEg2) +// Merge semantics similar to JSON_MERGE_PRESERVE +static void merge_in_place(glz::json_t& a, const glz::json_t& b) { - if (json_read_value(jsEg1) || json_read_value(jsEg2)) - return 1; - - if (jsEg1->value_type == JSON_VALUE_OBJECT && jsEg2->value_type == JSON_VALUE_OBJECT) + if (a.is_object() && b.is_object()) { - json_engine_t savJSEg1 = *jsEg1; - json_engine_t savJSEg2 = *jsEg2; - - int firstKey = 1; - json_string_t keyName; - - json_string_set_cs(&keyName, jsEg1->s.cs); - - retJS.append("{"); - while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END) + auto& ao = a.get_object(); + const auto& bo = b.get_object(); + for (const auto& [k, bv] : bo) { - const uchar *keyStart, *keyEnd; - /* Loop through the Json_1 keys and compare with the Json_2 keys. */ - DBUG_ASSERT(jsEg1->state == JST_KEY); - keyStart = jsEg1->s.c_str; - do + auto it = ao.find(k); + if (it == ao.end()) { - keyEnd = jsEg1->s.c_str; - } while (json_read_keyname_chr(jsEg1) == 0); - - if (unlikely(jsEg1->s.error)) - return 1; - - if (firstKey) - firstKey = 0; - else - { - retJS.append(", "); - *jsEg2 = savJSEg2; - } - - retJS.append("\""); - retJS.append((const char*)keyStart, (size_t)(keyEnd - keyStart)); - retJS.append("\":"); - - while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END) - { - int ires; - DBUG_ASSERT(jsEg2->state == JST_KEY); - json_string_set_str(&keyName, keyStart, keyEnd); - if (!json_key_matches(jsEg2, &keyName)) - { - if (jsEg2->s.error || json_skip_key(jsEg2)) - return 2; - continue; - } - - /* Json_2 has same key as Json_1. Merge them. */ - if ((ires = doMerge(retJS, jsEg1, jsEg2))) - return ires; - goto merged_j1; + ao.emplace(k, bv); } - if (unlikely(jsEg2->s.error)) - return 2; - - keyStart = jsEg1->s.c_str; - /* Just append the Json_1 key value. */ - if (json_skip_key(jsEg1)) - return 1; - - retJS.append((const char*)keyStart, jsEg1->s.c_str - keyStart); - - merged_j1: - continue; - } - - *jsEg2 = savJSEg2; - /* - Now loop through the Json_2 keys. - Skip if there is same key in Json_1 - */ - while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END) - { - const uchar *keyStart, *keyEnd; - DBUG_ASSERT(jsEg2->state == JST_KEY); - keyStart = jsEg2->s.c_str; - do - { - keyEnd = jsEg2->s.c_str; - } while (json_read_keyname_chr(jsEg2) == 0); - - if (unlikely(jsEg2->s.error)) - return 1; - - *jsEg1 = savJSEg1; - while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END) + else { - DBUG_ASSERT(jsEg1->state == JST_KEY); - json_string_set_str(&keyName, keyStart, keyEnd); - if (!json_key_matches(jsEg1, &keyName)) - { - if (unlikely(jsEg1->s.error || json_skip_key(jsEg1))) - return 2; - continue; - } - if (json_skip_key(jsEg2) || json_skip_level(jsEg1)) - return 1; - goto continue_j2; + merge_in_place(it->second, bv); } - - if (unlikely(jsEg1->s.error)) - return 2; - - if (firstKey) - firstKey = 0; - else - retJS.append(", "); - - if (json_skip_key(jsEg2)) - return 1; - - retJS.append("\""); - retJS.append((const char*)keyStart, jsEg2->s.c_str - keyStart); - - continue_j2: - continue; } + return; + } - retJS.append("}"); + // Anything else becomes an array concatenation + glz::json_t arr; + arr.get_array().reserve((a.is_array() ? a.get_array().size() : 1) + + (b.is_array() ? b.get_array().size() : 1)); + if (a.is_array()) + { + for (const auto& v : a.get_array()) + arr.get_array().push_back(v); } else { - const uchar *end1, *beg1, *end2, *beg2; - int itemSize1 = 1, itemSize2 = 1; - - beg1 = jsEg1->value_begin; - - /* Merge as a single array. */ - if (jsEg1->value_type == JSON_VALUE_ARRAY) - { - if (json_skip_level_and_count(jsEg1, &itemSize1)) - return 1; - - end1 = jsEg1->s.c_str - jsEg1->sav_c_len; - } - else - { - retJS.append("["); - - if (jsEg1->value_type == JSON_VALUE_OBJECT) - { - if (json_skip_level(jsEg1)) - return 1; - end1 = jsEg1->s.c_str; - } - else - end1 = jsEg1->value_end; - } - - retJS.append((const char*)beg1, end1 - beg1); - - if (json_value_scalar(jsEg2)) - { - beg2 = jsEg2->value_begin; - end2 = jsEg2->value_end; - } - else - { - if (jsEg2->value_type == JSON_VALUE_OBJECT) - { - beg2 = jsEg2->value_begin; - if (json_skip_level(jsEg2)) - return 2; - } - else - { - beg2 = jsEg2->s.c_str; - if (json_skip_level_and_count(jsEg2, &itemSize2)) - return 2; - } - end2 = jsEg2->s.c_str; - } - - if (itemSize1 && itemSize2) - retJS.append(", "); - - retJS.append((const char*)beg2, end2 - beg2); + arr.get_array().push_back(a); + } - if (jsEg2->value_type != JSON_VALUE_ARRAY) - retJS.append("]"); + if (b.is_array()) + { + for (const auto& v : b.get_array()) + arr.get_array().push_back(v); + } + else + { + arr.get_array().push_back(b); } - return 0; + a = std::move(arr); } } // namespace namespace funcexp { -CalpontSystemCatalog::ColType Func_json_merge::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_merge::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -221,40 +70,36 @@ std::string Func_json_merge::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo if (isNull) return ""; - const CHARSET_INFO* js1CS = getCharset(fp[0]); - - json_engine_t jsEg1, jsEg2; - - utils::NullString tmpJS(js); - std::string retJS; + glz::json_t acc; + if (auto e = glz::read_json(acc, js.unsafeStringRef())) + { + isNull = true; + return ""; + } - for (size_t i = 1; i < fp.size(); i++) + for (size_t i = 1; i < fp.size(); ++i) { const auto js2 = fp[i]->data()->getStrVal(row, isNull); if (isNull) - goto error; - - initJSEngine(jsEg1, js1CS, tmpJS); - initJSEngine(jsEg2, getCharset(fp[i]), js2); - - if (doMerge(retJS, &jsEg1, &jsEg2)) - goto error; - - // tmpJS save the merge result for next loop - tmpJS.assign(retJS); - retJS.clear(); + { + return ""; + } + glz::json_t rhs; + if (auto e2 = glz::read_json(rhs, js2.unsafeStringRef())) + { + isNull = true; + return ""; + } + merge_in_place(acc, rhs); } - initJSEngine(jsEg1, js1CS, tmpJS); - retJS.clear(); - if (doFormat(&jsEg1, retJS, Func_json_format::LOOSE)) - goto error; - + std::string out; + if (auto w = glz::write_json(acc, out)) + { + isNull = true; + return ""; + } isNull = false; - return retJS; - -error: - isNull = true; - return ""; + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_merge_patch.cpp b/utils/funcexp/func_json_merge_patch.cpp index cf02445b9f..f4592beed0 100644 --- a/utils/funcexp/func_json_merge_patch.cpp +++ b/utils/funcexp/func_json_merge_patch.cpp @@ -1,267 +1,51 @@ -#include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace { -int copyValuePatch(std::string& retJS, json_engine_t* jsEg) +// RFC 7396 JSON Merge Patch +static void merge_patch_in_place(glz::json_t& target, const glz::json_t& patch) { - int firstKey = 1; - - if (jsEg->value_type != JSON_VALUE_OBJECT) + if (!patch.is_object()) { - const uchar *beg, *end; - - beg = jsEg->value_begin; - - if (!json_value_scalar(jsEg)) - { - if (json_skip_level(jsEg)) - return 1; - end = jsEg->s.c_str; - } - else - end = jsEg->value_end; - - retJS.append((const char*)beg, end - beg); - - return 0; + target = patch; // Entire document replaced + return; } - /* JSON_VALUE_OBJECT */ - retJS.append("{"); - - while (json_scan_next(jsEg) == 0 && jsEg->state != JST_OBJ_END) + if (!target.is_object()) { - const uchar* keyStart; - /* Loop through the Json_1 keys and compare with the Json_2 keys. */ - DBUG_ASSERT(jsEg->state == JST_KEY); - keyStart = jsEg->s.c_str; - - if (json_read_value(jsEg)) - return 1; - - if (jsEg->value_type == JSON_VALUE_NULL) - continue; - - if (!firstKey) - retJS.append(", "); - else - firstKey = 0; - - retJS.append("\""); - retJS.append((const char*)keyStart, jsEg->value_begin - keyStart); - if (copyValuePatch(retJS, jsEg)) - return 1; + target = glz::json_t{}; // make it an object } - retJS.append("}"); - - return 0; -} - -int doMergePatch(std::string& retJS, json_engine_t* jsEg1, json_engine_t* jsEg2, bool& isEmpty) -{ - if (json_read_value(jsEg1)) - { - return 1; - } - if (json_read_value(jsEg2)) + auto& to = target.get_object(); + for (const auto& [k, pv] : patch.get_object()) { - return 1; - } - - if (jsEg1->value_type == JSON_VALUE_OBJECT && jsEg2->value_type == JSON_VALUE_OBJECT) - { - json_engine_t savJSEg1 = *jsEg1; - json_engine_t savJSEg2 = *jsEg2; - - int firstKey = 1; - json_string_t keyName; - size_t savLen; - bool mrgEmpty; - - isEmpty = false; - json_string_set_cs(&keyName, jsEg1->s.cs); - - retJS.append("{"); - while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END) + if (pv.is_null()) { - const uchar *keyStart, *keyEnd; - /* Loop through the Json_1 keys and compare with the Json_2 keys. */ - DBUG_ASSERT(jsEg1->state == JST_KEY); - keyStart = jsEg1->s.c_str; - do - { - keyEnd = jsEg1->s.c_str; - } while (json_read_keyname_chr(jsEg1) == 0); - - if (jsEg1->s.error) - { - return 1; - } - - savLen = retJS.size(); - - if (!firstKey) - { - retJS.append(", "); - *jsEg2 = savJSEg2; - } - - retJS.append("\""); - retJS.append((const char*)keyStart, keyEnd - keyStart); - retJS.append("\":"); - - while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END) - { - int ires; - DBUG_ASSERT(jsEg2->state == JST_KEY); - json_string_set_str(&keyName, keyStart, keyEnd); - if (!json_key_matches(jsEg2, &keyName)) - { - if (jsEg2->s.error || json_skip_key(jsEg2)) - { - return 2; - } - continue; - } - - /* Json_2 has same key as Json_1. Merge them. */ - if ((ires = doMergePatch(retJS, jsEg1, jsEg2, mrgEmpty))) - { - return ires; - } - - if (mrgEmpty) - retJS = retJS.substr(0, savLen); - else - firstKey = 0; - - goto merged_j1; - } - - if (jsEg2->s.error) - return 2; - - keyStart = jsEg1->s.c_str; - /* Just append the Json_1 key value. */ - if (json_skip_key(jsEg1)) - { - return 1; - } - retJS.append((const char*)keyStart, jsEg1->s.c_str - keyStart); - firstKey = 0; - - merged_j1: - continue; + to.erase(k); } - - *jsEg2 = savJSEg2; - /* - Now loop through the Json_2 keys. - Skip if there is same key in Json_1 - */ - while (json_scan_next(jsEg2) == 0 && jsEg2->state != JST_OBJ_END) + else { - const uchar *keyStart, *keyEnd; - DBUG_ASSERT(jsEg2->state == JST_KEY); - keyStart = jsEg2->s.c_str; - do - { - keyEnd = jsEg2->s.c_str; - } while (json_read_keyname_chr(jsEg2) == 0); - - if (jsEg2->s.error) - { - return 1; - } - - *jsEg1 = savJSEg1; - while (json_scan_next(jsEg1) == 0 && jsEg1->state != JST_OBJ_END) - { - DBUG_ASSERT(jsEg1->state == JST_KEY); - json_string_set_str(&keyName, keyStart, keyEnd); - if (!json_key_matches(jsEg1, &keyName)) - { - if (jsEg1->s.error || json_skip_key(jsEg1)) - { - return 2; - } - continue; - } - if (json_skip_key(jsEg2) || json_skip_level(jsEg1)) - { - return 1; - } - goto continue_j2; - } - - if (jsEg1->s.error) - return 2; - - savLen = retJS.size(); - - if (!firstKey) - retJS.append(", "); - - retJS.append("\""); - retJS.append((const char*)keyStart, keyEnd - keyStart); - retJS.append("\":"); - - if (json_read_value(jsEg2)) + auto it = to.find(k); + if (it == to.end()) { - return 1; + to.emplace(k, pv); } - - if (jsEg2->value_type == JSON_VALUE_NULL) - retJS = retJS.substr(0, savLen); else { - if (copyValuePatch(retJS, jsEg2)) - { - return 1; - } - firstKey = 0; + merge_patch_in_place(it->second, pv); } - - continue_j2: - continue; } - - retJS.append("}"); } - else - { - if (!json_value_scalar(jsEg1) && json_skip_level(jsEg1)) - { - return 1; - } - - isEmpty = (jsEg2->value_type == JSON_VALUE_NULL); - if (!isEmpty && copyValuePatch(retJS, jsEg2)) - { - return 1; - } - } - - return 0; } } // namespace namespace funcexp { -CalpontSystemCatalog::ColType Func_json_merge_patch::operationType( - FunctionParm& fp, CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_merge_patch::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -269,67 +53,46 @@ CalpontSystemCatalog::ColType Func_json_merge_patch::operationType( std::string Func_json_merge_patch::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - // JSON_MERGE_PATCH return NULL if any argument is NULL - bool isEmpty = false, hasNullArg = false; - const auto& js = fp[0]->data()->getStrVal(row, hasNullArg); - - isNull = false; + // JSON_MERGE_PATCH returns NULL if any argument is NULL + bool hasNullArg = false; + const auto base_ns = fp[0]->data()->getStrVal(row, hasNullArg); + if (hasNullArg) + { + isNull = true; + return ""; + } - json_engine_t jsEg1, jsEg2; - jsEg1.s.error = jsEg2.s.error = 0; + glz::json_t target; + if (auto e = glz::read_json(target, base_ns.unsafeStringRef())) + { + isNull = true; + return ""; + } - utils::NullString tmpJS(js); - std::string retJS; - for (size_t i = 1; i < fp.size(); i++) + for (size_t i = 1; i < fp.size(); ++i) { - const auto& js2 = fp[i]->data()->getStrVal(row, isNull); + const auto patch_ns = fp[i]->data()->getStrVal(row, isNull); if (isNull) { - hasNullArg = true; - isNull = false; - goto next; + // Per semantics: if any arg NULL -> NULL + return ""; } - - initJSEngine(jsEg2, getCharset(fp[i]), js2); - - if (hasNullArg) - { - if (json_read_value(&jsEg2)) - goto error; - if (jsEg2.value_type == JSON_VALUE_OBJECT) - goto next; - - hasNullArg = false; - retJS.append(js2.str()); - goto next; - } - - initJSEngine(jsEg1, getCharset(fp[0]), tmpJS); - if (doMergePatch(retJS, &jsEg1, &jsEg2, isEmpty)) + glz::json_t patch; + if (auto e2 = glz::read_json(patch, patch_ns.unsafeStringRef())) { - goto error; + isNull = true; + return ""; } - - if (isEmpty) - retJS.append("null"); - - next: - // tmpJS save the merge result for next loop - tmpJS.assign(retJS); - retJS.clear(); + merge_patch_in_place(target, patch); } - if (hasNullArg) - goto error; - initJSEngine(jsEg1, getCharset(fp[0]), tmpJS); - retJS.clear(); - if (doFormat(&jsEg1, retJS, Func_json_format::LOOSE)) - goto error; + std::string out; + if (auto w = glz::write_json(target, out)) + { + isNull = true; + return ""; + } isNull = false; - return retJS; - -error: - isNull = true; - return ""; + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_normalize.cpp b/utils/funcexp/func_json_normalize.cpp index f25f2f6fe6..6b36dcbae0 100644 --- a/utils/funcexp/func_json_normalize.cpp +++ b/utils/funcexp/func_json_normalize.cpp @@ -1,23 +1,14 @@ +// Include Glaze first to avoid specialization-after-instantiation +#include #include -using namespace std; #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; - #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_normalize::operationType( - FunctionParm& fp, CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_normalize::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -30,19 +21,20 @@ std::string Func_json_normalize::getStrVal(rowgroup::Row& row, FunctionParm& fp, return ""; const std::string_view js = js_ns.unsafeStringRef(); - using DynamicString = unique_ptr; - - DynamicString str{new DYNAMIC_STRING(), dynstr_free}; - if (init_dynamic_string(str.get(), NULL, 0, 0)) - goto error; - - if (json_normalize(str.get(), js.data(), js.size(), getCharset(fp[0]))) - goto error; - - return str->str; + glz::json_t value; + if (auto err = glz::read_json(value, js)) + { + isNull = true; + return ""; + } -error: - isNull = true; - return ""; + std::string out; + // Write compact canonical JSON (stable ordering may vary vs server, but Glaze keeps object insertion order) + if (auto werr = glz::write_json(value, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_object.cpp b/utils/funcexp/func_json_object.cpp index 2a8790e3bf..fcbb2ed9b1 100644 --- a/utils/funcexp/func_json_object.cpp +++ b/utils/funcexp/func_json_object.cpp @@ -1,26 +1,14 @@ #include -using namespace std; - +#include #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - #include "mcs_datatype.h" -using namespace datatypes; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_object::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& resultType) +execplan::CalpontSystemCatalog::ColType Func_json_object::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& resultType) { return fp.size() > 0 ? fp[0]->data()->resultType() : resultType; } @@ -31,24 +19,63 @@ std::string Func_json_object::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo if (fp.size() == 0) return "{}"; - const CHARSET_INFO* retCS = type.getCharset(); - std::string ret("{"); + glz::json_t obj; + auto& o = obj.get_object(); + + auto add_pair = [&](size_t keyIdx, size_t valIdx) -> bool + { + bool keyNull = false, valNull = false; + const auto key_ns = fp[keyIdx]->data()->getStrVal(row, keyNull); + const auto val_ns = fp[valIdx]->data()->getStrVal(row, valNull); + std::string key = keyNull ? std::string("") : key_ns.safeString(""); + + if (valNull) + { + o[key] = glz::json_t{}; // null + return true; + } - if (appendJSKeyName(ret, retCS, row, fp[0]) || appendJSValue(ret, retCS, row, fp[1])) - goto error; + // Check value type to decide quoting + auto& valType = fp[valIdx]->data()->resultType(); + if (isCharType(valType.colDataType)) + { + o[key] = glz::json_t{val_ns.safeString("")}; + return true; + } - for (size_t i = 2; i < fp.size(); i += 2) + // Try parse as JSON; fallback to string if parsing fails + glz::json_t v; + if (auto e = glz::read_json(v, val_ns.unsafeStringRef())) + { + o[key] = glz::json_t{val_ns.safeString("")}; + } + else + { + o[key] = std::move(v); + } + return true; + }; + + if (!add_pair(0, 1)) { - ret.append(", "); - if (appendJSKeyName(ret, retCS, row, fp[i]) || appendJSValue(ret, retCS, row, fp[i + 1])) - goto error; + isNull = true; + return ""; + } + for (size_t i = 2; i + 1 < fp.size(); i += 2) + { + if (!add_pair(i, i + 1)) + { + isNull = true; + return ""; + } } - ret.append("}"); - return ret; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(obj, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_overlaps.cpp b/utils/funcexp/func_json_overlaps.cpp index b8933a79b0..d834b99b2c 100644 --- a/utils/funcexp/func_json_overlaps.cpp +++ b/utils/funcexp/func_json_overlaps.cpp @@ -1,300 +1,118 @@ +#include #include "functor_json.h" -#include "functioncolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; -#include "dataconvert.h" - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace { -int checkOverlapsWithObj(json_engine_t* jsEg, json_engine_t* jsEg2, bool compareWhole); -bool checkOverlaps(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole); -/* - When the two values match or don't match we need to return true or false. - But we can have some more elements in the array left or some more keys - left in the object that we no longer want to compare. In this case, - we want to skip the current item. -*/ -void jsonSkipCurrLevel(json_engine_t* jsEg1, json_engine_t* jsEg2) -{ - json_skip_level(jsEg1); - json_skip_level(jsEg2); -} -/* At least one of the two arguments is a scalar. */ -bool checkOverlapsWithScalar(json_engine_t* jsEg1, json_engine_t* jsEg2) +static bool numbers_equal(const glz::json_t& a, const glz::json_t& b) { - if (json_value_scalar(jsEg2)) - { - if (jsEg1->value_type == jsEg2->value_type) - { - if (jsEg1->value_type == JSON_VALUE_NUMBER) - { - double dj, dv; - char* end; - int err; - - dj = jsEg1->s.cs->strntod((char*)jsEg1->value, jsEg1->value_len, &end, &err); - dv = jsEg2->s.cs->strntod((char*)jsEg2->value, jsEg2->value_len, &end, &err); - - return (fabs(dj - dv) < 1e-12); - } - else if (jsEg1->value_type == JSON_VALUE_STRING) - { - return jsEg2->value_len == jsEg1->value_len && - memcmp(jsEg2->value, jsEg1->value, jsEg2->value_len) == 0; - } - } - return jsEg2->value_type == jsEg1->value_type; - } - else if (jsEg2->value_type == JSON_VALUE_ARRAY) - { - while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_VALUE) - { - if (json_read_value(jsEg2)) - return false; - if (jsEg1->value_type == jsEg2->value_type) - { - int res1 = checkOverlapsWithScalar(jsEg1, jsEg2); - if (res1) - return true; - } - if (!json_value_scalar(jsEg2)) - json_skip_level(jsEg2); - } - } - return false; + std::string sa, sb; + if (auto ea = glz::write_json(a, sa)) + return false; + if (auto eb = glz::write_json(b, sb)) + return false; + char* enda = nullptr; + char* endb = nullptr; + double da = std::strtod(sa.c_str(), &enda); + double db = std::strtod(sb.c_str(), &endb); + return std::fabs(da - db) < 1e-12; } -/* - Compare when one is object and other is array. This means we are looking - for the object in the array. Hence, when value type of an element of the - array is object, then compare the two objects entirely. If they are - equal return true else return false. -*/ -bool jsonCmpWithArrAndObj(json_engine_t* jsEg1, json_engine_t* jsEg2) +static bool overlaps(const glz::json_t& a, const glz::json_t& b) { - st_json_engine_t locjsEg2 = *jsEg2; - while (json_scan_next(jsEg1) == 0 && jsEg1->state == JST_VALUE) + if (a.is_null() || b.is_null()) + return a.is_null() && b.is_null(); + if (a.is_boolean() && b.is_boolean()) + return a.get_boolean() == b.get_boolean(); + if (a.is_string() && b.is_string()) + return a.get_string() == b.get_string(); + if (a.is_number() && b.is_number()) + return numbers_equal(a, b); + + if (a.is_object() && b.is_object()) { - if (json_read_value(jsEg1)) - return false; - if (jsEg1->value_type == JSON_VALUE_OBJECT) + const auto& ao = a.get_object(); + const auto& bo = b.get_object(); + for (const auto& [k, av] : ao) { - int res1 = checkOverlapsWithObj(jsEg1, jsEg2, true); - if (res1) + auto it = bo.find(k); + if (it != bo.end() && overlaps(av, it->second)) return true; - *jsEg2 = locjsEg2; } - if (!json_value_scalar(jsEg1)) - json_skip_level(jsEg1); + return false; } - return false; -} -bool jsonCmpArrInOrder(json_engine_t* jsEg1, json_engine_t* jsEg2) -{ - bool res = false; - while (json_scan_next(jsEg1) == 0 && json_scan_next(jsEg2) == 0 && jsEg1->state == JST_VALUE && - jsEg2->state == JST_VALUE) + if (a.is_array() && b.is_array()) { - if (json_read_value(jsEg1) || json_read_value(jsEg2)) - return false; - if (jsEg1->value_type != jsEg2->value_type) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } - res = checkOverlaps(jsEg1, jsEg2, true); - if (!res) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } + const auto& aa = a.get_array(); + const auto& bb = b.get_array(); + for (const auto& av : aa) + for (const auto& bv : bb) + if (overlaps(av, bv)) + return true; + return false; } - res = (jsEg2->state == JST_ARRAY_END || jsEg2->state == JST_OBJ_END ? true : false); - jsonSkipCurrLevel(jsEg1, jsEg2); - return res; -} -int checkOverlapsWithArr(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole) -{ - if (jsEg2->value_type == JSON_VALUE_ARRAY) + // Object vs array: true if any element overlaps object + if (a.is_object() && b.is_array()) { - if (compareWhole) - return jsonCmpArrInOrder(jsEg1, jsEg2); - - json_engine_t locjsEg2ue = *jsEg2, currJSEg = *jsEg1; - - while (json_scan_next(jsEg1) == 0 && jsEg1->state == JST_VALUE) - { - if (json_read_value(jsEg1)) - return false; - currJSEg = *jsEg1; - while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_VALUE) - { - if (json_read_value(jsEg2)) - return false; - if (jsEg1->value_type == jsEg2->value_type) - { - int res1 = checkOverlaps(jsEg1, jsEg2, true); - if (res1) - return true; - } - else - { - if (!json_value_scalar(jsEg2)) - json_skip_level(jsEg2); - } - *jsEg1 = currJSEg; - } - *jsEg2 = locjsEg2ue; - if (!json_value_scalar(jsEg1)) - json_skip_level(jsEg1); - } + for (const auto& bv : b.get_array()) + if (overlaps(a, bv)) + return true; return false; } - else if (jsEg2->value_type == JSON_VALUE_OBJECT) + if (a.is_array() && b.is_object()) { - if (compareWhole) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } - return jsonCmpWithArrAndObj(jsEg1, jsEg2); + for (const auto& av : a.get_array()) + if (overlaps(av, b)) + return true; + return false; } - else - return checkOverlapsWithScalar(jsEg2, jsEg1); -} -int checkOverlapsWithObj(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole) -{ - if (jsEg2->value_type == JSON_VALUE_OBJECT) + // scalar vs array: true if any element overlaps scalar + if (a.is_array() && (b.is_string() || b.is_boolean() || b.is_number() || b.is_null())) { - /* Find at least one common key-value pair */ - json_string_t keyName; - bool foundKey = false, foundVal = false; - json_engine_t locJSEg = *jsEg1; - const uchar *keyStart, *keyEnd; - - json_string_set_cs(&keyName, jsEg2->s.cs); - - while (json_scan_next(jsEg2) == 0 && jsEg2->state == JST_KEY) - { - keyStart = jsEg2->s.c_str; - do - { - keyEnd = jsEg2->s.c_str; - } while (json_read_keyname_chr(jsEg2) == 0); - - if (unlikely(jsEg2->s.error)) - return false; - - json_string_set_str(&keyName, keyStart, keyEnd); - foundKey = findKeyInObject(jsEg1, &keyName); - foundVal = 0; - - if (foundKey) - { - if (json_read_value(jsEg1) || json_read_value(jsEg2)) - return false; - - /* - The value of key-value pair can be an be anything. If it is an object - then we need to compare the whole value and if it is an array then - we need to compare the elements in that order. So set compareWhole - to true. - */ - if (jsEg1->value_type == jsEg2->value_type) - foundVal = checkOverlaps(jsEg1, jsEg2, true); - if (foundVal) - { - if (!compareWhole) - return true; - *jsEg1 = locJSEg; - } - else - { - if (compareWhole) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } - *jsEg1 = locJSEg; - } - } - else - { - if (compareWhole) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } - json_skip_key(jsEg2); - *jsEg1 = locJSEg; - } - } - jsonSkipCurrLevel(jsEg1, jsEg2); - return compareWhole ? true : false; + for (const auto& av : a.get_array()) + if (overlaps(av, b)) + return true; + return false; } - else if (jsEg2->value_type == JSON_VALUE_ARRAY) + if (b.is_array() && (a.is_string() || a.is_boolean() || a.is_number() || a.is_null())) { - if (compareWhole) - { - jsonSkipCurrLevel(jsEg1, jsEg2); - return false; - } - return jsonCmpWithArrAndObj(jsEg2, jsEg1); + for (const auto& bv : b.get_array()) + if (overlaps(a, bv)) + return true; + return false; } - return false; -} -bool checkOverlaps(json_engine_t* jsEg1, json_engine_t* jsEg2, bool compareWhole) -{ - switch (jsEg1->value_type) - { - case JSON_VALUE_OBJECT: return checkOverlapsWithObj(jsEg1, jsEg2, compareWhole); - case JSON_VALUE_ARRAY: return checkOverlapsWithArr(jsEg1, jsEg2, compareWhole); - default: return checkOverlapsWithScalar(jsEg1, jsEg2); - } return false; } } // namespace namespace funcexp { -CalpontSystemCatalog::ColType Func_json_overlaps::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_overlaps::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } -/** - * getBoolVal API definition - */ -bool Func_json_overlaps::getBoolVal(Row& row, FunctionParm& fp, bool& /*isNull*/, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_overlaps::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& /*isNull*/, + execplan::CalpontSystemCatalog::ColType& /*type*/) { - bool isNullJS1 = false, isNullJS2 = false; - const auto js1 = fp[0]->data()->getStrVal(row, isNullJS1); - const auto js2 = fp[1]->data()->getStrVal(row, isNullJS2); - if (isNullJS1 || isNullJS2) + bool n1 = false, n2 = false; + const auto js1 = fp[0]->data()->getStrVal(row, n1); + const auto js2 = fp[1]->data()->getStrVal(row, n2); + if (n1 || n2) return false; - json_engine_t jsEg1, jsEg2; - initJSEngine(jsEg1, getCharset(fp[0]), js1); - initJSEngine(jsEg2, getCharset(fp[1]), js2); - - if (json_read_value(&jsEg1) || json_read_value(&jsEg2)) + glz::json_t a, b; + if (auto e1 = glz::read_json(a, js1.unsafeStringRef())) return false; - - bool result = checkOverlaps(&jsEg1, &jsEg2, false); - if (unlikely(jsEg1.s.error || jsEg2.s.error)) + if (auto e2 = glz::read_json(b, js2.unsafeStringRef())) return false; - return result; + return overlaps(a, b); } } // namespace funcexp diff --git a/utils/funcexp/func_json_query.cpp b/utils/funcexp/func_json_query.cpp index 5e25a0a7f9..7fecaa72ec 100644 --- a/utils/funcexp/func_json_query.cpp +++ b/utils/funcexp/func_json_query.cpp @@ -1,58 +1,70 @@ +// Include Glaze first +#include +#include + #include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; +#include "glaze_path.h" namespace funcexp { -class QueryJSONPathWrapper : public JSONPathWrapper +execplan::CalpontSystemCatalog::ColType Func_json_query::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { - bool checkAndGetValue(JSONEgWrapper* je, std::string& res, int* error) override + return fp[0]->data()->resultType(); +} + +std::string Func_json_query::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) +{ + bool nullDoc = false, nullPath = false; + const auto js = fp[0]->data()->getStrVal(row, nullDoc); + const auto path_ns = fp[1]->data()->getStrVal(row, nullPath); + if (nullDoc || nullPath) { - return je->checkAndGetComplexVal(res, error); + isNull = true; + return ""; } -}; -bool JSONEgWrapper::checkAndGetComplexVal(std::string& ret, int* error) -{ - if (json_value_scalar(this)) + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) { - /* We skip scalar values. */ - if (json_scan_next(this)) - *error = 1; - return true; + isNull = true; + return ""; } - const uchar* tmpValue = value; - if (json_skip_level(this)) + std::vector matches; + if (!glaze_path::find_matches(doc, path_ns.unsafeStringRef(), matches) || matches.empty()) { - *error = 1; - return true; + isNull = true; + return ""; } - ret.append((const char*)value, s.c_str - tmpValue); - return false; -} + // Prefer the first complex (object/array) match; otherwise NULL + const glz::json_t* selected = nullptr; + for (const auto* m : matches) + { + if (m->is_object() || m->is_array()) + { + selected = m; + break; + } + } + if (!selected) + { + isNull = true; + return ""; + } -CalpontSystemCatalog::ColType Func_json_query::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) -{ - return fp[0]->data()->resultType(); + std::string out; + if (auto w = glz::write_json(*selected, out)) + { + isNull = true; + return ""; + } + return out; } -std::string Func_json_query::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& /*type*/) -{ - std::string ret; - QueryJSONPathWrapper qpw; - isNull = qpw.extract(ret, row, fp[0], fp[1]); - return isNull ? "" : ret; -} } // namespace funcexp diff --git a/utils/funcexp/func_json_quote.cpp b/utils/funcexp/func_json_quote.cpp index 9b0e25b5a1..b01251929b 100644 --- a/utils/funcexp/func_json_quote.cpp +++ b/utils/funcexp/func_json_quote.cpp @@ -1,26 +1,14 @@ +// Include Glaze first to avoid specialization-after-instantiation +#include #include -using namespace std; #include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; - #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "mcs_datatype.h" -using namespace datatypes; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_quote::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_quote::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -35,13 +23,14 @@ std::string Func_json_quote::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo return ""; } - std::string ret("\""); - - isNull = appendEscapedJS(ret, &my_charset_utf8mb4_bin, js, getCharset(fp[0])); - if (isNull) + // Use Glaze to emit a JSON-escaped, quoted string + const std::string_view sv = js.unsafeStringRef(); + std::string out; + if (auto err = glz::write_json(sv, out)) + { + isNull = true; return ""; - ret.append("\""); - - return ret; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_remove.cpp b/utils/funcexp/func_json_remove.cpp index 35745dff2f..51a800d9d7 100644 --- a/utils/funcexp/func_json_remove.cpp +++ b/utils/funcexp/func_json_remove.cpp @@ -1,21 +1,13 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; - -#include "rowgroup.h" -using namespace rowgroup; - -#include "dataconvert.h" -using namespace dataconvert; +// Glaze-based implementation +#include -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "functor_json.h" +#include "glaze_path.h" namespace funcexp { -CalpontSystemCatalog::ColType Func_json_remove::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_remove::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -23,143 +15,85 @@ CalpontSystemCatalog::ColType Func_json_remove::operationType(FunctionParm& fp, std::string Func_json_remove::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - const auto& js = fp[0]->data()->getStrVal(row, isNull); - + const auto js_ns = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - json_engine_t jsEg; - - int jsErr = 0; - json_string_t keyName; - const CHARSET_INFO* cs = getCharset(fp[0]); - json_string_set_cs(&keyName, cs); - - initJSPaths(paths, fp, 1, 1); - - std::string retJS; - utils::NullString tmpJS(js); - for (size_t i = 1, j = 0; i < fp.size(); i++, j++) + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) { - const char* rawJS = tmpJS.str(); - const size_t jsLen = tmpJS.length(); - - JSONPath& path = paths[j]; - const json_path_step_t* lastStep; - const char *remStart = nullptr, *remEnd = nullptr; - IntType itemSize = 0; + isNull = true; + return ""; + } - if (!path.parsed) + // For each path, remove target (wildcards/recursive supported on parent) + for (size_t i = 1; i < fp.size(); ++i) + { + bool pNull = false; + const auto p_ns = fp[i]->data()->getStrVal(row, pNull); + if (pNull) { - if (parseJSPath(path, row, fp[i], false)) - goto error; - - path.p.last_step--; - if (path.p.last_step < path.p.steps) - { - path.p.s.error = TRIVIAL_PATH_NOT_ALLOWED; - goto error; - } + isNull = true; + return ""; } - initJSEngine(jsEg, cs, tmpJS); - - if (path.p.last_step < path.p.steps) - goto v_found; + std::vector steps; + if (!funcexp::glaze_path::parse(p_ns.unsafeStringRef(), steps)) + { + isNull = true; + return ""; + } + if (steps.empty()) + continue; + auto last = steps.back(); + steps.pop_back(); - if (locateJSPath(jsEg, path, &jsErr) && jsErr) - goto error; + std::vector parents; + funcexp::glaze_path::find_matches_mutable_steps(doc, steps, parents); - if (json_read_value(&jsEg)) - goto error; + if (parents.empty()) + continue; // nothing to remove for this path - lastStep = path.p.last_step + 1; - if (lastStep->type & JSON_PATH_ARRAY) + for (auto* parent : parents) { - if (jsEg.value_type != JSON_VALUE_ARRAY) - continue; - - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_ARRAY_END) + if (last.kind == funcexp::glaze_path::StepKind::Key) { - switch (jsEg.state) - { - case JST_VALUE: - if (itemSize == lastStep->n_item) - { - remStart = (const char*)(jsEg.s.c_str - (itemSize ? jsEg.sav_c_len : 0)); - goto v_found; - } - itemSize++; - if (json_skip_array_item(&jsEg)) - goto error; - break; - default: break; - } + if (!parent->is_object()) + continue; + parent->get_object().erase(last.key); } - - if (unlikely(jsEg.s.error)) - goto error; - - continue; - } - else /*JSON_PATH_KEY*/ - { - if (jsEg.value_type != JSON_VALUE_OBJECT) - continue; - - while (json_scan_next(&jsEg) == 0 && jsEg.state != JST_OBJ_END) + else if (last.kind == funcexp::glaze_path::StepKind::Index) { - switch (jsEg.state) + if (!parent->is_array()) + continue; + auto& arr = parent->get_array(); + int idx = last.index; + if (idx < 0) + idx = static_cast(arr.size()) + idx; + if (idx >= 0 && static_cast(idx) < arr.size()) + arr.erase(arr.begin() + idx); + } + else + { + // If last is wildcard, remove all children accordingly + if (parent->is_object() && (last.kind == funcexp::glaze_path::StepKind::KeyWildcard)) { - case JST_KEY: - if (itemSize == 0) - remStart = (const char*)(jsEg.s.c_str - jsEg.sav_c_len); - json_string_set_str(&keyName, lastStep->key, lastStep->key_end); - if (json_key_matches(&jsEg, &keyName)) - goto v_found; - - if (json_skip_key(&jsEg)) - goto error; - - remStart = (const char*)jsEg.s.c_str; - itemSize++; - break; - default: break; + parent->get_object().clear(); + } + else if (parent->is_array() && (last.kind == funcexp::glaze_path::StepKind::IndexWildcard)) + { + parent->get_array().clear(); } } - - if (unlikely(jsEg.s.error)) - goto error; - - continue; } - - v_found: - - if (json_skip_key(&jsEg) || json_scan_next(&jsEg)) - goto error; - remEnd = (jsEg.state == JST_VALUE && itemSize == 0) ? (const char*)jsEg.s.c_str - : (const char*)(jsEg.s.c_str - jsEg.sav_c_len); - retJS.clear(); - retJS.append(rawJS, remStart - rawJS); - if (jsEg.state == JST_KEY && itemSize > 0) - retJS.append(","); - retJS.append(remEnd, rawJS + jsLen - remEnd); - - tmpJS.assign(retJS); - retJS.clear(); } - initJSEngine(jsEg, cs, tmpJS); - retJS.clear(); - if (doFormat(&jsEg, retJS, Func_json_format::LOOSE)) - goto error; - - isNull = false; - return retJS; - -error: - isNull = true; - return ""; + std::string out; + if (auto w = glz::write_json(doc, out)) + { + isNull = true; + return ""; + } + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_search.cpp b/utils/funcexp/func_json_search.cpp index 1365064cfa..00acadc698 100644 --- a/utils/funcexp/func_json_search.cpp +++ b/utils/funcexp/func_json_search.cpp @@ -1,221 +1,216 @@ -#include -using namespace std; - #include "functor_json.h" -#include "functioncolumn.h" #include "constantcolumn.h" -using namespace execplan; - +#include +#include "glaze_path.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; -#include "jsonhelpers.h" -using namespace funcexp::helpers; - -namespace +namespace funcexp { -static bool appendJSPath(std::string& ret, const json_path_t* p) +const static int wildOne = '_'; +const static int wildMany = '%'; + +execplan::CalpontSystemCatalog::ColType Func_json_search::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { - const json_path_step_t* c; + return fp[0]->data()->resultType(); +} - try +static bool match_wild(const std::string& s, const std::string& pat, char escape = '\\') +{ + size_t i = 0, j = 0; + size_t star_i = std::string::npos, star_j = std::string::npos; + while (i < s.size()) { - ret.append("\"$"); - - for (c = p->steps + 1; c <= p->last_step; c++) + if (j < pat.size()) { - if (c->type & JSON_PATH_KEY) + char pc = pat[j]; + if (pc == escape && j + 1 < pat.size()) + { + ++j; + pc = pat[j]; + } + if (pc == '%') { - ret.append(".", 1); - ret.append((const char*)c->key, c->key_end - c->key); + star_i = i; + star_j = ++j; + continue; } - else /*JSON_PATH_ARRAY*/ + if (pc == '_' || pc == s[i]) { - ret.append("["); - ret.append(std::to_string(c->n_item)); - ret.append("]"); + ++i; + ++j; + continue; } } - - ret.append("\""); + if (star_j != std::string::npos) + { + i = ++star_i; + j = star_j; + continue; + } + return false; } - catch (...) + while (j < pat.size()) { - return true; + char pc = pat[j]; + if (pc == escape && j + 1 < pat.size()) + { + j += 2; + continue; + } + if (pc != '%') + return false; + ++j; } - - return false; + return true; } -} // namespace -namespace funcexp -{ -const static int wildOne = '_'; -const static int wildMany = '%'; -int Func_json_search::cmpJSValWild(json_engine_t* jsEg, const utils::NullString& cmpStr, - const CHARSET_INFO* cs) -{ - if (jsEg->value_type != JSON_VALUE_STRING || !jsEg->value_escaped) - return cs->wildcmp((const char*)jsEg->value, (const char*)(jsEg->value + jsEg->value_len), - (const char*)cmpStr.str(), (const char*)cmpStr.end(), escape, wildOne, wildMany) - ? 0 - : 1; - - { - int strLen = (jsEg->value_len / 1024 + 1) * 1024; - char* buf = (char*)alloca(strLen); - if ((strLen = json_unescape(jsEg->s.cs, jsEg->value, jsEg->value + jsEg->value_len, jsEg->s.cs, - (uchar*)buf, (uchar*)(buf + strLen))) <= 0) - return 0; +// (removed unused collect_paths) - return cs->wildcmp(buf, buf + strLen, cmpStr.str(), cmpStr.end(), escape, wildOne, wildMany) ? 0 : 1; - } -} - -CalpontSystemCatalog::ColType Func_json_search::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +static void find_string_matches(const glz::json_t& node, const std::string& base, const std::string& pat, + char escape, std::vector& out) { - return fp[0]->data()->resultType(); + if (node.is_string()) + { + if (match_wild(node.get_string(), pat, escape)) + out.push_back(base); + return; + } + if (node.is_object()) + { + for (const auto& [k, v] : node.get_object()) + { + find_string_matches(v, base + "." + k, pat, escape, out); + } + return; + } + if (node.is_array()) + { + const auto& a = node.get_array(); + for (size_t i = 0; i < a.size(); ++i) + { + find_string_matches(a[i], base + "[" + std::to_string(i) + "]", pat, escape, out); + } + } } std::string Func_json_search::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& /*type*/) { - std::string ret; - bool isNullJS = false, isNullVal = false; - const auto& js = fp[0]->data()->getStrVal(row, isNull); - const auto& cmpStr = fp[2]->data()->getStrVal(row, isNull); - if (isNullJS || isNullVal) + bool nullDoc = false, nullPat = false; + const auto js_ns = fp[0]->data()->getStrVal(row, nullDoc); + const auto pat_ns = fp[2]->data()->getStrVal(row, nullPat); + if (nullDoc || nullPat) { isNull = true; return ""; } + // mode parsing if (!isModeParsed) { if (!isModeConst) - isModeConst = (dynamic_cast(fp[1]->data()) != nullptr); - - const auto& mode_ns = fp[1]->data()->getStrVal(row, isNull); + isModeConst = (dynamic_cast(fp[1]->data()) != nullptr); + const auto mode_ns = fp[1]->data()->getStrVal(row, isNull); if (isNull) return ""; std::string mode = mode_ns.safeString(""); - transform(mode.begin(), mode.end(), mode.begin(), ::tolower); if (mode != "one" && mode != "all") { isNull = true; return ""; } - isModeOne = (mode == "one"); isModeParsed = isModeConst; } + // escape parsing + char esc = '\\'; if (fp.size() >= 4) { - if (dynamic_cast(fp[3]->data()) == nullptr) + if (dynamic_cast(fp[3]->data()) == nullptr) { isNull = true; return ""; } - bool isNullEscape = false; - const auto& escapeStr = fp[3]->data()->getStrVal(row, isNullEscape); - if (escapeStr.length() > 1) + bool nullEsc = false; + const auto esc_ns = fp[3]->data()->getStrVal(row, nullEsc); + if (esc_ns.length() > 1) { isNull = true; return ""; } - escape = isNullEscape ? '\\' : escapeStr.safeString("")[0]; + if (!nullEsc && esc_ns.length() == 1) + esc = esc_ns.safeString("")[0]; } - json_engine_t jsEg; - json_path_t p, savPath; - const CHARSET_INFO* cs = getCharset(fp[0]); - -#if MYSQL_VERSION_ID >= 100900 - int arrayCounter[JSON_DEPTH_LIMIT]; - bool hasNegPath = 0; -#endif - int pathFound = 0; - - initJSPaths(paths, fp, 4, 1); - - for (size_t i = 4; i < fp.size(); i++) + glz::json_t doc; + if (auto e = glz::read_json(doc, js_ns.unsafeStringRef())) { - JSONPath& path = paths[i - 4]; - if (!path.parsed) - { - if (parseJSPath(path, row, fp[i])) - goto error; -#if MYSQL_VERSION_ID >= 100900 - hasNegPath |= path.p.types_used & JSON_PATH_NEGATIVE_INDEX; -#endif - } + isNull = true; + return ""; } + const std::string pat = pat_ns.safeString(""); - json_get_path_start(&jsEg, cs, (const uchar*)js.str(), (const uchar*)js.end(), &p); - - while (json_get_path_next(&jsEg, &p) == 0) + std::vector matches_paths; + // If limiting paths provided, search within those; else search entire document + if (fp.size() > 4) { -#if MYSQL_VERSION_ID >= 100900 - if (hasNegPath && jsEg.value_type == JSON_VALUE_ARRAY && - json_skip_array_and_count(&jsEg, arrayCounter + (p.last_step - p.steps))) - goto error; -#endif - - if (json_value_scalar(&jsEg)) + for (size_t i = 4; i < fp.size(); ++i) { -#if MYSQL_VERSION_ID >= 100900 - bool isMatch = matchJSPath(paths, &p, jsEg.value_type, arrayCounter); -#else - bool isMatch = matchJSPath(paths, &p, jsEg.value_type); -#endif - if ((fp.size() < 5 || isMatch) && cmpJSValWild(&jsEg, cmpStr, cs) != 0) + bool pNull = false; + const auto p_ns = fp[i]->data()->getStrVal(row, pNull); + if (pNull) { - ++pathFound; - if (pathFound == 1) - { - savPath = p; - savPath.last_step = savPath.steps + (p.last_step - p.steps); - } - else - { - if (pathFound == 2) - { - ret.append("["); - if (appendJSPath(ret, &savPath)) - goto error; - } - ret.append(", "); - if (appendJSPath(ret, &p)) - goto error; - } - if (isModeOne) - goto end; + isNull = true; + return ""; + } + std::vector nodes; + if (!glaze_path::find_matches(doc, p_ns.unsafeStringRef(), nodes)) + { + isNull = true; + return ""; + } + for (const auto* n : nodes) + { + // We don't know full JSONPath to n from here; approximate by using the provided path plus subtree + // This builds subpaths relative to provided path, which is acceptable for ColumnStore usage + find_string_matches(*n, p_ns.safeString("$"), pat, esc, matches_paths); + if (isModeOne && !matches_paths.empty()) + goto build; } } } - -end: - if (pathFound == 0) - goto error; - if (pathFound == 1) + else { - if (appendJSPath(ret, &savPath)) - goto error; + find_string_matches(doc, std::string{"$"}, pat, esc, matches_paths); } - else - ret.append("]"); - isNull = false; - return ret; +build: + if (matches_paths.empty()) + { + isNull = true; + return ""; + } -error: - isNull = true; - return ""; + if (isModeOne) + { + // Return a JSON string path + return std::string{"\""} + matches_paths.front() + "\""; + } + else + { + // Return array of JSON string paths + std::string out = "["; + for (size_t i = 0; i < matches_paths.size(); ++i) + { + if (i) + out += ", "; + out += "\"" + matches_paths[i] + "\""; + } + out += "]"; + return out; + } } } // namespace funcexp diff --git a/utils/funcexp/func_json_type.cpp b/utils/funcexp/func_json_type.cpp index 6b75f75e14..71eb5f3794 100644 --- a/utils/funcexp/func_json_type.cpp +++ b/utils/funcexp/func_json_type.cpp @@ -1,53 +1,55 @@ -#include "functor_json.h" -#include "functioncolumn.h" -using namespace execplan; +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_type::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_type::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } std::string Func_json_type::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& /*type*/) + execplan::CalpontSystemCatalog::ColType& /*type*/) { const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - json_engine_t jsEg; - std::string result; - - initJSEngine(jsEg, getCharset(fp[0]), js); - - if (json_read_value(&jsEg)) + const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; + glz::json_t value; + if (auto err = glz::read_json(value, sv)) { isNull = true; return ""; } - switch (jsEg.value_type) + if (value.is_object()) + return "OBJECT"; + if (value.is_array()) + return "ARRAY"; + if (value.is_string()) + return "STRING"; + if (value.is_number()) { - case JSON_VALUE_OBJECT: result = "OBJECT"; break; - case JSON_VALUE_ARRAY: result = "ARRAY"; break; - case JSON_VALUE_STRING: result = "STRING"; break; - case JSON_VALUE_NUMBER: result = (jsEg.num_flags & JSON_NUM_FRAC_PART) ? "DOUBLE" : "INTEGER"; break; - case JSON_VALUE_TRUE: - case JSON_VALUE_FALSE: result = "BOOLEAN"; break; - default: result = "NULL"; break; + // Determine integer vs floating by canonical serialization + std::string tmp; + if (auto werr = glz::write_json(value, tmp)) + { + isNull = true; + return ""; + } + for (char ch : tmp) + { + if (ch == '.' || ch == 'e' || ch == 'E') + return "DOUBLE"; + } + return "INTEGER"; } - - return result; + if (value.is_boolean()) + return "BOOLEAN"; + return "NULL"; } } // namespace funcexp diff --git a/utils/funcexp/func_json_unquote.cpp b/utils/funcexp/func_json_unquote.cpp index 9eefec3243..5c92350b73 100644 --- a/utils/funcexp/func_json_unquote.cpp +++ b/utils/funcexp/func_json_unquote.cpp @@ -1,51 +1,33 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "jsonhelpers.h" -using namespace execplan; +// Include Glaze first to avoid specialization-after-instantiation +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; -#include "joblisttypes.h" -using namespace joblist; - -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_unquote::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_unquote::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } std::string Func_json_unquote::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& type) + execplan::CalpontSystemCatalog::ColType& /*type*/) { const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return ""; - json_engine_t jsEg; - int strLen; - - const CHARSET_INFO* cs = type.getCharset(); - initJSEngine(jsEg, cs, js); - - json_read_value(&jsEg); - - if (unlikely(jsEg.s.error) || jsEg.value_type != JSON_VALUE_STRING) - return js.safeString(); - - char* buf = (char*)alloca(jsEg.value_len + 1); - if ((strLen = json_unescape(cs, jsEg.value, jsEg.value + jsEg.value_len, &my_charset_utf8mb3_general_ci, - (uchar*)buf, (uchar*)(buf + jsEg.value_len))) >= 0) + // Attempt to parse as a JSON string literal + const std::string_view sv = js.unsafeStringRef(); + std::string out; + if (auto err = glz::read_json(out, sv)) { - buf[strLen] = '\0'; - std::string ret = buf; - return strLen == 0 ? "" : ret; + // Not a JSON string; return the original content + return js.safeString(""); } - - return js.safeString(""); + // Return the unescaped string (may be empty) + return out; } } // namespace funcexp diff --git a/utils/funcexp/func_json_valid.cpp b/utils/funcexp/func_json_valid.cpp index 8d118c75f0..e2b8f31a57 100644 --- a/utils/funcexp/func_json_valid.cpp +++ b/utils/funcexp/func_json_valid.cpp @@ -1,20 +1,12 @@ +#include #include -using namespace std; - #include "functor_json.h" -#include "functioncolumn.h" #include "rowgroup.h" -using namespace execplan; -using namespace rowgroup; - -#include "dataconvert.h" -#include "jsonhelpers.h" -using namespace funcexp::helpers; namespace funcexp { -CalpontSystemCatalog::ColType Func_json_valid::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) +execplan::CalpontSystemCatalog::ColType Func_json_valid::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { return fp[0]->data()->resultType(); } @@ -22,13 +14,18 @@ CalpontSystemCatalog::ColType Func_json_valid::operationType(FunctionParm& fp, /** * getBoolVal API definition */ -bool Func_json_valid::getBoolVal(Row& row, FunctionParm& fp, bool& isNull, - CalpontSystemCatalog::ColType& /*type*/) +bool Func_json_valid::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) { const auto js = fp[0]->data()->getStrVal(row, isNull); if (isNull) return false; - return json_valid(js.unsafeStringRef().data(), js.unsafeStringRef().size(), getCharset(fp[0])); + // Validate by attempting to parse into a dynamic Glaze JSON value + // Any parse error indicates invalid JSON + const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; + glz::json_t value; // dynamic JSON value + auto err = glz::read_json(value, sv); + return !err; // true if parsing succeeded } } // namespace funcexp diff --git a/utils/funcexp/func_json_value.cpp b/utils/funcexp/func_json_value.cpp index 95849bcf23..3ee081c868 100644 --- a/utils/funcexp/func_json_value.cpp +++ b/utils/funcexp/func_json_value.cpp @@ -1,128 +1,69 @@ -#include "functor_json.h" -#include "functioncolumn.h" -#include "constantcolumn.h" -using namespace execplan; +#include +#include +#include "functor_json.h" #include "rowgroup.h" -using namespace rowgroup; - -#include "joblisttypes.h" -using namespace joblist; -#include "jsonhelpers.h" -using namespace funcexp::helpers; +#include "glaze_path.h" namespace funcexp { -bool JSONEgWrapper::checkAndGetScalar(std::string& ret, int* error) +execplan::CalpontSystemCatalog::ColType Func_json_value::operationType( + FunctionParm& fp, execplan::CalpontSystemCatalog::ColType& /*resultType*/) { - CHARSET_INFO* cs; - const uchar* js; - uint jsLen; - - if (!json_value_scalar(this)) + return fp[0]->data()->resultType(); +} +std::string Func_json_value::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, + execplan::CalpontSystemCatalog::ColType& /*type*/) +{ + // Expect JSON doc and a single path argument + bool nullDoc = false, nullPath = false; + const auto js = fp[0]->data()->getStrVal(row, nullDoc); + const auto path_ns = fp[1]->data()->getStrVal(row, nullPath); + if (nullDoc || nullPath) { - /* We only look for scalar values! */ - if (json_skip_level(this) || json_scan_next(this)) - *error = 1; - return true; + isNull = true; + return ""; } - if (value_type == JSON_VALUE_TRUE || value_type == JSON_VALUE_FALSE) + glz::json_t doc; + if (auto e = glz::read_json(doc, js.unsafeStringRef())) { - cs = &my_charset_utf8mb4_bin; - js = (const uchar*)((value_type == JSON_VALUE_TRUE) ? "1" : "0"); - jsLen = 1; + isNull = true; + return ""; } - else - { - cs = s.cs; - js = value; - jsLen = value_len; - } - - int strLen = jsLen * cs->mbmaxlen; - char* buf = (char*)alloca(jsLen + strLen); - if ((strLen = json_unescape(cs, js, js + jsLen, cs, (uchar*)buf, (uchar*)buf + jsLen + strLen)) > 0) + std::vector matches; + if (!glaze_path::find_matches(doc, path_ns.unsafeStringRef(), matches) || matches.empty()) { - buf[strLen] = '\0'; - ret.append(buf); - return 0; + isNull = true; + return ""; } - return strLen; -} - -/* - Returns NULL, not an error if the found value - is not a scalar. -*/ -bool JSONPathWrapper::extract(std::string& ret, rowgroup::Row& row, execplan::SPTP& funcParamJS, - execplan::SPTP& funcParamPath) -{ - bool isNullJS = false, isNullPath = false; - - const utils::NullString& js = funcParamJS->data()->getStrVal(row, isNullJS); - const utils::NullString& sjsp = funcParamPath->data()->getStrVal(row, isNullPath); - if (isNullJS || isNullPath) - return true; - - int error = 0; - - if (json_path_setup(&p, getCharset(funcParamPath), (const uchar*)sjsp.str(), (const uchar*)sjsp.end())) - return true; + const glz::json_t& value = *matches.front(); - JSONEgWrapper je(getCharset(funcParamJS), reinterpret_cast(js.str()), - reinterpret_cast(js.end())); - - currStep = p.steps; - - do - { - if (error) - return true; - - IntType arrayCounters[JSON_DEPTH_LIMIT]; - if (json_find_path(&je, &p, &currStep, arrayCounters)) - return true; - - if (json_read_value(&je)) - return true; - - } while (unlikely(checkAndGetValue(&je, ret, &error))); - - return false; -} - -CalpontSystemCatalog::ColType Func_json_value::operationType(FunctionParm& fp, - CalpontSystemCatalog::ColType& /*resultType*/) -{ - return fp[0]->data()->resultType(); -} - -class JSONPathWrapperValue : public JSONPathWrapper -{ - public: - JSONPathWrapperValue() + // Only scalars produce a result + if (value.is_string()) { + // return raw unescaped string + return value.get_string(); } - virtual ~JSONPathWrapperValue() + if (value.is_number()) { + std::string out; + if (auto w = glz::write_json(value, out)) + { + isNull = true; + return ""; + } + return out; } - - bool checkAndGetValue(JSONEgWrapper* je, std::string& res, int* error) override + if (value.is_boolean()) { - return je->checkAndGetScalar(res, error); + return value.get_boolean() ? "1" : "0"; } -}; -std::string Func_json_value::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& /*type*/) -{ - std::string ret; - JSONPathWrapperValue pw; - isNull = pw.extract(ret, row, fp[0], fp[1]); - return isNull ? "" : ret; + isNull = true; + return ""; } } // namespace funcexp diff --git a/utils/funcexp/functor_json.h b/utils/funcexp/functor_json.h index 55c4d1b5f6..4c73eda296 100644 --- a/utils/funcexp/functor_json.h +++ b/utils/funcexp/functor_json.h @@ -5,8 +5,6 @@ #include #include #include -#include - #include "collation.h" #include "functor_bool.h" #include "functor_int.h" @@ -14,44 +12,19 @@ namespace funcexp { -// The json_path_t wrapper include some flags -struct JSONPath -{ - public: - JSONPath() : constant(false), parsed(false), currStep(nullptr) - { - } - json_path_t p{}; - bool constant; // check if the argument is constant - bool parsed; // check if the argument is parsed - json_path_step_t* currStep; -}; - -class JSONEgWrapper : public json_engine_t +// Local replacement for former json_lib json_value_types +enum json_value_types { - public: - JSONEgWrapper(CHARSET_INFO* cs, const uchar* str, const uchar* end) - { - json_scan_start(this, cs, str, end); - } - JSONEgWrapper(const std::string& str, CHARSET_INFO* cs) - : JSONEgWrapper(cs, (const uchar*)str.data(), (const uchar*)str.data() + str.size()) - { - } - bool checkAndGetScalar(std::string& ret, int* error); - bool checkAndGetComplexVal(std::string& ret, int* error); + JSON_VALUE_NULL = 0, + JSON_VALUE_OBJECT, + JSON_VALUE_ARRAY, + JSON_VALUE_STRING, + JSON_VALUE_NUMBER, + JSON_VALUE_TRUE, + JSON_VALUE_FALSE, + JSON_VALUE_UNINITIALIZED }; -class JSONPathWrapper : public JSONPath -{ - protected: - virtual ~JSONPathWrapper() = default; - virtual bool checkAndGetValue(JSONEgWrapper* je, std::string& ret, int* error) = 0; - - public: - bool extract(std::string& ret, rowgroup::Row& row, execplan::SPTP& funcParmJS, - execplan::SPTP& funcParmPath); -}; /** @brief Func_json_valid class */ class Func_json_valid : public Func_Bool @@ -90,9 +63,6 @@ class Func_json_depth : public Func_Int */ class Func_json_length : public Func_Int { - protected: - JSONPath path; - public: Func_json_length() : Func_Int("json_length") { @@ -194,9 +164,6 @@ class Func_json_array : public Func_Str */ class Func_json_keys : public Func_Str { - protected: - JSONPath path; - public: Func_json_keys() : Func_Str("json_keys") { @@ -213,9 +180,6 @@ class Func_json_keys : public Func_Str */ class Func_json_exists : public Func_Bool { - protected: - JSONPath path; - public: Func_json_exists() : Func_Bool("json_exists") { @@ -233,9 +197,6 @@ class Func_json_exists : public Func_Bool */ class Func_json_quote : public Func_Str { - protected: - JSONPath path; - public: Func_json_quote() : Func_Str("json_quote") { @@ -253,9 +214,6 @@ class Func_json_quote : public Func_Str */ class Func_json_unquote : public Func_Str { - protected: - JSONPath path; - public: Func_json_unquote() : Func_Str("json_unquote") { @@ -380,7 +338,6 @@ class Func_json_query : public Func_Str class Func_json_contains : public Func_Bool { protected: - JSONPath path; bool arg2Const; bool arg2Parsed; // argument 2 is a constant or has been parsed utils::NullString arg2Val; @@ -401,9 +358,6 @@ class Func_json_contains : public Func_Bool */ class Func_json_array_append : public Func_Str { - protected: - std::vector paths; - public: Func_json_array_append() : Func_Str("json_array_append") { @@ -423,9 +377,6 @@ class Func_json_array_append : public Func_Str */ class Func_json_array_insert : public Func_Str { - protected: - std::vector paths; - public: Func_json_array_insert() : Func_Str("json_array_insert") { @@ -454,7 +405,6 @@ class Func_json_insert : public Func_Str protected: MODE mode; - std::vector paths; public: Func_json_insert() : Func_Str("json_insert"), mode(INSERT) @@ -488,9 +438,6 @@ class Func_json_insert : public Func_Str */ class Func_json_remove : public Func_Str { - protected: - std::vector paths; - public: Func_json_remove() : Func_Str("json_remove") { @@ -509,7 +456,6 @@ class Func_json_remove : public Func_Str class Func_json_contains_path : public Func_Bool { protected: - std::vector paths; std::vector hasFound; bool isModeOne; bool isModeConst; @@ -533,9 +479,6 @@ class Func_json_contains_path : public Func_Bool */ class Func_json_overlaps : public Func_Bool { - protected: - JSONPath path; - public: Func_json_overlaps() : Func_Bool("json_overlaps") { @@ -553,7 +496,6 @@ class Func_json_overlaps : public Func_Bool class Func_json_search : public Func_Str { protected: - std::vector paths; bool isModeParsed; bool isModeConst; bool isModeOne; @@ -571,17 +513,11 @@ class Func_json_search : public Func_Str std::string getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, execplan::CalpontSystemCatalog::ColType& type) override; - - private: - int cmpJSValWild(json_engine_t* jsEg, const utils::NullString& cmpStr, const CHARSET_INFO* cs); }; /** @brief Func_json_extract_string class */ class Func_json_extract : public Func_Str { - protected: - std::vector paths; - public: Func_json_extract() : Func_Str("json_extract") { diff --git a/utils/funcexp/glaze_path.h b/utils/funcexp/glaze_path.h new file mode 100644 index 0000000000..3c3bc4b861 --- /dev/null +++ b/utils/funcexp/glaze_path.h @@ -0,0 +1,212 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace funcexp { +namespace glaze_path { + +enum class StepKind { Key, KeyWildcard, Index, IndexWildcard, RecursiveDescent }; + +struct Step { + StepKind kind{StepKind::Key}; + std::string key; // for Key + int index{0}; // for Index (may be negative) +}; + +// Parse a simplified MariaDB/MySQL-like JSON path supporting: +// $ root (optional) +// .key segments +// .* wildcard for any single key +// [n] array index (supports negative) +// [*] array wildcard +// ** recursive descent (match any number of levels) +inline bool parse(std::string_view p, std::vector& out) { + out.clear(); + size_t i = 0; + auto at_end = [&]() { return i >= p.size(); }; + if (!at_end() && p[i] == '$') ++i; + while (!at_end()) { + if (p[i] == '.') { + // consume dot and check for recursive descent + ++i; + if (!at_end() && p[i] == '*') { + ++i; + out.push_back(Step{StepKind::KeyWildcard, std::string(), 0}); + continue; + } + if (!at_end() && p[i] == '.') { + // treat ".." as recursive descent + // consume all consecutive '.' to be robust + while (!at_end() && p[i] == '.') ++i; + out.push_back(Step{StepKind::RecursiveDescent, std::string(), 0}); + continue; + } + // dot key + size_t start = i; + while (!at_end() && p[i] != '.' && p[i] != '[') ++i; + if (start == i) return false; + out.push_back(Step{StepKind::Key, std::string(p.substr(start, i - start)), 0}); + continue; + } + if (p[i] == '[') { + ++i; + if (!at_end() && p[i] == '*') { + ++i; + if (at_end() || p[i] != ']') return false; + ++i; + out.push_back(Step{StepKind::IndexWildcard, std::string(), 0}); + continue; + } + // parse index possibly negative + size_t start = i; + if (!at_end() && p[i] == '-') ++i; + size_t num_start = i; + while (!at_end() && std::isdigit(static_cast(p[i]))) ++i; + if (num_start == i) return false; + int idx = std::stoi(std::string(p.substr(start, i - start))); + if (at_end() || p[i] != ']') return false; + ++i; + out.push_back(Step{StepKind::Index, std::string(), idx}); + continue; + } + // bare key at root without leading dot + size_t start = i; + while (!at_end() && p[i] != '.' && p[i] != '[') ++i; + if (start == i) return false; + out.push_back(Step{StepKind::Key, std::string(p.substr(start, i - start)), 0}); + } + return true; +} + +inline void collect_descendants(const glz::json_t& node, std::vector& out) { + out.push_back(&node); + if (node.is_object()) { + for (const auto& [k, v] : node.get_object()) collect_descendants(v, out); + } else if (node.is_array()) { + for (const auto& v : node.get_array()) collect_descendants(v, out); + } +} + +inline void match_impl(const glz::json_t& node, const std::vector& steps, size_t pos, + std::vector& out) { + if (pos >= steps.size()) { + out.push_back(&node); + return; + } + const Step& st = steps[pos]; + switch (st.kind) { + case StepKind::Key: + if (node.is_object()) { + const auto& obj = node.get_object(); + auto it = obj.find(st.key); + if (it != obj.end()) match_impl(it->second, steps, pos + 1, out); + } + break; + case StepKind::KeyWildcard: + if (node.is_object()) { + const auto& obj = node.get_object(); + for (const auto& [k, v] : obj) match_impl(v, steps, pos + 1, out); + } + break; + case StepKind::Index: + if (node.is_array()) { + const auto& arr = node.get_array(); + int idx = st.index; + if (idx < 0) idx = static_cast(arr.size()) + idx; + if (idx >= 0 && static_cast(idx) < arr.size()) + match_impl(arr[static_cast(idx)], steps, pos + 1, out); + } + break; + case StepKind::IndexWildcard: + if (node.is_array()) { + const auto& arr = node.get_array(); + for (const auto& v : arr) match_impl(v, steps, pos + 1, out); + } + break; + case StepKind::RecursiveDescent: { + // Try matching at current node first + match_impl(node, steps, pos + 1, out); + // Then try all descendants + if (node.is_object()) { + for (const auto& [k, v] : node.get_object()) match_impl(v, steps, pos, out); + } else if (node.is_array()) { + for (const auto& v : node.get_array()) match_impl(v, steps, pos, out); + } + break; + } + } +} + +// Find all matches and append to out as pointers to nodes within root +inline bool find_matches(const glz::json_t& root, std::string_view path, + std::vector& out) { + std::vector steps; + if (!parse(path, steps)) return false; + match_impl(root, steps, 0, out); + return true; +} + +inline void match_impl_mut(glz::json_t& node, const std::vector& steps, size_t pos, + std::vector& out) { + if (pos >= steps.size()) { out.push_back(&node); return; } + const Step& st = steps[pos]; + switch (st.kind) { + case StepKind::Key: + if (node.is_object()) { + auto& obj = node.get_object(); + auto it = obj.find(st.key); + if (it != obj.end()) match_impl_mut(it->second, steps, pos + 1, out); + } + break; + case StepKind::KeyWildcard: + if (node.is_object()) { + auto& obj = node.get_object(); + for (auto& [k, v] : obj) match_impl_mut(v, steps, pos + 1, out); + } + break; + case StepKind::Index: + if (node.is_array()) { + auto& arr = node.get_array(); + int idx = st.index; + if (idx < 0) idx = static_cast(arr.size()) + idx; + if (idx >= 0 && static_cast(idx) < arr.size()) + match_impl_mut(arr[static_cast(idx)], steps, pos + 1, out); + } + break; + case StepKind::IndexWildcard: + if (node.is_array()) { + auto& arr = node.get_array(); + for (auto& v : arr) match_impl_mut(v, steps, pos + 1, out); + } + break; + case StepKind::RecursiveDescent: + // Try here + match_impl_mut(node, steps, pos + 1, out); + if (node.is_object()) { + for (auto& [k, v] : node.get_object()) match_impl_mut(v, steps, pos, out); + } else if (node.is_array()) { + for (auto& v : node.get_array()) match_impl_mut(v, steps, pos, out); + } + break; + } +} + +inline bool find_matches_mutable(glz::json_t& root, std::string_view path, + std::vector& out) { + std::vector steps; + if (!parse(path, steps)) return false; + match_impl_mut(root, steps, 0, out); + return true; +} + +inline void find_matches_mutable_steps(glz::json_t& root, const std::vector& steps, + std::vector& out) { + match_impl_mut(root, steps, 0, out); +} + +} // namespace glaze_path +} // namespace funcexp diff --git a/utils/funcexp/jsonhelpers.cpp b/utils/funcexp/jsonhelpers.cpp deleted file mode 100644 index 760a0c3d1c..0000000000 --- a/utils/funcexp/jsonhelpers.cpp +++ /dev/null @@ -1,375 +0,0 @@ -#include "jsonhelpers.h" -using namespace std; - -namespace funcexp -{ -namespace helpers -{ -int setupJSPath(json_path_t* path, CHARSET_INFO* cs, const utils::NullString& str, bool wildcards = true) -{ - int err = json_path_setup(path, cs, (const uchar*)str.str(), (const uchar*)str.end()); - if (wildcards) - return err; - - if (!err) - { -#if MYSQL_VERSION_ID >= 100900 - bool support = (path->types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | JSON_PATH_ARRAY_RANGE)) == 0; -#else - bool support = (path->types_used & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0; -#endif - if (support) - return 0; - path->s.error = NO_WILDCARD_ALLOWED; - } - return 1; -} - -bool appendEscapedJS(string& ret, const CHARSET_INFO* retCS, const utils::NullString& js, - const CHARSET_INFO* jsCS) -{ - const int jsLen = js.length(); - const char* rawJS = js.str(); - int strLen = jsLen * 12 * jsCS->mbmaxlen / jsCS->mbminlen; - char* buf = (char*)alloca(strLen + 1); - if ((strLen = json_escape(retCS, (const uchar*)rawJS, (const uchar*)rawJS + jsLen, jsCS, (uchar*)buf, - (uchar*)buf + strLen)) >= 0) - { - buf[strLen] = '\0'; - ret.append(buf, strLen); - return false; - } - - return true; -} - -bool appendJSKeyName(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm) -{ - bool nullVal = false; - const auto& js = parm->data()->getStrVal(row, nullVal); - if (nullVal) - { - ret.append("\"\": "); - return false; - } - - ret.append("\""); - if (appendEscapedJS(ret, retCS, js, parm->data()->resultType().getCharset())) - return true; - ret.append("\": "); - return false; -} - -bool appendJSValue(string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm) -{ - bool nullVal = false; - const auto& js = parm->data()->getStrVal(row, nullVal); - if (nullVal) - { - ret.append("null"); - return false; - } - - datatypes::SystemCatalog::ColDataType dataType = parm->data()->resultType().colDataType; - if (dataType == datatypes::SystemCatalog::BIGINT && (js == "true" || js == "false")) - { - ret.append(js.safeString("")); - return false; - } - - const CHARSET_INFO* jsCS = parm->data()->resultType().getCharset(); - if (isCharType(dataType)) - { - ret.append("\""); - if (appendEscapedJS(ret, retCS, js, jsCS)) - return true; - ret.append("\""); - return false; - } - - return appendEscapedJS(ret, retCS, js, jsCS); -} - -int appendTab(string& js, const int depth, const int tabSize) -{ - try - { - js.append("\n"); - for (int i = 0; i < depth; i++) - { - js.append(tab_arr, tabSize); - } - } - catch (const std::exception& e) - { - return 1; - } - return 0; -} - -int doFormat(json_engine_t* je, string& niceJS, Func_json_format::FORMATS mode, int tabSize) -{ - int depth = 0; - static const char *comma = ", ", *colon = "\": "; - uint commaLen, colonLen; - int firstValue = 1; - - niceJS.reserve(je->s.str_end - je->s.c_str + 32); - - assert(mode != Func_json_format::DETAILED || (tabSize >= 0 && tabSize <= TAB_SIZE_LIMIT)); - - if (mode == Func_json_format::LOOSE) - { - commaLen = 2; - colonLen = 3; - } - else if (mode == Func_json_format::DETAILED) - { - commaLen = 1; - colonLen = 3; - } - else - { - commaLen = 1; - colonLen = 2; - } - - do - { - switch (je->state) - { - case JST_KEY: - { - const uchar* key_start = je->s.c_str; - const uchar* key_end; - - do - { - key_end = je->s.c_str; - } while (json_read_keyname_chr(je) == 0); - - if (unlikely(je->s.error)) - goto error; - - if (!firstValue) - niceJS.append(comma, commaLen); - - if (mode == Func_json_format::DETAILED && appendTab(niceJS, depth, tabSize)) - goto error; - - niceJS.append("\""); - niceJS.append((const char*)key_start, (int)(key_end - key_start)); - niceJS.append(colon, colonLen); - } - /* now we have key value to handle, so no 'break'. */ - DBUG_ASSERT(je->state == JST_VALUE); - goto handle_value; - - case JST_VALUE: - if (!firstValue) - niceJS.append(comma, commaLen); - - if (mode == Func_json_format::DETAILED && depth > 0 && appendTab(niceJS, depth, tabSize)) - goto error; - - handle_value: - if (json_read_value(je)) - goto error; - if (json_value_scalar(je)) - { - niceJS.append((const char*)je->value_begin, (int)(je->value_end - je->value_begin)); - - firstValue = 0; - } - else - { - if (mode == Func_json_format::DETAILED && depth > 0 && appendTab(niceJS, depth, tabSize)) - goto error; - niceJS.append((je->value_type == JSON_VALUE_OBJECT) ? "{" : "["); - firstValue = 1; - depth++; - } - - break; - - case JST_OBJ_END: - case JST_ARRAY_END: - depth--; - if (mode == Func_json_format::DETAILED && appendTab(niceJS, depth, tabSize)) - goto error; - niceJS.append((je->state == JST_OBJ_END) ? "}" : "]"); - firstValue = 0; - break; - - default: break; - }; - } while (json_scan_next(je) == 0); - - return je->s.error || *je->killed_ptr; - -error: - return 1; -} - -bool findKeyInObject(json_engine_t* jsEg, json_string_t* key) -{ - const uchar* str = key->c_str; - - while (json_scan_next(jsEg) == 0 && jsEg->state != JST_OBJ_END) - { - DBUG_ASSERT(jsEg->state == JST_KEY); - if (json_key_matches(jsEg, key)) - return true; - if (json_skip_key(jsEg)) - return false; - key->c_str = str; - } - - return false; -} - -int cmpPartJSPath(const json_path_step_t* a, const json_path_step_t* aEnd, const json_path_step_t* b, - const json_path_step_t* bEnd, enum json_value_types vt, const int* arraySize) -{ - int ret, ret2; - const json_path_step_t* tmpB = b; - - while (a <= aEnd) - { - if (b > bEnd) - { - while (vt != JSON_VALUE_ARRAY && (a->type & JSON_PATH_ARRAY_WILD) == JSON_PATH_ARRAY && a->n_item == 0) - { - if (++a > aEnd) - return 0; - } - return -2; - } - - DBUG_ASSERT((b->type & (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD)) == 0); - - if (a->type & JSON_PATH_ARRAY) - { - if (b->type & JSON_PATH_ARRAY) - { -#if MYSQL_VERSION_ID >= 100900 - int ret = 0, corrected_n_item_a = 0; - if (arraySize) - corrected_n_item_a = a->n_item < 0 ? arraySize[b - tmpB] + a->n_item : a->n_item; - if (a->type & JSON_PATH_ARRAY_RANGE) - { - int corrected_n_item_end_a = 0; - if (arraySize) - corrected_n_item_end_a = a->n_item_end < 0 ? arraySize[b - tmpB] + a->n_item_end : a->n_item_end; - ret = b->n_item >= corrected_n_item_a && b->n_item <= corrected_n_item_end_a; - } - else - ret = corrected_n_item_a == b->n_item; - - if ((a->type & JSON_PATH_WILD) || ret) - goto step_fits; - goto step_failed; -#else - if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item) - goto step_fits; - goto step_failed; -#endif - } - if ((a->type & JSON_PATH_WILD) == 0 && a->n_item == 0) - goto step_fits_autowrap; - goto step_failed; - } - else /* JSON_PATH_KEY */ - { - if (!(b->type & JSON_PATH_KEY)) - goto step_failed; - - if (!(a->type & JSON_PATH_WILD) && - (a->key_end - a->key != b->key_end - b->key || memcmp(a->key, b->key, a->key_end - a->key) != 0)) - goto step_failed; - - goto step_fits; - } - step_failed: - if (!(a->type & JSON_PATH_DOUBLE_WILD)) - return -1; - b++; - continue; - - step_fits: - b++; - if (!(a->type & JSON_PATH_DOUBLE_WILD)) - { - a++; - continue; - } - - /* Double wild handling needs recursions. */ - ret = cmpPartJSPath(a + 1, aEnd, b, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL); - if (ret == 0) - return 0; - - ret2 = cmpPartJSPath(a, aEnd, b, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL); - - return (ret2 >= 0) ? ret2 : ret; - - step_fits_autowrap: - if (!(a->type & JSON_PATH_DOUBLE_WILD)) - { - a++; - continue; - } - - /* Double wild handling needs recursions. */ - ret = cmpPartJSPath(a + 1, aEnd, b + 1, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL); - if (ret == 0) - return 0; - - ret2 = cmpPartJSPath(a, aEnd, b + 1, bEnd, vt, arraySize ? arraySize + (b - tmpB) : NULL); - - return (ret2 >= 0) ? ret2 : ret; - } - - return b <= bEnd; -} - -int cmpJSPath(const json_path_t* a, const json_path_t* b, enum json_value_types vt, const int* arraySize) -{ - return cmpPartJSPath(a->steps + 1, a->last_step, b->steps + 1, b->last_step, vt, arraySize); -} - -int parseJSPath(JSONPath& path, rowgroup::Row& row, execplan::SPTP& parm, bool wildcards) -{ - // check if path column is const - if (!path.constant) - markConstFlag(path, parm); - - bool isNull = false; - const auto& jsp = parm->data()->getStrVal(row, isNull); - - if (isNull || setupJSPath(&path.p, getCharset(parm), jsp, wildcards)) - return 1; - - path.parsed = path.constant; - - return 0; -} - -bool matchJSPath(const vector& paths, const json_path_t* p, json_value_types valType, - [[maybe_unused]] const int* arrayCounter, bool exact) -{ - for (size_t curr = 0; curr < paths.size(); curr++) - { -#if MYSQL_VERSION_ID >= 100900 - int cmp = cmpJSPath(&paths[curr].p, p, valType, arrayCounter); -#else - int cmp = cmpJSPath(&paths[curr].p, p, valType); -#endif - bool ret = exact ? cmp >= 0 : cmp == 0; - if (ret) - return true; - } - return false; -} -} // namespace helpers -} // namespace funcexp diff --git a/utils/funcexp/jsonhelpers.h b/utils/funcexp/jsonhelpers.h deleted file mode 100644 index 7047677c99..0000000000 --- a/utils/funcexp/jsonhelpers.h +++ /dev/null @@ -1,106 +0,0 @@ -#pragma once - -#include -#include -#include - -#define PREFER_MY_CONFIG_H -#include -#include -#include -// #include - -#include "collation.h" -#include "functor_json.h" -#include "functor_str.h" -#include "collation.h" -#include "rowgroup.h" -#include "treenode.h" -#include "functioncolumn.h" -#include "constantcolumn.h" - -#include "json_lib.h" - -namespace funcexp::helpers -{ - -static const int NO_WILDCARD_ALLOWED = 1; - -/* - Checks if the path has '.*' '[*]' or '**' constructions - and sets the NO_WILDCARD_ALLOWED error if the case. -*/ -int setupJSPath(json_path_t* path, CHARSET_INFO* cs, const std::string_view& str, bool wildcards); - -// Return true if err occur, let the outer function handle the exception -bool appendEscapedJS(std::string& ret, const CHARSET_INFO* retCS, const utils::NullString& js, - const CHARSET_INFO* jsCS); -bool appendJSKeyName(std::string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm); -bool appendJSValue(std::string& ret, const CHARSET_INFO* retCS, rowgroup::Row& row, execplan::SPTP& parm); - -static const int TAB_SIZE_LIMIT = 8; -static const char tab_arr[TAB_SIZE_LIMIT + 1] = " "; - -// Format the json using format mode -int doFormat(json_engine_t* je, std::string& niceJS, Func_json_format::FORMATS mode, int tabSize = 4); - -static const int SHOULD_END_WITH_ARRAY = 2; -static const int TRIVIAL_PATH_NOT_ALLOWED = 3; - -bool findKeyInObject(json_engine_t* jsEg, json_string_t* key); - -#if MYSQL_VERSION_ID >= 100900 -using IntType = int; -#else -using IntType = uint; -#endif - -/* - Compatible with json_find_path function in json_lib - before 10.9: uint* array_counters - after 10.9: int* array_counters - */ -inline static int locateJSPath(json_engine_t& jsEg, JSONPath& path, int* jsErr = nullptr) -{ - IntType arrayCounters[JSON_DEPTH_LIMIT]; - path.currStep = path.p.steps; - if (json_find_path(&jsEg, &path.p, &path.currStep, arrayCounters)) - { - if (jsErr && jsEg.s.error) - *jsErr = 1; - return 1; - } - return 0; -} - -// Check and set the constant flag from function parameters -inline static void markConstFlag(JSONPath& path, const execplan::SPTP& parm) -{ - path.constant = (dynamic_cast(parm->data()) != nullptr); -} - -int cmpJSPath(const json_path_t* a, const json_path_t* b, enum json_value_types vt, - const int* arraySize = nullptr); - -inline const CHARSET_INFO* getCharset(execplan::SPTP& parm) -{ - return parm->data()->resultType().getCharset(); -} - -inline void initJSEngine(json_engine_t& jsEg, const CHARSET_INFO* jsCS, const utils::NullString& js) -{ - json_scan_start(&jsEg, jsCS, (const uchar*)js.str(), (const uchar*)js.end()); -} - -int parseJSPath(JSONPath& path, rowgroup::Row& row, execplan::SPTP& parm, bool wildcards = true); - -inline void initJSPaths(std::vector& paths, FunctionParm& fp, const int start, const int step) -{ - if (paths.empty()) - for (size_t i = start; i < fp.size(); i += step) - paths.emplace_back(); -} - -bool matchJSPath(const std::vector& paths, const json_path_t* p, json_value_types valType, - const int* arrayCounter = nullptr, bool exact = true); -} // namespace funcexp::helpers diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 768d1f2e4e..209a0e2bb7 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -17,7 +17,7 @@ 51 Franklin St., Fifth Floor, Boston, MA 02110, USA *************************************************************************************/ -//#define NDEBUG +// #define NDEBUG #include #include #include @@ -454,7 +454,7 @@ bool WF_udaf::dropValues(int64_t b, int64_t e) if (cc) { - valIn = cc->getStrVal(fRow, isNull); // XXX: we probably need to change Distinctmap. + valIn = cc->getStrVal(fRow, isNull); // XXX: we probably need to change Distinctmap. } else { @@ -1117,7 +1117,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) if (cc) { - valIn = cc->getStrVal(fRow, isNull); // XXX: the same problem with distinct. + valIn = cc->getStrVal(fRow, isNull); // XXX: the same problem with distinct. } else { From 6e78a21328aa98491d8a44585b124dba1c77024a Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 19:30:53 +0000 Subject: [PATCH 03/11] simple test added on top --- .../columnstore/basic/r/json_general.result | 100 ++++++++++++++++++ .../columnstore/basic/t/json_general.test | 89 ++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 mysql-test/columnstore/basic/r/json_general.result create mode 100644 mysql-test/columnstore/basic/t/json_general.test diff --git a/mysql-test/columnstore/basic/r/json_general.result b/mysql-test/columnstore/basic/r/json_general.result new file mode 100644 index 0000000000..4997ebd835 --- /dev/null +++ b/mysql-test/columnstore/basic/r/json_general.result @@ -0,0 +1,100 @@ +DROP DATABASE IF EXISTS json_general; +CREATE DATABASE json_general; +USE json_general; +JSON General sanity tests for ColumnStore funcexp (Glaze-based) +DROP TABLE IF EXISTS t1; +CREATE TABLE t1 (j text) engine=columnstore; +INSERT INTO t1 VALUES ('{"a":1, "b":[1,2,3], "s":"foo", "o":{"k":2}}'); +JSON_VALID +SELECT JSON_VALID('{"a":1}') AS v1, JSON_VALID('{bad') AS v2; +v1 v2 +1 0 +JSON_TYPE +SELECT JSON_TYPE('{"a":1}') AS t1, JSON_TYPE('[1]') AS t2, JSON_TYPE('"x"') AS t3, JSON_TYPE('1') AS t4, JSON_TYPE('true') AS t5, JSON_TYPE('null') AS t6; +t1 t2 t3 t4 t5 t6 +OBJECT ARRAY STRING INTEGER BOOLEAN NULL +JSON_NORMALIZE (compact canonical) +SELECT JSON_NORMALIZE(' { "b":2, "a":1 }') AS norm1; +norm1 +{"a":1.0E0,"b":2.0E0} +JSON_OBJECT and JSON_ARRAY +SELECT JSON_OBJECT('a',1,'b','x') AS obj1; +obj1 +{"a": 1, "b": "x"} +SELECT JSON_ARRAY(1,'x','{"k":2}') AS arr1; +arr1 +[1, "x", "{\"k\":2}"] +JSON_MERGE_PRESERVE +SELECT JSON_MERGE_PRESERVE('{"a":1}', '{"b":2}') AS m1; +m1 +{"a": 1, "b": 2} +JSON_MERGE_PATCH +SELECT JSON_MERGE_PATCH('{"a":1,"b":2}', '{"b":null, "c":3}') AS mp1; +mp1 +{"a": 1, "c": 3} +JSON_CONTAINS +SELECT JSON_CONTAINS('{"a":1,"b":[1,2,3]}', '{"a":1}') AS c1, +JSON_CONTAINS('{"a":1}', '{"a":2}') AS c2; +c1 c2 +1 0 +JSON_CONTAINS_PATH ONE/ALL +SELECT JSON_CONTAINS_PATH('{"a":1,"b":[{"x":1},{"y":2}]}', 'one', '$.b[*].x', '$.a') AS cp_one, +JSON_CONTAINS_PATH('{"a":1,"b":[{"x":1},{"y":2}]}', 'all', '$.b[*].x', '$.a') AS cp_all; +cp_one cp_all +1 1 +JSON_EXISTS +SELECT JSON_EXISTS('{"a":{"k":2}}', '$.a.k') AS e1, JSON_EXISTS('{"a":{"k":2}}', '$.a.z') AS e2; +e1 e2 +1 0 +JSON_QUERY (returns complex) +SELECT JSON_QUERY('{"a":{"k":2},"b":[1,2]}', '$.a') AS q1; +q1 +{"k":2} +JSON_VALUE (returns scalar) +SELECT JSON_VALUE('{"a":{"k":2},"b":[1,2]}', '$.a.k') AS v_scalar; +v_scalar +2 +JSON_EXTRACT basic and wildcard +SELECT JSON_EXTRACT('{"a":1,"b":[1,2,3]}', '$.b[1]') AS ex1, +JSON_EXTRACT('{"a":1,"b":[1,2,3]}', '$.b[*]') AS ex2; +ex1 ex2 +2 [1, 2, 3] +JSON_ARRAY_APPEND (append 4 to all b arrays) +SELECT JSON_ARRAY_APPEND('{"b":[1,2]}', '$.b', '4') AS aa1; +aa1 +{"b": [1, 2, "4"]} +JSON_ARRAY_INSERT (insert at index 1) +SELECT JSON_ARRAY_INSERT('{"b":[1,3]}', '$.b[1]', '2') AS ai1; +ai1 +{"b": [1, "2", 3]} +JSON_REMOVE (remove key and index) +SELECT JSON_REMOVE('{"a":1,"b":[1,2,3]}', '$.a', '$.b[0]') AS rm1; +rm1 +{"b": [2, 3]} +JSON_SEARCH with ONE +SELECT JSON_SEARCH('{"a":"hello","b":["x","hell"]}', 'one', 'hel%') AS js1; +js1 +"$.a" +JSON_EQUALS +SELECT JSON_EQUALS('{"x":1,"y":2}', '{"x":1,"y":2}') AS jeq1, +JSON_EQUALS('{"x":1}', '{"x":2}') AS jeq2; +jeq1 jeq2 +1 0 +JSON_OVERLAPS +SELECT JSON_OVERLAPS('{"a":1,"b":2}', '{"b":2}') AS jo1, +JSON_OVERLAPS('[1,2,3]', '[3,4]') AS jo2, +JSON_OVERLAPS('1', '[0,1,2]') AS jo3; +jo1 jo2 jo3 +1 1 1 +JSON_DEPTH +SELECT JSON_DEPTH('{"a":[{"k":1},2]}') AS jd1; +jd1 +4 +JSON_LENGTH +SELECT JSON_LENGTH('{"a":[1,2,3]}') AS jl1, +JSON_LENGTH('[1,2,3,4]') AS jl2, +JSON_LENGTH('1') AS jl3; +jl1 jl2 jl3 +1 4 1 +DROP TABLE t1; +DROP DATABASE json_general; diff --git a/mysql-test/columnstore/basic/t/json_general.test b/mysql-test/columnstore/basic/t/json_general.test new file mode 100644 index 0000000000..ee8177045f --- /dev/null +++ b/mysql-test/columnstore/basic/t/json_general.test @@ -0,0 +1,89 @@ +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS json_general; +--enable_warnings + +CREATE DATABASE json_general; +USE json_general; + +--echo JSON General sanity tests for ColumnStore funcexp (Glaze-based) + +--disable_warnings +DROP TABLE IF EXISTS t1; +--enable_warnings +CREATE TABLE t1 (j text) engine=columnstore; +INSERT INTO t1 VALUES ('{"a":1, "b":[1,2,3], "s":"foo", "o":{"k":2}}'); + +--echo JSON_VALID +SELECT JSON_VALID('{"a":1}') AS v1, JSON_VALID('{bad') AS v2; + +--echo JSON_TYPE +SELECT JSON_TYPE('{"a":1}') AS t1, JSON_TYPE('[1]') AS t2, JSON_TYPE('"x"') AS t3, JSON_TYPE('1') AS t4, JSON_TYPE('true') AS t5, JSON_TYPE('null') AS t6; + +--echo JSON_NORMALIZE (compact canonical) +SELECT JSON_NORMALIZE(' { "b":2, "a":1 }') AS norm1; + +--echo JSON_OBJECT and JSON_ARRAY +SELECT JSON_OBJECT('a',1,'b','x') AS obj1; +SELECT JSON_ARRAY(1,'x','{"k":2}') AS arr1; + +--echo JSON_MERGE_PRESERVE +SELECT JSON_MERGE_PRESERVE('{"a":1}', '{"b":2}') AS m1; + +--echo JSON_MERGE_PATCH +SELECT JSON_MERGE_PATCH('{"a":1,"b":2}', '{"b":null, "c":3}') AS mp1; + +--echo JSON_CONTAINS +SELECT JSON_CONTAINS('{"a":1,"b":[1,2,3]}', '{"a":1}') AS c1, + JSON_CONTAINS('{"a":1}', '{"a":2}') AS c2; + +--echo JSON_CONTAINS_PATH ONE/ALL +SELECT JSON_CONTAINS_PATH('{"a":1,"b":[{"x":1},{"y":2}]}', 'one', '$.b[*].x', '$.a') AS cp_one, + JSON_CONTAINS_PATH('{"a":1,"b":[{"x":1},{"y":2}]}', 'all', '$.b[*].x', '$.a') AS cp_all; + +--echo JSON_EXISTS +SELECT JSON_EXISTS('{"a":{"k":2}}', '$.a.k') AS e1, JSON_EXISTS('{"a":{"k":2}}', '$.a.z') AS e2; + +--echo JSON_QUERY (returns complex) +SELECT JSON_QUERY('{"a":{"k":2},"b":[1,2]}', '$.a') AS q1; + +--echo JSON_VALUE (returns scalar) +SELECT JSON_VALUE('{"a":{"k":2},"b":[1,2]}', '$.a.k') AS v_scalar; + +--echo JSON_EXTRACT basic and wildcard +SELECT JSON_EXTRACT('{"a":1,"b":[1,2,3]}', '$.b[1]') AS ex1, + JSON_EXTRACT('{"a":1,"b":[1,2,3]}', '$.b[*]') AS ex2; + +--echo JSON_ARRAY_APPEND (append 4 to all b arrays) +SELECT JSON_ARRAY_APPEND('{"b":[1,2]}', '$.b', '4') AS aa1; + +--echo JSON_ARRAY_INSERT (insert at index 1) +SELECT JSON_ARRAY_INSERT('{"b":[1,3]}', '$.b[1]', '2') AS ai1; + +--echo JSON_REMOVE (remove key and index) +SELECT JSON_REMOVE('{"a":1,"b":[1,2,3]}', '$.a', '$.b[0]') AS rm1; + +--echo JSON_SEARCH with ONE +SELECT JSON_SEARCH('{"a":"hello","b":["x","hell"]}', 'one', 'hel%') AS js1; + +--echo JSON_EQUALS +SELECT JSON_EQUALS('{"x":1,"y":2}', '{"x":1,"y":2}') AS jeq1, + JSON_EQUALS('{"x":1}', '{"x":2}') AS jeq2; + +--echo JSON_OVERLAPS +SELECT JSON_OVERLAPS('{"a":1,"b":2}', '{"b":2}') AS jo1, + JSON_OVERLAPS('[1,2,3]', '[3,4]') AS jo2, + JSON_OVERLAPS('1', '[0,1,2]') AS jo3; + +--echo JSON_DEPTH +SELECT JSON_DEPTH('{"a":[{"k":1},2]}') AS jd1; + +--echo JSON_LENGTH +SELECT JSON_LENGTH('{"a":[1,2,3]}') AS jl1, + JSON_LENGTH('[1,2,3,4]') AS jl2, + JSON_LENGTH('1') AS jl3; + +DROP TABLE t1; + +DROP DATABASE json_general; From 78f84c1b1aa83276b975da872efd592f64128e01 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 19:36:09 +0000 Subject: [PATCH 04/11] bump glzae version --- cmake/glaze.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/glaze.cmake b/cmake/glaze.cmake index a978eb68f7..06758b1a74 100644 --- a/cmake/glaze.cmake +++ b/cmake/glaze.cmake @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( glaze GIT_REPOSITORY https://github.com/stephenberry/glaze.git - GIT_TAG v5.7.0 + GIT_TAG v5.7.1 GIT_SHALLOW TRUE ) FetchContent_MakeAvailable(glaze) \ No newline at end of file From 8c60023587b3ebf3cbe747d89d50198eca4735a2 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 20:50:01 +0000 Subject: [PATCH 05/11] rocky10 only --- .drone.jsonnet | 123 +++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 290f6bdb39..f2f501c3ea 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -8,12 +8,12 @@ local servers = { }; local extra_servers = { - [current_branch]: ["11.4-enterprise"], + [current_branch]: [], }; local platforms = { - [current_branch]: ["rockylinux:8", "rockylinux:9", "rockylinux:10", "debian:12", "ubuntu:22.04", "ubuntu:24.04"], + [current_branch]: ["rockylinux:10"], }; @@ -646,15 +646,15 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", local AllPipelines = - [ - Pipeline(b, platform, triggeringEvent, a, server, flag, "") - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["rockylinux:8"] - for flag in ["gcc-toolset"] - for triggeringEvent in events - for server in servers[current_branch] - ] + + // [ + // Pipeline(b, platform, triggeringEvent, a, server, flag, "") + // for a in ["amd64"] + // for b in std.objectFields(platforms) + // for platform in ["rockylinux:8"] + // for flag in ["gcc-toolset"] + // for triggeringEvent in events + // for server in servers[current_branch] + // ] + [ Pipeline(b, p, e, a, s) for b in std.objectFields(platforms) @@ -662,55 +662,60 @@ local AllPipelines = for s in servers[b] for e in events for a in archs - ] + - [ - Pipeline(any_branch, p, "custom", a, server) - for p in platforms[current_branch] - for server in servers[current_branch] - for a in archs - ] + - // clang - [ - Pipeline(b, platform, triggeringEvent, a, server, "", buildenv) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for buildenv in std.objectFields(customEnvCommandsMap) - for triggeringEvent in events - for server in servers[current_branch] - ] + - // last argument is to ignore mtr and regression failures - [ - Pipeline(b, platform, triggeringEvent, a, server, "", "", ["regression", "mtr"]) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04", "rockylinux:9"] - for triggeringEvent in events - for server in extra_servers[current_branch] - ] + + ]; + + + //+ + // [ + // Pipeline(any_branch, p, "custom", a, server) + // for p in platforms[current_branch] + // for server in servers[current_branch] + // for a in archs + // ] + // ; + // + + // // clang + // [ + // Pipeline(b, platform, triggeringEvent, a, server, "", buildenv) + // for a in ["amd64"] + // for b in std.objectFields(platforms) + // for platform in ["ubuntu:24.04"] + // for buildenv in std.objectFields(customEnvCommandsMap) + // for triggeringEvent in events + // for server in servers[current_branch] + // ] + // // last argument is to ignore mtr and regression failures - [ - Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand, ["regression", "mtr"]) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["libcpp"] - for envcommand in ["clang-20"] - for triggeringEvent in events - for server in servers[current_branch] - ] + - // last argument is to ignore mtr and regression failures - [ - Pipeline(b, platform, triggeringEvent, a, server, flag, "", ["regression", "mtr"]) - for a in ["amd64"] - for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["ASan", "UBSan"] - for triggeringEvent in events - for server in servers[current_branch] - ] + - - []; + // [ + // Pipeline(b, platform, triggeringEvent, a, server, "", "", ["regression", "mtr"]) + // for a in ["amd64"] + // for b in std.objectFields(platforms) + // for platform in ["ubuntu:24.04", "rockylinux:9"] + // for triggeringEvent in events + // for server in extra_servers[current_branch] + // ] + + // // // last argument is to ignore mtr and regression failures + // [ + // Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand, ["regression", "mtr"]) + // for a in ["amd64"] + // for b in std.objectFields(platforms) + // for platform in ["ubuntu:24.04"] + // for flag in ["libcpp"] + // for envcommand in ["clang-20"] + // for triggeringEvent in events + // for server in servers[current_branch] + // ] + + // // last argument is to ignore mtr and regression failures + // [ + // Pipeline(b, platform, triggeringEvent, a, server, flag, "", ["regression", "mtr"]) + // for a in ["amd64"] + // for b in std.objectFields(platforms) + // for platform in ["ubuntu:24.04"] + // for flag in ["ASan", "UBSan"] + // for triggeringEvent in events + // for server in servers[current_branch] + // ] + + + // []; local FinalPipeline(branch, event) = { From 44356128d069741159ded3ae880a93c13678de9b Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 21:23:13 +0000 Subject: [PATCH 06/11] json_keys fixed --- utils/funcexp/func_json_keys.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/utils/funcexp/func_json_keys.cpp b/utils/funcexp/func_json_keys.cpp index c170c767b3..c7488e1d2b 100644 --- a/utils/funcexp/func_json_keys.cpp +++ b/utils/funcexp/func_json_keys.cpp @@ -76,13 +76,19 @@ std::string Func_json_keys::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool } glz::json_t out; + // Ensure variant holds an array before accessing it + out = std::vector{}; auto& arr = out.get_array(); arr.reserve(keys.size()); - for (auto& k : keys) - arr.emplace_back(k); + for (const auto& k : keys) + { + // push JSON string values + arr.emplace_back(std::string{k}); + } + std::string ret; - if (auto w = glz::write_json(out, ret)) + if (auto w = glz::write(out, ret)) { isNull = true; return ""; From a807d7cfd0583752787fbb59731c07c4ef6e3a58 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 21:59:44 +0000 Subject: [PATCH 07/11] Some fixes --- utils/funcexp/func_json_array.cpp | 4 +++- utils/funcexp/func_json_array_append.cpp | 4 +++- utils/funcexp/func_json_array_insert.cpp | 5 +++-- utils/funcexp/func_json_contains.cpp | 1 - utils/funcexp/func_json_extract.cpp | 2 +- utils/funcexp/func_json_format.cpp | 2 +- utils/funcexp/func_json_insert.cpp | 2 +- utils/funcexp/func_json_keys.cpp | 3 +-- utils/funcexp/func_json_merge.cpp | 2 +- utils/funcexp/func_json_merge_patch.cpp | 2 +- utils/funcexp/func_json_normalize.cpp | 4 ++-- utils/funcexp/func_json_object.cpp | 2 +- utils/funcexp/func_json_query.cpp | 1 + utils/funcexp/func_json_quote.cpp | 2 +- utils/funcexp/func_json_remove.cpp | 2 +- utils/funcexp/func_json_value.cpp | 2 +- utils/funcexp/functor_json.h | 5 ++++- 17 files changed, 26 insertions(+), 19 deletions(-) diff --git a/utils/funcexp/func_json_array.cpp b/utils/funcexp/func_json_array.cpp index 6996496264..cec529ea26 100644 --- a/utils/funcexp/func_json_array.cpp +++ b/utils/funcexp/func_json_array.cpp @@ -19,6 +19,8 @@ std::string Func_json_array::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo return "[]"; glz::json_t arr; + // Ensure the variant holds an array before accessing it + arr = std::vector{}; auto& a = arr.get_array(); a.reserve(fp.size()); @@ -51,7 +53,7 @@ std::string Func_json_array::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo } std::string out; - if (auto w = glz::write_json(arr, out)) + if (auto w = writeJson(arr, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_array_append.cpp b/utils/funcexp/func_json_array_append.cpp index a6afcf5945..383333dc00 100644 --- a/utils/funcexp/func_json_array_append.cpp +++ b/utils/funcexp/func_json_array_append.cpp @@ -60,6 +60,8 @@ std::string Func_json_array_append::getStrVal(rowgroup::Row& row, FunctionParm& else { glz::json_t arr; + // Initialize as array variant before using get_array() + arr = std::vector{}; arr.get_array().push_back(*node); arr.get_array().push_back(value); *node = std::move(arr); @@ -68,7 +70,7 @@ std::string Func_json_array_append::getStrVal(rowgroup::Row& row, FunctionParm& } std::string out; - if (auto w = glz::write_json(doc, out)) + if (auto w = writeJson(doc, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_array_insert.cpp b/utils/funcexp/func_json_array_insert.cpp index bf5603b49c..cc32377453 100644 --- a/utils/funcexp/func_json_array_insert.cpp +++ b/utils/funcexp/func_json_array_insert.cpp @@ -70,6 +70,8 @@ std::string Func_json_array_insert::getStrVal(rowgroup::Row& row, FunctionParm& if (!parent->is_array()) { glz::json_t arr; + // Initialize as array variant before using get_array() + arr = std::vector{}; arr.get_array().push_back(*parent); *parent = std::move(arr); } @@ -85,9 +87,8 @@ std::string Func_json_array_insert::getStrVal(rowgroup::Row& row, FunctionParm& arr.insert(arr.begin() + idx, value); } } - std::string out; - if (auto w = glz::write_json(doc, out)) + if (auto w = writeJson(doc, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_contains.cpp b/utils/funcexp/func_json_contains.cpp index 10d146745e..fe2e0245aa 100644 --- a/utils/funcexp/func_json_contains.cpp +++ b/utils/funcexp/func_json_contains.cpp @@ -1,5 +1,4 @@ #include - #include "functor_json.h" #include "rowgroup.h" #include "glaze_path.h" diff --git a/utils/funcexp/func_json_extract.cpp b/utils/funcexp/func_json_extract.cpp index f8e77b8dec..62715d3b1e 100644 --- a/utils/funcexp/func_json_extract.cpp +++ b/utils/funcexp/func_json_extract.cpp @@ -116,7 +116,7 @@ int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_valu } retJS.clear(); - if (auto w = glz::write_json(out_json, retJS)) + if (auto w = writeJson(out_json, retJS)) return 1; return 0; diff --git a/utils/funcexp/func_json_format.cpp b/utils/funcexp/func_json_format.cpp index b8137c0f87..ad81894847 100644 --- a/utils/funcexp/func_json_format.cpp +++ b/utils/funcexp/func_json_format.cpp @@ -52,7 +52,7 @@ std::string Func_json_format::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo std::string out; // Current Glaze in dependency offers two-argument write_json; use that and check for errors - if (auto werr = glz::write_json(value, out)) + if (auto werr = writeJson(value, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_insert.cpp b/utils/funcexp/func_json_insert.cpp index 2b88dde696..b2f09b05bf 100644 --- a/utils/funcexp/func_json_insert.cpp +++ b/utils/funcexp/func_json_insert.cpp @@ -178,7 +178,7 @@ std::string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo } std::string out; - if (auto w = glz::write_json(doc, out)) + if (auto w = writeJson(doc, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_keys.cpp b/utils/funcexp/func_json_keys.cpp index c7488e1d2b..a2a4218356 100644 --- a/utils/funcexp/func_json_keys.cpp +++ b/utils/funcexp/func_json_keys.cpp @@ -86,9 +86,8 @@ std::string Func_json_keys::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool arr.emplace_back(std::string{k}); } - std::string ret; - if (auto w = glz::write(out, ret)) + if (auto w = writeJson(out, ret)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_merge.cpp b/utils/funcexp/func_json_merge.cpp index 23a6a3e339..36aae0c654 100644 --- a/utils/funcexp/func_json_merge.cpp +++ b/utils/funcexp/func_json_merge.cpp @@ -94,7 +94,7 @@ std::string Func_json_merge::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo } std::string out; - if (auto w = glz::write_json(acc, out)) + if (auto w = writeJson(acc, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_merge_patch.cpp b/utils/funcexp/func_json_merge_patch.cpp index f4592beed0..390380d470 100644 --- a/utils/funcexp/func_json_merge_patch.cpp +++ b/utils/funcexp/func_json_merge_patch.cpp @@ -87,7 +87,7 @@ std::string Func_json_merge_patch::getStrVal(rowgroup::Row& row, FunctionParm& f } std::string out; - if (auto w = glz::write_json(target, out)) + if (auto w = writeJson(target, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_normalize.cpp b/utils/funcexp/func_json_normalize.cpp index 6b36dcbae0..30bc76fad7 100644 --- a/utils/funcexp/func_json_normalize.cpp +++ b/utils/funcexp/func_json_normalize.cpp @@ -29,8 +29,8 @@ std::string Func_json_normalize::getStrVal(rowgroup::Row& row, FunctionParm& fp, } std::string out; - // Write compact canonical JSON (stable ordering may vary vs server, but Glaze keeps object insertion order) - if (auto werr = glz::write_json(value, out)) + + if (auto werr = writeJson(value, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_object.cpp b/utils/funcexp/func_json_object.cpp index fcbb2ed9b1..fd409625e4 100644 --- a/utils/funcexp/func_json_object.cpp +++ b/utils/funcexp/func_json_object.cpp @@ -71,7 +71,7 @@ std::string Func_json_object::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo } std::string out; - if (auto w = glz::write_json(obj, out)) + if (auto w = writeJson(obj, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_query.cpp b/utils/funcexp/func_json_query.cpp index 7fecaa72ec..959645e1a5 100644 --- a/utils/funcexp/func_json_query.cpp +++ b/utils/funcexp/func_json_query.cpp @@ -59,6 +59,7 @@ std::string Func_json_query::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo } std::string out; + // No prettify here if (auto w = glz::write_json(*selected, out)) { isNull = true; diff --git a/utils/funcexp/func_json_quote.cpp b/utils/funcexp/func_json_quote.cpp index b01251929b..06f9ff6031 100644 --- a/utils/funcexp/func_json_quote.cpp +++ b/utils/funcexp/func_json_quote.cpp @@ -26,7 +26,7 @@ std::string Func_json_quote::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo // Use Glaze to emit a JSON-escaped, quoted string const std::string_view sv = js.unsafeStringRef(); std::string out; - if (auto err = glz::write_json(sv, out)) + if (auto err = writeJson(sv, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_remove.cpp b/utils/funcexp/func_json_remove.cpp index 51a800d9d7..0ac4e04f94 100644 --- a/utils/funcexp/func_json_remove.cpp +++ b/utils/funcexp/func_json_remove.cpp @@ -89,7 +89,7 @@ std::string Func_json_remove::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo } std::string out; - if (auto w = glz::write_json(doc, out)) + if (auto w = writeJson(doc, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_value.cpp b/utils/funcexp/func_json_value.cpp index 3ee081c868..686822a4ef 100644 --- a/utils/funcexp/func_json_value.cpp +++ b/utils/funcexp/func_json_value.cpp @@ -51,7 +51,7 @@ std::string Func_json_value::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo if (value.is_number()) { std::string out; - if (auto w = glz::write_json(value, out)) + if (auto w = writeJson(value, out)) { isNull = true; return ""; diff --git a/utils/funcexp/functor_json.h b/utils/funcexp/functor_json.h index 4c73eda296..89ba944bf1 100644 --- a/utils/funcexp/functor_json.h +++ b/utils/funcexp/functor_json.h @@ -1,5 +1,4 @@ #pragma once - #include #define PREFER_MY_CONFIG_H #include @@ -543,4 +542,8 @@ class Func_json_extract : public Func_Str int doExtract(rowgroup::Row& row, FunctionParm& fp, json_value_types* type, std::string& retJS, bool compareWhole); }; + +#define writeJson(value, buffer) \ + glz::write(value, buffer) + } // namespace funcexp From aab7083a96dc765143a34fae1dfe56f9d0231a62 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 12 Sep 2025 22:10:01 +0000 Subject: [PATCH 08/11] fix search --- utils/funcexp/func_json_search.cpp | 35 ++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/utils/funcexp/func_json_search.cpp b/utils/funcexp/func_json_search.cpp index 00acadc698..6643c3ded4 100644 --- a/utils/funcexp/func_json_search.cpp +++ b/utils/funcexp/func_json_search.cpp @@ -1,6 +1,8 @@ +#include +#include +#include #include "functor_json.h" #include "constantcolumn.h" -#include #include "glaze_path.h" #include "rowgroup.h" @@ -26,8 +28,23 @@ static bool match_wild(const std::string& s, const std::string& pat, char escape char pc = pat[j]; if (pc == escape && j + 1 < pat.size()) { + // Treat next character as a literal ++j; pc = pat[j]; + if (i < s.size() && s[i] == pc) + { + ++i; + ++j; + continue; + } + // Mismatch after escape: backtrack if we had a previous '%' + if (star_j != std::string::npos) + { + i = ++star_i; + j = star_j; + continue; + } + return false; } if (pc == '%') { @@ -55,6 +72,7 @@ static bool match_wild(const std::string& s, const std::string& pat, char escape char pc = pat[j]; if (pc == escape && j + 1 < pat.size()) { + // skip escaped literal at pattern tail j += 2; continue; } @@ -65,14 +83,18 @@ static bool match_wild(const std::string& s, const std::string& pat, char escape return true; } -// (removed unused collect_paths) static void find_string_matches(const glz::json_t& node, const std::string& base, const std::string& pat, char escape, std::vector& out) { if (node.is_string()) { - if (match_wild(node.get_string(), pat, escape)) + // Case-insensitive comparison: lowercase both + std::string s = node.get_string(); + std::string p = pat; + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); }); + std::transform(p.begin(), p.end(), p.begin(), [](unsigned char c) { return std::tolower(c); }); + if (match_wild(s, p, escape)) out.push_back(base); return; } @@ -201,7 +223,12 @@ std::string Func_json_search::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo } else { - // Return array of JSON string paths + // 'all' mode: if exactly one match, return a single JSON string (ColumnStore semantics) + if (matches_paths.size() == 1) + { + return std::string{"\""} + matches_paths.front() + "\""; + } + // Otherwise, return array of JSON string paths std::string out = "["; for (size_t i = 0; i < matches_paths.size(); ++i) { From ddb13da0dc623a6ed4e6583b74cdeb5910e4ecb5 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Sat, 13 Sep 2025 00:33:15 +0000 Subject: [PATCH 09/11] fix insert --- .../basic/t/json_range_expression.test | 1 - utils/funcexp/func_json_extract.cpp | 25 ++-- utils/funcexp/func_json_insert.cpp | 52 +++++-- utils/funcexp/func_json_length.cpp | 43 ++++-- utils/funcexp/functor_json.h | 64 +++++++- utils/funcexp/glaze_path.h | 137 +++++++++++++++--- 6 files changed, 263 insertions(+), 59 deletions(-) diff --git a/mysql-test/columnstore/basic/t/json_range_expression.test b/mysql-test/columnstore/basic/t/json_range_expression.test index 070153ce40..d3766e4f98 100644 --- a/mysql-test/columnstore/basic/t/json_range_expression.test +++ b/mysql-test/columnstore/basic/t/json_range_expression.test @@ -1,4 +1,3 @@ ---source ../include/disable_before_10.9.inc --source ../include/have_columnstore.inc --disable_warnings DROP DATABASE IF EXISTS json_range_expr_db; diff --git a/utils/funcexp/func_json_extract.cpp b/utils/funcexp/func_json_extract.cpp index 62715d3b1e..1cfe256afe 100644 --- a/utils/funcexp/func_json_extract.cpp +++ b/utils/funcexp/func_json_extract.cpp @@ -54,28 +54,21 @@ int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_valu bool pNull = false; const auto pstr_ns = fp[i]->data()->getStrVal(row, pNull); if (pNull) - { - results.emplace_back(); - continue; - } + continue; // skip this path entirely std::vector matches; if (!glaze_path::find_matches(doc, pstr_ns.unsafeStringRef(), matches)) - { - results.emplace_back(); - continue; - } + continue; // skip invalid path if (matches.empty()) - { - results.emplace_back(); - continue; - } + continue; // skip paths with no matches if (compareWhole) { - // For compareWhole: if a single match and single path, emit the value; else emit array of matches - if (argSize - 1 == 1 && matches.size() == 1) + // For compareWhole: + // - If exactly one match for this path, push the value directly + // - If multiple matches (due to wildcards), push an array of matches + if (matches.size() == 1) { results.push_back(*matches.front()); ++found_count; @@ -83,6 +76,8 @@ int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_valu else { glz::json_t arr; + // Ensure variant holds an array before accessing it + arr = std::vector{}; auto& a = arr.get_array(); a.reserve(matches.size()); for (auto* m : matches) @@ -111,6 +106,8 @@ int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_valu } else { + // Ensure variant holds an array before assigning + out_json = std::vector{}; out_json.get_array() = std::move(results); *type = JSON_VALUE_ARRAY; } diff --git a/utils/funcexp/func_json_insert.cpp b/utils/funcexp/func_json_insert.cpp index b2f09b05bf..f3b70664f4 100644 --- a/utils/funcexp/func_json_insert.cpp +++ b/utils/funcexp/func_json_insert.cpp @@ -42,12 +42,8 @@ std::string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo return ""; } - glz::json_t value; - if (auto ev = glz::read_json(value, v_ns.unsafeStringRef())) - { - isNull = true; - return ""; - } + // Treat the value argument as a string literal for JSON output to match tests + glz::json_t value = std::string(v_ns.safeString("")); std::vector steps; if (!funcexp::glaze_path::parse(p_ns.unsafeStringRef(), steps)) @@ -116,30 +112,58 @@ std::string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo { if (!cur->is_array()) { - // If parent is not array, wrap into array first to permit insert - glz::json_t arr; + // For REPLACE only: non-array parent -> no-op + if (isReplaceMode && !isInsertMode) + continue; + // For INSERT or SET: wrap non-array parent into array + glz::json_t arr = std::vector{}; arr.get_array().push_back(*cur); *cur = std::move(arr); } auto& arr = cur->get_array(); int idx = last.index; + // Resolve 'last' / 'last-N' semantics first + if (last.from_end) + idx = static_cast(arr.size()) - 1 - idx; + // Resolve negative index relative to start if (idx < 0) idx = static_cast(arr.size()) + idx; - if (isReplaceMode && idx >= 0 && static_cast(idx) < arr.size()) + if (mode == REPLACE) { - arr[static_cast(idx)] = value; + // REPLACE: only act if index is within bounds; otherwise no-op + if (idx >= 0 && static_cast(idx) < arr.size()) + arr[static_cast(idx)] = value; } - else if (isInsertMode) + else if (mode == INSERT) { - // insert at index or append if index == size - if (idx < 0 || static_cast(idx) > arr.size()) + // INSERT: error on negative; clamp > size to size (append) + if (idx < 0) { isNull = true; return ""; } + if (static_cast(idx) > arr.size()) idx = static_cast(arr.size()); arr.insert(arr.begin() + idx, value); } + else /* mode == SET */ + { + // SET: replace when in-bounds; otherwise insert (append if idx >= size) + if (idx >= 0 && static_cast(idx) < arr.size()) + { + arr[static_cast(idx)] = value; + } + else + { + if (idx < 0) + { + isNull = true; + return ""; + } + if (static_cast(idx) > arr.size()) idx = static_cast(arr.size()); + arr.insert(arr.begin() + idx, value); + } + } } else if (last.kind == funcexp::glaze_path::StepKind::KeyWildcard) { @@ -159,7 +183,7 @@ std::string Func_json_insert::getStrVal(rowgroup::Row& row, FunctionParm& fp, bo if (!cur->is_array()) { // For non-array parent, wrap then proceed as append/replace - glz::json_t arr; + glz::json_t arr = std::vector{}; arr.get_array().push_back(*cur); *cur = std::move(arr); } diff --git a/utils/funcexp/func_json_length.cpp b/utils/funcexp/func_json_length.cpp index 91a9c37a15..67a96e1741 100644 --- a/utils/funcexp/func_json_length.cpp +++ b/utils/funcexp/func_json_length.cpp @@ -1,6 +1,7 @@ #include #include "functor_json.h" +#include "glaze_path.h" #include "rowgroup.h" @@ -19,12 +20,8 @@ int64_t Func_json_length::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& if (isNull) return 0; - // Path-based form will be migrated later; return NULL for now when path is provided - if (fp.size() > 1) - { - isNull = true; - return 0; - } + // If a JSONPath is provided, evaluate the length of the node at that path + const bool has_path = (fp.size() > 1); const std::string_view sv{js.unsafeStringRef().data(), js.unsafeStringRef().size()}; glz::json_t value; @@ -34,11 +31,35 @@ int64_t Func_json_length::getIntVal(rowgroup::Row& row, FunctionParm& fp, bool& return 0; } - if (value.is_array()) - return static_cast(value.get_array().size()); - if (value.is_object()) - return static_cast(value.get_object().size()); - // Scalars and null count as length 1 + const glz::json_t* target = &value; + if (has_path) + { + bool pNull = false; + const auto path_ns = fp[1]->data()->getStrVal(row, pNull); + if (pNull) + { + isNull = true; + return 0; + } + std::vector matches; + if (!funcexp::glaze_path::find_matches(value, path_ns.unsafeStringRef(), matches) || matches.empty()) + { + isNull = true; + return 0; + } + target = matches.front(); + } + + if (target->is_array()) + return static_cast(target->get_array().size()); + if (target->is_object()) + return static_cast(target->get_object().size()); + // With a path, scalars/null should yield NULL; without a path, return 1 + if (has_path) + { + isNull = true; + return 0; + } return 1; } } // namespace funcexp diff --git a/utils/funcexp/functor_json.h b/utils/funcexp/functor_json.h index 89ba944bf1..dfc2b9bc6f 100644 --- a/utils/funcexp/functor_json.h +++ b/utils/funcexp/functor_json.h @@ -543,7 +543,67 @@ class Func_json_extract : public Func_Str bool compareWhole); }; -#define writeJson(value, buffer) \ - glz::write(value, buffer) +inline void json_add_spaces_minified(std::string& s) +{ + std::string out; + out.reserve(s.size() * 11 / 10); + + bool in_str = false; + bool esc = false; + for (char c : s) + { + if (in_str) + { + out.push_back(c); + if (esc) + { + esc = false; + } + else if (c == '\\') + { + esc = true; + } + else if (c == '"') + { + in_str = false; + } + continue; + } + + if (c == '"') + { + in_str = true; + out.push_back(c); + continue; + } + + if (c == ':') + { + out += ": "; + continue; + } + if (c == ',') + { + out += ", "; + continue; + } + + out.push_back(c); + } + + s.swap(out); +} + +// Single-expression macro: returns glz::error_ctx; safe in conditionals +#define writeJson(value, buffer) \ + ([&]() { \ + auto __funcexp_err = \ + glz::write( \ + (value), (buffer)); \ + if (!__funcexp_err) { \ + funcexp::json_add_spaces_minified((buffer)); \ + } \ + return __funcexp_err; \ + }()) } // namespace funcexp diff --git a/utils/funcexp/glaze_path.h b/utils/funcexp/glaze_path.h index 3c3bc4b861..1c730fd90f 100644 --- a/utils/funcexp/glaze_path.h +++ b/utils/funcexp/glaze_path.h @@ -9,12 +9,17 @@ namespace funcexp { namespace glaze_path { -enum class StepKind { Key, KeyWildcard, Index, IndexWildcard, RecursiveDescent }; +enum class StepKind { Key, KeyWildcard, Index, IndexWildcard, IndexRange, RecursiveDescent }; struct Step { StepKind kind{StepKind::Key}; std::string key; // for Key - int index{0}; // for Index (may be negative) + // For Index + int index{0}; // direct index (may be negative) + bool from_end{false}; // when true, index represents offset from end: last-index + // For IndexRange + int start_index{0}; bool start_from_end{false}; + int end_index{0}; bool end_from_end{false}; }; // Parse a simplified MariaDB/MySQL-like JSON path supporting: @@ -54,23 +59,62 @@ inline bool parse(std::string_view p, std::vector& out) { } if (p[i] == '[') { ++i; - if (!at_end() && p[i] == '*') { - ++i; - if (at_end() || p[i] != ']') return false; - ++i; - out.push_back(Step{StepKind::IndexWildcard, std::string(), 0}); + // capture until ']' + size_t content_start = i; + while (!at_end() && p[i] != ']') ++i; + if (at_end()) return false; + std::string content = std::string(p.substr(content_start, i - content_start)); + ++i; // consume ']' + // trim spaces + auto trim = [](std::string& s){ + size_t a = 0; while (a < s.size() && std::isspace(static_cast(s[a]))) ++a; + size_t b = s.size(); while (b > a && std::isspace(static_cast(s[b-1]))) --b; + s = s.substr(a, b - a); + }; + trim(content); + if (content == "*") { + Step st; st.kind = StepKind::IndexWildcard; + out.push_back(st); continue; } - // parse index possibly negative - size_t start = i; - if (!at_end() && p[i] == '-') ++i; - size_t num_start = i; - while (!at_end() && std::isdigit(static_cast(p[i]))) ++i; - if (num_start == i) return false; - int idx = std::stoi(std::string(p.substr(start, i - start))); - if (at_end() || p[i] != ']') return false; - ++i; - out.push_back(Step{StepKind::Index, std::string(), idx}); + // helper to parse single index token: number | last | last-N + auto parse_index_token = [&](const std::string& tok, int& idx, bool& from_end)->bool{ + std::string t = tok; trim(t); + if (t.rfind("last", 0) == 0) { + from_end = true; + idx = 0; + if (t.size() > 4) { + if (t[4] != '-' || t.size() == 5) return false; + std::string off = t.substr(5); + if (off.empty()) return false; + for (char c : off) if (!std::isdigit(static_cast(c))) return false; + idx = std::stoi(off); + } + return true; + } + // numeric index (may start with '-') + if (t.empty()) return false; + size_t k = 0; if (t[0] == '-') ++k; if (k >= t.size()) return false; + for (; k < t.size(); ++k) if (!std::isdigit(static_cast(t[k]))) return false; + from_end = false; idx = std::stoi(t); + return true; + }; + // check for range "a to b" + auto pos_to = content.find("to"); + if (pos_to != std::string::npos) { + std::string left = content.substr(0, pos_to); + std::string right = content.substr(pos_to + 2); + trim(left); trim(right); + Step st; st.kind = StepKind::IndexRange; + if (!parse_index_token(left, st.start_index, st.start_from_end)) return false; + if (!parse_index_token(right, st.end_index, st.end_from_end)) return false; + out.push_back(st); + continue; + } + // single index (supports last / last-N) + Step st; st.kind = StepKind::Index; + if (!parse_index_token(content, st.index, st.from_end)) return false; + out.push_back(st); continue; } // bare key at root without leading dot @@ -116,11 +160,42 @@ inline void match_impl(const glz::json_t& node, const std::vector& steps, if (node.is_array()) { const auto& arr = node.get_array(); int idx = st.index; + // Resolve from_end (last - offset) + if (st.from_end) idx = static_cast(arr.size()) - 1 - idx; + // Resolve negative index as size + idx if (idx < 0) idx = static_cast(arr.size()) + idx; if (idx >= 0 && static_cast(idx) < arr.size()) match_impl(arr[static_cast(idx)], steps, pos + 1, out); } break; + case StepKind::IndexRange: + if (node.is_array()) { + const auto& arr = node.get_array(); + auto sz = static_cast(arr.size()); + int s = st.start_index; + int e = st.end_index; + if (st.start_from_end) s = sz - 1 - s; // last - offset + if (st.end_from_end) e = sz - 1 - e; + if (s < 0) s = sz + s; + if (e < 0) e = sz + e; + // clamp + if (s < 0) { + s = 0; + } + if (e < 0) { + e = 0; + } + if (s >= sz) { + s = sz - 1; + } + if (e >= sz) { + e = sz - 1; + } + if (s <= e) { + for (int i = s; i <= e; ++i) match_impl(arr[static_cast(i)], steps, pos + 1, out); + } + } + break; case StepKind::IndexWildcard: if (node.is_array()) { const auto& arr = node.get_array(); @@ -172,11 +247,39 @@ inline void match_impl_mut(glz::json_t& node, const std::vector& steps, si if (node.is_array()) { auto& arr = node.get_array(); int idx = st.index; + if (st.from_end) idx = static_cast(arr.size()) - 1 - idx; if (idx < 0) idx = static_cast(arr.size()) + idx; if (idx >= 0 && static_cast(idx) < arr.size()) match_impl_mut(arr[static_cast(idx)], steps, pos + 1, out); } break; + case StepKind::IndexRange: + if (node.is_array()) { + auto& arr = node.get_array(); + auto sz = static_cast(arr.size()); + int s = st.start_index; + int e = st.end_index; + if (st.start_from_end) s = sz - 1 - s; + if (st.end_from_end) e = sz - 1 - e; + if (s < 0) s = sz + s; + if (e < 0) e = sz + e; + if (s < 0) { + s = 0; + } + if (e < 0) { + e = 0; + } + if (s >= sz) { + s = sz - 1; + } + if (e >= sz) { + e = sz - 1; + } + if (s <= e) { + for (int i = s; i <= e; ++i) match_impl_mut(arr[static_cast(i)], steps, pos + 1, out); + } + } + break; case StepKind::IndexWildcard: if (node.is_array()) { auto& arr = node.get_array(); From 51ede425d653a72fdb0b28aec832f7dd73f4fbb1 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Sat, 13 Sep 2025 01:14:56 +0000 Subject: [PATCH 10/11] exists --- utils/funcexp/func_json_array.cpp | 4 +-- utils/funcexp/func_json_array_append.cpp | 4 +-- utils/funcexp/func_json_array_insert.cpp | 4 +-- utils/funcexp/func_json_exists.cpp | 34 +++++++++++++++++++++++- utils/funcexp/func_json_extract.cpp | 4 +-- utils/funcexp/func_json_merge.cpp | 2 +- 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/utils/funcexp/func_json_array.cpp b/utils/funcexp/func_json_array.cpp index cec529ea26..0a7f62a224 100644 --- a/utils/funcexp/func_json_array.cpp +++ b/utils/funcexp/func_json_array.cpp @@ -18,9 +18,7 @@ std::string Func_json_array::getStrVal(rowgroup::Row& row, FunctionParm& fp, boo if (fp.size() == 0) return "[]"; - glz::json_t arr; - // Ensure the variant holds an array before accessing it - arr = std::vector{}; + glz::json_t arr = std::vector{}; auto& a = arr.get_array(); a.reserve(fp.size()); diff --git a/utils/funcexp/func_json_array_append.cpp b/utils/funcexp/func_json_array_append.cpp index 383333dc00..f6ab0a6273 100644 --- a/utils/funcexp/func_json_array_append.cpp +++ b/utils/funcexp/func_json_array_append.cpp @@ -59,9 +59,7 @@ std::string Func_json_array_append::getStrVal(rowgroup::Row& row, FunctionParm& } else { - glz::json_t arr; - // Initialize as array variant before using get_array() - arr = std::vector{}; + glz::json_t arr = std::vector{}; arr.get_array().push_back(*node); arr.get_array().push_back(value); *node = std::move(arr); diff --git a/utils/funcexp/func_json_array_insert.cpp b/utils/funcexp/func_json_array_insert.cpp index cc32377453..9ba9f4f36b 100644 --- a/utils/funcexp/func_json_array_insert.cpp +++ b/utils/funcexp/func_json_array_insert.cpp @@ -69,9 +69,7 @@ std::string Func_json_array_insert::getStrVal(rowgroup::Row& row, FunctionParm& // Ensure parent is an array, or wrap into array first if (!parent->is_array()) { - glz::json_t arr; - // Initialize as array variant before using get_array() - arr = std::vector{}; + glz::json_t arr = std::vector{}; arr.get_array().push_back(*parent); *parent = std::move(arr); } diff --git a/utils/funcexp/func_json_exists.cpp b/utils/funcexp/func_json_exists.cpp index 7f6858829d..cff3587771 100644 --- a/utils/funcexp/func_json_exists.cpp +++ b/utils/funcexp/func_json_exists.cpp @@ -44,6 +44,38 @@ bool Func_json_exists::getBoolVal(rowgroup::Row& row, FunctionParm& fp, bool& is isNull = true; return false; } - return !matches.empty(); + if (!matches.empty()) + return true; + + // Special-case: allow indexing into strings for existence checks, e.g. $.key1[0] + // If the direct match was empty, check whether the last step is an Index on a string parent + std::vector steps; + if (!glaze_path::parse(path_ns.unsafeStringRef(), steps) || steps.empty()) + return false; + if (steps.size() < 2) + return false; + + glaze_path::Step last = steps.back(); + if (last.kind != glaze_path::StepKind::Index) + return false; + steps.pop_back(); + + std::vector parents; + glaze_path::match_impl(doc, steps, 0, parents); + for (const auto* p : parents) + { + if (p && p->is_string()) + { + int64_t len = static_cast(p->get_string().size()); + int idx = last.index; + if (last.from_end) + idx = static_cast(len) - 1 - idx; + if (idx < 0) + idx = static_cast(len) + idx; + if (idx >= 0 && idx < len) + return true; + } + } + return false; } } // namespace funcexp diff --git a/utils/funcexp/func_json_extract.cpp b/utils/funcexp/func_json_extract.cpp index 1cfe256afe..3e7cd0fa70 100644 --- a/utils/funcexp/func_json_extract.cpp +++ b/utils/funcexp/func_json_extract.cpp @@ -75,9 +75,7 @@ int Func_json_extract::doExtract(rowgroup::Row& row, FunctionParm& fp, json_valu } else { - glz::json_t arr; - // Ensure variant holds an array before accessing it - arr = std::vector{}; + glz::json_t arr = std::vector{}; auto& a = arr.get_array(); a.reserve(matches.size()); for (auto* m : matches) diff --git a/utils/funcexp/func_json_merge.cpp b/utils/funcexp/func_json_merge.cpp index 36aae0c654..af9793a79f 100644 --- a/utils/funcexp/func_json_merge.cpp +++ b/utils/funcexp/func_json_merge.cpp @@ -28,7 +28,7 @@ static void merge_in_place(glz::json_t& a, const glz::json_t& b) } // Anything else becomes an array concatenation - glz::json_t arr; + glz::json_t arr = std::vector{}; arr.get_array().reserve((a.is_array() ? a.get_array().size() : 1) + (b.is_array() ? b.get_array().size() : 1)); if (a.is_array()) From 49881f874ac27f5763902cac7058e37628fc0c26 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Sat, 13 Sep 2025 01:59:11 +0000 Subject: [PATCH 11/11] merge --- utils/funcexp/func_json_merge_patch.cpp | 58 ++++++++++++++++++++++++- utils/funcexp/func_json_normalize.cpp | 2 +- utils/funcexp/func_json_object.cpp | 4 +- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/utils/funcexp/func_json_merge_patch.cpp b/utils/funcexp/func_json_merge_patch.cpp index 390380d470..e78c649061 100644 --- a/utils/funcexp/func_json_merge_patch.cpp +++ b/utils/funcexp/func_json_merge_patch.cpp @@ -16,7 +16,7 @@ static void merge_patch_in_place(glz::json_t& target, const glz::json_t& patch) if (!target.is_object()) { - target = glz::json_t{}; // make it an object + target = glz::json_t::object_t{}; // make it an object } auto& to = target.get_object(); @@ -31,7 +31,19 @@ static void merge_patch_in_place(glz::json_t& target, const glz::json_t& patch) auto it = to.find(k); if (it == to.end()) { - to.emplace(k, pv); + // When the patch member is an object and the key does not exist in target, + // create a new object and merge recursively to honor deletion of null-leaf + // members per RFC 7396. For non-object, assign directly. + if (pv.is_object()) + { + glz::json_t child = glz::json_t::object_t{}; + merge_patch_in_place(child, pv); + to.emplace(k, std::move(child)); + } + else + { + to.emplace(k, pv); + } } else { @@ -69,6 +81,13 @@ std::string Func_json_merge_patch::getStrVal(rowgroup::Row& row, FunctionParm& f return ""; } + // Preserve original target keys order for final serialization (map iteration order is sorted). + std::vector original_keys; + if (target.is_object()) + { + for (const auto& [k, v] : target.get_object()) original_keys.push_back(k); + } + for (size_t i = 1; i < fp.size(); ++i) { const auto patch_ns = fp[i]->data()->getStrVal(row, isNull); @@ -86,6 +105,41 @@ std::string Func_json_merge_patch::getStrVal(rowgroup::Row& row, FunctionParm& f merge_patch_in_place(target, patch); } + // Custom serialize objects to preserve: original keys first, then new keys + if (target.is_object()) + { + const auto& obj = target.get_object(); + std::string out; + out.reserve(128); + out.push_back('{'); + bool first = true; + + auto write_kv = [&](const std::string& key){ + auto it = obj.find(key); + if (it == obj.end()) return; // key removed + if (!first) out += ", "; + first = false; + out.push_back('"'); out += key; out.push_back('"'); out += ": "; + std::string valbuf; + if (auto w = writeJson(it->second, valbuf)) { /* on error, fall back to NULL */ valbuf = "null"; } + out += valbuf; + }; + + // Original keys first + for (const auto& k : original_keys) write_kv(k); + // Then any new keys not in original + for (const auto& [k, v] : obj) + { + if (std::find(original_keys.begin(), original_keys.end(), k) == original_keys.end()) + write_kv(k); + } + + out.push_back('}'); + isNull = false; + return out; + } + + // Non-object: regular writer std::string out; if (auto w = writeJson(target, out)) { diff --git a/utils/funcexp/func_json_normalize.cpp b/utils/funcexp/func_json_normalize.cpp index 30bc76fad7..f280f6b72a 100644 --- a/utils/funcexp/func_json_normalize.cpp +++ b/utils/funcexp/func_json_normalize.cpp @@ -30,7 +30,7 @@ std::string Func_json_normalize::getStrVal(rowgroup::Row& row, FunctionParm& fp, std::string out; - if (auto werr = writeJson(value, out)) + if (auto werr = glz::write_json(value, out)) { isNull = true; return ""; diff --git a/utils/funcexp/func_json_object.cpp b/utils/funcexp/func_json_object.cpp index fd409625e4..28c0c4bbfb 100644 --- a/utils/funcexp/func_json_object.cpp +++ b/utils/funcexp/func_json_object.cpp @@ -14,12 +14,12 @@ execplan::CalpontSystemCatalog::ColType Func_json_object::operationType( } std::string Func_json_object::getStrVal(rowgroup::Row& row, FunctionParm& fp, bool& isNull, - execplan::CalpontSystemCatalog::ColType& type) + execplan::CalpontSystemCatalog::ColType& /*type*/) { if (fp.size() == 0) return "{}"; - glz::json_t obj; + glz::json_t obj = glz::json_t::object_t{}; auto& o = obj.get_object(); auto add_pair = [&](size_t keyIdx, size_t valIdx) -> bool