Skip to content

Commit

Permalink
fix: Fixe handling of user errors in json_parse (facebookincubator#12272
Browse files Browse the repository at this point in the history
)

Summary:
Pull Request resolved: facebookincubator#12272

json_parse currently only properly propagates user errors that come from
normalizedSizeForJsonParse or from simd when parsing the JSON.

User errors can come from elsewhere in the UDF, e.g. the generateViews logic, which are not
caught and thus fail the query even when the UDF is wrapped in a "try".

To fix this I've wrapped the bulk of the logic that gets executed on a single row in a try catch that
handles user errors, propagating them to the EvalCtx.

Reviewed By: Yuhta, kgpai

Differential Revision: D69211907

fbshipit-source-id: 2ee7422c0f80a35b6f44221fe4942ab2cc04ae3a
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Feb 6, 2025
1 parent 4caf5d1 commit 3375085
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 32 deletions.
85 changes: 53 additions & 32 deletions velox/functions/prestosql/JsonFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <glog/logging.h>

#include "velox/common/base/SortingNetwork.h"
#include "velox/expression/VectorFunction.h"
#include "velox/functions/lib/Utf8Utils.h"
Expand Down Expand Up @@ -151,35 +153,40 @@ class JsonParseFunction : public exec::VectorFunction {
const auto& arg = args[0];
if (arg->isConstantEncoding()) {
auto value = arg->as<ConstantVector<StringView>>()->valueAt(0);
bool needNormalize =
needNormalizeForJsonParse(value.data(), value.size());
auto size = value.size();
if (needNormalize) {
try {
auto buffer = AlignedBuffer::allocate<char>(size, context.pool());
BufferPtr stringViews =
AlignedBuffer::allocate<StringView>(1, context.pool());
auto rawStringViews = stringViews->asMutable<StringView>();

try {
bool needNormalize =
needNormalizeForJsonParse(value.data(), value.size());

if (needNormalize) {
size = normalizedSizeForJsonParse(value.data(), value.size());
} catch (const VeloxException& e) {
if (!e.isUserError()) {
throw;
}
context.setErrors(rows, std::current_exception());
}
paddedInput_.resize(size + simdjson::SIMDJSON_PADDING);
VELOX_CHECK_EQ(prepareInput(value, needNormalize), size);

if (auto error = parse(size, needNormalize)) {
context.setErrors(rows, errors_[error]);
return;
}
}
paddedInput_.resize(size + simdjson::SIMDJSON_PADDING);
VELOX_CHECK_EQ(prepareInput(value, needNormalize), size);
auto* output = buffer->asMutable<char>();
auto outputSize = concatViews(views_, output);
rawStringViews[0] = StringView(output, outputSize);
} catch (const VeloxException& e) {
if (!e.isUserError()) {
throw;
}
context.setErrors(rows, std::current_exception());

FB_LOG_EVERY_MS(WARNING, 1000)
<< "Caught user error in json_parse: " << e.message();

auto buffer = AlignedBuffer::allocate<char>(size, context.pool());
if (auto error = parse(size, needNormalize)) {
context.setErrors(rows, errors_[error]);
return;
}
auto* output = buffer->asMutable<char>();
auto outputSize = concatViews(views_, output);

BufferPtr stringViews =
AlignedBuffer::allocate<StringView>(1, context.pool());
auto rawStringViews = stringViews->asMutable<StringView>();
rawStringViews[0] = StringView(output, outputSize);

auto constantBase = std::make_shared<FlatVector<StringView>>(
context.pool(),
Expand Down Expand Up @@ -234,17 +241,31 @@ class JsonParseFunction : public exec::VectorFunction {
if (hasError[row]) {
return;
}
auto value = flatInput->valueAt(row);
auto size = prepareInput(value, needNormalizes[row]);
if (auto error = parse(size, needNormalizes[row])) {
context.setVeloxExceptionError(row, errors_[error]);

try {
auto value = flatInput->valueAt(row);
auto size = prepareInput(value, needNormalizes[row]);
if (auto error = parse(size, needNormalizes[row])) {
context.setVeloxExceptionError(row, errors_[error]);
clearState();
return;
}
auto outputSize = concatViews(views_, output);
rawStringViews[row] = StringView(output, outputSize);
if (!StringView::isInline(outputSize)) {
output += outputSize;
}
} catch (const VeloxException& e) {
if (!e.isUserError()) {
throw;
}

context.setVeloxExceptionError(row, std::current_exception());

FB_LOG_EVERY_MS(WARNING, 1000)
<< "Caught user error in json_parse: " << e.message();

clearState();
return;
}
auto outputSize = concatViews(views_, output);
rawStringViews[row] = StringView(output, outputSize);
if (!StringView::isInline(outputSize)) {
output += outputSize;
}
});

Expand Down
32 changes: 32 additions & 0 deletions velox/functions/prestosql/tests/JsonFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,38 @@ TEST_F(JsonFunctionsTest, jsonParse) {

velox::test::assertEqualVectors(expected, result);
}

// Test try with invalid escape sequence.
{
auto data = makeRowVector({makeFlatVector<StringView>({
// The logic for sorting keys checks the validity of escape sequences
// and will throw a user error in this case.
"{\"k\\i\":\"abc\",\"k2\":\"xyz\"}", // invalid json
// Add a second value to ensure the state is cleared.
R"([{"k1": "v1" }, {"k2": "v2" }])" // valid json
})});

auto result = evaluate("try(json_parse(c0))", data);

auto expected = makeNullableFlatVector<StringView>(
{std::nullopt, R"([{"k1":"v1"},{"k2":"v2"}])"}, JSON());

velox::test::assertEqualVectors(expected, result);
}

// Test try with invalid escape sequence going through fast path for
// constants.
{
auto data =
makeRowVector({makeConstant("{\"k\\i\":\"abc\",\"k2\":\"xyz\"}", 3)});

auto result = evaluate("try(json_parse(c0))", data);

auto expected = makeNullableFlatVector<StringView>(
{std::nullopt, std::nullopt, std::nullopt}, JSON());

velox::test::assertEqualVectors(expected, result);
}
}

TEST_F(JsonFunctionsTest, canonicalization) {
Expand Down

0 comments on commit 3375085

Please sign in to comment.