Skip to content

Commit

Permalink
feat: Add support for UUID comparison functions (facebookincubator#10791
Browse files Browse the repository at this point in the history
)

Summary:
This adds binary comparison functions <, >, <=, >= to the UUID custom data type. Equality functions are already present. Added unit tests for testing a query with comparisons between UUID values.

The ordering is done lexicographically to conforms with IETF RFC 4122 https://datatracker.ietf.org/doc/html/rfc4122.html, and also matches Presto Java after prestodb/presto#23311.

Related UUID serialization fix at facebookincubator#11197
From facebookincubator#10584

Pull Request resolved: facebookincubator#10791

Reviewed By: Yuhta

Differential Revision: D66707660

Pulled By: xiaoxmeng

fbshipit-source-id: f49ee07cf172735eadb8a85e533a2919d0bb48b2
  • Loading branch information
BryanCutler authored and facebook-github-bot committed Jan 11, 2025
1 parent 9dcfd39 commit 022cd87
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 13 deletions.
29 changes: 16 additions & 13 deletions velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "velox/functions/prestosql/types/IPAddressType.h"
#include "velox/functions/prestosql/types/IPPrefixType.h"
#include "velox/functions/prestosql/types/JsonType.h"
#include "velox/functions/prestosql/types/UuidType.h"
#include "velox/serializers/PrestoSerializer.h"
#include "velox/type/parser/TypeParser.h"

Expand Down Expand Up @@ -426,18 +427,19 @@ bool PrestoQueryRunner::isConstantExprSupported(
const core::TypedExprPtr& expr) {
if (std::dynamic_pointer_cast<const core::ConstantTypedExpr>(expr)) {
// TODO: support constant literals of these types. Complex-typed constant
// literals require support of converting them to SQL. Json, Ipaddress, and
// Ipprefix can be enabled after we're able to generate valid input values,
// because when these types are used as the type of a constant literal in
// SQL, Presto implicitly invoke json_parse(), cast(x as Ipaddress), and
// cast(x as Ipprefix) on it, which makes the behavior of Presto different
// from Velox. Timestamp constant literals require further investigation to
// ensure Presto uses the same timezone as Velox. Interval type cannot be
// used as the type of constant literals in Presto SQL.
// literals require support of converting them to SQL. Json, Ipaddress,
// Ipprefix, and Uuid can be enabled after we're able to generate valid
// input values, because when these types are used as the type of a constant
// literal in SQL, Presto implicitly invokes json_parse(),
// cast(x as Ipaddress), cast(x as Ipprefix) and cast(x as uuid) on it,
// which makes the behavior of Presto different from Velox. Timestamp
// constant literals require further investigation to ensure Presto uses the
// same timezone as Velox. Interval type cannot be used as the type of
// constant literals in Presto SQL.
auto& type = expr->type();
return type->isPrimitiveType() && !type->isTimestamp() &&
!isJsonType(type) && !type->isIntervalDayTime() &&
!isIPAddressType(type) && !isIPPrefixType(type);
!isIPAddressType(type) && !isIPPrefixType(type) && !isUuidType(type);
}
return true;
}
Expand All @@ -448,16 +450,17 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) {
// cast-to or constant literals. Hyperloglog can only be casted from varbinary
// and cannot be used as the type of constant literals. Interval year to month
// can only be casted from NULL and cannot be used as the type of constant
// literals. Json, Ipaddress, and Ipprefix require special handling, because
// Presto requires literals of these types to be valid, and doesn't allow
// creating HIVE columns of these types.
// literals. Json, Ipaddress, Ipprefix, and UUID require special handling,
// because Presto requires literals of these types to be valid, and doesn't
// allow creating HIVE columns of these types.
return !(
usesTypeName(signature, "interval year to month") ||
usesTypeName(signature, "hugeint") ||
usesTypeName(signature, "hyperloglog") ||
usesInputTypeName(signature, "json") ||
usesInputTypeName(signature, "ipaddress") ||
usesInputTypeName(signature, "ipprefix"));
usesInputTypeName(signature, "ipprefix") ||
usesInputTypeName(signature, "uuid"));
}

std::optional<std::string> PrestoQueryRunner::toSql(
Expand Down
23 changes: 23 additions & 0 deletions velox/functions/prestosql/UuidFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,29 @@

#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/Comparisons.h"
#include "velox/functions/prestosql/types/UuidType.h"

namespace facebook::velox::functions {

#define VELOX_GEN_BINARY_EXPR_UUID(Name, uuidCompExpr) \
template <typename T> \
struct Name##Uuid { \
VELOX_DEFINE_FUNCTION_TYPES(T); \
\
FOLLY_ALWAYS_INLINE void \
call(bool& result, const arg_type<Uuid>& lhs, const arg_type<Uuid>& rhs) { \
result = (uuidCompExpr); \
} \
};

VELOX_GEN_BINARY_EXPR_UUID(LtFunction, (uint128_t)lhs < (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(GtFunction, (uint128_t)lhs > (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(LteFunction, (uint128_t)lhs <= (uint128_t)rhs);
VELOX_GEN_BINARY_EXPR_UUID(GteFunction, (uint128_t)lhs >= (uint128_t)rhs);

#undef VELOX_GEN_BINARY_EXPR_UUID

template <typename T>
struct UuidFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand All @@ -42,6 +61,10 @@ struct UuidFunction {
inline void registerUuidFunctions(const std::string& prefix) {
registerUuidType();
registerFunction<UuidFunction, Uuid>({prefix + "uuid"});
registerFunction<LtFunctionUuid, bool, Uuid, Uuid>({prefix + "lt"});
registerFunction<GtFunctionUuid, bool, Uuid, Uuid>({prefix + "gt"});
registerFunction<LteFunctionUuid, bool, Uuid, Uuid>({prefix + "lte"});
registerFunction<GteFunctionUuid, bool, Uuid, Uuid>({prefix + "gte"});
}

} // namespace facebook::velox::functions
114 changes: 114 additions & 0 deletions velox/functions/prestosql/tests/UuidFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,119 @@ TEST_F(UuidFunctionsTest, unsupportedCast) {
evaluate("cast(123 as uuid())", input), "Cannot cast BIGINT to UUID.");
}

TEST_F(UuidFunctionsTest, comparisons) {
const auto uuidEval = [&](const std::optional<std::string>& lhs,
const std::string& operation,
const std::optional<std::string>& rhs) {
return evaluateOnce<bool>(
fmt::format("cast(c0 as uuid) {} cast(c1 as uuid)", operation),
lhs,
rhs);
};

ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));
ASSERT_EQ(
false,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<",
"00000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
true,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
">",
"00000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
false,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
">",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));

ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));
ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
"<=",
"ffffffff-ffff-ffff-ffff-ffffffffffff"));
ASSERT_EQ(
true,
uuidEval(
"33355449-2c7d-43d7-967a-f53cd23215ad",
">=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));
ASSERT_EQ(
true,
uuidEval(
"ffffffff-ffff-ffff-ffff-ffffffffffff",
">=",
"33355449-2c7d-43d7-967a-f53cd23215ad"));

ASSERT_EQ(
true,
uuidEval(
"f768f36d-4f09-4da7-a298-3564d8f3c986",
"==",
"f768f36d-4f09-4da7-a298-3564d8f3c986"));
ASSERT_EQ(
true,
uuidEval(
"eed9f812-4b0c-472f-8a10-4ae7bff79a47",
"!=",
"f768f36d-4f09-4da7-a298-3564d8f3c986"));

ASSERT_EQ(
true,
uuidEval(
"11000000-0000-0022-0000-000000000000",
"<",
"22000000-0000-0011-0000-000000000000"));
ASSERT_EQ(
true,
uuidEval(
"00000000-0000-0000-2200-000000000011",
">",
"00000000-0000-0000-1100-000000000022"));
ASSERT_EQ(
false,
uuidEval(
"00000000-0000-0000-0000-000000000011",
">",
"22000000-0000-0000-0000-000000000000"));
ASSERT_EQ(
false,
uuidEval(
"11000000-0000-0000-0000-000000000000",
"<",
"00000000-0000-0000-0000-000000000022"));

std::string lhs = "12342345-3456-4567-5678-678978908901";
std::string rhs = "23451234-4567-3456-6789-567889017890";
ASSERT_EQ(true, uuidEval(lhs, "<", rhs));

for (vector_size_t i = 0; i < lhs.size(); i++) {
if (lhs[i] == '-') {
continue;
}
lhs[i] = '0';
rhs[i] = '0';
bool expected = boost::lexical_cast<boost::uuids::uuid>(lhs) <
boost::lexical_cast<boost::uuids::uuid>(rhs);
ASSERT_EQ(expected, uuidEval(lhs, "<", rhs));
}
}

} // namespace
} // namespace facebook::velox::functions::prestosql

0 comments on commit 022cd87

Please sign in to comment.