From 0dc6eb8cec1ccd82a7e01b8d7832452d73f7516b Mon Sep 17 00:00:00 2001 From: Bikramjeet Vig Date: Wed, 30 Oct 2024 17:06:52 -0700 Subject: [PATCH] Normalize NaNs in to_ieee754_32 and to_ieee754_64 (#11393) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11393 We noticed a case in the fuzzer where different code paths or builds (specifically ASAN) generated NaNs with different binary representations before being fed to either of the aforementioned UDFs. To be more specific, with ASAN builds we noticed that the exact same input when added returned different results in different iterations. For example, NaN + (-NaN) sometimes returned NaN and sometimes -NAN. When this was fed into to_ieee754_32, the binary representation came out different between fuzzer runs and the results mismatched. Therefore, this change ensures that the output of to_ieee754_32/64 for NaNs is consistent across different binary representations so that to_ieee754_32(NaN) = to_ieee754_32(NaN) and NaN=NaN remain true (in velox and presto all NaNs are considered equal). Reviewed By: kevinwilfong Differential Revision: D65238915 fbshipit-source-id: d9add771ee7a4f25e20bd8e84a8fe6931d17f81a --- velox/functions/prestosql/BinaryFunctions.h | 12 ++++++++++-- .../prestosql/tests/BinaryFunctionsTest.cpp | 9 ++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/velox/functions/prestosql/BinaryFunctions.h b/velox/functions/prestosql/BinaryFunctions.h index ce153ee349fc..c9495daff1bb 100644 --- a/velox/functions/prestosql/BinaryFunctions.h +++ b/velox/functions/prestosql/BinaryFunctions.h @@ -388,7 +388,11 @@ struct ToIEEE754Bits64 { out_type& result, const arg_type& input) { static constexpr auto kTypeLength = sizeof(int64_t); - auto value = folly::Endian::big(input); + // Since we consider NaNs with different binary representation as equal, we + // normalize them to a single value to ensure the output is equal too. + auto value = std::isnan(input) + ? folly::Endian::big(std::numeric_limits::quiet_NaN()) + : folly::Endian::big(input); result.setNoCopy( StringView(reinterpret_cast(&value), kTypeLength)); } @@ -416,7 +420,11 @@ struct ToIEEE754Bits32 { out_type& result, const arg_type& input) { static constexpr auto kTypeLength = sizeof(int32_t); - auto value = folly::Endian::big(input); + // Since we consider NaNs with different binary representation as equal, we + // normalize them to a single value to ensure the output is equal too. + auto value = std::isnan(input) + ? folly::Endian::big(std::numeric_limits::quiet_NaN()) + : folly::Endian::big(input); result.setNoCopy( StringView(reinterpret_cast(&value), kTypeLength)); } diff --git a/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp b/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp index 2ce20c3f58e2..18fb0b8b9a10 100644 --- a/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/BinaryFunctionsTest.cpp @@ -635,8 +635,10 @@ TEST_F(BinaryFunctionsTest, toIEEE754Bits64) { EXPECT_EQ( hexToDec("7FF8000000000000"), toIEEE754Bits64(std::numeric_limits::quiet_NaN())); + // NaNs are normalized when generating output to ensure they are equal as all + // NaNs are considered equal EXPECT_EQ( - hexToDec("7FF4000000000000"), + hexToDec("7FF8000000000000"), toIEEE754Bits64(std::numeric_limits::signaling_NaN())); EXPECT_EQ( hexToDec("FFF0000000000000"), @@ -698,6 +700,11 @@ TEST_F(BinaryFunctionsTest, toIEEE754Bits32) { EXPECT_EQ( hexToDec("7FC00000"), toIEEE754Bits32(std::numeric_limits::quiet_NaN())); + // NaNs are normalized when generating output to ensure they are equal as all + // NaNs are considered equal + EXPECT_EQ( + hexToDec("7FC00000"), + toIEEE754Bits32(std::numeric_limits::signaling_NaN())); EXPECT_EQ( hexToDec("7F800000"), toIEEE754Bits32(std::numeric_limits::infinity()));