From b13e209b556b092861d1b53842ba8174bb0dd99b Mon Sep 17 00:00:00 2001 From: Darren Fu Date: Thu, 9 Jan 2025 17:54:45 -0800 Subject: [PATCH] refactor(dwio): Rewrite partition file path unescape utility functions using folly::uriUnescape() (#12036) Summary: Refactor the uri unescape utility function using folly:: uriUnescape NOTE: UriEscape refactor is not part of this PR due to its underlying escape char set is only a subset of folly::uriEscape's char set causing slightly behavior discrepancy Pull Request resolved: https://github.com/facebookincubator/velox/pull/12036 Test Plan: ``` buck test //velox/dwio/catalog/fbhive/test:file-utils-tests ``` ## Koski error events before the change ``` Expression: (*__errno_location ()) != 34 && end == tmp.data() + HEX_WIDTH Function: unescapePathName File: fbcode/velox/dwio/catalog/fbhive/FileUtils.cpp Line: 154 ``` https://fburl.com/scuba/xldb_koski_queries/c9z0941x Reviewed By: spershin Differential Revision: D67917130 Pulled By: darrenfu fbshipit-source-id: 2b54b5291bfb9a5d58f09e2c08dffea49702184c --- velox/dwio/catalog/fbhive/CMakeLists.txt | 4 +-- velox/dwio/catalog/fbhive/FileUtils.cpp | 28 ++++++++----------- velox/dwio/catalog/fbhive/test/CMakeLists.txt | 2 +- .../catalog/fbhive/test/FileUtilsTests.cpp | 26 +++++++++++++---- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/velox/dwio/catalog/fbhive/CMakeLists.txt b/velox/dwio/catalog/fbhive/CMakeLists.txt index e89210732368..cb6a0d30b50f 100644 --- a/velox/dwio/catalog/fbhive/CMakeLists.txt +++ b/velox/dwio/catalog/fbhive/CMakeLists.txt @@ -13,8 +13,8 @@ # limitations under the License. velox_add_library(velox_dwio_catalog_fbhive FileUtils.cpp) -velox_link_libraries(velox_dwio_catalog_fbhive velox_dwio_common_exception - fmt::fmt Folly::folly) +velox_link_libraries(velox_dwio_catalog_fbhive velox_exception fmt::fmt + Folly::folly) if(${VELOX_BUILD_TESTING}) add_subdirectory(test) diff --git a/velox/dwio/catalog/fbhive/FileUtils.cpp b/velox/dwio/catalog/fbhive/FileUtils.cpp index 94c75dffd995..36d1b29d98e5 100644 --- a/velox/dwio/catalog/fbhive/FileUtils.cpp +++ b/velox/dwio/catalog/fbhive/FileUtils.cpp @@ -21,7 +21,7 @@ #include #include -#include "velox/dwio/common/exception/Exception.h" +#include "velox/common/base/Exceptions.h" namespace facebook { namespace velox { @@ -31,6 +31,8 @@ namespace fbhive { namespace { +constexpr auto kUriEscapeMode = folly::UriEscapeMode::ALL; + constexpr size_t HEX_WIDTH = 2; constexpr auto charsToEscape = folly::make_array( @@ -143,20 +145,14 @@ std::string FileUtils::escapePathName(const std::string& data) { } std::string FileUtils::unescapePathName(const std::string& data) { - std::string ret; - ret.reserve(data.size()); - for (size_t i = 0; i < data.size(); ++i) { - char c = data[i]; - if (c == '%' && i + HEX_WIDTH < data.size()) { - std::string tmp{data.data() + i + 1, HEX_WIDTH}; - char* end; - c = static_cast(std::strtol(tmp.c_str(), &end, 16)); - DWIO_ENSURE(errno != ERANGE && end == tmp.data() + HEX_WIDTH); - i += HEX_WIDTH; - } - ret.append(1, c); + std::string out; + bool success = folly::tryUriUnescape(data, out, kUriEscapeMode); + if (!success) { + VELOX_FAIL( + "Due to incomplete percent encode sequence, failed to unescape malformed path name: {}", + data); } - return ret; + return out; } std::string FileUtils::makePartName( @@ -166,7 +162,7 @@ std::string FileUtils::makePartName( size_t escapeCount = 0; std::for_each(entries.begin(), entries.end(), [&](auto& pair) { auto keySize = pair.first.size(); - DWIO_ENSURE_GT(keySize, 0); + VELOX_CHECK_GT(keySize, 0); size += keySize; escapeCount += countEscape(pair.first); @@ -212,7 +208,7 @@ std::vector> FileUtils::parsePartKeyValues( std::for_each(parts.begin(), parts.end(), [&](auto& part) { std::vector kv; folly::split('=', part, kv); - DWIO_ENSURE_EQ(kv.size(), 2); + VELOX_CHECK_EQ(kv.size(), 2); ret.push_back({unescapePathName(kv[0]), unescapePathName(kv[1])}); }); return ret; diff --git a/velox/dwio/catalog/fbhive/test/CMakeLists.txt b/velox/dwio/catalog/fbhive/test/CMakeLists.txt index 34b279ffdb8c..d7fb52887727 100644 --- a/velox/dwio/catalog/fbhive/test/CMakeLists.txt +++ b/velox/dwio/catalog/fbhive/test/CMakeLists.txt @@ -17,7 +17,7 @@ add_test(file_utils_test file_utils_test) target_link_libraries( file_utils_test velox_dwio_catalog_fbhive - velox_dwio_common_exception + velox_exception GTest::gtest GTest::gtest_main GTest::gmock) diff --git a/velox/dwio/catalog/fbhive/test/FileUtilsTests.cpp b/velox/dwio/catalog/fbhive/test/FileUtilsTests.cpp index b2909cc0da36..042c5ba93080 100644 --- a/velox/dwio/catalog/fbhive/test/FileUtilsTests.cpp +++ b/velox/dwio/catalog/fbhive/test/FileUtilsTests.cpp @@ -16,12 +16,11 @@ #include #include +#include "velox/common/base/Exceptions.h" #include "velox/dwio/catalog/fbhive/FileUtils.h" -#include "velox/dwio/common/exception/Exception.h" using namespace ::testing; using namespace facebook::velox::dwio::catalog::fbhive; -using namespace facebook::velox::dwio::common::exception; TEST(FileUtilsTests, MakePartName) { std::vector> pairs{ @@ -35,15 +34,30 @@ TEST(FileUtilsTests, MakePartName) { } TEST(FileUtilsTests, ParsePartKeyValues) { - ASSERT_THROW(FileUtils::parsePartKeyValues("ds"), LoggedException); + EXPECT_THROW( + FileUtils::parsePartKeyValues("ds"), facebook::velox::VeloxRuntimeError); + EXPECT_THROW( + FileUtils::parsePartKeyValues("ds=/ts"), + facebook::velox::VeloxRuntimeError); - ASSERT_THAT( + try { + FileUtils::parsePartKeyValues("ts=%xx"); + FAIL() + << "Expected VeloxException with message 'incomplete percent encode sequence' but none was thrown."; + } catch (const facebook::velox::VeloxException& e) { + EXPECT_THAT( + e.what(), ::testing::HasSubstr("incomplete percent encode sequence")); + } catch (...) { + FAIL() << "Expected VeloxException but caught an unknown exception type."; + } + + EXPECT_THAT( FileUtils::parsePartKeyValues( - "ds=2016-01-01/foo=__HIVE_DEFAULT_PARTITION__/a%0Ab%3Ac=a%23b%3Dc"), + "ds=2016-01-01/foo=__HIVE_DEFAULT_PARTITION__/a%0Ab%3Ac=a%23b%3Dc%2F"), ElementsAre( std::make_pair("ds", "2016-01-01"), std::make_pair("foo", "__HIVE_DEFAULT_PARTITION__"), - std::make_pair("a\nb:c", "a#b=c"))); + std::make_pair("a\nb:c", "a#b=c/"))); } TEST(FileUtilsTests, ExtractPartitionName) {