Skip to content

Commit

Permalink
refactor(dwio): Rewrite partition file path unescape utility function…
Browse files Browse the repository at this point in the history
…s using folly::uriUnescape() (facebookincubator#12036)

Summary:
Refactor the uri unescape utility function using folly:: uriUnescape

NOTE: UriEscape refactor is not part of this PR due to its underlying escape
char set is only a subset of folly::uriEscape's char set causing slightly
behavior discrepancy

Pull Request resolved: facebookincubator#12036

Test Plan:
```
buck test //velox/dwio/catalog/fbhive/test:file-utils-tests
```

## Koski error events before the change
```
Expression: (*__errno_location ()) != 34 && end == tmp.data() + HEX_WIDTH
Function: unescapePathName
File: fbcode/velox/dwio/catalog/fbhive/FileUtils.cpp
Line: 154
```
https://fburl.com/scuba/xldb_koski_queries/c9z0941x

Reviewed By: spershin

Differential Revision: D67917130

Pulled By: darrenfu

fbshipit-source-id: 2b54b5291bfb9a5d58f09e2c08dffea49702184c
  • Loading branch information
darrenfu authored and facebook-github-bot committed Jan 10, 2025
1 parent c965e7c commit b13e209
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 25 deletions.
4 changes: 2 additions & 2 deletions velox/dwio/catalog/fbhive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# limitations under the License.

velox_add_library(velox_dwio_catalog_fbhive FileUtils.cpp)
velox_link_libraries(velox_dwio_catalog_fbhive velox_dwio_common_exception
fmt::fmt Folly::folly)
velox_link_libraries(velox_dwio_catalog_fbhive velox_exception fmt::fmt
Folly::folly)

if(${VELOX_BUILD_TESTING})
add_subdirectory(test)
Expand Down
28 changes: 12 additions & 16 deletions velox/dwio/catalog/fbhive/FileUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <fmt/core.h>
#include <folly/container/Array.h>

#include "velox/dwio/common/exception/Exception.h"
#include "velox/common/base/Exceptions.h"

namespace facebook {
namespace velox {
Expand All @@ -31,6 +31,8 @@ namespace fbhive {

namespace {

constexpr auto kUriEscapeMode = folly::UriEscapeMode::ALL;

constexpr size_t HEX_WIDTH = 2;

constexpr auto charsToEscape = folly::make_array(
Expand Down Expand Up @@ -143,20 +145,14 @@ std::string FileUtils::escapePathName(const std::string& data) {
}

std::string FileUtils::unescapePathName(const std::string& data) {
std::string ret;
ret.reserve(data.size());
for (size_t i = 0; i < data.size(); ++i) {
char c = data[i];
if (c == '%' && i + HEX_WIDTH < data.size()) {
std::string tmp{data.data() + i + 1, HEX_WIDTH};
char* end;
c = static_cast<char>(std::strtol(tmp.c_str(), &end, 16));
DWIO_ENSURE(errno != ERANGE && end == tmp.data() + HEX_WIDTH);
i += HEX_WIDTH;
}
ret.append(1, c);
std::string out;
bool success = folly::tryUriUnescape<std::string>(data, out, kUriEscapeMode);
if (!success) {
VELOX_FAIL(
"Due to incomplete percent encode sequence, failed to unescape malformed path name: {}",
data);
}
return ret;
return out;
}

std::string FileUtils::makePartName(
Expand All @@ -166,7 +162,7 @@ std::string FileUtils::makePartName(
size_t escapeCount = 0;
std::for_each(entries.begin(), entries.end(), [&](auto& pair) {
auto keySize = pair.first.size();
DWIO_ENSURE_GT(keySize, 0);
VELOX_CHECK_GT(keySize, 0);
size += keySize;
escapeCount += countEscape(pair.first);

Expand Down Expand Up @@ -212,7 +208,7 @@ std::vector<std::pair<std::string, std::string>> FileUtils::parsePartKeyValues(
std::for_each(parts.begin(), parts.end(), [&](auto& part) {
std::vector<std::string> kv;
folly::split('=', part, kv);
DWIO_ENSURE_EQ(kv.size(), 2);
VELOX_CHECK_EQ(kv.size(), 2);
ret.push_back({unescapePathName(kv[0]), unescapePathName(kv[1])});
});
return ret;
Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/catalog/fbhive/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ add_test(file_utils_test file_utils_test)
target_link_libraries(
file_utils_test
velox_dwio_catalog_fbhive
velox_dwio_common_exception
velox_exception
GTest::gtest
GTest::gtest_main
GTest::gmock)
26 changes: 20 additions & 6 deletions velox/dwio/catalog/fbhive/test/FileUtilsTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "velox/common/base/Exceptions.h"
#include "velox/dwio/catalog/fbhive/FileUtils.h"
#include "velox/dwio/common/exception/Exception.h"

using namespace ::testing;
using namespace facebook::velox::dwio::catalog::fbhive;
using namespace facebook::velox::dwio::common::exception;

TEST(FileUtilsTests, MakePartName) {
std::vector<std::pair<std::string, std::string>> pairs{
Expand All @@ -35,15 +34,30 @@ TEST(FileUtilsTests, MakePartName) {
}

TEST(FileUtilsTests, ParsePartKeyValues) {
ASSERT_THROW(FileUtils::parsePartKeyValues("ds"), LoggedException);
EXPECT_THROW(
FileUtils::parsePartKeyValues("ds"), facebook::velox::VeloxRuntimeError);
EXPECT_THROW(
FileUtils::parsePartKeyValues("ds=/ts"),
facebook::velox::VeloxRuntimeError);

ASSERT_THAT(
try {
FileUtils::parsePartKeyValues("ts=%xx");
FAIL()
<< "Expected VeloxException with message 'incomplete percent encode sequence' but none was thrown.";
} catch (const facebook::velox::VeloxException& e) {
EXPECT_THAT(
e.what(), ::testing::HasSubstr("incomplete percent encode sequence"));
} catch (...) {
FAIL() << "Expected VeloxException but caught an unknown exception type.";
}

EXPECT_THAT(
FileUtils::parsePartKeyValues(
"ds=2016-01-01/foo=__HIVE_DEFAULT_PARTITION__/a%0Ab%3Ac=a%23b%3Dc"),
"ds=2016-01-01/foo=__HIVE_DEFAULT_PARTITION__/a%0Ab%3Ac=a%23b%3Dc%2F"),
ElementsAre(
std::make_pair("ds", "2016-01-01"),
std::make_pair("foo", "__HIVE_DEFAULT_PARTITION__"),
std::make_pair("a\nb:c", "a#b=c")));
std::make_pair("a\nb:c", "a#b=c/")));
}

TEST(FileUtilsTests, ExtractPartitionName) {
Expand Down

0 comments on commit b13e209

Please sign in to comment.