Skip to content

Commit

Permalink
feat: Deprecate no_retention config and session property
Browse files Browse the repository at this point in the history
Differential Revision: D68176092
  • Loading branch information
zacw7 authored and facebook-github-bot committed Jan 16, 2025
1 parent 4ad5a8a commit 3f5ebc4
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 53 deletions.
6 changes: 0 additions & 6 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,12 +214,6 @@ bool HiveConfig::readStatsBasedFilterReorderDisabled(
config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
}

bool HiveConfig::cacheNoRetention(const config::ConfigBase* session) const {
return session->get<bool>(
kCacheNoRetentionSession,
config_->get<bool>(kCacheNoRetention, /*defaultValue=*/false));
}

std::string HiveConfig::hiveLocalDataPath() const {
return config_->get<std::string>(kLocalDataPath, "");
}
Expand Down
9 changes: 0 additions & 9 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,6 @@ class HiveConfig {
static constexpr const char* kReadStatsBasedFilterReorderDisabledSession =
"hive.reader.stats_based_filter_reorder_disabaled";

static constexpr const char* kCacheNoRetention = "cache.no_retention";
static constexpr const char* kCacheNoRetentionSession = "cache.no_retention";
static constexpr const char* kLocalDataPath = "hive_local_data_path";
static constexpr const char* kLocalFileFormat = "hive_local_file_format";

Expand Down Expand Up @@ -236,13 +234,6 @@ class HiveConfig {
bool readStatsBasedFilterReorderDisabled(
const config::ConfigBase* session) const;

/// Returns true to evict out a query scanned data out of in-memory cache
/// right after the access, and also skip staging to the ssd cache. This helps
/// to prevent the cache space pollution from the one-time table scan by large
/// batch query when mixed running with interactive query which has high data
/// locality.
bool cacheNoRetention(const config::ConfigBase* session) const;

/// Returns the file system path containing local data. If non-empty,
/// initializes LocalHiveConnectorMetadata to provide metadata for the tables
/// in the directory.
Expand Down
3 changes: 1 addition & 2 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,8 +577,7 @@ void configureReaderOptions(
readerOptions.setFooterEstimatedSize(hiveConfig->footerEstimatedSize());
readerOptions.setFilePreloadThreshold(hiveConfig->filePreloadThreshold());
readerOptions.setPrefetchRowGroups(hiveConfig->prefetchRowGroups());
readerOptions.setNoCacheRetention(
hiveConfig->cacheNoRetention(sessionProperties) || !hiveSplit->cacheable);
readerOptions.setNoCacheRetention(!hiveSplit->cacheable);
const auto& sessionTzName = connectorQueryCtx->sessionTimezone();
if (!sessionTzName.empty()) {
const auto timezone = tz::locateZone(sessionTzName);
Expand Down
2 changes: 0 additions & 2 deletions velox/connectors/hive/tests/HiveConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ TEST(HiveConfigTest, overrideConfig) {
{HiveConfig::kSortWriterMaxOutputRows, "100"},
{HiveConfig::kSortWriterMaxOutputBytes, "100MB"},
{HiveConfig::kSortWriterFinishTimeSliceLimitMs, "400"},
{HiveConfig::kCacheNoRetention, "true"},
{HiveConfig::kReadStatsBasedFilterReorderDisabled, "true"},
{HiveConfig::kLoadQuantum, std::to_string(4 << 20)}};
HiveConfig hiveConfig(
Expand Down Expand Up @@ -121,7 +120,6 @@ TEST(HiveConfigTest, overrideSession) {
{HiveConfig::kPartitionPathAsLowerCaseSession, "false"},
{HiveConfig::kAllowNullPartitionKeysSession, "false"},
{HiveConfig::kIgnoreMissingFilesSession, "true"},
{HiveConfig::kCacheNoRetentionSession, "true"},
{HiveConfig::kReadStatsBasedFilterReorderDisabledSession, "true"},
{HiveConfig::kLoadQuantumSession, std::to_string(4 << 20)}};
const auto session =
Expand Down
15 changes: 3 additions & 12 deletions velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,30 +266,21 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {

TEST_F(HiveConnectorUtilTest, cacheRetention) {
struct {
bool sessionNoCacheRetention;
bool splitCacheable;
bool expectedNoCacheRetention;

std::string debugString() const {
return fmt::format(
"sessionNoCacheRetention {}, splitCacheable {}, expectedNoCacheRetention {}",
sessionNoCacheRetention,
"splitCacheable {}, expectedNoCacheRetention {}",
splitCacheable,
expectedNoCacheRetention);
}
} testSettings[] = {
{false, false, true},
{true, false, true},
{false, true, false},
{true, true, true}};
} testSettings[] = {{false, true}, {true, false}};

for (const auto& testData : testSettings) {
SCOPED_TRACE(testData.debugString());

std::unordered_map<std::string, std::string> sessionConfigs;
sessionConfigs[hive::HiveConfig::kCacheNoRetentionSession] =
testData.sessionNoCacheRetention ? "true" : "false";
config::ConfigBase sessionProperties(std::move(sessionConfigs));
config::ConfigBase sessionProperties({});
auto hiveConfig =
std::make_shared<hive::HiveConfig>(std::make_shared<config::ConfigBase>(
std::unordered_map<std::string, std::string>()));
Expand Down
34 changes: 12 additions & 22 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5164,22 +5164,16 @@ TEST_F(TableScanTest, noCacheRetention) {
createDuckDbTable(vectors);

struct {
bool sessionNoCacheRetention;
bool splitCacheable;
bool expectedNoCacheRetention;
bool expectSplitCached;

std::string debugString() const {
return fmt::format(
"sessionNoCacheRetention {}, splitCacheable {}, expectedNoCacheRetention {}",
sessionNoCacheRetention,
"splitCacheable {}, expectSplitCached {}",
splitCacheable,
expectedNoCacheRetention);
expectSplitCached);
}
} testSettings[] = {
{false, false, true},
{true, false, true},
{false, true, false},
{true, true, true}};
} testSettings[] = {{false, false}, {true, true}};

for (const auto& testData : testSettings) {
SCOPED_TRACE(testData.debugString());
Expand All @@ -5191,10 +5185,6 @@ TEST_F(TableScanTest, noCacheRetention) {
0,
testData.splitCacheable);
AssertQueryBuilder(tableScanNode(), duckDbQueryRunner_)
.connectorSessionProperty(
kHiveConnectorId,
connector::hive::HiveConfig::kCacheNoRetentionSession,
testData.sessionNoCacheRetention ? "true" : "false")
.split(std::move(split))
.assertResults("SELECT * FROM tmp");
waitForAllTasksToBeDeleted();
Expand All @@ -5206,14 +5196,7 @@ TEST_F(TableScanTest, noCacheRetention) {
for (const auto& cacheEntry : cacheEntries) {
const auto cacheEntryHelper =
cache::test::AsyncDataCacheEntryTestHelper(cacheEntry);
if (testData.expectedNoCacheRetention) {
if (!cacheEntryHelper.firstUse()) {
ASSERT_EQ(cacheEntryHelper.accessStats().lastUse, 0)
<< cacheEntry->toString();
}
ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0)
<< cacheEntry->toString();
} else {
if (testData.expectSplitCached) {
if (cacheEntryHelper.firstUse()) {
ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0)
<< cacheEntry->toString();
Expand All @@ -5223,6 +5206,13 @@ TEST_F(TableScanTest, noCacheRetention) {
}
ASSERT_NE(cacheEntryHelper.accessStats().lastUse, 0)
<< cacheEntry->toString();
} else {
if (!cacheEntryHelper.firstUse()) {
ASSERT_EQ(cacheEntryHelper.accessStats().lastUse, 0)
<< cacheEntry->toString();
}
ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0)
<< cacheEntry->toString();
}
}
}
Expand Down

0 comments on commit 3f5ebc4

Please sign in to comment.