diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index 77d3af367d0b..6276ae0f4bd4 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -214,12 +214,6 @@ bool HiveConfig::readStatsBasedFilterReorderDisabled( config_->get(kReadStatsBasedFilterReorderDisabled, false)); } -bool HiveConfig::cacheNoRetention(const config::ConfigBase* session) const { - return session->get( - kCacheNoRetentionSession, - config_->get(kCacheNoRetention, /*defaultValue=*/false)); -} - std::string HiveConfig::hiveLocalDataPath() const { return config_->get(kLocalDataPath, ""); } diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index a4a52dcd6d2f..512a1d5db0dd 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -169,8 +169,6 @@ class HiveConfig { static constexpr const char* kReadStatsBasedFilterReorderDisabledSession = "hive.reader.stats_based_filter_reorder_disabaled"; - static constexpr const char* kCacheNoRetention = "cache.no_retention"; - static constexpr const char* kCacheNoRetentionSession = "cache.no_retention"; static constexpr const char* kLocalDataPath = "hive_local_data_path"; static constexpr const char* kLocalFileFormat = "hive_local_file_format"; @@ -236,13 +234,6 @@ class HiveConfig { bool readStatsBasedFilterReorderDisabled( const config::ConfigBase* session) const; - /// Returns true to evict out a query scanned data out of in-memory cache - /// right after the access, and also skip staging to the ssd cache. This helps - /// to prevent the cache space pollution from the one-time table scan by large - /// batch query when mixed running with interactive query which has high data - /// locality. - bool cacheNoRetention(const config::ConfigBase* session) const; - /// Returns the file system path containing local data. If non-empty, /// initializes LocalHiveConnectorMetadata to provide metadata for the tables /// in the directory. diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index 38fe785fd6c8..1d47b0f5a75a 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -577,8 +577,7 @@ void configureReaderOptions( readerOptions.setFooterEstimatedSize(hiveConfig->footerEstimatedSize()); readerOptions.setFilePreloadThreshold(hiveConfig->filePreloadThreshold()); readerOptions.setPrefetchRowGroups(hiveConfig->prefetchRowGroups()); - readerOptions.setNoCacheRetention( - hiveConfig->cacheNoRetention(sessionProperties) || !hiveSplit->cacheable); + readerOptions.setNoCacheRetention(!hiveSplit->cacheable); const auto& sessionTzName = connectorQueryCtx->sessionTimezone(); if (!sessionTzName.empty()) { const auto timezone = tz::locateZone(sessionTzName); diff --git a/velox/connectors/hive/tests/HiveConfigTest.cpp b/velox/connectors/hive/tests/HiveConfigTest.cpp index 0e1b7edfe43a..50ada7bde4df 100644 --- a/velox/connectors/hive/tests/HiveConfigTest.cpp +++ b/velox/connectors/hive/tests/HiveConfigTest.cpp @@ -73,7 +73,6 @@ TEST(HiveConfigTest, overrideConfig) { {HiveConfig::kSortWriterMaxOutputRows, "100"}, {HiveConfig::kSortWriterMaxOutputBytes, "100MB"}, {HiveConfig::kSortWriterFinishTimeSliceLimitMs, "400"}, - {HiveConfig::kCacheNoRetention, "true"}, {HiveConfig::kReadStatsBasedFilterReorderDisabled, "true"}, {HiveConfig::kLoadQuantum, std::to_string(4 << 20)}}; HiveConfig hiveConfig( @@ -121,7 +120,6 @@ TEST(HiveConfigTest, overrideSession) { {HiveConfig::kPartitionPathAsLowerCaseSession, "false"}, {HiveConfig::kAllowNullPartitionKeysSession, "false"}, {HiveConfig::kIgnoreMissingFilesSession, "true"}, - {HiveConfig::kCacheNoRetentionSession, "true"}, {HiveConfig::kReadStatsBasedFilterReorderDisabledSession, "true"}, {HiveConfig::kLoadQuantumSession, std::to_string(4 << 20)}}; const auto session = diff --git a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp index 934b7e978873..b2307483e146 100644 --- a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp +++ b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp @@ -266,30 +266,21 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) { TEST_F(HiveConnectorUtilTest, cacheRetention) { struct { - bool sessionNoCacheRetention; bool splitCacheable; bool expectedNoCacheRetention; std::string debugString() const { return fmt::format( - "sessionNoCacheRetention {}, splitCacheable {}, expectedNoCacheRetention {}", - sessionNoCacheRetention, + "splitCacheable {}, expectedNoCacheRetention {}", splitCacheable, expectedNoCacheRetention); } - } testSettings[] = { - {false, false, true}, - {true, false, true}, - {false, true, false}, - {true, true, true}}; + } testSettings[] = {{false, true}, {true, false}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); - std::unordered_map sessionConfigs; - sessionConfigs[hive::HiveConfig::kCacheNoRetentionSession] = - testData.sessionNoCacheRetention ? "true" : "false"; - config::ConfigBase sessionProperties(std::move(sessionConfigs)); + config::ConfigBase sessionProperties({}); auto hiveConfig = std::make_shared(std::make_shared( std::unordered_map())); diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index 92bb6016f6ad..85c5f1e139e2 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -5164,22 +5164,16 @@ TEST_F(TableScanTest, noCacheRetention) { createDuckDbTable(vectors); struct { - bool sessionNoCacheRetention; bool splitCacheable; - bool expectedNoCacheRetention; + bool expectSplitCached; std::string debugString() const { return fmt::format( - "sessionNoCacheRetention {}, splitCacheable {}, expectedNoCacheRetention {}", - sessionNoCacheRetention, + "splitCacheable {}, expectSplitCached {}", splitCacheable, - expectedNoCacheRetention); + expectSplitCached); } - } testSettings[] = { - {false, false, true}, - {true, false, true}, - {false, true, false}, - {true, true, true}}; + } testSettings[] = {{false, false}, {true, true}}; for (const auto& testData : testSettings) { SCOPED_TRACE(testData.debugString()); @@ -5191,10 +5185,6 @@ TEST_F(TableScanTest, noCacheRetention) { 0, testData.splitCacheable); AssertQueryBuilder(tableScanNode(), duckDbQueryRunner_) - .connectorSessionProperty( - kHiveConnectorId, - connector::hive::HiveConfig::kCacheNoRetentionSession, - testData.sessionNoCacheRetention ? "true" : "false") .split(std::move(split)) .assertResults("SELECT * FROM tmp"); waitForAllTasksToBeDeleted(); @@ -5206,14 +5196,7 @@ TEST_F(TableScanTest, noCacheRetention) { for (const auto& cacheEntry : cacheEntries) { const auto cacheEntryHelper = cache::test::AsyncDataCacheEntryTestHelper(cacheEntry); - if (testData.expectedNoCacheRetention) { - if (!cacheEntryHelper.firstUse()) { - ASSERT_EQ(cacheEntryHelper.accessStats().lastUse, 0) - << cacheEntry->toString(); - } - ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0) - << cacheEntry->toString(); - } else { + if (testData.expectSplitCached) { if (cacheEntryHelper.firstUse()) { ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0) << cacheEntry->toString(); @@ -5223,6 +5206,13 @@ TEST_F(TableScanTest, noCacheRetention) { } ASSERT_NE(cacheEntryHelper.accessStats().lastUse, 0) << cacheEntry->toString(); + } else { + if (!cacheEntryHelper.firstUse()) { + ASSERT_EQ(cacheEntryHelper.accessStats().lastUse, 0) + << cacheEntry->toString(); + } + ASSERT_EQ(cacheEntryHelper.accessStats().numUses, 0) + << cacheEntry->toString(); } } }