diff --git a/core/index/index_meta.hpp b/core/index/index_meta.hpp index d8277ec5d..adce86fb5 100644 --- a/core/index/index_meta.hpp +++ b/core/index/index_meta.hpp @@ -39,6 +39,15 @@ class format; class IndexWriter; struct SegmentInfo { + SegmentInfo() = default; + + // Added for testing purposes. + SegmentInfo( + const std::string& _name, + uint64_t _byte_size + ) : name(_name), byte_size(_byte_size) + {} + bool operator==(const SegmentInfo&) const = default; std::string name; // FIXME(gnusi): move to SegmentMeta diff --git a/core/utils/index_utils.cpp b/core/utils/index_utils.cpp index 5809d6bce..6a3d234b7 100644 --- a/core/utils/index_utils.cpp +++ b/core/utils/index_utils.cpp @@ -28,149 +28,25 @@ #include "formats/format_utils.hpp" -namespace { - -// Returns percentage of live documents -inline double FillFactor(const irs::SegmentInfo& segment) noexcept { - return static_cast(segment.live_docs_count) / - static_cast(segment.docs_count); -} - -// Returns approximated size of a segment in the absence of removals -inline size_t SizeWithoutRemovals(const irs::SegmentInfo& segment) noexcept { - return size_t(static_cast(segment.byte_size) * FillFactor(segment)); -} - namespace tier { -struct SegmentStats { - // cppcheck-suppress noExplicitConstructor - SegmentStats(const irs::SubReader& reader) noexcept - : reader{&reader}, - meta{&reader.Meta()}, - size{SizeWithoutRemovals(*meta)}, - fill_factor{FillFactor(*meta)} {} - - bool operator<(const SegmentStats& rhs) const noexcept { - // cppcheck-suppress constVariable - auto& lhs = *this; - - if (lhs.size == rhs.size) { - if (lhs.fill_factor > rhs.fill_factor) { - return true; - } else if (lhs.fill_factor < rhs.fill_factor) { - return false; - } - - return lhs.meta->name < rhs.meta->name; + // interface to fetch the required attributes from + // SegmentStats struct. + // We use this function in struct ConsolidationCandidate + // to fetch the segment dimensions from the SegmentStats + // struct. + // + void getSegmentDimensions( + const tier::SegmentStats& segment, + tier::SegmentAttributes& attrs) { + + auto* meta = segment.meta; + attrs.byteSize = meta->byte_size; + attrs.docsCount = meta->docs_count; + attrs.liveDocsCount = meta->live_docs_count; } - - return lhs.size < rhs.size; - } - - operator const irs::SubReader*() const noexcept { return reader; } - - const irs::SubReader* reader; - const irs::SegmentInfo* meta; - size_t size; // approximate size of segment without removals - double_t fill_factor; -}; - -struct ConsolidationCandidate { - using iterator_t = std::vector::const_iterator; - using range_t = std::pair; - - explicit ConsolidationCandidate(iterator_t i) noexcept : segments(i, i) {} - - iterator_t begin() const noexcept { return segments.first; } - iterator_t end() const noexcept { return segments.second; } - - range_t segments; - size_t count{0}; - size_t size{0}; // estimated size of the level - double_t score{DBL_MIN}; // how good this permutation is -}; - -/// @returns score of the consolidation bucket -double_t consolidation_score(const ConsolidationCandidate& consolidation, - const size_t segments_per_tier, - const size_t floor_segment_bytes) noexcept { - // to detect how skewed the consolidation we do the following: - // 1. evaluate coefficient of variation, less is better - // 2. good candidates are in range [0;1] - // 3. favor condidates where number of segments is equal to - // 'segments_per_tier' approx - // 4. prefer smaller consolidations - // 5. prefer consolidations which clean removals - - switch (consolidation.count) { - case 0: - // empty consolidation makes not sense - return DBL_MIN; - case 1: { - auto& meta = *consolidation.segments.first->meta; - - if (meta.docs_count == meta.live_docs_count) { - // singletone without removals makes no sense - return DBL_MIN; - } - - // FIXME honor number of deletes??? - // signletone with removals makes sense if nothing better is found - return DBL_MIN + DBL_EPSILON; - } - } - - size_t size_before_consolidation = 0; - size_t size_after_consolidation = 0; - size_t size_after_consolidation_floored = 0; - for (auto& segment_stat : consolidation) { - size_before_consolidation += segment_stat.meta->byte_size; - size_after_consolidation += segment_stat.size; - size_after_consolidation_floored += - std::max(segment_stat.size, floor_segment_bytes); - } - - // evaluate coefficient of variation - double sum_square_differences = 0; - const auto segment_size_after_consolidaton_mean = - static_cast(size_after_consolidation_floored) / - static_cast(consolidation.count); - for (auto& segment_stat : consolidation) { - const auto diff = - static_cast(std::max(segment_stat.size, floor_segment_bytes)) - - segment_size_after_consolidaton_mean; - sum_square_differences += diff * diff; - } - - const auto stdev = std::sqrt(sum_square_differences / - static_cast(consolidation.count)); - const auto cv = (stdev / segment_size_after_consolidaton_mean); - - // evaluate initial score - auto score = 1. - cv; - - // favor consolidations that contain approximately the requested number of - // segments - score *= std::pow(static_cast(consolidation.count) / - static_cast(segments_per_tier), - 1.5); - - // FIXME use relative measure, e.g. cosolidation_size/total_size - // carefully prefer smaller consolidations over the bigger ones - score /= std::pow(size_after_consolidation, 0.5); - - // favor consolidations which clean out removals - score /= std::pow(static_cast(size_after_consolidation) / - static_cast(size_before_consolidation), - 2); - - return score; } -} // namespace tier -} // namespace - namespace irs::index_utils { ConsolidationPolicy MakePolicy(const ConsolidateBytes& options) { @@ -391,6 +267,9 @@ ConsolidationPolicy MakePolicy(const ConsolidateTier& options) { /// if /// - segment size is greater than 'max_segments_bytes / 2' /// - segment has many documents but only few deletions + /// + /// TODO - too_big_segments_threshold formula is unreasonable + /// - add unit tests as well /////////////////////////////////////////////////////////////////////////// const double_t total_fill_factor = @@ -413,63 +292,36 @@ ConsolidationPolicy MakePolicy(const ConsolidateTier& options) { } } - /////////////////////////////////////////////////////////////////////////// - /// Stage 3 - /// sort candidates - /////////////////////////////////////////////////////////////////////////// - - std::sort(sorted_segments.begin(), sorted_segments.end()); + // No point in attempting consolidation if we don't have + // enough segments to fill the consolidation window + if (sorted_segments.size() < tier::ConsolidationConfig::candidate_size) + return; /////////////////////////////////////////////////////////////////////////// - /// Stage 4 - /// find proper candidates + /// Stage 3 + /// Find cleanup candidates /////////////////////////////////////////////////////////////////////////// - tier::ConsolidationCandidate best(sorted_segments.begin()); - - if (sorted_segments.size() >= min_segments_per_tier) { - for (auto i = sorted_segments.begin(), end = sorted_segments.end(); - i != end; ++i) { - tier::ConsolidationCandidate candidate(i); - - while (candidate.segments.second != end && - candidate.count < max_segments_per_tier) { - candidate.size += candidate.segments.second->size; - - if (candidate.size > max_segments_bytes) { - // overcome the limit - break; - } - - ++candidate.count; - ++candidate.segments.second; - - if (candidate.count < min_segments_per_tier) { - // not enough segments yet - continue; - } - - candidate.score = tier::consolidation_score( - candidate, max_segments_per_tier, floor_segment_bytes); - - if (candidate.score < min_score) { - // score is too small - continue; - } - - if (best.score < candidate.score) { - best = candidate; - } - } - } + tier::ConsolidationCandidate best; + auto ret = tier::findBestCleanupCandidate(sorted_segments, tier::getSegmentDimensions, best); + if (ret && best.initialized && std::distance(best.first(), best.last()) >= 0) { + std::copy(best.first(), best.last() + 1, std::back_inserter(candidates)); + return; } /////////////////////////////////////////////////////////////////////////// /// Stage 4 - /// pick the best candidate + /// find consolidation candidates /////////////////////////////////////////////////////////////////////////// - std::copy(best.begin(), best.end(), std::back_inserter(candidates)); + if (!tier::findBestConsolidationCandidate( + sorted_segments, + max_segments_bytes, + tier::getSegmentDimensions, best)) + return; + + candidates.reserve(std::distance(best.first(), best.last()) + 1); + std::copy(best.first(), best.last() + 1, std::back_inserter(candidates)); }; } diff --git a/core/utils/index_utils.hpp b/core/utils/index_utils.hpp index c47eb4199..b4d618413 100644 --- a/core/utils/index_utils.hpp +++ b/core/utils/index_utils.hpp @@ -24,6 +24,304 @@ #pragma once #include "index/index_writer.hpp" +#include "index/index_reader.hpp" + +namespace { + + // Returns percentage of live documents + inline double FillFactor(const irs::SegmentInfo& segment) noexcept { + return static_cast(segment.live_docs_count) / + static_cast(segment.docs_count); + } + + // Returns approximated size of a segment in the absence of removals + inline size_t SizeWithoutRemovals(const irs::SegmentInfo& segment) noexcept { + return size_t(static_cast(segment.byte_size) * FillFactor(segment)); + } +} + +namespace tier { + + struct SegmentStats { + SegmentStats() = default; + + // cppcheck-suppress noExplicitConstructor + SegmentStats(const irs::SubReader& reader) noexcept + : reader{&reader}, + meta{&reader.Meta()}, + size{SizeWithoutRemovals(*meta)}, + fill_factor{FillFactor(*meta)} {} + + operator const irs::SubReader*() const noexcept { return reader; } + + const irs::SubReader* reader; + const irs::SegmentInfo* meta; + size_t size; // approximate size of segment without removals + double_t fill_factor; // live_docs_count / docs_count - meaning that fill_factor is + // inversely proportional to the no. of deletions. + }; + + struct ConsolidationConfig { + static constexpr size_t candidate_size { 2 }; // candidate selection window size: 4 + static constexpr double maxMergeScore { 0.4 }; // max score allowed for candidates consolidation. + // Skip consolidation if candidate score is greater + // than this value. + static constexpr double maxLivePercentage { 0.5 }; // Max live docs % of a segment to consider it + // for cleanup during consolidation. + }; + + struct SegmentAttributes { + uint64_t byteSize { 0 }; + uint64_t docsCount { 0 }; + uint64_t liveDocsCount { 0 }; + + SegmentAttributes() = default; + SegmentAttributes(uint64_t b, uint64_t d, uint64_t l) : + byteSize(b), docsCount(d), liveDocsCount(l) {} + }; + + // interface to fetch the required attributes from + // SegmentStats struct. + // We use this function in struct ConsolidationCandidate + // to fetch the segment dimensions from the SegmentStats + // struct. + // + void getSegmentDimensions( + const tier::SegmentStats& segment, + tier::SegmentAttributes& attrs); + + template + struct ConsolidationCandidate { + using SegmentIterator = std::vector::const_iterator; + using range_t = std::pair; + + ConsolidationCandidate() = default; + + ConsolidationCandidate( + SegmentIterator start, + SegmentIterator end, + std::function accessor + ) noexcept : segments(start, end), accessor_(accessor) { + + initialized = true; + + // Calculate initial cost + SegmentAttributes attrs; + + auto itr = start; + do + { + accessor_(*itr, attrs); + mergeBytes += attrs.byteSize; + + } while (itr++ != end); + + skew = static_cast(attrs.byteSize) / mergeBytes; + mergeScore = skew; + } + + // It is the caller's responsibility to ensure that + // a std::advance() operation is possible on the segments + // range. + bool pop_front() { + if (!initialized) + return false; + + const auto removeSegment = first(); + const auto lastSegment = last(); + + std::advance(segments.first, 1); + + // Segment to be removed + SegmentAttributes remSegAttrs; + accessor_(*removeSegment, remSegAttrs); + + SegmentAttributes lastSegAttrs; + accessor_(*lastSegment, lastSegAttrs); + + mergeBytes -= remSegAttrs.byteSize; + skew = static_cast(lastSegAttrs.byteSize) / mergeBytes; + mergeScore = skew; + + return true; + } + + // It is the caller's responsibility to ensure that + // a std::advance() operation is possible on the segments + // range. + bool push_back() noexcept { + if (!initialized) + return false; + + const auto addSegment = segments.second + 1; + + std::advance(segments.second, 1); + + // Segment to be added + SegmentAttributes attrs; + accessor_(*addSegment, attrs); + + mergeBytes += attrs.byteSize; + skew = static_cast(attrs.byteSize) / mergeBytes; + mergeScore = skew; + + return true; + } + + SegmentIterator first() const noexcept { return segments.first; } + SegmentIterator last() const noexcept { return segments.second; } + + size_t mergeBytes { 0 }; + double skew { 0.0 }; + double mergeScore { 0.0 }; + bool initialized { false }; + + range_t segments; + std::function accessor_; + }; + + template + bool findBestCleanupCandidate( + std::vector& segments, + const std::function< + void(const Segment&, + tier::SegmentAttributes& + )>& getSegmentAttributes, + tier::ConsolidationCandidate& best) { + + auto segmentSortFunc = [&](const Segment& left, const Segment& right) { + + tier::SegmentAttributes attrs; + getSegmentAttributes(left, attrs); + auto lLivePerc = static_cast(attrs.liveDocsCount) / attrs.docsCount; + + getSegmentAttributes(right, attrs); + auto rLivePerc = static_cast(attrs.liveDocsCount) / attrs.docsCount; + + return lLivePerc < rLivePerc; + }; + + std::sort(segments.begin(), segments.end(), segmentSortFunc); + + auto count = 0; + auto totalDocsCount = 0; + auto totalLiveDocsCount = 0; + double livePerc; + + for (auto itr = segments.begin(); itr != segments.end(); itr++) { + + tier::SegmentAttributes attrs; + getSegmentAttributes(*itr, attrs); + + totalDocsCount += attrs.docsCount; + totalLiveDocsCount += attrs.liveDocsCount; + + livePerc = static_cast(totalLiveDocsCount) / totalDocsCount; + if (livePerc > tier::ConsolidationConfig::maxLivePercentage) + break; + + ++count; + } + + if (count < 1) + return false; + + best = ConsolidationCandidate(segments.begin(), segments.begin() + count - 1, getSegmentAttributes); + return true; + } + + // + // This function receives a set of segments and finds + // the best subset to merge together. + // The best subset is defined as the one with the lowest + // merge cost (i.e. skew). The merge cost is computed inside + // the ConslidationCandidate struct upon candidate init, + // push_back() and pop_front() operations. + // + // findBestConsolidationCandidate sorts the set of segments + // in the increasing order of the segment sizes and then finds + // the largest possible subset of segments whose consolidated + // size is within the maxSegmentsBytes range and has the + // lowest skew. + // + // Currently it is only executed with struct tier::SegmentStats + // as the template argument in ArangoSearch. However we leverage + // this templatized design for writing unit tests. + // + // findBestConsolidationCandidate does not use the live % + // to find the best candidate. It only needs the segment + // byte size. + // + template + bool findBestConsolidationCandidate( + std::vector& segments, + size_t maxSegmentsBytes, + const std::function< + void(const Segment&, + SegmentAttributes& + )>& getSegmentAttributes, + tier::ConsolidationCandidate& best) { + + // sort segments by segment size + auto comp = [&](const Segment& lhs, const Segment& rhs) { + + SegmentAttributes lAttrs; + SegmentAttributes rAttrs; + + getSegmentAttributes(lhs, lAttrs); + getSegmentAttributes(rhs, rAttrs); + + if (lAttrs.byteSize == rAttrs.byteSize) { + + double lfill_factor = static_cast(lAttrs.liveDocsCount) / lAttrs.docsCount; + double rfill_factor = static_cast(rAttrs.liveDocsCount) / rAttrs.docsCount; + return lfill_factor > rfill_factor; + } + + return lAttrs.byteSize < rAttrs.byteSize; + }; + + // sort segments in increasing order of the segment byte size + std::sort(segments.begin(), segments.end(), comp); + + // We start with a min. window size of 2 + // since a window of size 1 will always + // give us a skew of 1.0. + uint64_t minWindowSize { tier::ConsolidationConfig::candidate_size }; + auto front = segments.begin(); + auto rear = front + minWindowSize - 1; + tier::ConsolidationCandidate candidate(front, rear, getSegmentAttributes); + + // Algorithm: + // We start by setting the smallest possible window on the list of + // sorted segments. We move the right end ahead to add more segments to + // the window and we incrementally compute the merge cost for each subset. + // We move the left end ahead to remove segments from the window and we + // only do this when we're over the maxSegmentsBytes limit. + while ((candidate.first() + minWindowSize - 1) <= candidate.last() && + candidate.last() < segments.end()) { + + if (candidate.mergeBytes > maxSegmentsBytes) { + candidate.pop_front(); + continue; + } + + if (!best.initialized || best.mergeScore > candidate.mergeScore) + best = candidate; + + if (candidate.last() == (segments.end() - 1)) + break; + + candidate.push_back(); + } + + return (best.initialized && + best.mergeScore <= tier::ConsolidationConfig::maxMergeScore); + } +} namespace irs::index_utils { @@ -68,6 +366,8 @@ struct ConsolidateDocsFill { ConsolidationPolicy MakePolicy(const ConsolidateDocsFill& options); +// [TODO] Currently unused as the new algorithm uses a different +// approach. Only max_segments_bytes is in use. struct ConsolidateTier { // minimum allowed number of segments to consolidate at once size_t min_segments = 1; diff --git a/tests/index/consolidation_policy_tests.cpp b/tests/index/consolidation_policy_tests.cpp index d67f0a1fb..2eeb97c1a 100644 --- a/tests/index/consolidation_policy_tests.cpp +++ b/tests/index/consolidation_policy_tests.cpp @@ -73,8 +73,9 @@ namespace { void AssertCandidates(const irs::IndexReader& reader, const std::vector& expected_candidates, - const irs::Consolidation& actual_candidates) { - ASSERT_EQ(expected_candidates.size(), actual_candidates.size()); + const irs::Consolidation& actual_candidates, + const std::string& errMsg = "") { + ASSERT_EQ(expected_candidates.size(), actual_candidates.size()) << errMsg; for (const size_t expected_candidate_idx : expected_candidates) { const auto& expected_candidate = reader[expected_candidate_idx]; @@ -228,6 +229,9 @@ TEST(ConsolidationTierTest, MaxConsolidationSize) { } // 3rd tier + // At this point we'll be left with only 2 segments left + // for consolidation. But the skew is now over the threshold, + // so we won't consolidate those segments. { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); @@ -235,19 +239,12 @@ TEST(ConsolidationTierTest, MaxConsolidationSize) { for (const auto* candidate : candidates) { consolidating_segments.emplace(candidate->Meta().name); } - ASSERT_EQ(reader.size() - 2 * options.max_segments_bytes, - candidates.size()); - } - - // last empty tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); + ASSERT_EQ(consolidating_segments.size(), 2 * options.max_segments_bytes); ASSERT_TRUE(candidates.empty()); } } - // invalid options: max_segments_bytes == 0 + // invalid options: maxSegmentsBytes == 0 { irs::index_utils::ConsolidateTier options; options.floor_segment_bytes = 1; @@ -320,58 +317,78 @@ TEST(ConsolidationTierTest, EmptySegment) { ASSERT_TRUE(candidates.empty()); // skip empty segments } -TEST(ConsolidationTierTest, MaxConsolidationCount) { +TEST(ConsolidationTierTest, PreferConsolidationOverCleanupWhenDoesntMeetThreshold) { + irs::IndexMeta meta; + AddSegment(meta, "0", 10, 10, 10); + AddSegment(meta, "1", 10, 10, 10); + AddSegment(meta, "2", 10, 10, 10); + AddSegment(meta, "3", 10, 10, 10); + AddSegment(meta, "4", 10, 10, 10); + AddSegment(meta, "5", 11, 8, 81); + AddSegment(meta, "6", 10, 9, 91); + IndexReaderMock reader{meta}; + + // The conslidation threshold defaults are defined + // in tier::ConsolidationConfig. + + irs::index_utils::ConsolidateTier options; + options.floor_segment_bytes = 1; + options.max_segments = 2; + options.min_segments = 2; + options.max_segments_bytes = std::numeric_limits::max(); + + irs::ConsolidatingSegments consolidating_segments; + auto policy = irs::index_utils::MakePolicy(options); + + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + ASSERT_EQ(5, candidates.size()); +} + +TEST(ConsolidationTierTest, PreferCleanupWhenMeetsThreshold) { // generate meta irs::IndexMeta meta; - for (size_t i = 0; i < 22; ++i) { - AddSegment(meta, std::to_string(i), 1, 1, 1); - } + AddSegment(meta, "0", 10, 10, 10); + AddSegment(meta, "1", 10, 10, 10); + AddSegment(meta, "2", 10, 10, 10); + AddSegment(meta, "3", 10, 10, 10); + AddSegment(meta, "4", 10, 10, 10); + AddSegment(meta, "5", 11, 5, 11); + AddSegment(meta, "6", 10, 5, 11); IndexReaderMock reader{meta}; - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 10; - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); + // ensure policy prefers segments with removals + irs::index_utils::ConsolidateTier options; + options.floor_segment_bytes = 1; + options.max_segments = 2; + options.min_segments = 2; + options.max_segments_bytes = std::numeric_limits::max(); - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); + irs::ConsolidatingSegments consolidating_segments; + auto policy = irs::index_utils::MakePolicy(options); - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + ASSERT_EQ(2, candidates.size()); + AssertCandidates(reader, {5, 6}, candidates); +} - // 2nd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } +TEST(ConsolidationTierTest, Singleton) { + irs::index_utils::ConsolidateTier options; + options.floor_segment_bytes = 1; + options.max_segments = std::numeric_limits::max(); + options.min_segments = 1; + options.max_segments_bytes = std::numeric_limits::max(); + auto policy = irs::index_utils::MakePolicy(options); - // 3rd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(reader.size() - 2 * options.max_segments, candidates.size()); - } + // singleton consolidation without removals + { + irs::ConsolidatingSegments consolidating_segments; + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 150); + IndexReaderMock reader{meta}; - // last empty tier + // avoid having singletone merges without removals { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); @@ -379,29 +396,13 @@ TEST(ConsolidationTierTest, MaxConsolidationCount) { } } - // max_segments == std::numeric_limits::max() + // singleton consolidation with < 50% removals { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = std::numeric_limits::max(); - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(reader.size(), candidates.size()); - } + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 51, 150); + IndexReaderMock reader{meta}; - // last empty tier { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); @@ -409,1113 +410,504 @@ TEST(ConsolidationTierTest, MaxConsolidationCount) { } } - // invalid options: max_segments == 0 + // singleton consolidation with >= 50% removals { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 0; - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 49, 150); + AddSegment(meta, "1", 100, 59, 150); + AddSegment(meta, "2", 100, 69, 150); + AddSegment(meta, "3", 100, 50, 150); + IndexReaderMock reader{meta}; - // last empty tier { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); + AssertCandidates(reader, {0, 3}, candidates); } } +} - // invalid options: floor_segments_bytes == 0 - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 0; - options.max_segments = 10; - options.min_segments = 3; - options.max_segments_bytes = std::numeric_limits::max(); +TEST(ConsolidationTierTest, Defaults) { + irs::index_utils::ConsolidateTier options; + auto policy = irs::index_utils::MakePolicy(options); + { irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); + + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 150); + AddSegment(meta, "1", 100, 100, 100); + AddSegment(meta, "2", 100, 100, 100); + AddSegment(meta, "3", 100, 100, 100); + AddSegment(meta, "4", 100, 100, 100); + IndexReaderMock reader{meta}; // 1st tier { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } - - // 2nd tier - { - irs::Consolidation candidates; + AssertCandidates(reader, {1, 2, 3, 4}, candidates, "Line: " + std::to_string(__LINE__)); + candidates.clear(); policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, {1, 2, 3, 4}, candidates, "Line: " + std::to_string(__LINE__)); // register candidates for consolidation for (const auto* candidate : candidates) { consolidating_segments.emplace(candidate->Meta().name); } - ASSERT_EQ(options.max_segments, candidates.size()); } - // last empty tier + // no more segments to consolidate { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); ASSERT_TRUE(candidates.empty()); } } -} - -TEST(ConsolidationTierTest, MinConsolidationCount) { - // generate meta - irs::IndexMeta meta; - for (size_t i = 0; i < 22; ++i) { - AddSegment(meta, std::to_string(i), 1, 1, 1); - } - IndexReaderMock reader{meta}; - // min_segments == 3 { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 10; - options.min_segments = 3; - options.max_segments_bytes = std::numeric_limits::max(); - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 150); + AddSegment(meta, "1", 100, 100, 100); + AddSegment(meta, "2", 100, 100, 100); + AddSegment(meta, "3", 100, 100, 100); + AddSegment(meta, "4", 100, 100, 100); + AddSegment(meta, "5", 100, 100, 100); + AddSegment(meta, "6", 100, 100, 100); + AddSegment(meta, "7", 100, 100, 100); + AddSegment(meta, "8", 100, 100, 100); + AddSegment(meta, "9", 100, 100, 100); + AddSegment(meta, "10", 100, 100, 100); + IndexReaderMock reader{meta}; // 1st tier { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } - - // 2nd tier - { - irs::Consolidation candidates; + AssertCandidates(reader, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, candidates, "Line: " + std::to_string(__LINE__)); + candidates.clear(); policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, candidates, "Line: " + std::to_string(__LINE__)); // register candidates for consolidation for (const auto* candidate : candidates) { consolidating_segments.emplace(candidate->Meta().name); } - ASSERT_EQ(options.max_segments, candidates.size()); } - // last empty tier + // no more segments to consolidate { irs::Consolidation candidates; policy(candidates, reader, consolidating_segments); ASSERT_TRUE(candidates.empty()); } } +} - // invalid options: min_segments == 1 - { +TEST(ConsolidationTierTest, NoCandidates) { + irs::index_utils::ConsolidateTier options; + options.floor_segment_bytes = 2097152; + options.max_segments_bytes = 4294967296; + options.min_segments = 5; // min number of segments per tier to merge at once + // max number of segments per tier to merge at once + options.max_segments = 10; + auto policy = irs::index_utils::MakePolicy(options); + + irs::ConsolidatingSegments consolidating_segments; + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 1); + AddSegment(meta, "1", 100, 100, 4); + AddSegment(meta, "2", 100, 100, 7); + AddSegment(meta, "3", 100, 100, 13); + AddSegment(meta, "4", 100, 100, 26); + IndexReaderMock reader{meta}; + + // The candidate sizes are such that the skew for any + // combination of contiguous candidates is greater than + // ConsolidationConfig::maxMergeScore (default: 0.4) + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + ASSERT_TRUE(candidates.empty()); +} + +TEST(ConsolidationTierTest, SkewedSegments) { irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 10; - options.min_segments = 0; - options.max_segments_bytes = std::numeric_limits::max(); + options.max_segments_bytes = 52500; // max size of the merge - irs::ConsolidatingSegments consolidating_segments; auto policy = irs::index_utils::MakePolicy(options); - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } + // test correct selection of candidates + { + irs::ConsolidatingSegments consolidating_segments; + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 10); + AddSegment(meta, "1", 100, 100, 40); + AddSegment(meta, "2", 100, 100, 60); + AddSegment(meta, "3", 100, 100, 70); + AddSegment(meta, "4", 100, 100, 100); + AddSegment(meta, "5", 100, 100, 150); + AddSegment(meta, "6", 100, 100, 200); + AddSegment(meta, "7", 100, 100, 500); + AddSegment(meta, "8", 100, 100, 750); + AddSegment(meta, "9", 100, 100, 1100); + AddSegment(meta, "10", 100, 100, 90); + AddSegment(meta, "11", 100, 100, 75); + AddSegment(meta, "12", 100, 100, 1500); + AddSegment(meta, "13", 100, 100, 10000); + AddSegment(meta, "14", 100, 100, 5000); + AddSegment(meta, "15", 100, 100, 1750); + AddSegment(meta, "16", 100, 100, 690); + IndexReaderMock reader{meta}; + + const std::vector> expected_tiers{ + {0, 1, 2, 3, 4, 10, 11}, + {5, 6, 7, 8, 9, 12, 15, 16} + }; - // 2nd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } + for (size_t i = 0; i < expected_tiers.size(); i++) { + auto& expected_tier = expected_tiers[i]; + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + candidates.clear(); + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + // register candidates for consolidation + for (const auto* candidate : candidates) { + consolidating_segments.emplace(candidate->Meta().name); + } + } + } + + { + irs::ConsolidatingSegments consolidating_segments; + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 1); + AddSegment(meta, "1", 100, 100, 1); + AddSegment(meta, "2", 100, 100, 1); + AddSegment(meta, "3", 100, 100, 75); + AddSegment(meta, "4", 100, 100, 90); + AddSegment(meta, "5", 100, 100, 100); + AddSegment(meta, "6", 100, 100, 150); + AddSegment(meta, "7", 100, 100, 200); + AddSegment(meta, "8", 100, 100, 750); + AddSegment(meta, "9", 100, 100, 1100); + AddSegment(meta, "10", 100, 100, 1500); + AddSegment(meta, "11", 100, 100, 1750); + AddSegment(meta, "12", 100, 100, 5000); + AddSegment(meta, "13", 100, 100, 10000); + AddSegment(meta, "14", 100, 100, 15000); + AddSegment(meta, "15", 100, 100, 20000); + IndexReaderMock reader{meta}; + + const std::vector> expected_tiers{ + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + {12, 13, 14, 15} + }; - // 3rd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); + for (size_t i = 0; i < expected_tiers.size(); i++) { + auto& expected_tier = expected_tiers[i]; + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + candidates.clear(); + + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + // register candidates for consolidation + for (const auto* candidate : candidates) { + consolidating_segments.emplace(candidate->Meta().name); + } } - ASSERT_EQ(reader.size() - 2 * options.max_segments, candidates.size()); } +} - // last empty tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } +// When the skew of all the candidates is over the default threshold, +// we don't consolidate. +TEST(ConsolidationTierTest, SkewOverThresholdDontConsolidate) { + + using SEGMENT_SIZE = int; + size_t maxSegmentsBytes { 5000 }; + + // The default threshold value is 0.4. In a list of segments sorted + // by the segment size, if the size of a segment is >= the + // sum of sizes of segments on its left, then the skew will be >= 0.5 + // which is over the default threshold. + std::vector< + std::vector + > testcases{ + { 50, 100, 2000, 4000 }, + { 1, 2, 4, 8, 16, 32, 64 }, + { 1, 1, 12 }, + { 12, 12 }, + { 12 }, + { 2, 2, 3} + }; + + auto getAttributes = []( + int segment, + tier::SegmentAttributes& attrs) { + attrs.byteSize = segment; + }; + + for (size_t i = 0; i < testcases.size(); i++) { + + auto& segmentSizes = testcases[i]; + // No cleanup candidates should be selected here. + tier::ConsolidationCandidate best; + auto result = tier::findBestConsolidationCandidate(segmentSizes, maxSegmentsBytes, getAttributes, best); + ASSERT_FALSE(result); } +} - // invalid options: min_segments > max_segments - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 10; - options.min_segments = std::numeric_limits::max(); - options.max_segments_bytes = std::numeric_limits::max(); +TEST(ConsolidationTierTest, NewSegmentAdditions) { + using SEGMENT_SIZE = int; + size_t maxSegmentsBytes { 5000 }; + + // The default threshold value is 0.4. In a list of segments sorted + // by the segment size, if the size of a segment is >= the + // sum of sizes of segments on its left, then the skew will be >= 0.5 + // which is over the default threshold. + std::vector segmentSizes + { 1, 2, 4, 8, 16, 32, 64 }; + + auto getAttributes = []( + int segment, + tier::SegmentAttributes& attrs) { + attrs.byteSize = segment; + }; - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); + tier::ConsolidationCandidate best; + auto result = tier::findBestConsolidationCandidate(segmentSizes, maxSegmentsBytes, getAttributes, best); + ASSERT_FALSE(result); - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } + // Adding 2 segments of size 64 each will lower the + // skew and bring it under the default skew threshold + // of 0.4 thereby allowing consolidation. + segmentSizes.emplace_back(64); + segmentSizes.emplace_back(64); + result = tier::findBestConsolidationCandidate(segmentSizes, maxSegmentsBytes, getAttributes, best); + ASSERT_TRUE(result); - // 2nd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(options.max_segments, candidates.size()); - } - - // last empty tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - // invalid options: min_segments > max_segments - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = std::numeric_limits::max(); - options.min_segments = std::numeric_limits::max(); - options.max_segments_bytes = std::numeric_limits::max(); - - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); - - // can't find anything - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } -} - -TEST(ConsolidationTierTest, ConsolidationFloor) { - // generate meta - irs::IndexMeta meta; - { - size_t i = 0; - for (; i < 5; ++i) { - AddSegment(meta, std::to_string(i), 1, 1, 2 * i); - } - for (; i < 22; ++i) { - AddSegment(meta, std::to_string(i), 1, 1, 2 * i); - } - } - IndexReaderMock reader{meta}; - - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 8; - options.max_segments = reader.size(); - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(5, candidates.size()); - - for (size_t i = 0; i < candidates.size(); ++i) { - ASSERT_NE(candidates.end(), - std::find(candidates.begin(), candidates.end(), &reader[i])); - } - } - - // 2nd tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(reader.size() - 5, candidates.size()); - } - - // last empty tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - // enormous floor value, treat all segments as equal - { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = std::numeric_limits::max(); - options.max_segments = std::numeric_limits::max(); - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - ASSERT_EQ(reader.size(), candidates.size()); - } - - // last empty tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } -} - -TEST(ConsolidationTierTest, PreferSegmentsWithRemovals) { - // generate meta - irs::IndexMeta meta; - AddSegment(meta, "0", 10, 10, 10); - AddSegment(meta, "1", 10, 10, 10); - AddSegment(meta, "2", 11, 10, 11); - AddSegment(meta, "3", 11, 10, 11); - IndexReaderMock reader{meta}; - - // ensure policy prefers segments with removals - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = 2; - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - - irs::ConsolidatingSegments consolidating_segments; - auto policy = irs::index_utils::MakePolicy(options); - - const std::vector> expected_tiers{{2, 3}, {0, 1}}; - - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } -} - -TEST(ConsolidationTierTest, Singleton) { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 1; - options.max_segments = std::numeric_limits::max(); - options.min_segments = 1; - options.max_segments_bytes = std::numeric_limits::max(); - auto policy = irs::index_utils::MakePolicy(options); - - // singleton consolidation without removals - { - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 150); - IndexReaderMock reader{meta}; - - // avoid having singletone merges without removals - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - // singleton consolidation with removals - { - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 99, 150); - IndexReaderMock reader{meta}; - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {0}, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {0}, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } -} - -TEST(ConsolidationTierTest, Defaults) { - irs::index_utils::ConsolidateTier options; - auto policy = irs::index_utils::MakePolicy(options); - - { - irs::ConsolidatingSegments consolidating_segments; - - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 150); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 100); - AddSegment(meta, "3", 100, 100, 100); - AddSegment(meta, "4", 100, 100, 100); - IndexReaderMock reader{meta}; - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {0, 1, 2, 3, 4}, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {0, 1, 2, 3, 4}, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - { - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 150); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 100); - AddSegment(meta, "3", 100, 100, 100); - AddSegment(meta, "4", 100, 100, 100); - AddSegment(meta, "5", 100, 100, 100); - AddSegment(meta, "6", 100, 100, 100); - AddSegment(meta, "7", 100, 100, 100); - AddSegment(meta, "8", 100, 100, 100); - AddSegment(meta, "9", 100, 100, 100); - AddSegment(meta, "10", 100, 100, 100); - IndexReaderMock reader{meta}; - - // 1st tier - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } -} - -TEST(ConsolidationTierTest, NoCandidates) { - irs::index_utils::ConsolidateTier options; - options.floor_segment_bytes = 2097152; - options.max_segments_bytes = 4294967296; - options.min_segments = 5; // min number of segments per tier to merge at once - // max number of segments per tier to merge at once - options.max_segments = 10; - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 141747); - AddSegment(meta, "1", 100, 100, 1548373791); - AddSegment(meta, "2", 100, 100, 1699787770); - AddSegment(meta, "3", 100, 100, 1861963739); - AddSegment(meta, "4", 100, 100, 2013404723); - IndexReaderMock reader{meta}; - - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); // candidates too large -} - -TEST(ConsolidationTierTest, SkewedSegments) { - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 2500; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 10); - AddSegment(meta, "1", 100, 100, 40); - AddSegment(meta, "2", 100, 100, 60); - AddSegment(meta, "3", 100, 100, 70); - AddSegment(meta, "4", 100, 100, 100); - AddSegment(meta, "5", 100, 100, 150); - AddSegment(meta, "6", 100, 100, 200); - AddSegment(meta, "7", 100, 100, 500); - AddSegment(meta, "8", 100, 100, 750); - AddSegment(meta, "9", 100, 100, 1100); - AddSegment(meta, "10", 100, 100, 90); - AddSegment(meta, "11", 100, 100, 75); - AddSegment(meta, "12", 100, 100, 1500); - AddSegment(meta, "13", 100, 100, 10000); - AddSegment(meta, "14", 100, 100, 5000); - AddSegment(meta, "15", 100, 100, 1750); - AddSegment(meta, "16", 100, 100, 690); - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 1, 2, 3, 4, 10, 11}, - {5, 6}, - {7, 8, 16}, - }; - - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 10); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 500); - AddSegment(meta, "3", 100, 100, 1000); - AddSegment(meta, "4", 100, 100, 2000); - AddSegment(meta, "5", 100, 100, 4000); - AddSegment(meta, "6", 100, 100, 12000); - AddSegment(meta, "7", 100, 100, 30000); - AddSegment(meta, "8", 100, 100, 50000); - AddSegment(meta, "9", 100, 100, 100000); - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 1}, - {2, 3}, - {4, 5}, - {6, 7, 8}, - }; - - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 2; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 10); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 500); - AddSegment(meta, "3", 100, 100, 1000); - AddSegment(meta, "4", 100, 100, 2000); - AddSegment(meta, "5", 100, 100, 4000); - AddSegment(meta, "6", 100, 100, 12000); - AddSegment(meta, "7", 100, 100, 30000); - AddSegment(meta, "8", 100, 100, 50000); - AddSegment(meta, "9", 100, 100, 100000); - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 1}, {2, 3}, {4, 5}, {6, 7}, {8, 9}}; - - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } + // [first, last] is inclusive of bounds. + ASSERT_EQ(std::distance(best.first(), best.last() + 1), segmentSizes.size()); +} - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } +// Total segment size goes over max_segment_bytes +// and we have to remove a few segments from the beginning. +TEST(ConsolidationTierTest, PopLeftTest) { - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 3; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); + // Segments with individual- and combined- live % over 50% + size_t maxSegmentsBytes { 9 }; + std::vector segmentSizes{ + 1, 1, 2, 3, 3, 4 + }; - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 10); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 500); - AddSegment(meta, "3", 100, 100, 1000); - AddSegment(meta, "4", 100, 100, 2000); - AddSegment(meta, "5", 100, 100, 4000); - AddSegment(meta, "6", 100, 100, 12000); - AddSegment(meta, "7", 100, 100, 30000); - AddSegment(meta, "8", 100, 100, 50000); - AddSegment(meta, "9", 100, 100, 100000); - IndexReaderMock reader{meta}; + auto getAttributes = []( + int segment, + tier::SegmentAttributes& attrs) { + attrs.byteSize = segment; + }; - const std::vector> expected_tiers{ - {2, 3, 4}, {6, 7, 8} - // no more candidates since 10, 100, 4000, 100000 means exponensial grow - }; + // No cleanup candidates should be selected here. + tier::ConsolidationCandidate best; + auto result = tier::findBestConsolidationCandidate(segmentSizes, maxSegmentsBytes, getAttributes, best); + ASSERT_TRUE(result); - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } + ASSERT_EQ(std::distance(best.first(), best.last()), 3); + ASSERT_EQ(best.first(), segmentSizes.cbegin() + 1); + ASSERT_EQ(best.last(), segmentSizes.cend() - 2); - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } + for (auto res_itr = best.first(), src_itr = segmentSizes.cbegin() + 1; + res_itr != best.last() + 1; + res_itr++, src_itr++) { + ASSERT_EQ(*res_itr, *src_itr); } +} - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); +TEST(ConsolidationTierTest, CleanupSmokeTest) { + + using LIVE_DOCS_COUNT = int; + using DOCS_COUNT = int; + using SegmentType = std::pair; + + std::vector< + std::vector + > testcases { + { + { 100, 100 }, // 100% live + { 55, 100 }, // 55% live + { 66, 100 }, // 66% + { 49, 100 }, // 49% - under the default threshold of 50% + { 50, 100 } // 50% - equal to the default threshold + }, + + // no qualifying segments here + { + { 90, 100 }, + { 80, 100 }, + { 70, 100 }, + { 60, 100 }, + { 51, 100 } + } + }; - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 10); - AddSegment(meta, "1", 100, 100, 100); - AddSegment(meta, "2", 100, 100, 500); - AddSegment(meta, "3", 100, 100, 1000); - AddSegment(meta, "4", 100, 100, 2000); - AddSegment(meta, "5", 100, 100, 4000); - AddSegment(meta, "6", 100, 100, 12000); - AddSegment(meta, "7", 100, 100, 30000); - AddSegment(meta, "8", 100, 100, 50000); - AddSegment(meta, "9", 100, 100, 100000); - AddSegment(meta, "10", 100, 100, 51); - AddSegment(meta, "11", 100, 100, 151); - AddSegment(meta, "12", 100, 100, 637); - AddSegment(meta, "13", 100, 100, 351); - AddSegment(meta, "14", 100, 100, 2351); - AddSegment(meta, "15", 100, 100, 1351); - AddSegment(meta, "16", 100, 100, 1351); - AddSegment(meta, "17", 100, 100, 20); - IndexReaderMock reader{meta}; + std::vector< + std::vector< + SegmentType> + > expectedCandidates { + { + { 49, 100 }, // 49% - under the default threshold of 50% + { 50, 100 } + }, + { + } + }; - const std::vector> expected_tiers{ - {0, 10, 17}, {1, 11}, {2, 3, 12, 13, 15, 16}, {4, 14}, {5, 6}, {7, 8}, + auto getSegmentAttributes = []( + const SegmentType& seg, + tier::SegmentAttributes& attrs) { + attrs.liveDocsCount = seg.first; + attrs.docsCount = seg.second; }; - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } + for (size_t i = 0; i < testcases.size(); i++) { + auto& testcase = testcases[i]; + const auto& expectedCandidate = expectedCandidates[i]; - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } + tier::ConsolidationCandidate best; - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 1; - options.min_score = 0; // default min score - auto policy = irs::index_utils::MakePolicy(options); + // returns true if a cleanup candidate is found, false otherwise. + auto result = tier::findBestCleanupCandidate(testcase, getSegmentAttributes, best); - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 1); - AddSegment(meta, "1", 100, 100, 9886); - IndexReaderMock reader{meta}; + ASSERT_EQ(result, expectedCandidate.size() > 0); - const std::vector> expected_tiers{{0, 1}}; + if (result) { + ASSERT_EQ(std::distance(best.first(), best.last() + 1), expectedCandidate.size()); - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); + // Compare individual elements of candidate + auto itr = best.first(); + size_t j = 0; + while (j < expectedCandidate.size()) { + ASSERT_EQ(expectedCandidate[j++], *itr++); } } - ASSERT_EQ(reader.size(), consolidating_segments.size()); - - // no segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } } +} - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 1; - options.min_score = 0.001; // filter out irrelevant merges - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 1); - AddSegment(meta, "1", 100, 100, 9886); - IndexReaderMock reader{meta}; - - // no segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } +// Allow single segment cleanup +TEST(ConsolidationTierTest, SingleSegmentCleanup) { - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 1; - options.min_score = 0; // default min score - auto policy = irs::index_utils::MakePolicy(options); + using LIVE_DOCS_COUNT = int; + using DOCS_COUNT = int; + using SegmentType = std::pair; - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 1); - AddSegment(meta, "1", 100, 100, 9886); - AddSegment(meta, "2", 100, 100, 2); - IndexReaderMock reader{meta}; + std::vector testcase { + { 44, 100 } + }; - const std::vector> expected_tiers{ - {0, 2}, + auto getSegmentAttributes = []( + const SegmentType& seg, + tier::SegmentAttributes& attrs) { + attrs.liveDocsCount = seg.first; + attrs.docsCount = seg.second; }; - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } + tier::ConsolidationCandidate best; + auto result = tier::findBestCleanupCandidate(testcase, getSegmentAttributes, best); - // no segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } + ASSERT_TRUE(result); + ASSERT_EQ(best.first()->first, 44); + ASSERT_EQ(best.last()->second, 100); +} - { +// Cleanup before consolidation +TEST(ConsolidationTierTest, CleanupBeforeConsolidation) { irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - options.max_segments_bytes = 250000; // max size of the merge - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 1; - options.min_score = 0.001; // filter our irrelevant merges - auto policy = irs::index_utils::MakePolicy(options); - - irs::ConsolidatingSegments consolidating_segments; - irs::IndexMeta meta; - AddSegment(meta, "0", 100, 100, 1); - AddSegment(meta, "1", 100, 100, 9886); - AddSegment(meta, "2", 100, 100, 2); - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 2}, - }; - - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } + options.max_segments_bytes = 100000; // max size of the merge - // no segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - // max size of the merge - options.max_segments_bytes = std::numeric_limits::max(); - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; auto policy = irs::index_utils::MakePolicy(options); - irs::IndexMeta meta; - { - constexpr size_t sizes[] = { - 90, 100, 110, 95, 105, 150, 145, 155, 160, - 165, 1000, 900, 1100, 1150, 950, 10000, 10100, 9900, - 10250, 9800, 110000, 110100, 19900, 110250, 19800}; - - for (auto begin = std::begin(sizes), end = std::end(sizes); begin != end; - ++begin) { - const auto i = std::distance(begin, end); - AddSegment(meta, std::to_string(i), 100, 100, *begin); - } - } - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19}, - {22, 24}, - {20, 21, 23}, - }; - - irs::ConsolidatingSegments consolidating_segments; - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - ASSERT_EQ(reader.size(), consolidating_segments.size()); - - // no more segments to consolidate + // test correct selection of candidates { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - // enusre policy honors removals - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - // max size of the merge - options.max_segments_bytes = std::numeric_limits::max(); - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); + irs::ConsolidatingSegments consolidating_segments; + irs::IndexMeta meta; + AddSegment(meta, "0", 100, 100, 10); + AddSegment(meta, "1", 100, 100, 40); + AddSegment(meta, "2", 100, 100, 60); + AddSegment(meta, "3", 100, 100, 70); + AddSegment(meta, "4", 100, 50, 300); + IndexReaderMock reader{meta}; - irs::IndexMeta meta; - { - constexpr size_t sizes[] = { - 90, 100, 110, 95, 105, 150, 145, 155, 160, - 165, 1000, 900, 1100, 1150, 950, 10000, 10100, 9900, - 10250, 9800, 110000, 110100, 19900, 110250, 19800, + const std::vector> expected_tiers{ + {4}, + {0, 1, 2, 3} }; - for (auto begin = std::begin(sizes), end = std::end(sizes); begin != end; - ++begin) { - const auto i = std::distance(begin, end); - AddSegment(meta, std::to_string(i), 100, 100, *begin); - } - - const_cast(meta.segments[10].meta).live_docs_count = 1; - } - IndexReaderMock reader{meta}; - - const std::vector> expected_tiers{ - {0, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, - {11, 12, 13, 14}, {15, 16, 17, 18, 19}, - {22, 24}, {20, 21, 23}, - }; - - irs::ConsolidatingSegments consolidating_segments; - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); + for (size_t i = 0; i < expected_tiers.size(); i++) { + auto& expected_tier = expected_tiers[i]; + irs::Consolidation candidates; + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + candidates.clear(); + policy(candidates, reader, consolidating_segments); + AssertCandidates(reader, expected_tier, candidates, "Line: " + std::to_string(__LINE__) + ", i = " + std::to_string(i)); + // register candidates for consolidation + for (const auto* candidate : candidates) { + consolidating_segments.emplace(candidate->Meta().name); + } } } - ASSERT_EQ(reader.size(), consolidating_segments.size()); - - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } - } - - { - irs::index_utils::ConsolidateTier options; - // min number of segments per tier to merge at once - options.min_segments = 1; - // max number of segments per tier to merge at once - options.max_segments = 10; - // max size of the merge - options.max_segments_bytes = std::numeric_limits::max(); - // smaller segments will be treated as equal to this value - options.floor_segment_bytes = 50; - auto policy = irs::index_utils::MakePolicy(options); +} - irs::IndexMeta meta; - { - constexpr size_t sizes[] = { - 90, 100, 110, 95, 105, 150, 145, 155, 160, - 165, 1000, 900, 1100, 1150, 950, 10000, 10100, 9900, - 10250, 9800, 110000, 110100, 19900, 110250, 19800, - }; +// Combined live percentage within threshold +TEST(ConsolidationTierTest, CombinedLivePercentageWithinThreshold) { - for (auto begin = std::begin(sizes), end = std::end(sizes); begin != end; - ++begin) { - const auto i = std::distance(begin, end); - AddSegment(meta, std::to_string(i), 100, 100, *begin); - } + using LIVE_DOCS_COUNT = int; + using DOCS_COUNT = int; + using SegmentType = std::pair; - const_cast(meta.segments[10].meta).live_docs_count = 1; - } - IndexReaderMock reader{meta}; + // Segments with individual- and combined- live % over 50% + std::vector segments { + { 50, 90 }, + { 60, 100 } + }; - const std::vector> expected_tiers{ - {0, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, - {11, 12, 13, 14}, {15, 16, 17, 18, 19}, - {22, 24}, {20, 21, 23}, + auto getSegmentAttributes = []( + const SegmentType& seg, + tier::SegmentAttributes& attrs) { + attrs.liveDocsCount = seg.first; + attrs.docsCount = seg.second; }; - irs::ConsolidatingSegments consolidating_segments; - for (auto& expected_tier : expected_tiers) { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - candidates.clear(); - policy(candidates, reader, consolidating_segments); - AssertCandidates(reader, expected_tier, candidates); - // register candidates for consolidation - for (const auto* candidate : candidates) { - consolidating_segments.emplace(candidate->Meta().name); - } - } - ASSERT_EQ(reader.size(), consolidating_segments.size()); + // No cleanup candidates should be selected here. + tier::ConsolidationCandidate best; + auto result = tier::findBestCleanupCandidate(segments, getSegmentAttributes, best); + + ASSERT_FALSE(result); + + // Add a segment with a very low live %. + // The combined live % of all segments should now be + // within the threshold. + segments.emplace_back(20, 100); + result = tier::findBestCleanupCandidate(segments, getSegmentAttributes, best); + ASSERT_TRUE(result); + + // findBestCleanupCandidate sorts the candidates in + // the incresing order of their live %. + std::vector expected { + { 20, 100 }, + { 50, 90 }, + { 60, 100 }, + }; - // no more segments to consolidate - { - irs::Consolidation candidates; - policy(candidates, reader, consolidating_segments); - ASSERT_TRUE(candidates.empty()); - } + // [first, last] is inclusive of bounds. + ASSERT_EQ(std::distance(best.first(), best.last() + 1), expected.size()); + + // Compare individual elements of candidate + auto itr = best.first(); + size_t j = 0; + while (j < expected.size()) { + ASSERT_EQ(expected[j++], *itr++); } }