diff --git a/src/core/search/index_result.h b/src/core/search/index_result.h
index a250892cafad..620d4858eb59 100644
--- a/src/core/search/index_result.h
+++ b/src/core/search/index_result.h
@@ -39,8 +39,8 @@ class IndexResult {
BorrowedView Borrowed() const;
- // Move out of owned or copy borrowed
- DocVec Take();
+ // Move out of owned or copy borrowed. Take up to `limit` entries and return original size.
+ std::pair Take(size_t limit = std::numeric_limits::max());
private:
bool IsOwned() const;
@@ -82,18 +82,36 @@ inline IndexResult::BorrowedView IndexResult::Borrowed() const {
return std::visit(cb, value_);
}
-inline IndexResult::DocVec IndexResult::Take() {
+inline std::pair IndexResult::Take(size_t limit) {
if (IsOwned()) {
- return std::move(std::get(value_));
+ auto& vec = std::get(value_);
+ size_t size = vec.size();
+ return {std::move(vec), size};
}
- auto cb = [](auto* set) -> DocVec {
+ // Numeric ranges need to be filtered and don't know their exact size ahead
+ if (std::holds_alternative(value_)) {
+ auto cb = [limit](auto* range) -> std::pair {
+ DocVec out;
+ size_t total = 0;
+ out.reserve(std::min(limit, range->size()));
+ for (auto it = range->begin(); it != range->end(); ++it) {
+ total++;
+ if (out.size() < limit)
+ out.push_back(*it);
+ }
+ return {std::move(out), total};
+ };
+ return std::visit(cb, Borrowed());
+ }
+
+ // Generic borrowed results sets don't need to be filtered, so we can tell the result size ahead
+ auto cb = [limit](auto* set) -> std::pair {
DocVec out;
- out.reserve(set->size());
- for (auto it = set->begin(); it != set->end(); ++it) {
+ out.reserve(std::min(limit, set->size()));
+ for (auto it = set->begin(); it != set->end() && out.size() < limit; ++it)
out.push_back(*it);
- }
- return out;
+ return {std::move(out), set->size()};
};
return std::visit(cb, Borrowed());
}
diff --git a/src/core/search/search.cc b/src/core/search/search.cc
index 6ec74efdc4c5..110a1fed68f7 100644
--- a/src/core/search/search.cc
+++ b/src/core/search/search.cc
@@ -289,7 +289,7 @@ struct BasicSearch {
// negate -(*subquery*): explicitly compute result complement. Needs further optimizations
IndexResult Search(const AstNegateNode& node, string_view active_field) {
- vector matched = SearchGeneric(*node.node, active_field).Take();
+ auto matched = SearchGeneric(*node.node, active_field).Take().first;
vector all = indices_->GetAllDocs();
// To negate a result, we have to find the complement of matched to all documents,
@@ -358,7 +358,7 @@ struct BasicSearch {
knn_distances_ = vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime);
else
knn_distances_ =
- vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take());
+ vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take().first);
}
// [KNN limit @field vec]: Compute distance from `vec` to all vectors keep closest `limit`
@@ -405,7 +405,6 @@ struct BasicSearch {
// Top level results don't need to be sorted, because they will be scored, sorted by fields or
// used by knn
-
DCHECK(top_level || holds_alternative(node.Variant()) ||
holds_alternative(node.Variant()) ||
visit([](auto* set) { return is_sorted(set->begin(), set->end()); }, result.Borrowed()));
@@ -416,16 +415,15 @@ struct BasicSearch {
return result;
}
- SearchResult Search(const AstNode& query) {
+ SearchResult Search(const AstNode& query, size_t cuttoff_limit) {
IndexResult result = SearchGeneric(query, "", true);
// Extract profile if enabled
optional profile =
profile_builder_ ? make_optional(profile_builder_->Take()) : nullopt;
- auto out = result.Take();
- const size_t total = out.size();
- return SearchResult{total, std::move(out), std::move(knn_scores_), std::move(profile),
+ auto [out, total_size] = result.Take(cuttoff_limit);
+ return SearchResult{total_size, std::move(out), std::move(knn_scores_), std::move(profile),
std::move(error_)};
}
@@ -654,11 +652,11 @@ bool SearchAlgorithm::Init(string_view query, const QueryParams* params,
return true;
}
-SearchResult SearchAlgorithm::Search(const FieldIndices* index) const {
+SearchResult SearchAlgorithm::Search(const FieldIndices* index, size_t cuttoff_limit) const {
auto bs = BasicSearch{index};
if (profiling_enabled_)
bs.EnableProfiling();
- return bs.Search(*query_);
+ return bs.Search(*query_, cuttoff_limit);
}
optional SearchAlgorithm::GetKnnScoreSortOption() const {
diff --git a/src/core/search/search.h b/src/core/search/search.h
index 3c1477b9a47e..96f2b4271e04 100644
--- a/src/core/search/search.h
+++ b/src/core/search/search.h
@@ -197,7 +197,9 @@ class SearchAlgorithm {
bool Init(std::string_view query, const QueryParams* params,
const OptionalFilters* filters = nullptr);
- SearchResult Search(const FieldIndices* index) const;
+ // Search on given index with predefined limit for cutting off result ids
+ SearchResult Search(const FieldIndices* index,
+ size_t cuttoff_limit = std::numeric_limits::max()) const;
// if enabled, return limit & alias for knn query
std::optional GetKnnScoreSortOption() const;
diff --git a/src/server/search/doc_index.cc b/src/server/search/doc_index.cc
index dd13daf4db07..2e76834e6a3c 100644
--- a/src/server/search/doc_index.cc
+++ b/src/server/search/doc_index.cc
@@ -408,7 +408,12 @@ vector ShardDocIndex::KeepTopKSorted(vector* ids,
SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
search::SearchAlgorithm* search_algo) const {
size_t limit = params.limit_offset + params.limit_total;
- auto result = search_algo->Search(&*indices_);
+
+ // If we don't sort the documents, we don't need to copy more ids than are requested
+ bool can_cut = !params.sort_option && !search_algo->GetKnnScoreSortOption();
+ size_t id_cutoff_limit = can_cut ? limit : numeric_limits::max();
+
+ auto result = search_algo->Search(&*indices_, id_cutoff_limit);
if (!result.error.empty())
return {facade::ErrorReply(std::move(result.error))};