Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions src/core/search/index_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ class IndexResult {

BorrowedView Borrowed() const;

// Move out of owned or copy borrowed
DocVec Take();
// Move out of owned or copy borrowed. Take up to `limit` entries and return original size.
std::pair<DocVec, size_t /* full size */> Take(size_t limit = std::numeric_limits<size_t>::max());

private:
bool IsOwned() const;
Expand Down Expand Up @@ -82,18 +82,36 @@ inline IndexResult::BorrowedView IndexResult::Borrowed() const {
return std::visit(cb, value_);
}

inline IndexResult::DocVec IndexResult::Take() {
inline std::pair<IndexResult::DocVec, size_t> IndexResult::Take(size_t limit) {
if (IsOwned()) {
return std::move(std::get<DocVec>(value_));
auto& vec = std::get<DocVec>(value_);
size_t size = vec.size();
return {std::move(vec), size};
}

auto cb = [](auto* set) -> DocVec {
// Numeric ranges need to be filtered and don't know their exact size ahead
if (std::holds_alternative<RangeResult>(value_)) {
auto cb = [limit](auto* range) -> std::pair<DocVec, size_t> {
DocVec out;
size_t total = 0;
out.reserve(std::min(limit, range->size()));
for (auto it = range->begin(); it != range->end(); ++it) {
total++;
if (out.size() < limit)
out.push_back(*it);
}
return {std::move(out), total};
};
return std::visit(cb, Borrowed());
}

// Generic borrowed results sets don't need to be filtered, so we can tell the result size ahead
auto cb = [limit](auto* set) -> std::pair<DocVec, size_t> {
DocVec out;
out.reserve(set->size());
for (auto it = set->begin(); it != set->end(); ++it) {
out.reserve(std::min(limit, set->size()));
for (auto it = set->begin(); it != set->end() && out.size() < limit; ++it)
out.push_back(*it);
}
return out;
return {std::move(out), set->size()};
};
return std::visit(cb, Borrowed());
}
Expand Down
16 changes: 7 additions & 9 deletions src/core/search/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ struct BasicSearch {

// negate -(*subquery*): explicitly compute result complement. Needs further optimizations
IndexResult Search(const AstNegateNode& node, string_view active_field) {
vector<DocId> matched = SearchGeneric(*node.node, active_field).Take();
auto matched = SearchGeneric(*node.node, active_field).Take().first;
vector<DocId> all = indices_->GetAllDocs();

// To negate a result, we have to find the complement of matched to all documents,
Expand Down Expand Up @@ -358,7 +358,7 @@ struct BasicSearch {
knn_distances_ = vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime);
else
knn_distances_ =
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take());
vec_index->Knn(knn.vec.first.get(), knn.limit, knn.ef_runtime, sub_results.Take().first);
}

// [KNN limit @field vec]: Compute distance from `vec` to all vectors keep closest `limit`
Expand Down Expand Up @@ -405,7 +405,6 @@ struct BasicSearch {

// Top level results don't need to be sorted, because they will be scored, sorted by fields or
// used by knn

DCHECK(top_level || holds_alternative<AstKnnNode>(node.Variant()) ||
holds_alternative<AstGeoNode>(node.Variant()) ||
visit([](auto* set) { return is_sorted(set->begin(), set->end()); }, result.Borrowed()));
Expand All @@ -416,16 +415,15 @@ struct BasicSearch {
return result;
}

SearchResult Search(const AstNode& query) {
SearchResult Search(const AstNode& query, size_t cuttoff_limit) {
IndexResult result = SearchGeneric(query, "", true);

// Extract profile if enabled
optional<AlgorithmProfile> profile =
profile_builder_ ? make_optional(profile_builder_->Take()) : nullopt;

auto out = result.Take();
const size_t total = out.size();
return SearchResult{total, std::move(out), std::move(knn_scores_), std::move(profile),
auto [out, total_size] = result.Take(cuttoff_limit);
return SearchResult{total_size, std::move(out), std::move(knn_scores_), std::move(profile),
std::move(error_)};
}

Expand Down Expand Up @@ -654,11 +652,11 @@ bool SearchAlgorithm::Init(string_view query, const QueryParams* params,
return true;
}

SearchResult SearchAlgorithm::Search(const FieldIndices* index) const {
SearchResult SearchAlgorithm::Search(const FieldIndices* index, size_t cuttoff_limit) const {
auto bs = BasicSearch{index};
if (profiling_enabled_)
bs.EnableProfiling();
return bs.Search(*query_);
return bs.Search(*query_, cuttoff_limit);
}

optional<KnnScoreSortOption> SearchAlgorithm::GetKnnScoreSortOption() const {
Expand Down
4 changes: 3 additions & 1 deletion src/core/search/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ class SearchAlgorithm {
bool Init(std::string_view query, const QueryParams* params,
const OptionalFilters* filters = nullptr);

SearchResult Search(const FieldIndices* index) const;
// Search on given index with predefined limit for cutting off result ids
SearchResult Search(const FieldIndices* index,
size_t cuttoff_limit = std::numeric_limits<size_t>::max()) const;

// if enabled, return limit & alias for knn query
std::optional<KnnScoreSortOption> GetKnnScoreSortOption() const;
Expand Down
7 changes: 6 additions & 1 deletion src/server/search/doc_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,12 @@ vector<search::SortableValue> ShardDocIndex::KeepTopKSorted(vector<DocId>* ids,
SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
search::SearchAlgorithm* search_algo) const {
size_t limit = params.limit_offset + params.limit_total;
auto result = search_algo->Search(&*indices_);

// If we don't sort the documents, we don't need to copy more ids than are requested
bool can_cut = !params.sort_option && !search_algo->GetKnnScoreSortOption();
size_t id_cutoff_limit = can_cut ? limit : numeric_limits<size_t>::max();

auto result = search_algo->Search(&*indices_, id_cutoff_limit);
if (!result.error.empty())
return {facade::ErrorReply(std::move(result.error))};

Expand Down
Loading