Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit a8defb5

Browse files
bors[bot]loiclec
andauthored
Merge #742
742: Add a "Criterion implementation strategy" parameter to Search r=irevoire a=loiclec Add a parameter to search requests which determines the implementation strategy of the criteria. This can be either `set-based`, `iterative`, or `dynamic` (ie choosing between set-based or iterative at search time). See https://github.com/meilisearch/milli/issues/755 for more context about this change. Co-authored-by: Loïc Lecrenier <[email protected]>
2 parents 249e051 + 339a4b0 commit a8defb5

File tree

7 files changed

+188
-48
lines changed

7 files changed

+188
-48
lines changed

cli/src/main.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::collections::BTreeMap;
2+
use std::fmt::Display;
23
use std::fs::File;
34
use std::io::{stdin, BufRead, BufReader, Cursor, Read, Write};
45
use std::path::PathBuf;
@@ -13,7 +14,7 @@ use milli::update::UpdateIndexingStep::{
1314
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
1415
};
1516
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
16-
use milli::{heed, Index, Object};
17+
use milli::{heed, CriterionImplementationStrategy, Index, Object};
1718
use structopt::StructOpt;
1819

1920
#[global_allocator]
@@ -349,6 +350,29 @@ fn documents_from_csv(reader: impl Read) -> Result<Vec<u8>> {
349350
documents.into_inner().map_err(Into::into)
350351
}
351352

353+
#[derive(Debug, Clone, Copy)]
354+
struct SearchStrategyOption(CriterionImplementationStrategy);
355+
impl FromStr for SearchStrategyOption {
356+
type Err = String;
357+
fn from_str(s: &str) -> Result<Self, Self::Err> {
358+
match s.to_lowercase().as_str() {
359+
"dynamic" => Ok(SearchStrategyOption(CriterionImplementationStrategy::Dynamic)),
360+
"set" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlySetBased)),
361+
"iterative" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlyIterative)),
362+
_ => Err("could not parse {s} as a criterion implementation strategy, available options are `dynamic`, `set`, and `iterative`".to_owned()),
363+
}
364+
}
365+
}
366+
impl Display for SearchStrategyOption {
367+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368+
match self.0 {
369+
CriterionImplementationStrategy::OnlyIterative => Display::fmt("iterative", f),
370+
CriterionImplementationStrategy::OnlySetBased => Display::fmt("set", f),
371+
CriterionImplementationStrategy::Dynamic => Display::fmt("dynamic", f),
372+
}
373+
}
374+
}
375+
352376
#[derive(Debug, StructOpt)]
353377
struct Search {
354378
query: Option<String>,
@@ -360,6 +384,8 @@ struct Search {
360384
limit: Option<usize>,
361385
#[structopt(short, long, conflicts_with = "query")]
362386
interactive: bool,
387+
#[structopt(short, long)]
388+
strategy: Option<SearchStrategyOption>,
363389
}
364390

365391
impl Performer for Search {
@@ -379,13 +405,15 @@ impl Performer for Search {
379405
&self.filter,
380406
&self.offset,
381407
&self.limit,
408+
&self.strategy,
382409
)?;
383410

384411
let time = now.elapsed();
385412

386413
let hits = serde_json::to_string_pretty(&jsons)?;
387414

388415
println!("{}", hits);
416+
389417
eprintln!("found {} results in {:.02?}", jsons.len(), time);
390418
}
391419
_ => break,
@@ -399,6 +427,7 @@ impl Performer for Search {
399427
&self.filter,
400428
&self.offset,
401429
&self.limit,
430+
&self.strategy,
402431
)?;
403432

404433
let time = now.elapsed();
@@ -420,6 +449,7 @@ impl Search {
420449
filter: &Option<String>,
421450
offset: &Option<usize>,
422451
limit: &Option<usize>,
452+
strategy: &Option<SearchStrategyOption>,
423453
) -> Result<Vec<Object>> {
424454
let txn = index.read_txn()?;
425455
let mut search = index.search(&txn);
@@ -441,6 +471,9 @@ impl Search {
441471
if let Some(limit) = limit {
442472
search.limit(*limit);
443473
}
474+
if let Some(strategy) = strategy {
475+
search.criterion_implementation_strategy(strategy.0);
476+
}
444477

445478
let result = search.execute()?;
446479

milli/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ pub use self::heed_codec::{
4242
};
4343
pub use self::index::Index;
4444
pub use self::search::{
45-
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
46-
MatchingWords, Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
45+
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
46+
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
47+
DEFAULT_VALUES_PER_FACET,
4748
};
4849

4950
pub type Result<T> = std::result::Result<T, error::Error>;

milli/src/search/criteria/asc_desc.rs

Lines changed: 69 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::heed_codec::ByteSliceRefCodec;
1212
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
1313
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
1414
use crate::search::query_tree::Operation;
15+
use crate::search::CriterionImplementationStrategy;
1516
use crate::{FieldId, Index, Result};
1617

1718
/// Threshold on the number of candidates that will make
@@ -29,6 +30,7 @@ pub struct AscDesc<'t> {
2930
allowed_candidates: RoaringBitmap,
3031
initial_candidates: InitialCandidates,
3132
faceted_candidates: RoaringBitmap,
33+
implementation_strategy: CriterionImplementationStrategy,
3234
parent: Box<dyn Criterion + 't>,
3335
}
3436

@@ -38,17 +40,19 @@ impl<'t> AscDesc<'t> {
3840
rtxn: &'t heed::RoTxn,
3941
parent: Box<dyn Criterion + 't>,
4042
field_name: String,
43+
implementation_strategy: CriterionImplementationStrategy,
4144
) -> Result<Self> {
42-
Self::new(index, rtxn, parent, field_name, true)
45+
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
4346
}
4447

4548
pub fn desc(
4649
index: &'t Index,
4750
rtxn: &'t heed::RoTxn,
4851
parent: Box<dyn Criterion + 't>,
4952
field_name: String,
53+
implementation_strategy: CriterionImplementationStrategy,
5054
) -> Result<Self> {
51-
Self::new(index, rtxn, parent, field_name, false)
55+
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
5256
}
5357

5458
fn new(
@@ -57,6 +61,7 @@ impl<'t> AscDesc<'t> {
5761
parent: Box<dyn Criterion + 't>,
5862
field_name: String,
5963
is_ascending: bool,
64+
implementation_strategy: CriterionImplementationStrategy,
6065
) -> Result<Self> {
6166
let fields_ids_map = index.fields_ids_map(rtxn)?;
6267
let field_id = fields_ids_map.id(&field_name);
@@ -82,6 +87,7 @@ impl<'t> AscDesc<'t> {
8287
allowed_candidates: RoaringBitmap::new(),
8388
faceted_candidates,
8489
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
90+
implementation_strategy,
8591
parent,
8692
})
8793
}
@@ -149,6 +155,7 @@ impl<'t> Criterion for AscDesc<'t> {
149155
field_id,
150156
self.is_ascending,
151157
candidates & &self.faceted_candidates,
158+
self.implementation_strategy,
152159
)?,
153160
None => Box::new(std::iter::empty()),
154161
};
@@ -170,6 +177,51 @@ impl<'t> Criterion for AscDesc<'t> {
170177
}
171178
}
172179

180+
fn facet_ordered_iterative<'t>(
181+
index: &'t Index,
182+
rtxn: &'t heed::RoTxn,
183+
field_id: FieldId,
184+
is_ascending: bool,
185+
candidates: RoaringBitmap,
186+
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
187+
let number_iter = iterative_facet_number_ordered_iter(
188+
index,
189+
rtxn,
190+
field_id,
191+
is_ascending,
192+
candidates.clone(),
193+
)?;
194+
let string_iter =
195+
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
196+
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
197+
}
198+
199+
fn facet_ordered_set_based<'t>(
200+
index: &'t Index,
201+
rtxn: &'t heed::RoTxn,
202+
field_id: FieldId,
203+
is_ascending: bool,
204+
candidates: RoaringBitmap,
205+
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
206+
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
207+
208+
let number_iter = make_iter(
209+
rtxn,
210+
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
211+
field_id,
212+
candidates.clone(),
213+
)?;
214+
215+
let string_iter = make_iter(
216+
rtxn,
217+
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
218+
field_id,
219+
candidates,
220+
)?;
221+
222+
Ok(Box::new(number_iter.chain(string_iter)))
223+
}
224+
173225
/// Returns an iterator over groups of the given candidates in ascending or descending order.
174226
///
175227
/// It will either use an iterative or a recursive method on the whole facet database depending
@@ -180,36 +232,22 @@ fn facet_ordered<'t>(
180232
field_id: FieldId,
181233
is_ascending: bool,
182234
candidates: RoaringBitmap,
235+
implementation_strategy: CriterionImplementationStrategy,
183236
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
184-
if candidates.len() <= CANDIDATES_THRESHOLD {
185-
let number_iter = iterative_facet_number_ordered_iter(
186-
index,
187-
rtxn,
188-
field_id,
189-
is_ascending,
190-
candidates.clone(),
191-
)?;
192-
let string_iter =
193-
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
194-
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
195-
} else {
196-
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
197-
198-
let number_iter = make_iter(
199-
rtxn,
200-
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
201-
field_id,
202-
candidates.clone(),
203-
)?;
204-
205-
let string_iter = make_iter(
206-
rtxn,
207-
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
208-
field_id,
209-
candidates,
210-
)?;
211-
212-
Ok(Box::new(number_iter.chain(string_iter)))
237+
match implementation_strategy {
238+
CriterionImplementationStrategy::OnlyIterative => {
239+
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
240+
}
241+
CriterionImplementationStrategy::OnlySetBased => {
242+
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
243+
}
244+
CriterionImplementationStrategy::Dynamic => {
245+
if candidates.len() <= CANDIDATES_THRESHOLD {
246+
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
247+
} else {
248+
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
249+
}
250+
}
213251
}
214252
}
215253

milli/src/search/criteria/attribute.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use roaring::RoaringBitmap;
99
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
1010
use crate::search::criteria::{InitialCandidates, Query};
1111
use crate::search::query_tree::{Operation, QueryKind};
12-
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
12+
use crate::search::{
13+
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
14+
};
1315
use crate::Result;
1416

1517
/// To be able to divide integers by the number of words in the query
@@ -30,17 +32,23 @@ pub struct Attribute<'t> {
3032
parent: Box<dyn Criterion + 't>,
3133
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
3234
set_buckets: Option<BinaryHeap<Branch<'t>>>,
35+
implementation_strategy: CriterionImplementationStrategy,
3336
}
3437

3538
impl<'t> Attribute<'t> {
36-
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
39+
pub fn new(
40+
ctx: &'t dyn Context<'t>,
41+
parent: Box<dyn Criterion + 't>,
42+
implementation_strategy: CriterionImplementationStrategy,
43+
) -> Self {
3744
Attribute {
3845
ctx,
3946
state: None,
4047
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
4148
parent,
4249
linear_buckets: None,
4350
set_buckets: None,
51+
implementation_strategy,
4452
}
4553
}
4654
}
@@ -64,7 +72,15 @@ impl<'t> Criterion for Attribute<'t> {
6472
}));
6573
}
6674
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
67-
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
75+
let found_candidates = if matches!(
76+
self.implementation_strategy,
77+
CriterionImplementationStrategy::OnlyIterative
78+
) || (matches!(
79+
self.implementation_strategy,
80+
CriterionImplementationStrategy::Dynamic
81+
) && allowed_candidates.len()
82+
< CANDIDATES_THRESHOLD)
83+
{
6884
let linear_buckets = match self.linear_buckets.as_mut() {
6985
Some(linear_buckets) => linear_buckets,
7086
None => {

0 commit comments

Comments
 (0)