Skip to content

Commit b7ea08e

Browse files
committed
Optimize time ranged leaf search queries
When the search request contains a time range, we aborted the optimization of converting unneeded split searches into count queries.
1 parent bca9ff1 commit b7ea08e

File tree

1 file changed

+74
-23
lines changed
  • quickwit/quickwit-search/src

1 file changed

+74
-23
lines changed

quickwit/quickwit-search/src/leaf.rs

+74-23
Original file line numberDiff line numberDiff line change
@@ -942,11 +942,6 @@ fn is_simple_all_query(search_request: &SearchRequest) -> bool {
942942
return false;
943943
}
944944

945-
// TODO: Update the logic to handle start_timestamp end_timestamp ranges
946-
if search_request.start_timestamp.is_some() || search_request.end_timestamp.is_some() {
947-
return false;
948-
}
949-
950945
let Ok(query_ast) = serde_json::from_str(&search_request.query_ast) else {
951946
return false;
952947
};
@@ -1000,6 +995,29 @@ impl CanSplitDoBetter {
1000995
}
1001996
}
1002997

998+
fn is_split_contained_in_search_time_range(
999+
split: &SplitIdAndFooterOffsets,
1000+
search_request: &SearchRequest,
1001+
) -> bool {
1002+
if let Some(start) = search_request.start_timestamp {
1003+
let Some(split_start) = split.timestamp_start else {
1004+
return false;
1005+
};
1006+
if split_start < start {
1007+
return false;
1008+
}
1009+
}
1010+
if let Some(end) = search_request.end_timestamp {
1011+
let Some(split_end) = split.timestamp_end else {
1012+
return false;
1013+
};
1014+
if split_end >= end {
1015+
return false;
1016+
}
1017+
}
1018+
true
1019+
}
1020+
10031021
fn to_splits_with_request(
10041022
splits: Vec<SplitIdAndFooterOffsets>,
10051023
request: Arc<SearchRequest>,
@@ -1012,23 +1030,32 @@ impl CanSplitDoBetter {
10121030
}
10131031

10141032
/// Calculate the number of splits which are guaranteed to deliver enough documents.
1033+
///
1034+
/// If there's a time range and not enough splits contain at least the number of requested
1035+
/// documents, return None.
10151036
fn get_min_required_splits(
10161037
splits: &[SplitIdAndFooterOffsets],
10171038
request: &SearchRequest,
1018-
) -> usize {
1039+
) -> Option<usize> {
10191040
let num_requested_docs = request.start_offset + request.max_hits;
10201041

1021-
splits
1022-
.into_iter()
1023-
.map(|split| split.num_docs)
1024-
// computing the partial sum
1025-
.scan(0u64, |partial_sum: &mut u64, num_docs_in_split: u64| {
1026-
*partial_sum += num_docs_in_split;
1027-
Some(*partial_sum)
1028-
})
1029-
.take_while(|partial_sum| *partial_sum < num_requested_docs)
1030-
.count()
1031-
+ 1
1042+
let mut min_required_splits = 0;
1043+
let mut partial_sum = 0;
1044+
1045+
for split in splits.iter() {
1046+
if !Self::is_split_contained_in_search_time_range(split, request) {
1047+
continue;
1048+
}
1049+
1050+
partial_sum += split.num_docs;
1051+
1052+
min_required_splits += 1;
1053+
if partial_sum >= num_requested_docs {
1054+
return Some(min_required_splits);
1055+
}
1056+
}
1057+
1058+
None
10321059
}
10331060

10341061
fn optimize_split_id_higher(
@@ -1043,7 +1070,11 @@ impl CanSplitDoBetter {
10431070
return Ok(Self::to_splits_with_request(splits, request));
10441071
}
10451072

1046-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1073+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1074+
// not enough splits contained in time range.
1075+
return Ok(Self::to_splits_with_request(splits, request));
1076+
};
1077+
10471078
let mut split_with_req = Self::to_splits_with_request(splits, request);
10481079

10491080
// In this case there is no sort order, we order by split id.
@@ -1061,14 +1092,21 @@ impl CanSplitDoBetter {
10611092
request: Arc<SearchRequest>,
10621093
mut splits: Vec<SplitIdAndFooterOffsets>,
10631094
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1064-
splits.sort_unstable_by_key(|split| std::cmp::Reverse(split.timestamp_end()));
1095+
splits.sort_unstable_by_key(|split| {
1096+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1097+
(!contained, std::cmp::Reverse(split.timestamp_end()))
1098+
});
10651099

10661100
if !is_simple_all_query(&request) {
10671101
// no optimization opportunity here.
10681102
return Ok(Self::to_splits_with_request(splits, request));
10691103
}
10701104

1071-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1105+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1106+
// not enough splits contained in time range.
1107+
return Ok(Self::to_splits_with_request(splits, request));
1108+
};
1109+
10721110
let mut split_with_req = Self::to_splits_with_request(splits, request);
10731111

10741112
// We order by timestamp desc. split_with_req is sorted by timestamp_end desc.
@@ -1098,14 +1136,21 @@ impl CanSplitDoBetter {
10981136
request: Arc<SearchRequest>,
10991137
mut splits: Vec<SplitIdAndFooterOffsets>,
11001138
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1101-
splits.sort_unstable_by_key(|split| split.timestamp_start());
1139+
splits.sort_unstable_by_key(|split| {
1140+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1141+
(!contained, split.timestamp_start())
1142+
});
11021143

11031144
if !is_simple_all_query(&request) {
11041145
// no optimization opportunity here.
11051146
return Ok(Self::to_splits_with_request(splits, request));
11061147
}
11071148

1108-
let min_required_splits = Self::get_min_required_splits(&splits, &request);
1149+
let Some(min_required_splits) = Self::get_min_required_splits(&splits, &request) else {
1150+
// not enough splits contained in time range.
1151+
return Ok(Self::to_splits_with_request(splits, request));
1152+
};
1153+
11091154
let mut split_with_req = Self::to_splits_with_request(splits, request);
11101155

11111156
// We order by timestamp asc. split_with_req is sorted by timestamp_start.
@@ -1142,7 +1187,10 @@ impl CanSplitDoBetter {
11421187
request: Arc<SearchRequest>,
11431188
mut splits: Vec<SplitIdAndFooterOffsets>,
11441189
) -> Result<Vec<(SplitIdAndFooterOffsets, SearchRequest)>, SearchError> {
1145-
splits.sort_unstable_by_key(|split| std::cmp::Reverse(split.timestamp_end()));
1190+
splits.sort_unstable_by_key(|split| {
1191+
let contained = Self::is_split_contained_in_search_time_range(split, &request);
1192+
(!contained, std::cmp::Reverse(split.timestamp_end()))
1193+
});
11461194

11471195
if !is_simple_all_query(&request) {
11481196
// no optimization opportunity here.
@@ -1155,6 +1203,9 @@ impl CanSplitDoBetter {
11551203
/// This function tries to detect upfront which splits contain the top n hits and convert other
11561204
/// split searches to count only searches. It also optimizes split order.
11571205
///
1206+
/// To skip splits in time ranged queries, we sort the splits first by whether they are
1207+
/// contained in the search request time range.
1208+
///
11581209
/// Returns the search_requests with their split.
11591210
fn optimize(
11601211
&self,

0 commit comments

Comments
 (0)