-
Notifications
You must be signed in to change notification settings - Fork 449
Record search metrics on cancelation #5743
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
257418c
bb2cf53
6971dce
703db2b
86b6611
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,13 +13,17 @@ | |
// limitations under the License. | ||
|
||
use std::collections::{HashMap, HashSet}; | ||
use std::future::Future; | ||
use std::pin::Pin; | ||
use std::sync::OnceLock; | ||
use std::sync::atomic::{AtomicU64, Ordering}; | ||
use std::task::{Context as TaskContext, Poll, ready}; | ||
use std::time::Duration; | ||
|
||
use anyhow::Context; | ||
use futures::future::try_join_all; | ||
use itertools::Itertools; | ||
use pin_project::{pin_project, pinned_drop}; | ||
use quickwit_common::pretty::PrettySample; | ||
use quickwit_common::shared_consts; | ||
use quickwit_common::uri::Uri; | ||
|
@@ -45,6 +49,7 @@ use tantivy::aggregation::agg_result::AggregationResults; | |
use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResults; | ||
use tantivy::collector::Collector; | ||
use tantivy::schema::{Field, FieldEntry, FieldType, Schema}; | ||
use tokio::time::Instant; | ||
use tracing::{debug, info_span, instrument}; | ||
|
||
use crate::cluster_client::ClusterClient; | ||
|
@@ -1147,19 +1152,11 @@ async fn refine_and_list_matches( | |
Ok(split_metadatas) | ||
} | ||
|
||
/// Performs a distributed search. | ||
/// 1. Sends leaf request over gRPC to multiple leaf nodes. | ||
/// 2. Merges the search results. | ||
/// 3. Sends fetch docs requests to multiple leaf nodes. | ||
/// 4. Builds the response with docs and returns. | ||
#[instrument(skip_all)] | ||
pub async fn root_search( | ||
searcher_context: &SearcherContext, | ||
mut search_request: SearchRequest, | ||
mut metastore: MetastoreServiceClient, | ||
cluster_client: &ClusterClient, | ||
) -> crate::Result<SearchResponse> { | ||
let start_instant = tokio::time::Instant::now(); | ||
/// Fetches the list of splits and their metadata from the metastore | ||
async fn plan_splits_for_root_search( | ||
search_request: &mut SearchRequest, | ||
metastore: &mut MetastoreServiceClient, | ||
) -> crate::Result<(Vec<SplitMetadata>, IndexesMetasForLeafSearch)> { | ||
let list_indexes_metadatas_request = ListIndexesMetadataRequest { | ||
index_id_patterns: search_request.index_id_patterns.clone(), | ||
}; | ||
|
@@ -1172,71 +1169,73 @@ pub async fn root_search( | |
check_all_index_metadata_found(&indexes_metadata[..], &search_request.index_id_patterns[..])?; | ||
|
||
if indexes_metadata.is_empty() { | ||
// We go through root_search_aux instead of directly | ||
// returning an empty response to make sure we generate | ||
// a (pretty useless) scroll id if requested. | ||
let mut search_response = root_search_aux( | ||
searcher_context, | ||
&HashMap::default(), | ||
search_request, | ||
Vec::new(), | ||
cluster_client, | ||
) | ||
.await?; | ||
search_response.elapsed_time_micros = start_instant.elapsed().as_micros() as u64; | ||
return Ok(search_response); | ||
return Ok((Vec::new(), HashMap::default())); | ||
} | ||
|
||
let request_metadata = validate_request_and_build_metadata(&indexes_metadata, &search_request)?; | ||
let request_metadata = validate_request_and_build_metadata(&indexes_metadata, search_request)?; | ||
let split_metadatas = refine_and_list_matches( | ||
&mut metastore, | ||
&mut search_request, | ||
metastore, | ||
search_request, | ||
indexes_metadata, | ||
request_metadata.query_ast_resolved, | ||
request_metadata.sort_fields_is_datetime, | ||
request_metadata.timestamp_field_opt, | ||
) | ||
.await?; | ||
Ok(( | ||
split_metadatas, | ||
request_metadata.indexes_meta_for_leaf_search, | ||
)) | ||
} | ||
|
||
/// Performs a distributed search. | ||
/// 1. Sends leaf request over gRPC to multiple leaf nodes. | ||
/// 2. Merges the search results. | ||
/// 3. Sends fetch docs requests to multiple leaf nodes. | ||
/// 4. Builds the response with docs and returns. | ||
#[instrument(skip_all)] | ||
pub async fn root_search( | ||
searcher_context: &SearcherContext, | ||
mut search_request: SearchRequest, | ||
mut metastore: MetastoreServiceClient, | ||
cluster_client: &ClusterClient, | ||
) -> crate::Result<SearchResponse> { | ||
let start_instant = tokio::time::Instant::now(); | ||
|
||
let (split_metadatas, indexes_meta_for_leaf_search) = RootSearchMetricsFuture { | ||
start: start_instant, | ||
tracked: plan_splits_for_root_search(&mut search_request, &mut metastore), | ||
is_success: None, | ||
step: RootSearchMetricsStep::Plan, | ||
} | ||
.await?; | ||
|
||
let num_docs: usize = split_metadatas.iter().map(|split| split.num_docs).sum(); | ||
let num_splits = split_metadatas.len(); | ||
let current_span = tracing::Span::current(); | ||
current_span.record("num_docs", num_docs); | ||
current_span.record("num_splits", num_splits); | ||
|
||
let mut search_response_result = root_search_aux( | ||
searcher_context, | ||
&request_metadata.indexes_meta_for_leaf_search, | ||
search_request, | ||
split_metadatas, | ||
cluster_client, | ||
) | ||
let mut search_response_result = RootSearchMetricsFuture { | ||
start: start_instant, | ||
tracked: root_search_aux( | ||
searcher_context, | ||
&indexes_meta_for_leaf_search, | ||
search_request, | ||
split_metadatas, | ||
cluster_client, | ||
), | ||
is_success: None, | ||
step: RootSearchMetricsStep::Exec { | ||
targeted_splits: num_splits, | ||
}, | ||
} | ||
.await; | ||
|
||
let elapsed = start_instant.elapsed(); | ||
|
||
if let Ok(search_response) = &mut search_response_result { | ||
search_response.elapsed_time_micros = elapsed.as_micros() as u64; | ||
search_response.elapsed_time_micros = start_instant.elapsed().as_micros() as u64; | ||
} | ||
|
||
let label_values = if search_response_result.is_ok() { | ||
["success"] | ||
} else { | ||
["error"] | ||
}; | ||
SEARCH_METRICS | ||
.root_search_requests_total | ||
.with_label_values(label_values) | ||
.inc(); | ||
SEARCH_METRICS | ||
.root_search_request_duration_seconds | ||
.with_label_values(label_values) | ||
.observe(elapsed.as_secs_f64()); | ||
SEARCH_METRICS | ||
.root_search_targeted_splits | ||
.with_label_values(label_values) | ||
.observe(num_splits as f64); | ||
|
||
search_response_result | ||
} | ||
|
||
|
@@ -1766,6 +1765,69 @@ pub fn jobs_to_fetch_docs_requests( | |
Ok(fetch_docs_requests) | ||
} | ||
|
||
enum RootSearchMetricsStep { | ||
Plan, | ||
Exec { targeted_splits: usize }, | ||
} | ||
|
||
/// Wrapper around the plan and search futures to track metrics. | ||
#[pin_project(PinnedDrop)] | ||
struct RootSearchMetricsFuture<F> { | ||
#[pin] | ||
tracked: F, | ||
start: Instant, | ||
step: RootSearchMetricsStep, | ||
is_success: Option<bool>, | ||
} | ||
|
||
#[pinned_drop] | ||
impl<F> PinnedDrop for RootSearchMetricsFuture<F> { | ||
fn drop(self: Pin<&mut Self>) { | ||
let (targeted_splits, status) = match (&self.step, self.is_success) { | ||
// is is a partial success, actual success is recorded during the search step | ||
(RootSearchMetricsStep::Plan, Some(true)) => return, | ||
(RootSearchMetricsStep::Plan, Some(false)) => (0, "plan-error"), | ||
(RootSearchMetricsStep::Plan, None) => (0, "plan-cancelled"), | ||
Comment on lines
+1789
to
+1790
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These extra statuses seem valuable, but we should also try to avoid creating too many series. No strong opinion on whether we should have these or not. (On that topic, I think |
||
(RootSearchMetricsStep::Exec { targeted_splits }, Some(true)) => { | ||
(*targeted_splits, "success") | ||
} | ||
(RootSearchMetricsStep::Exec { targeted_splits }, Some(false)) => { | ||
(*targeted_splits, "error") | ||
} | ||
(RootSearchMetricsStep::Exec { targeted_splits }, None) => { | ||
(*targeted_splits, "cancelled") | ||
} | ||
}; | ||
|
||
let label_values = [status]; | ||
SEARCH_METRICS | ||
.root_search_requests_total | ||
.with_label_values(label_values) | ||
.inc(); | ||
SEARCH_METRICS | ||
.root_search_request_duration_seconds | ||
.with_label_values(label_values) | ||
.observe(self.start.elapsed().as_secs_f64()); | ||
SEARCH_METRICS | ||
.root_search_targeted_splits | ||
.with_label_values(label_values) | ||
.observe(targeted_splits as f64); | ||
} | ||
} | ||
|
||
impl<F, R, E> Future for RootSearchMetricsFuture<F> | ||
where F: Future<Output = Result<R, E>> | ||
{ | ||
type Output = Result<R, E>; | ||
|
||
fn poll(self: Pin<&mut Self>, cx: &mut TaskContext<'_>) -> Poll<Self::Output> { | ||
let this = self.project(); | ||
let response = ready!(this.tracked.poll(cx)); | ||
*this.is_success = Some(response.is_ok()); | ||
Poll::Ready(Ok(response?)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use std::ops::Range; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was there any specific reason why this was measured per index here?