From 893592c5e97b32c5d738d383c15cd8f187abc6cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 26 Apr 2023 17:08:55 +0200 Subject: [PATCH] Send analytics about the facet-search route --- meilisearch/src/analytics/mock_analytics.rs | 13 + meilisearch/src/analytics/mod.rs | 7 + .../src/analytics/segment_analytics.rs | 230 +++++++++++++++--- .../src/routes/indexes/facet_search.rs | 17 +- 4 files changed, 225 insertions(+), 42 deletions(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 68c3a7dff..4bd190f87 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -38,6 +38,18 @@ impl MultiSearchAggregator { pub fn succeed(&mut self) {} } +#[derive(Default)] +pub struct FacetSearchAggregator; + +#[allow(dead_code)] +impl FacetSearchAggregator { + pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { + Self::default() + } + + pub fn succeed(&mut self, _: &dyn Any) {} +} + impl MockAnalytics { #[allow(clippy::new_ret_no_self)] pub fn new(opt: &Opt) -> Arc { @@ -56,6 +68,7 @@ impl Analytics for MockAnalytics { fn get_search(&self, _aggregate: super::SearchAggregator) {} fn post_search(&self, _aggregate: super::SearchAggregator) {} fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} + fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {} fn add_documents( &self, _documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index c48564dff..86f9c1abe 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics; pub type SearchAggregator = mock_analytics::SearchAggregator; #[cfg(any(debug_assertions, not(feature = "analytics")))] pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; // if we are in release mode and the feature analytics was enabled // we use the real analytics @@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub type SearchAggregator = segment_analytics::SearchAggregator; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. @@ -88,6 +92,9 @@ pub trait Analytics: Sync + Send { /// This method should be called to aggregate a post array of searches fn post_multi_search(&self, aggregate: MultiSearchAggregator); + /// This method should be called to aggregate post facet values searches + fn post_facet_search(&self, aggregate: FacetSearchAggregator); + // this method should be called to aggregate a add documents request fn add_documents( &self, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 9a96c4650..4508fa26e 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -1,5 +1,6 @@ use std::collections::{BinaryHeap, HashMap, HashSet}; use std::fs; +use std::mem::take; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant}; @@ -29,11 +30,13 @@ use super::{ use crate::analytics::Analytics; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot}; use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::indexes::facet_search::FacetSearchQuery; use crate::routes::tasks::TasksFilterQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, + DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, + DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, }; use crate::Opt; @@ -71,6 +74,7 @@ pub enum AnalyticsMsg { AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), AggregatePostMultiSearch(MultiSearchAggregator), + AggregatePostFacetSearch(FacetSearchAggregator), AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), @@ -139,6 +143,7 @@ impl SegmentAnalytics { batcher, post_search_aggregator: SearchAggregator::default(), post_multi_search_aggregator: MultiSearchAggregator::default(), + post_facet_search_aggregator: FacetSearchAggregator::default(), get_search_aggregator: SearchAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), @@ -182,6 +187,10 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } + fn post_facet_search(&self, aggregate: FacetSearchAggregator) { + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate)); + } + fn post_multi_search(&self, aggregate: MultiSearchAggregator) { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); } @@ -354,6 +363,7 @@ pub struct Segment { get_search_aggregator: SearchAggregator, post_search_aggregator: SearchAggregator, post_multi_search_aggregator: MultiSearchAggregator, + post_facet_search_aggregator: FacetSearchAggregator, add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, @@ -418,6 +428,7 @@ impl Segment { Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), @@ -461,55 +472,72 @@ impl Segment { }) .await; } - let get_search = std::mem::take(&mut self.get_search_aggregator) - .into_event(&self.user, "Documents Searched GET"); - let post_search = std::mem::take(&mut self.post_search_aggregator) - .into_event(&self.user, "Documents Searched POST"); - let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator) - .into_event(&self.user, "Documents Searched by Multi-Search POST"); - let add_documents = std::mem::take(&mut self.add_documents_aggregator) - .into_event(&self.user, "Documents Added"); - let delete_documents = std::mem::take(&mut self.delete_documents_aggregator) - .into_event(&self.user, "Documents Deleted"); - let update_documents = std::mem::take(&mut self.update_documents_aggregator) - .into_event(&self.user, "Documents Updated"); - let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched GET"); - let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator) - .into_event(&self.user, "Documents Fetched POST"); - let get_tasks = - std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); - let health = - std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen"); - if let Some(get_search) = get_search { + let Segment { + inbox: _, + opt: _, + batcher: _, + user, + get_search_aggregator, + post_search_aggregator, + post_multi_search_aggregator, + post_facet_search_aggregator, + add_documents_aggregator, + delete_documents_aggregator, + update_documents_aggregator, + get_fetch_documents_aggregator, + post_fetch_documents_aggregator, + get_tasks_aggregator, + health_aggregator, + } = self; + + if let Some(get_search) = + take(get_search_aggregator).into_event(&user, "Documents Searched GET") + { let _ = self.batcher.push(get_search).await; } - if let Some(post_search) = post_search { + if let Some(post_search) = + take(post_search_aggregator).into_event(&user, "Documents Searched POST") + { let _ = self.batcher.push(post_search).await; } - if let Some(post_multi_search) = post_multi_search { + if let Some(post_multi_search) = take(post_multi_search_aggregator) + .into_event(&user, "Documents Searched by Multi-Search POST") + { let _ = self.batcher.push(post_multi_search).await; } - if let Some(add_documents) = add_documents { + if let Some(post_facet_search) = take(post_facet_search_aggregator) + .into_event(&user, "Documents Searched by Facet-Search POST") + { + let _ = self.batcher.push(post_facet_search).await; + } + if let Some(add_documents) = + take(add_documents_aggregator).into_event(&user, "Documents Added") + { let _ = self.batcher.push(add_documents).await; } - if let Some(delete_documents) = delete_documents { + if let Some(delete_documents) = + take(delete_documents_aggregator).into_event(&user, "Documents Deleted") + { let _ = self.batcher.push(delete_documents).await; } - if let Some(update_documents) = update_documents { + if let Some(update_documents) = + take(update_documents_aggregator).into_event(&user, "Documents Updated") + { let _ = self.batcher.push(update_documents).await; } - if let Some(get_fetch_documents) = get_fetch_documents { + if let Some(get_fetch_documents) = + take(get_fetch_documents_aggregator).into_event(&user, "Documents Fetched GET") { let _ = self.batcher.push(get_fetch_documents).await; } - if let Some(post_fetch_documents) = post_fetch_documents { + if let Some(post_fetch_documents) = + take(post_fetch_documents_aggregator).into_event(&user, "Documents Fetched POST") { let _ = self.batcher.push(post_fetch_documents).await; } - if let Some(get_tasks) = get_tasks { + if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") { let _ = self.batcher.push(get_tasks).await; } - if let Some(health) = health { + if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") { let _ = self.batcher.push(health).await; } let _ = self.batcher.flush().await; @@ -909,6 +937,144 @@ impl MultiSearchAggregator { } } +#[derive(Default)] +pub struct FacetSearchAggregator { + timestamp: Option, + + // context + user_agents: HashSet, + + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + q, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve, + attributes_to_crop, + crop_length, + attributes_to_highlight, + show_matches_position, + filter, + sort, + facets, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + } = query; + + let mut ret = Self::default(); + ret.timestamp = Some(OffsetDateTime::now_utc()); + + ret.total_received = 1; + ret.user_agents = extract_user_agents(request).into_iter().collect(); + ret.facet_names = Some(facet_name.clone()).into_iter().collect(); + + ret.additional_search_parameters_provided = q.is_some() + || *offset != DEFAULT_SEARCH_OFFSET() + || *limit != DEFAULT_SEARCH_LIMIT() + || page.is_some() + || hits_per_page.is_some() + || attributes_to_retrieve.is_some() + || attributes_to_crop.is_some() + || *crop_length != DEFAULT_CROP_LENGTH() + || attributes_to_highlight.is_some() + || *show_matches_position + || filter.is_some() + || sort.is_some() + || facets.is_some() + || *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG() + || *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG() + || *crop_marker != DEFAULT_CROP_MARKER() + || *matching_strategy != MatchingStrategy::default(); + + ret + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + self.time_spent.push(result.processing_time_ms as usize); + } + + /// Aggregate one [SearchAggregator] into another. + pub fn aggregate(&mut self, mut other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // context + for user_agent in other.user_agents.into_iter() { + self.user_agents.insert(user_agent); + } + + // request + self.total_received = self.total_received.saturating_add(other.total_received); + self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded); + self.time_spent.append(&mut other.time_spent); + + // facet_names + for facet_name in other.facet_names.into_iter() { + self.facet_names.insert(facet_name); + } + + // additional_search_parameters_provided + self.additional_search_parameters_provided = self.additional_search_parameters_provided + | other.additional_search_parameters_provided; + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + // the index of the 99th percentage of value + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + // we get all the values in a sorted manner + let time_spent = self.time_spent.into_sorted_vec(); + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th as usize); + + let properties = json!({ + "user-agent": self.user_agents, + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "facets": { + "total_distinct_facet_count": self.facet_names.len(), + }, + "additional_search_parameters_provided": self.additional_search_parameters_provided, + }); + + Some(Track { + timestamp: self.timestamp, + user: user.clone(), + event: event_name.to_string(), + properties, + ..Default::default() + }) + } + } +} + #[derive(Default)] pub struct DocumentsAggregator { timestamp: Option, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 16479c755..947454d12 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -14,7 +14,7 @@ use meilisearch_types::milli::facet; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; -use crate::analytics::{Analytics, SearchAggregator}; +use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::search::{ @@ -122,6 +122,8 @@ pub async fn search( let query = params.into_inner(); debug!("facet search called with params: {:?}", query); + let mut aggregate = FacetSearchAggregator::from_query(&query, &req); + let facet_query = query.facet_query.clone(); let facet_name = query.facet_name.clone(); let mut search_query = SearchQuery::from(query); @@ -131,21 +133,16 @@ pub async fn search( add_search_rules(&mut search_query, search_rules); } - // TODO log stuff - // let mut aggregate = SearchAggregator::from_query(&query, &req); - let index = index_scheduler.index(&index_uid)?; let search_result = tokio::task::spawn_blocking(move || { perform_facet_search(&index, search_query, facet_query, facet_name) }) .await?; - // TODO log stuff - // if let Ok(ref search_result) = search_result { - // aggregate.succeed(search_result); - // } - // TODO analytics - // analytics.post_search(aggregate); + if let Ok(ref search_result) = search_result { + aggregate.succeed(search_result); + } + analytics.post_facet_search(aggregate); let search_result = search_result?;