From 596a98f7c6bb87d512838d90426fd6e577fd0479 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 20 Feb 2023 09:21:52 +0100 Subject: [PATCH] multi-search: Add basic analytics --- meilisearch/src/analytics/mock_analytics.rs | 13 ++ meilisearch/src/analytics/mod.rs | 7 + .../src/analytics/segment_analytics.rs | 127 +++++++++++++++++- 3 files changed, 146 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 092e35424..03aed0189 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -26,6 +26,18 @@ impl SearchAggregator { pub fn succeed(&mut self, _: &dyn Any) {} } +#[derive(Default)] +pub struct MultiSearchAggregator; + +#[allow(dead_code)] +impl MultiSearchAggregator { + pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self { + Self::default() + } + + pub fn succeed(&mut self) {} +} + impl MockAnalytics { #[allow(clippy::new_ret_no_self)] pub fn new(opt: &Opt) -> Arc { @@ -43,6 +55,7 @@ impl Analytics for MockAnalytics { fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} fn get_search(&self, _aggregate: super::SearchAggregator) {} fn post_search(&self, _aggregate: super::SearchAggregator) {} + fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {} fn add_documents( &self, _documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index ec15707bf..4d295db57 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -23,6 +23,8 @@ use crate::routes::tasks::TasksFilterQuery; pub type SegmentAnalytics = mock_analytics::MockAnalytics; #[cfg(any(debug_assertions, not(feature = "analytics")))] pub type SearchAggregator = mock_analytics::SearchAggregator; +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; // if we are in release mode and the feature analytics was enabled // we use the real analytics @@ -30,6 +32,8 @@ pub type SearchAggregator = mock_analytics::SearchAggregator; pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; #[cfg(all(not(debug_assertions), feature = "analytics"))] pub type SearchAggregator = segment_analytics::SearchAggregator; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; /// The Meilisearch config dir: /// `~/.config/Meilisearch` on *NIX or *BSD. @@ -74,6 +78,9 @@ pub trait Analytics: Sync + Send { /// This method should be called to aggregate a post search fn post_search(&self, aggregate: SearchAggregator); + /// This method should be called to aggregate a post array of searches + fn post_multi_search(&self, aggregate: MultiSearchAggregator); + // this method should be called to aggregate a add documents request fn add_documents( &self, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 4d4b7cd06..92ac4b1d5 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -30,7 +30,7 @@ use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::tasks::TasksFilterQuery; use crate::routes::{create_all_stats, Stats}; use crate::search::{ - SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, }; use crate::Opt; @@ -68,6 +68,7 @@ pub enum AnalyticsMsg { BatchMessage(Track), AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), + AggregatePostMultiSearch(MultiSearchAggregator), AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), @@ -133,6 +134,7 @@ impl SegmentAnalytics { opt: opt.clone(), batcher, post_search_aggregator: SearchAggregator::default(), + post_multi_search_aggregator: MultiSearchAggregator::default(), get_search_aggregator: SearchAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), @@ -174,6 +176,10 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } + fn post_multi_search(&self, aggregate: MultiSearchAggregator) { + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); + } + fn add_documents( &self, documents_query: &UpdateDocumentsQuery, @@ -326,6 +332,7 @@ pub struct Segment { batcher: AutoBatcher, get_search_aggregator: SearchAggregator, post_search_aggregator: SearchAggregator, + post_multi_search_aggregator: MultiSearchAggregator, add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, @@ -383,6 +390,7 @@ impl Segment { Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), @@ -428,6 +436,8 @@ impl Segment { .into_event(&self.user, "Documents Searched GET"); let post_search = std::mem::take(&mut self.post_search_aggregator) .into_event(&self.user, "Documents Searched POST"); + let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator) + .into_event(&self.user, "Documents Searched by Multi-Search POST"); let add_documents = std::mem::take(&mut self.add_documents_aggregator) .into_event(&self.user, "Documents Added"); let delete_documents = std::mem::take(&mut self.delete_documents_aggregator) @@ -445,6 +455,9 @@ impl Segment { if let Some(post_search) = post_search { let _ = self.batcher.push(post_search).await; } + if let Some(post_multi_search) = post_multi_search { + let _ = self.batcher.push(post_multi_search).await; + } if let Some(add_documents) = add_documents { let _ = self.batcher.push(add_documents).await; } @@ -718,6 +731,118 @@ impl SearchAggregator { } } +#[derive(Default)] +pub struct MultiSearchAggregator { + timestamp: Option, + + // requests + total_received: usize, + total_succeeded: usize, + + // sum of the number of distinct indexes in each single request, use with total_received to compute an avg + total_distinct_index_count: usize, + // number of queries with a single index, use with total_received to compute a proportion + total_single_index: usize, + + // sum of the number of search queries in the requests, use with total_received to compute an average + total_search_count: usize, + + // context + user_agents: HashSet, +} + +impl MultiSearchAggregator { + pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self { + let timestamp = Some(OffsetDateTime::now_utc()); + + let user_agents = extract_user_agents(request).into_iter().collect(); + + let distinct_indexes: HashSet<_> = + query.iter().map(|query| query.index_uid.as_str()).collect(); + + Self { + timestamp, + total_received: 1, + total_succeeded: 0, + total_distinct_index_count: distinct_indexes.len(), + total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, + total_search_count: query.len(), + user_agents, + } + } + + pub fn succeed(&mut self) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + } + + pub fn aggregate(&mut self, other: Self) { + // write the aggregate in a way that will cause a compilation error if a field is added. + + // get ownership of self, replacing it by a default value. + let this = std::mem::take(self); + + let timestamp = this.timestamp.or(other.timestamp); + let total_received = this.total_received.saturating_add(other.total_received); + let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded); + let total_distinct_index_count = + this.total_distinct_index_count.saturating_add(other.total_distinct_index_count); + let total_single_index = this.total_single_index.saturating_add(other.total_single_index); + let total_search_count = this.total_search_count.saturating_add(other.total_search_count); + let mut user_agents = this.user_agents; + + for user_agent in other.user_agents.into_iter() { + user_agents.insert(user_agent); + } + + // need all fields or compile error + let mut aggregated = Self { + timestamp, + total_received, + total_succeeded, + total_distinct_index_count, + total_single_index, + total_search_count, + user_agents, + // do not add _ or ..Default::default() here + }; + + // replace the default self with the aggregated value + std::mem::swap(self, &mut aggregated); + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + let properties = json!({ + "user-agent": self.user_agents, + "requests": { + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "indexes": { + "total_single_index": self.total_single_index, + "total_distinct_index_count": self.total_distinct_index_count, + "avg_distinct_index_count": (self.total_distinct_index_count as f64) / (self.total_received as f64), // not 0 else returned early + }, + "searches": { + "total_search_count": self.total_search_count, + "avg_search_count": (self.total_search_count as f64) / (self.total_received as f64), + } + }); + + Some(Track { + timestamp: self.timestamp, + user: user.clone(), + event: event_name.to_string(), + properties, + ..Default::default() + }) + } + } +} + #[derive(Default)] pub struct DocumentsAggregator { timestamp: Option,