rewrite most of the analytics especially the settings

This commit is contained in:
Tamo 2024-10-16 15:43:27 +02:00
parent a0b3887709
commit 73e87c152a
12 changed files with 1381 additions and 647 deletions

View File

@ -1,109 +0,0 @@
use std::any::Any;
use std::sync::Arc;
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
use crate::Opt;
pub struct MockAnalytics {
instance_uid: Option<InstanceUid>,
}
#[derive(Default)]
pub struct SearchAggregator;
#[allow(dead_code)]
impl SearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct SimilarAggregator;
#[allow(dead_code)]
impl SimilarAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
#[allow(dead_code)]
impl MultiSearchAggregator {
pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
let instance_uid = find_user_id(&opt.db_path);
Arc::new(Self { instance_uid })
}
}
impl Analytics for MockAnalytics {
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
self.instance_uid.as_ref()
}
// These methods are noop and should be optimized out
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {}
fn update_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn update_documents_by_function(
&self,
_documents_query: &DocumentEditionByFunction,
_index_creation: bool,
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
}

View File

@ -1,45 +1,51 @@
mod mock_analytics; pub mod segment_analytics;
#[cfg(feature = "analytics")]
mod segment_analytics;
use std::any::TypeId;
use std::collections::HashMap;
use std::fs; use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use actix_web::HttpRequest; use actix_web::HttpRequest;
use meilisearch_types::InstanceUid; use meilisearch_types::InstanceUid;
pub use mock_analytics::MockAnalytics;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use platform_dirs::AppDirs; use platform_dirs::AppDirs;
use serde_json::Value; use segment::message::User;
use serde::Serialize;
use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery};
// if the analytics feature is disabled
// the `SegmentAnalytics` point to the mock instead of the real analytics
#[cfg(not(feature = "analytics"))]
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(not(feature = "analytics"))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
#[cfg(not(feature = "analytics"))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(not(feature = "analytics"))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if the feature analytics is enabled we use the real analytics // if the feature analytics is enabled we use the real analytics
#[cfg(feature = "analytics")]
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(feature = "analytics")] pub use segment_analytics::SearchAggregator;
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(feature = "analytics")]
pub type SimilarAggregator = segment_analytics::SimilarAggregator; pub type SimilarAggregator = segment_analytics::SimilarAggregator;
#[cfg(feature = "analytics")]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(feature = "analytics")]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name.
#[macro_export]
macro_rules! empty_analytics {
($struct_name:ident, $event_name:literal) => {
#[derive(Default)]
struct $struct_name {}
impl $crate::analytics::Aggregate for $struct_name {
fn event_name(&self) -> &'static str {
$event_name
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
self
}
fn into_event(self) -> serde_json::Value {
serde_json::json!({})
}
}
};
}
/// The Meilisearch config dir: /// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD. /// `~/.config/Meilisearch` on *NIX or *BSD.
/// `~/Library/ApplicationSupport` on macOS. /// `~/Library/ApplicationSupport` on macOS.
@ -78,60 +84,73 @@ pub enum DocumentFetchKind {
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
} }
pub trait Analytics: Sync + Send { pub trait Aggregate {
fn instance_uid(&self) -> Option<&InstanceUid>; fn event_name(&self) -> &'static str;
fn aggregate(self, other: Self) -> Self
where
Self: Sized;
fn into_event(self) -> impl Serialize
where
Self: Sized;
}
/// Helper trait to define multiple aggregate with the same content but a different name.
/// Commonly used when you must aggregate a search with POST or with GET for example.
pub trait AggregateMethod {
fn event_name() -> &'static str;
}
/// A macro used to quickly define multiple aggregate method with their name
#[macro_export]
macro_rules! aggregate_methods {
($method:ident => $event_name:literal) => {
pub enum $method {}
impl $crate::analytics::AggregateMethod for $method {
fn event_name() -> &'static str {
$event_name
}
}
};
($($method:ident => $event_name:literal,)+) => {
$(
aggregate_methods!($method => $event_name);
)+
};
}
pub struct Analytics {
// TODO: TAMO: remove
inner: Option<SegmentAnalytics>,
instance_uid: Option<InstanceUid>,
user: Option<User>,
events: HashMap<TypeId, Box<dyn Aggregate>>,
}
impl Analytics {
fn no_analytics() -> Self {
Self { inner: None, events: HashMap::new(), instance_uid: None, user: None }
}
fn segment_analytics(segment: SegmentAnalytics) -> Self {
Self {
instance_uid: Some(segment.instance_uid),
user: Some(segment.user),
inner: Some(segment),
events: HashMap::new(),
}
}
pub fn instance_uid(&self) -> Option<&InstanceUid> {
self.instance_uid
}
/// The method used to publish most analytics that do not need to be batched every hours /// The method used to publish most analytics that do not need to be batched every hours
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); pub fn publish(&self, send: impl Aggregate, request: Option<&HttpRequest>) {
let Some(segment) = self.inner else { return };
/// This method should be called to aggregate a get search }
fn get_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a get similar request
fn get_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post similar request
fn post_similar(&self, aggregate: SimilarAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
/// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate an add documents request
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
);
// this method should be called to aggregate a fetch documents request
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
// this method should be called to aggregate a fetch documents request
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
// this method should be called to aggregate a add documents request
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);
// this method should be called to batch an update documents request
fn update_documents(
&self,
documents_query: &UpdateDocumentsQuery,
index_creation: bool,
request: &HttpRequest,
);
// this method should be called to batch an update documents by function request
fn update_documents_by_function(
&self,
documents_query: &DocumentEditionByFunction,
index_creation: bool,
request: &HttpRequest,
);
} }

View File

@ -25,7 +25,8 @@ use tokio::sync::mpsc::{self, Receiver, Sender};
use uuid::Uuid; use uuid::Uuid;
use super::{ use super::{
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, config_user_id_path, Aggregate, AggregateMethod, DocumentDeletionKind, DocumentFetchKind,
MEILISEARCH_CONFIG_PATH,
}; };
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::option::{ use crate::option::{
@ -40,7 +41,7 @@ use crate::search::{
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEMANTIC_RATIO, DEFAULT_SEMANTIC_RATIO,
}; };
use crate::Opt; use crate::{aggregate_methods, Opt};
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
@ -87,9 +88,9 @@ pub enum AnalyticsMsg {
} }
pub struct SegmentAnalytics { pub struct SegmentAnalytics {
instance_uid: InstanceUid, pub instance_uid: InstanceUid,
sender: Sender<AnalyticsMsg>, sender: Sender<AnalyticsMsg>,
user: User, pub user: User,
} }
impl SegmentAnalytics { impl SegmentAnalytics {
@ -98,7 +99,7 @@ impl SegmentAnalytics {
opt: &Opt, opt: &Opt,
index_scheduler: Arc<IndexScheduler>, index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>, auth_controller: Arc<AuthController>,
) -> Arc<dyn Analytics> { ) -> Arc<Analytics> {
let instance_uid = super::find_user_id(&opt.db_path); let instance_uid = super::find_user_id(&opt.db_path);
let first_time_run = instance_uid.is_none(); let first_time_run = instance_uid.is_none();
let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4);
@ -108,7 +109,7 @@ impl SegmentAnalytics {
// if reqwest throws an error we won't be able to send analytics // if reqwest throws an error we won't be able to send analytics
if client.is_err() { if client.is_err() {
return super::MockAnalytics::new(opt); return Arc::new(Analytics::no_analytics());
} }
let client = let client =
@ -161,10 +162,11 @@ impl SegmentAnalytics {
let this = Self { instance_uid, sender, user: user.clone() }; let this = Self { instance_uid, sender, user: user.clone() };
Arc::new(this) Arc::new(Analytics::segment_analytics(this))
} }
} }
/*
impl super::Analytics for SegmentAnalytics { impl super::Analytics for SegmentAnalytics {
fn instance_uid(&self) -> Option<&InstanceUid> { fn instance_uid(&self) -> Option<&InstanceUid> {
Some(&self.instance_uid) Some(&self.instance_uid)
@ -253,6 +255,7 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
} }
} }
*/
/// This structure represent the `infos` field we send in the analytics. /// This structure represent the `infos` field we send in the analytics.
/// It's quite close to the `Opt` structure except all sensitive informations /// It's quite close to the `Opt` structure except all sensitive informations
@ -607,12 +610,7 @@ impl Segment {
} }
#[derive(Default)] #[derive(Default)]
pub struct SearchAggregator { pub struct SearchAggregator<Method: AggregateMethod> {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests // requests
total_received: usize, total_received: usize,
total_succeeded: usize, total_succeeded: usize,
@ -684,9 +682,11 @@ pub struct SearchAggregator {
show_ranking_score: bool, show_ranking_score: bool,
show_ranking_score_details: bool, show_ranking_score_details: bool,
ranking_score_threshold: bool, ranking_score_threshold: bool,
marker: std::marker::PhantomData<Method>,
} }
impl SearchAggregator { impl<Method: AggregateMethod> SearchAggregator<Method> {
#[allow(clippy::field_reassign_with_default)] #[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self {
let SearchQuery { let SearchQuery {
@ -827,12 +827,21 @@ impl SearchAggregator {
} }
self.time_spent.push(*processing_time_ms as usize); self.time_spent.push(*processing_time_ms as usize);
} }
}
/// Aggregate one [SearchAggregator] into another. aggregate_methods!(
pub fn aggregate(&mut self, mut other: Self) { SearchGET => "Documents Searched GET",
SearchPOST => "Documents Searched POST",
);
impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
}
fn aggregate(mut self, mut other: Self) -> Self {
let Self { let Self {
timestamp,
user_agents,
total_received, total_received,
total_succeeded, total_succeeded,
ref mut time_spent, ref mut time_spent,
@ -871,17 +880,9 @@ impl SearchAggregator {
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold, ranking_score_threshold,
ref mut locales, ref mut locales,
marker: _,
} = other; } = other;
if self.timestamp.is_none() {
self.timestamp = timestamp;
}
// context
for user_agent in user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request // request
self.total_received = self.total_received.saturating_add(total_received); self.total_received = self.total_received.saturating_add(total_received);
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
@ -961,12 +962,12 @@ impl SearchAggregator {
// locales // locales
self.locales.append(locales); self.locales.append(locales);
self
} }
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> { fn into_event(self) -> Option<Track> {
let Self { let Self {
timestamp,
user_agents,
total_received, total_received,
total_succeeded, total_succeeded,
time_spent, time_spent,
@ -1005,90 +1006,78 @@ impl SearchAggregator {
total_used_negative_operator, total_used_negative_operator,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
marker: _,
} = self; } = self;
if total_received == 0 { // we get all the values in a sorted manner
None let time_spent = time_spent.into_sorted_vec();
} else { // the index of the 99th percentage of value
// we get all the values in a sorted manner let percentile_99th = time_spent.len() * 99 / 100;
let time_spent = time_spent.into_sorted_vec(); // We are only interested by the slowest value of the 99th fastest results
// the index of the 99th percentage of value let time_spent = time_spent.get(percentile_99th);
let percentile_99th = time_spent.len() * 99 / 100;
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th);
let properties = json!({ json!({
"user-agent": user_agents, "requests": {
"requests": { "99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)), "total_succeeded": total_succeeded,
"total_succeeded": total_succeeded, "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics "total_received": total_received,
"total_received": total_received, "total_degraded": total_degraded,
"total_degraded": total_degraded, "total_used_negative_operator": total_used_negative_operator,
"total_used_negative_operator": total_used_negative_operator, },
}, "sort": {
"sort": { "with_geoPoint": sort_with_geo_point,
"with_geoPoint": sort_with_geo_point, "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), },
}, "distinct": distinct,
"distinct": distinct, "filter": {
"filter": { "with_geoRadius": filter_with_geo_radius,
"with_geoRadius": filter_with_geo_radius, "with_geoBoundingBox": filter_with_geo_bounding_box,
"with_geoBoundingBox": filter_with_geo_bounding_box, "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), },
}, "attributes_to_search_on": {
"attributes_to_search_on": { "total_number_of_uses": attributes_to_search_on_total_number_of_uses,
"total_number_of_uses": attributes_to_search_on_total_number_of_uses, },
}, "q": {
"q": { "max_terms_number": max_terms_number,
"max_terms_number": max_terms_number, },
}, "vector": {
"vector": { "max_vector_size": max_vector_size,
"max_vector_size": max_vector_size, "retrieve_vectors": retrieve_vectors,
"retrieve_vectors": retrieve_vectors, },
}, "hybrid": {
"hybrid": { "enabled": hybrid,
"enabled": hybrid, "semantic_ratio": semantic_ratio,
"semantic_ratio": semantic_ratio, },
}, "pagination": {
"pagination": { "max_limit": max_limit,
"max_limit": max_limit, "max_offset": max_offset,
"max_offset": max_offset, "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" },
"most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, },
}, "formatting": {
"formatting": { "max_attributes_to_retrieve": max_attributes_to_retrieve,
"max_attributes_to_retrieve": max_attributes_to_retrieve, "max_attributes_to_highlight": max_attributes_to_highlight,
"max_attributes_to_highlight": max_attributes_to_highlight, "highlight_pre_tag": highlight_pre_tag,
"highlight_pre_tag": highlight_pre_tag, "highlight_post_tag": highlight_post_tag,
"highlight_post_tag": highlight_post_tag, "max_attributes_to_crop": max_attributes_to_crop,
"max_attributes_to_crop": max_attributes_to_crop, "crop_marker": crop_marker,
"crop_marker": crop_marker, "show_matches_position": show_matches_position,
"show_matches_position": show_matches_position, "crop_length": crop_length,
"crop_length": crop_length, },
}, "facets": {
"facets": { "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64),
"avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), },
}, "matching_strategy": {
"matching_strategy": { "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
"most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), },
}, "locales": locales,
"locales": locales, "scoring": {
"scoring": { "show_ranking_score": show_ranking_score,
"show_ranking_score": show_ranking_score, "show_ranking_score_details": show_ranking_score_details,
"show_ranking_score_details": show_ranking_score_details, "ranking_score_threshold": ranking_score_threshold,
"ranking_score_threshold": ranking_score_threshold, },
}, })
});
Some(Track {
timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
} }
} }

View File

@ -120,7 +120,7 @@ pub fn create_app(
search_queue: Data<SearchQueue>, search_queue: Data<SearchQueue>,
opt: Opt, opt: Opt,
logs: (LogRouteHandle, LogStderrHandle), logs: (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<Analytics>,
enable_dashboard: bool, enable_dashboard: bool,
) -> actix_web::App< ) -> actix_web::App<
impl ServiceFactory< impl ServiceFactory<
@ -473,7 +473,7 @@ pub fn configure_data(
search_queue: Data<SearchQueue>, search_queue: Data<SearchQueue>,
opt: &Opt, opt: &Opt,
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Arc<dyn Analytics>, analytics: Arc<Analytics>,
) { ) {
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config config

View File

@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use tracing::debug; use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
} }
crate::empty_analytics!(DumpAnalytics, "Dump Created");
pub async fn create_dump( pub async fn create_dump(
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>, auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); analytics.publish(DumpAnalytics::default(), Some(&req));
let task = KindWithContent::DumpCreation { let task = KindWithContent::DumpCreation {
keys: auth_controller.list_keys()?, keys: auth_controller.list_keys()?,

View File

@ -6,10 +6,11 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde::Serialize;
use serde_json::json; use serde_json::json;
use tracing::debug; use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
@ -22,17 +23,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
); );
} }
crate::empty_analytics!(GetExperimentalFeatureAnalytics, "Experimental features Seen");
async fn get_features( async fn get_features(
index_scheduler: GuardedData< index_scheduler: GuardedData<
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>,
Data<IndexScheduler>, Data<IndexScheduler>,
>, >,
req: HttpRequest, req: HttpRequest,
analytics: Data<dyn Analytics>, analytics: Data<Analytics>,
) -> HttpResponse { ) -> HttpResponse {
let features = index_scheduler.features(); let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); analytics.publish(GetExperimentalFeatureAnalytics::default(), Some(&req));
let features = features.runtime_features(); let features = features.runtime_features();
debug!(returns = ?features, "Get features"); debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features) HttpResponse::Ok().json(features)
@ -53,6 +56,38 @@ pub struct RuntimeTogglableFeatures {
pub contains_filter: Option<bool>, pub contains_filter: Option<bool>,
} }
#[derive(Serialize)]
pub struct PatchExperimentalFeatureAnalytics {
vector_store: bool,
metrics: bool,
logs_route: bool,
edit_documents_by_function: bool,
contains_filter: bool,
}
impl Aggregate for PatchExperimentalFeatureAnalytics {
fn event_name(&self) -> &'static str {
"Experimental features Updated"
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
Self {
vector_store: other.vector_store,
metrics: other.metrics,
logs_route: other.logs_route,
edit_documents_by_function: other.edit_documents_by_function,
contains_filter: other.contains_filter,
}
}
fn into_event(self) -> serde_json::Value {
serde_json::to_value(self).unwrap()
}
}
async fn patch_features( async fn patch_features(
index_scheduler: GuardedData< index_scheduler: GuardedData<
ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>, ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>,
@ -60,7 +95,7 @@ async fn patch_features(
>, >,
new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>, new_features: AwebJson<RuntimeTogglableFeatures, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
analytics: Data<dyn Analytics>, analytics: Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features(); let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features"); debug!(parameters = ?new_features, "Patch features");
@ -89,14 +124,13 @@ async fn patch_features(
} = new_features; } = new_features;
analytics.publish( analytics.publish(
"Experimental features Updated".to_string(), PatchExperimentalFeatureAnalytics {
json!({ vector_store,
"vector_store": vector_store, metrics,
"metrics": metrics, logs_route,
"logs_route": logs_route, edit_documents_by_function,
"edit_documents_by_function": edit_documents_by_function, contains_filter,
"contains_filter": contains_filter, },
}),
Some(&req), Some(&req),
); );
index_scheduler.put_runtime_features(new_features)?; index_scheduler.put_runtime_features(new_features)?;

View File

@ -1,4 +1,6 @@
use std::collections::HashSet;
use std::io::ErrorKind; use std::io::ErrorKind;
use std::marker::PhantomData;
use actix_web::http::header::CONTENT_TYPE; use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data; use actix_web::web::Data;
@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index}; use meilisearch_types::{milli, Document, Index};
use mime::Mime; use mime::Mime;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use serde::Deserialize; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use tokio::fs::File; use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug; use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::analytics::{Aggregate, AggregateMethod, Analytics, DocumentDeletionKind};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
use crate::error::PayloadError::ReceivePayload; use crate::error::PayloadError::ReceivePayload;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -41,7 +43,7 @@ use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
}; };
use crate::search::{parse_filter, RetrieveVectors}; use crate::search::{parse_filter, RetrieveVectors};
use crate::Opt; use crate::{aggregate_methods, Opt};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -100,12 +102,82 @@ pub struct GetDocument {
retrieve_vectors: Param<bool>, retrieve_vectors: Param<bool>,
} }
#[derive(Default, Serialize)]
pub struct DocumentsFetchAggregator {
#[serde(rename = "requests.total_received")]
total_received: usize,
// a call on ../documents/:doc_id
per_document_id: bool,
// if a filter was used
per_filter: bool,
#[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool,
// pagination
#[serde(rename = "pagination.max_limit")]
max_limit: usize,
#[serde(rename = "pagination.max_offset")]
max_offset: usize,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool },
}
impl DocumentsFetchAggregator {
pub fn from_query(query: &DocumentFetchKind) -> Self {
let (limit, offset, retrieve_vectors) = match query {
DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors),
DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => {
(*limit, *offset, *retrieve_vectors)
}
};
Self {
total_received: 1,
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
retrieve_vectors,
}
}
}
impl Aggregate for DocumentsFetchAggregator {
// TODO: TAMO: Should we do the same event for the GET requests
fn event_name(&self) -> &'static str {
"Documents Fetched POST"
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
Self {
total_received: self.total_received.saturating_add(other.total_received),
per_document_id: self.per_document_id | other.per_document_id,
per_filter: self.per_filter | other.per_filter,
retrieve_vectors: self.retrieve_vectors | other.retrieve_vectors,
max_limit: self.max_limit.max(other.max_limit),
max_offset: self.max_offset.max(other.max_offset),
}
}
fn into_event(self) -> Value {
serde_json::to_value(self).unwrap()
}
}
pub async fn get_document( pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
document_param: web::Path<DocumentParam>, document_param: web::Path<DocumentParam>,
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>, params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner(); let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document"); debug!(parameters = ?params, "Get document");
@ -117,9 +189,12 @@ pub async fn get_document(
let features = index_scheduler.features(); let features = index_scheduler.features();
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
analytics.get_fetch_documents( analytics.publish(
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 }, DocumentsFetchAggregator {
&req, retrieve_vectors: param_retrieve_vectors.0,
..Default::default()
},
Some(&req),
); );
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
@ -129,17 +204,57 @@ pub async fn get_document(
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
#[derive(Default, Serialize)]
pub struct DocumentsDeletionAggregator {
#[serde(rename = "requests.total_received")]
total_received: usize,
per_document_id: bool,
clear_all: bool,
per_batch: bool,
per_filter: bool,
}
impl Aggregate for DocumentsDeletionAggregator {
fn event_name(&self) -> &'static str {
"Documents Deleted"
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
Self {
total_received: self.total_received.saturating_add(other.total_received),
per_document_id: self.per_document_id | other.per_document_id,
clear_all: self.clear_all | other.clear_all,
per_batch: self.per_batch | other.per_batch,
per_filter: self.per_filter | other.per_filter,
}
}
fn into_event(self) -> Value {
serde_json::to_value(self).unwrap()
}
}
pub async fn delete_document( pub async fn delete_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
path: web::Path<DocumentParam>, path: web::Path<DocumentParam>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = path.into_inner(); let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?; let index_uid = IndexUid::try_from(index_uid)?;
analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req); analytics.publish(
DocumentsDeletionAggregator {
total_received: 1,
per_document_id: true,
..Default::default()
},
Some(&req),
);
let task = KindWithContent::DocumentDeletion { let task = KindWithContent::DocumentDeletion {
index_uid: index_uid.to_string(), index_uid: index_uid.to_string(),
@ -190,19 +305,21 @@ pub async fn documents_by_query_post(
index_uid: web::Path<String>, index_uid: web::Path<String>,
body: AwebJson<BrowseQuery, DeserrJsonError>, body: AwebJson<BrowseQuery, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST"); debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents( analytics.publish(
&DocumentFetchKind::Normal { DocumentsFetchAggregator {
with_filter: body.filter.is_some(), total_received: 1,
limit: body.limit, per_filter: body.filter.is_some(),
offset: body.offset,
retrieve_vectors: body.retrieve_vectors, retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit,
max_offset: body.offset,
..Default::default()
}, },
&req, Some(&req),
); );
documents_by_query(&index_scheduler, index_uid, body) documents_by_query(&index_scheduler, index_uid, body)
@ -213,7 +330,7 @@ pub async fn get_documents(
index_uid: web::Path<String>, index_uid: web::Path<String>,
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>, params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!(parameters = ?params, "Get documents GET");
@ -235,14 +352,16 @@ pub async fn get_documents(
filter, filter,
}; };
analytics.get_fetch_documents( analytics.publish(
&DocumentFetchKind::Normal { DocumentsFetchAggregator {
with_filter: query.filter.is_some(), total_received: 1,
limit: query.limit, per_filter: query.filter.is_some(),
offset: query.offset,
retrieve_vectors: query.retrieve_vectors, retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit,
max_offset: query.offset,
..Default::default()
}, },
&req, Some(&req),
); );
documents_by_query(&index_scheduler, index_uid, query) documents_by_query(&index_scheduler, index_uid, query)
@ -298,6 +417,42 @@ fn from_char_csv_delimiter(
} }
} }
aggregate_methods!(
Replaced => "Documents Added",
Updated => "Documents Updated",
);
#[derive(Default, Serialize)]
pub struct DocumentsAggregator<T: AggregateMethod> {
payload_types: HashSet<String>,
primary_key: HashSet<String>,
index_creation: bool,
#[serde(skip)]
method: PhantomData<T>,
}
impl<Method: AggregateMethod> Aggregate for DocumentsAggregator<Method> {
fn event_name(&self) -> &'static str {
Method::event_name()
}
fn aggregate(mut self, other: Self) -> Self
where
Self: Sized,
{
Self {
payload_types: self.payload_types.union(&other.payload_types).collect(),
primary_key: self.primary_key.union(&other.primary_key).collect(),
index_creation: self.index_creation | other.index_creation,
method: PhantomData,
}
}
fn into_event(self) -> Value {
serde_json::to_value(self).unwrap()
}
}
pub async fn replace_documents( pub async fn replace_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>, index_uid: web::Path<String>,
@ -305,17 +460,33 @@ pub async fn replace_documents(
body: Payload, body: Payload,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!(parameters = ?params, "Replace documents"); debug!(parameters = ?params, "Replace documents");
let params = params.into_inner(); let params = params.into_inner();
analytics.add_documents( let mut content_types = HashSet::new();
&params, let content_type = req
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), .headers()
&req, .get(CONTENT_TYPE)
.and_then(|s| s.to_str().ok())
.unwrap_or("unknown")
.to_string();
content_types.insert(content_type);
let mut primary_keys = HashSet::new();
if let Some(primary_key) = params.primary_key.clone() {
primary_keys.insert(primary_key);
}
analytics.publish(
DocumentsAggregator::<Replaced> {
payload_types: content_types,
primary_key: primary_keys,
index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
method: PhantomData,
},
Some(&req),
); );
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@ -346,17 +517,33 @@ pub async fn update_documents(
body: Payload, body: Payload,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let params = params.into_inner(); let params = params.into_inner();
debug!(parameters = ?params, "Update documents"); debug!(parameters = ?params, "Update documents");
analytics.add_documents( let mut content_types = HashSet::new();
&params, let content_type = req
index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), .headers()
&req, .get(CONTENT_TYPE)
.and_then(|s| s.to_str().ok())
.unwrap_or("unknown")
.to_string();
content_types.insert(content_type);
let mut primary_keys = HashSet::new();
if let Some(primary_key) = params.primary_key.clone() {
primary_keys.insert(primary_key);
}
analytics.publish(
DocumentsAggregator::<Updated> {
payload_types: content_types,
primary_key: primary_keys,
index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x),
method: PhantomData,
},
Some(&req),
); );
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
@ -524,12 +711,15 @@ pub async fn delete_documents_batch(
body: web::Json<Vec<Value>>, body: web::Json<Vec<Value>>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch"); debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); analytics.publish(
DocumentsDeletionAggregator { total_received: 1, per_batch: true, ..Default::default() },
Some(&req),
);
let ids = body let ids = body
.iter() .iter()
@ -562,14 +752,17 @@ pub async fn delete_documents_by_filter(
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>, body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by filter"); debug!(parameters = ?body, "Delete documents by filter");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner(); let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter; let filter = body.into_inner().filter;
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); analytics.publish(
DocumentsDeletionAggregator { total_received: 1, per_filter: true, ..Default::default() },
Some(&req),
);
// we ensure the filter is well formed before enqueuing it // we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
@ -599,13 +792,44 @@ pub struct DocumentEditionByFunction {
pub function: String, pub function: String,
} }
#[derive(Default, Serialize)]
struct EditDocumentsByFunctionAggregator {
// Set to true if at least one request was filtered
filtered: bool,
// Set to true if at least one request contained a context
with_context: bool,
index_creation: bool,
}
impl Aggregate for EditDocumentsByFunctionAggregator {
fn event_name(&self) -> &'static str {
"Documents Edited By Function"
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
Self {
filtered: self.filtered | other.filtered,
with_context: self.with_context | other.with_context,
index_creation: self.index_creation | other.index_creation,
}
}
fn into_event(self) -> Value {
serde_json::to_value(self).unwrap()
}
}
pub async fn edit_documents_by_function( pub async fn edit_documents_by_function(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ALL }>, Data<IndexScheduler>>,
index_uid: web::Path<String>, index_uid: web::Path<String>,
params: AwebJson<DocumentEditionByFunction, DeserrJsonError>, params: AwebJson<DocumentEditionByFunction, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Edit documents by function"); debug!(parameters = ?params, "Edit documents by function");
@ -617,10 +841,13 @@ pub async fn edit_documents_by_function(
let index_uid = index_uid.into_inner(); let index_uid = index_uid.into_inner();
let params = params.into_inner(); let params = params.into_inner();
analytics.update_documents_by_function( analytics.publish(
&params, EditDocumentsByFunctionAggregator {
index_scheduler.index(&index_uid).is_err(), filtered: params.filter.is_some(),
&req, with_context: params.context.is_some(),
index_creation: index_scheduler.index(&index_uid).is_err(),
},
Some(&req),
); );
let DocumentEditionByFunction { filter, context, function } = params; let DocumentEditionByFunction { filter, context, function } = params;
@ -670,10 +897,13 @@ pub async fn clear_all_documents(
index_uid: web::Path<String>, index_uid: web::Path<String>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); analytics.publish(
DocumentsDeletionAggregator { total_received: 1, clear_all: true, ..Default::default() },
Some(&req),
);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let uid = get_task_id(&req, &opt)?; let uid = get_task_id(&req, &opt)?;

View File

@ -1,3 +1,5 @@
use std::collections::{BinaryHeap, HashSet};
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale;
use serde_json::Value; use serde_json::Value;
use tracing::debug; use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData; use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
use crate::search::{ use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy,
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
}; };
use crate::search_queue::SearchQueue; use crate::search_queue::SearchQueue;
@ -53,13 +56,110 @@ pub struct FacetSearchQuery {
pub locales: Option<Vec<Locale>>, pub locales: Option<Vec<Locale>>,
} }
#[derive(Default)]
pub struct FacetSearchAggregator {
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
#[allow(clippy::field_reassign_with_default)]
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
vector,
q,
filter,
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
locales,
} = query;
Self {
total_received: 1,
facet_names: Some(facet_name.clone()).into_iter().collect(),
additional_search_parameters_provided: q.is_some()
|| vector.is_some()
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some()
|| ranking_score_threshold.is_some()
|| locales.is_some(),
..Default::default()
}
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result;
self.total_succeeded = 1;
self.time_spent.push(*processing_time_ms as usize);
}
}
impl Aggregate for FacetSearchAggregator {
fn event_name(&self) -> &'static str {
"Facet Searched POST"
}
fn aggregate(mut self, other: Self) -> Self
where
Self: Sized,
{
self.time_spent.insert(other.time_spent);
Self {
total_received: self.total_received.saturating_add(other.total_received),
total_succeeded: self.total_succeeded.saturating_add(other.total_succeeded),
time_spent: self.time_spent,
facet_names: self.facet_names.union(&other.facet_names).collect(),
additional_search_parameters_provided: self.additional_search_parameters_provided
| other.additional_search_parameters_provided,
}
}
fn into_event(self) -> Value {
let Self {
total_received,
total_succeeded,
time_spent,
facet_names,
additional_search_parameters_provided,
} = self;
serde_json::json!({
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": total_succeeded,
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
"total_received": total_received,
},
"facets": {
"total_distinct_facet_count": facet_names.len(),
"additional_search_parameters_provided": additional_search_parameters_provided,
},
})
}
}
pub async fn search( pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: Data<SearchQueue>, search_queue: Data<SearchQueue>,
index_uid: web::Path<String>, index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>, params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -100,7 +200,7 @@ pub async fn search(
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
analytics.post_facet_search(aggregate); analytics.publish(aggregate, Some(&req));
let search_result = search_result?; let search_result = search_result?;

View File

@ -1,3 +1,4 @@
use std::collections::BTreeSet;
use std::convert::Infallible; use std::convert::Infallible;
use actix_web::web::Data; use actix_web::web::Data;
@ -18,7 +19,7 @@ use time::OffsetDateTime;
use tracing::debug; use tracing::debug;
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics; use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler; use crate::extractors::sequential_extractor::SeqHandler;
@ -123,12 +124,34 @@ pub struct IndexCreateRequest {
primary_key: Option<String>, primary_key: Option<String>,
} }
#[derive(Serialize)]
struct IndexCreatedAggregate {
primary_key: BTreeSet<String>,
}
impl Aggregate for IndexCreatedAggregate {
fn event_name(&self) -> &'static str {
"Index Created"
}
fn aggregate(self, other: Self) -> Self
where
Self: Sized,
{
Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
}
fn into_event(self) -> impl Serialize {
self
}
}
pub async fn create_index( pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: AwebJson<IndexCreateRequest, DeserrJsonError>, body: AwebJson<IndexCreateRequest, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index"); debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner(); let IndexCreateRequest { primary_key, uid } = body.into_inner();
@ -136,8 +159,7 @@ pub async fn create_index(
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
if allow_index_creation { if allow_index_creation {
analytics.publish( analytics.publish(
"Index Created".to_string(), IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() },
json!({ "primary_key": primary_key }),
Some(&req), Some(&req),
); );
@ -194,20 +216,37 @@ pub async fn get_index(
Ok(HttpResponse::Ok().json(index_view)) Ok(HttpResponse::Ok().json(index_view))
} }
#[derive(Serialize)]
struct IndexUpdatedAggregate {
primary_key: BTreeSet<String>,
}
impl Aggregate for IndexUpdatedAggregate {
fn event_name(&self) -> &'static str {
"Index Updated"
}
fn aggregate(self, other: Self) -> Self {
Self { primary_key: self.primary_key.union(&other.primary_key).collect() }
}
fn into_event(self) -> impl Serialize {
self
}
}
pub async fn update_index( pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>, index_uid: web::Path<String>,
body: AwebJson<UpdateIndexRequest, DeserrJsonError>, body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
opt: web::Data<Opt>, opt: web::Data<Opt>,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Update index"); debug!(parameters = ?body, "Update index");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
analytics.publish( analytics.publish(
"Index Updated".to_string(), IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() },
json!({ "primary_key": body.primary_key }),
Some(&req), Some(&req),
); );

View File

@ -13,6 +13,7 @@ use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value; use serde_json::Value;
use tracing::debug; use tracing::debug;
use crate::analytics::segment_analytics::{SearchGET, SearchPOST};
use crate::analytics::{Analytics, SearchAggregator}; use crate::analytics::{Analytics, SearchAggregator};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -225,7 +226,7 @@ pub async fn search_with_url_query(
index_uid: web::Path<String>, index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>, params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Search get"); debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -237,7 +238,7 @@ pub async fn search_with_url_query(
add_search_rules(&mut query.filter, search_rules); add_search_rules(&mut query.filter, search_rules);
} }
let mut aggregate = SearchAggregator::from_query(&query, &req); let mut aggregate = SearchAggregator::<SearchGET>::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let features = index_scheduler.features(); let features = index_scheduler.features();
@ -254,7 +255,7 @@ pub async fn search_with_url_query(
if let Ok(ref search_result) = search_result { if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result); aggregate.succeed(search_result);
} }
analytics.get_search(aggregate); analytics.publish(aggregate, Some(&req));
let search_result = search_result?; let search_result = search_result?;
@ -268,7 +269,7 @@ pub async fn search_with_post(
index_uid: web::Path<String>, index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>, params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@ -280,7 +281,7 @@ pub async fn search_with_post(
add_search_rules(&mut query.filter, search_rules); add_search_rules(&mut query.filter, search_rules);
} }
let mut aggregate = SearchAggregator::from_query(&query, &req); let mut aggregate = SearchAggregator::<SearchPOST>::from_query(&query, &req);
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
@ -302,7 +303,7 @@ pub async fn search_with_post(
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
} }
} }
analytics.post_search(aggregate); analytics.publish(aggregate, Some(&req));
let search_result = search_result?; let search_result = search_result?;

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,7 @@ pub async fn swap_indexes(
analytics.publish( analytics.publish(
"Indexes Swapped".to_string(), "Indexes Swapped".to_string(),
json!({ json!({
"swap_operation_number": params.len(), "swap_operation_number": params.len(), // Return the max ever encountered
}), }),
Some(&req), Some(&req),
); );