mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Merge #3071
3071: Analytics on the tasks route r=Kerollmops a=irevoire Implement the missing analytics on the delete and cancel task routes. + Batch the analytics on the `GET tasks` route to avoid flooding ourselves while polling meilisearch. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
b478b18218
@ -7,6 +7,7 @@ use serde_json::Value;
|
||||
|
||||
use super::{find_user_id, Analytics, DocumentDeletionKind};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
use crate::Opt;
|
||||
|
||||
pub struct MockAnalytics {
|
||||
@ -57,5 +58,6 @@ impl Analytics for MockAnalytics {
|
||||
_request: &HttpRequest,
|
||||
) {
|
||||
}
|
||||
fn get_tasks(&self, _query: &TasksFilterQueryRaw, _request: &HttpRequest) {}
|
||||
fn health_seen(&self, _request: &HttpRequest) {}
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ use platform_dirs::AppDirs;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
|
||||
// if we are in debug mode OR the analytics feature is disabled
|
||||
// the `SegmentAnalytics` point to the mock instead of the real analytics
|
||||
@ -92,6 +93,9 @@ pub trait Analytics: Sync + Send {
|
||||
request: &HttpRequest,
|
||||
);
|
||||
|
||||
// this method should be called to aggregate the get tasks requests.
|
||||
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn health_seen(&self, request: &HttpRequest);
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::tasks::TasksFilterQueryRaw;
|
||||
use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
@ -70,6 +71,7 @@ pub enum AnalyticsMsg {
|
||||
AggregateAddDocuments(DocumentsAggregator),
|
||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||
AggregateUpdateDocuments(DocumentsAggregator),
|
||||
AggregateTasks(TasksAggregator),
|
||||
AggregateHealth(HealthAggregator),
|
||||
}
|
||||
|
||||
@ -131,6 +133,7 @@ impl SegmentAnalytics {
|
||||
add_documents_aggregator: DocumentsAggregator::default(),
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||
update_documents_aggregator: DocumentsAggregator::default(),
|
||||
get_tasks_aggregator: TasksAggregator::default(),
|
||||
health_aggregator: HealthAggregator::default(),
|
||||
});
|
||||
tokio::spawn(segment.run(index_scheduler.clone()));
|
||||
@ -192,6 +195,11 @@ impl super::Analytics for SegmentAnalytics {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
|
||||
}
|
||||
|
||||
fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest) {
|
||||
let aggregate = TasksAggregator::from_query(query, request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
|
||||
}
|
||||
|
||||
fn health_seen(&self, request: &HttpRequest) {
|
||||
let aggregate = HealthAggregator::from_query(request);
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate));
|
||||
@ -326,6 +334,7 @@ pub struct Segment {
|
||||
add_documents_aggregator: DocumentsAggregator,
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||
update_documents_aggregator: DocumentsAggregator,
|
||||
get_tasks_aggregator: TasksAggregator,
|
||||
health_aggregator: HealthAggregator,
|
||||
}
|
||||
|
||||
@ -382,6 +391,7 @@ impl Segment {
|
||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
|
||||
None => (),
|
||||
}
|
||||
@ -416,6 +426,8 @@ impl Segment {
|
||||
.into_event(&self.user, "Documents Deleted");
|
||||
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Updated");
|
||||
let get_tasks =
|
||||
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
|
||||
let health =
|
||||
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
|
||||
|
||||
@ -434,6 +446,9 @@ impl Segment {
|
||||
if let Some(update_documents) = update_documents {
|
||||
let _ = self.batcher.push(update_documents).await;
|
||||
}
|
||||
if let Some(get_tasks) = get_tasks {
|
||||
let _ = self.batcher.push(get_tasks).await;
|
||||
}
|
||||
if let Some(health) = health {
|
||||
let _ = self.batcher.push(health).await;
|
||||
}
|
||||
@ -840,6 +855,91 @@ impl DocumentsDeletionAggregator {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct TasksAggregator {
|
||||
#[serde(skip)]
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
#[serde(rename = "user-agent")]
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
filtered_by_uid: bool,
|
||||
filtered_by_index_uid: bool,
|
||||
filtered_by_type: bool,
|
||||
filtered_by_status: bool,
|
||||
filtered_by_canceled_by: bool,
|
||||
filtered_by_before_enqueued_at: bool,
|
||||
filtered_by_after_enqueued_at: bool,
|
||||
filtered_by_before_started_at: bool,
|
||||
filtered_by_after_started_at: bool,
|
||||
filtered_by_before_finished_at: bool,
|
||||
filtered_by_after_finished_at: bool,
|
||||
total_received: usize,
|
||||
}
|
||||
|
||||
impl TasksAggregator {
|
||||
pub fn from_query(query: &TasksFilterQueryRaw, request: &HttpRequest) -> Self {
|
||||
Self {
|
||||
timestamp: Some(OffsetDateTime::now_utc()),
|
||||
user_agents: extract_user_agents(request).into_iter().collect(),
|
||||
filtered_by_uid: query.common.uids.is_some(),
|
||||
filtered_by_index_uid: query.common.index_uids.is_some(),
|
||||
filtered_by_type: query.common.types.is_some(),
|
||||
filtered_by_status: query.common.statuses.is_some(),
|
||||
filtered_by_canceled_by: query.common.canceled_by.is_some(),
|
||||
filtered_by_before_enqueued_at: query.dates.before_enqueued_at.is_some(),
|
||||
filtered_by_after_enqueued_at: query.dates.after_enqueued_at.is_some(),
|
||||
filtered_by_before_started_at: query.dates.before_started_at.is_some(),
|
||||
filtered_by_after_started_at: query.dates.after_started_at.is_some(),
|
||||
filtered_by_before_finished_at: query.dates.before_finished_at.is_some(),
|
||||
filtered_by_after_finished_at: query.dates.after_finished_at.is_some(),
|
||||
total_received: 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregate one [DocumentsAggregator] into another.
|
||||
pub fn aggregate(&mut self, other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// we can't create a union because there is no `into_union` method
|
||||
for user_agent in other.user_agents {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
self.filtered_by_uid |= other.filtered_by_uid;
|
||||
self.filtered_by_index_uid |= other.filtered_by_index_uid;
|
||||
self.filtered_by_type |= other.filtered_by_type;
|
||||
self.filtered_by_status |= other.filtered_by_status;
|
||||
self.filtered_by_canceled_by |= other.filtered_by_canceled_by;
|
||||
self.filtered_by_before_enqueued_at |= other.filtered_by_before_enqueued_at;
|
||||
self.filtered_by_after_enqueued_at |= other.filtered_by_after_enqueued_at;
|
||||
self.filtered_by_before_started_at |= other.filtered_by_before_started_at;
|
||||
self.filtered_by_after_started_at |= other.filtered_by_after_started_at;
|
||||
self.filtered_by_before_finished_at |= other.filtered_by_before_finished_at;
|
||||
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
|
||||
self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at;
|
||||
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
// if we had no timestamp it means we never encountered any events and
|
||||
// thus we don't need to send this event.
|
||||
let timestamp = self.timestamp?;
|
||||
|
||||
Some(Track {
|
||||
timestamp: Some(timestamp),
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties: serde_json::to_value(self).ok()?,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize)]
|
||||
pub struct HealthAggregator {
|
||||
#[serde(skip)]
|
||||
|
@ -21,7 +21,7 @@ mod api_key;
|
||||
mod dump;
|
||||
pub mod indexes;
|
||||
mod swap_indexes;
|
||||
mod tasks;
|
||||
pub mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
|
@ -162,11 +162,11 @@ impl From<Details> for DetailsView {
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct TaskCommonQueryRaw {
|
||||
uids: Option<CS<String>>,
|
||||
canceled_by: Option<CS<String>>,
|
||||
types: Option<CS<StarOr<String>>>,
|
||||
statuses: Option<CS<StarOr<String>>>,
|
||||
index_uids: Option<CS<StarOr<String>>>,
|
||||
pub uids: Option<CS<String>>,
|
||||
pub canceled_by: Option<CS<String>>,
|
||||
pub types: Option<CS<StarOr<String>>>,
|
||||
pub statuses: Option<CS<StarOr<String>>>,
|
||||
pub index_uids: Option<CS<StarOr<String>>>,
|
||||
}
|
||||
impl TaskCommonQueryRaw {
|
||||
fn validate(self) -> Result<TaskCommonQuery, ResponseError> {
|
||||
@ -261,12 +261,12 @@ impl TaskCommonQueryRaw {
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct TaskDateQueryRaw {
|
||||
after_enqueued_at: Option<String>,
|
||||
before_enqueued_at: Option<String>,
|
||||
after_started_at: Option<String>,
|
||||
before_started_at: Option<String>,
|
||||
after_finished_at: Option<String>,
|
||||
before_finished_at: Option<String>,
|
||||
pub after_enqueued_at: Option<String>,
|
||||
pub before_enqueued_at: Option<String>,
|
||||
pub after_started_at: Option<String>,
|
||||
pub before_started_at: Option<String>,
|
||||
pub after_finished_at: Option<String>,
|
||||
pub before_finished_at: Option<String>,
|
||||
}
|
||||
impl TaskDateQueryRaw {
|
||||
fn validate(self) -> Result<TaskDateQuery, ResponseError> {
|
||||
@ -339,21 +339,21 @@ impl TaskDateQueryRaw {
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct TasksFilterQueryRaw {
|
||||
#[serde(flatten)]
|
||||
common: TaskCommonQueryRaw,
|
||||
pub common: TaskCommonQueryRaw,
|
||||
#[serde(default = "DEFAULT_LIMIT")]
|
||||
limit: u32,
|
||||
from: Option<TaskId>,
|
||||
pub limit: u32,
|
||||
pub from: Option<TaskId>,
|
||||
#[serde(flatten)]
|
||||
dates: TaskDateQueryRaw,
|
||||
pub dates: TaskDateQueryRaw,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct TaskDeletionOrCancelationQueryRaw {
|
||||
#[serde(flatten)]
|
||||
common: TaskCommonQueryRaw,
|
||||
pub common: TaskCommonQueryRaw,
|
||||
#[serde(flatten)]
|
||||
dates: TaskDateQueryRaw,
|
||||
pub dates: TaskDateQueryRaw,
|
||||
}
|
||||
|
||||
impl TasksFilterQueryRaw {
|
||||
@ -442,8 +442,9 @@ pub struct TaskDeletionOrCancelationQuery {
|
||||
|
||||
async fn cancel_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
params: web::Query<TaskDeletionOrCancelationQueryRaw>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let query = params.into_inner().validate()?;
|
||||
let TaskDeletionOrCancelationQuery {
|
||||
@ -459,6 +460,24 @@ async fn cancel_tasks(
|
||||
},
|
||||
} = query;
|
||||
|
||||
analytics.publish(
|
||||
"Tasks Canceled".to_string(),
|
||||
json!({
|
||||
"filtered_by_uid": uids.is_some(),
|
||||
"filtered_by_index_uid": index_uids.is_some(),
|
||||
"filtered_by_type": types.is_some(),
|
||||
"filtered_by_status": statuses.is_some(),
|
||||
"filtered_by_canceled_by": canceled_by.is_some(),
|
||||
"filtered_by_before_enqueued_at": before_enqueued_at.is_some(),
|
||||
"filtered_by_after_enqueued_at": after_enqueued_at.is_some(),
|
||||
"filtered_by_before_started_at": before_started_at.is_some(),
|
||||
"filtered_by_after_started_at": after_started_at.is_some(),
|
||||
"filtered_by_before_finished_at": before_finished_at.is_some(),
|
||||
"filtered_by_after_finished_at": after_finished_at.is_some(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let query = Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
@ -495,8 +514,9 @@ async fn cancel_tasks(
|
||||
|
||||
async fn delete_tasks(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
params: web::Query<TaskDeletionOrCancelationQueryRaw>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let TaskDeletionOrCancelationQuery {
|
||||
common: TaskCommonQuery { types, uids, canceled_by, statuses, index_uids },
|
||||
@ -511,6 +531,24 @@ async fn delete_tasks(
|
||||
},
|
||||
} = params.into_inner().validate()?;
|
||||
|
||||
analytics.publish(
|
||||
"Tasks Deleted".to_string(),
|
||||
json!({
|
||||
"filtered_by_uid": uids.is_some(),
|
||||
"filtered_by_index_uid": index_uids.is_some(),
|
||||
"filtered_by_type": types.is_some(),
|
||||
"filtered_by_status": statuses.is_some(),
|
||||
"filtered_by_canceled_by": canceled_by.is_some(),
|
||||
"filtered_by_before_enqueued_at": before_enqueued_at.is_some(),
|
||||
"filtered_by_after_enqueued_at": after_enqueued_at.is_some(),
|
||||
"filtered_by_before_started_at": before_started_at.is_some(),
|
||||
"filtered_by_after_started_at": after_started_at.is_some(),
|
||||
"filtered_by_before_finished_at": before_finished_at.is_some(),
|
||||
"filtered_by_after_finished_at": after_finished_at.is_some(),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let query = Query {
|
||||
limit: None,
|
||||
from: None,
|
||||
@ -559,6 +597,8 @@ async fn get_tasks(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.get_tasks(¶ms, &req);
|
||||
|
||||
let TasksFilterQuery {
|
||||
common: TaskCommonQuery { types, uids, canceled_by, statuses, index_uids },
|
||||
limit,
|
||||
@ -574,16 +614,6 @@ async fn get_tasks(
|
||||
},
|
||||
} = params.into_inner().validate()?;
|
||||
|
||||
analytics.publish(
|
||||
"Tasks Seen".to_string(),
|
||||
json!({
|
||||
"filtered_by_index_uid": index_uids.as_ref().map_or(false, |v| !v.is_empty()),
|
||||
"filtered_by_type": types.as_ref().map_or(false, |v| !v.is_empty()),
|
||||
"filtered_by_status": statuses.as_ref().map_or(false, |v| !v.is_empty()),
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
// We +1 just to know if there is more after this "page" or not.
|
||||
let limit = limit.saturating_add(1);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user