fix the analytics

This commit is contained in:
Tamo 2022-10-18 12:45:06 +02:00 committed by Clément Renault
parent 65e69c06bc
commit 656f809b1d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 44 additions and 28 deletions

View File

@ -7,13 +7,9 @@ use std::time::{Duration, Instant};
use actix_web::http::header::USER_AGENT; use actix_web::http::header::USER_AGENT;
use actix_web::HttpRequest; use actix_web::HttpRequest;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler;
use meilisearch_auth::SearchRules; use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{ use meilisearch_types::InstanceUid;
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
};
use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use segment::message::{Identify, Track, User}; use segment::message::{Identify, Track, User};
@ -28,6 +24,11 @@ use uuid::Uuid;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::option::default_http_addr; use crate::option::default_http_addr;
use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
};
use crate::Opt; use crate::Opt;
use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH};
@ -35,14 +36,14 @@ use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH};
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
fn write_user_id(db_path: &Path, user_id: &str) { fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes());
if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH
.as_ref() .as_ref()
.zip(config_user_id_path(db_path)) .zip(config_user_id_path(db_path))
{ {
let _ = fs::create_dir_all(&meilisearch_config_path); let _ = fs::create_dir_all(&meilisearch_config_path);
let _ = fs::write(user_id_path, user_id.as_bytes()); let _ = fs::write(user_id_path, user_id.to_string());
} }
} }
@ -71,16 +72,17 @@ pub enum AnalyticsMsg {
} }
pub struct SegmentAnalytics { pub struct SegmentAnalytics {
instance_uid: InstanceUid,
sender: Sender<AnalyticsMsg>, sender: Sender<AnalyticsMsg>,
user: User, user: User,
} }
impl SegmentAnalytics { impl SegmentAnalytics {
pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> (Arc<dyn Analytics>, String) { pub async fn new(opt: &Opt, index_scheduler: Arc<IndexScheduler>) -> Arc<dyn Analytics> {
let user_id = super::find_user_id(&opt.db_path); let instance_uid = super::find_user_id(&opt.db_path);
let first_time_run = user_id.is_none(); let first_time_run = instance_uid.is_none();
let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4());
write_user_id(&opt.db_path, &user_id); write_user_id(&opt.db_path, &instance_uid);
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.connect_timeout(Duration::from_secs(10)) .connect_timeout(Duration::from_secs(10))
@ -95,7 +97,9 @@ impl SegmentAnalytics {
client.unwrap(), client.unwrap(),
"https://telemetry.meilisearch.com".to_string(), "https://telemetry.meilisearch.com".to_string(),
); );
let user = User::UserId { user_id }; let user = User::UserId {
user_id: instance_uid.to_string(),
};
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
// If Meilisearch is Launched for the first time: // If Meilisearch is Launched for the first time:
@ -133,18 +137,23 @@ impl SegmentAnalytics {
add_documents_aggregator: DocumentsAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(),
update_documents_aggregator: DocumentsAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(),
}); });
tokio::spawn(segment.run(meilisearch.clone())); tokio::spawn(segment.run(index_scheduler.clone()));
let this = Self { let this = Self {
instance_uid,
sender, sender,
user: user.clone(), user: user.clone(),
}; };
(Arc::new(this), user.to_string()) Arc::new(this)
} }
} }
impl super::Analytics for SegmentAnalytics { impl super::Analytics for SegmentAnalytics {
fn instance_uid(&self) -> Option<&InstanceUid> {
Some(&self.instance_uid)
}
fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) {
let user_agent = request.map(|req| extract_user_agents(req)); let user_agent = request.map(|req| extract_user_agents(req));
@ -270,7 +279,7 @@ impl Segment {
}) })
} }
async fn run(mut self, meilisearch: MeiliSearch) { async fn run(mut self, index_scheduler: Arc<IndexScheduler>) {
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
// The first batch must be sent after one hour. // The first batch must be sent after one hour.
let mut interval = let mut interval =
@ -279,7 +288,7 @@ impl Segment {
loop { loop {
select! { select! {
_ = interval.tick() => { _ = interval.tick() => {
self.tick(meilisearch.clone()).await; self.tick(index_scheduler.clone()).await;
}, },
msg = self.inbox.recv() => { msg = self.inbox.recv() => {
match msg { match msg {
@ -295,8 +304,8 @@ impl Segment {
} }
} }
async fn tick(&mut self, meilisearch: MeiliSearch) { async fn tick(&mut self, index_scheduler: Arc<IndexScheduler>) {
if let Ok(stats) = meilisearch.get_all_stats(&SearchRules::default()).await { if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) {
let _ = self let _ = self
.batcher .batcher
.push(Identify { .push(Identify {

View File

@ -48,10 +48,11 @@ async fn main() -> anyhow::Result<()> {
} }
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?; let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let index_scheduler = Arc::new(index_scheduler);
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
let analytics = if !opt.no_analytics { let analytics = if !opt.no_analytics {
analytics::SegmentAnalytics::new(&opt, &meilisearch).await analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await
} else { } else {
analytics::MockAnalytics::new(&opt) analytics::MockAnalytics::new(&opt)
}; };
@ -66,14 +67,14 @@ async fn main() -> anyhow::Result<()> {
} }
async fn run_http( async fn run_http(
index_scheduler: IndexScheduler, index_scheduler: Arc<IndexScheduler>,
auth_controller: AuthController, auth_controller: AuthController,
opt: Opt, opt: Opt,
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development"; let enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone(); let opt_clone = opt.clone();
let index_scheduler = Data::new(index_scheduler); let index_scheduler = Data::from(index_scheduler);
let http_server = HttpServer::new(move || { let http_server = HttpServer::new(move || {
create_app( create_app(

View File

@ -280,6 +280,16 @@ async fn get_stats(
); );
let search_rules = &index_scheduler.filters().search_rules; let search_rules = &index_scheduler.filters().search_rules;
let stats = create_all_stats((*index_scheduler).clone(), search_rules)?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
}
pub fn create_all_stats(
index_scheduler: Data<IndexScheduler>,
search_rules: &meilisearch_auth::SearchRules,
) -> Result<Stats, ResponseError> {
let mut last_task: Option<OffsetDateTime> = None; let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new(); let mut indexes = BTreeMap::new();
let mut database_size = 0; let mut database_size = 0;
@ -291,7 +301,6 @@ async fn get_stats(
let processing_index = processing_task let processing_index = processing_task
.first() .first()
.and_then(|task| task.index_uid().clone()); .and_then(|task| task.index_uid().clone());
for (name, index) in index_scheduler.indexes()? { for (name, index) in index_scheduler.indexes()? {
if !search_rules.is_index_authorized(&name) { if !search_rules.is_index_authorized(&name) {
continue; continue;
@ -313,15 +322,12 @@ async fn get_stats(
indexes.insert(name, stats); indexes.insert(name, stats);
} }
let stats = Stats { let stats = Stats {
database_size, database_size,
last_update: last_task, last_update: last_task,
indexes, indexes,
}; };
Ok(stats)
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
} }
#[derive(Serialize)] #[derive(Serialize)]