From dd9eae8c26816b94df8095276783795f1ff8429d Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Thu, 1 Apr 2021 17:44:42 +0300 Subject: [PATCH 1/7] feat(http): stats route --- Cargo.lock | 37 ++++++++++++ meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/data/mod.rs | 6 +- meilisearch-http/src/index/mod.rs | 16 ++++- .../src/index_controller/index_actor/actor.rs | 31 +++++++++- .../index_actor/handle_impl.rs | 14 ++++- .../index_controller/index_actor/message.rs | 9 ++- .../src/index_controller/index_actor/mod.rs | 29 ++++----- meilisearch-http/src/index_controller/mod.rs | 57 +++++++++++------- meilisearch-http/src/routes/stats.rs | 59 +++++++++++++++---- 10 files changed, 199 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 461a4789b..a79898d99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1875,6 +1875,23 @@ dependencies = [ "urlencoding", "uuid", "vergen", + "walkdir", +] + +[[package]] +name = "meilisearch-tokenizer" +version = "0.1.1" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06" +dependencies = [ + "character_converter", + "cow-utils", + "deunicode", + "fst", + "jieba-rs", + "once_cell", + "slice-group-by", + "unicode-segmentation", + "whatlang", ] [[package]] @@ -2840,6 +2857,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -3717,6 +3743,17 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi 0.3.9", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 3e04b876a..9ab386882 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -62,6 +62,7 @@ thiserror = "1.0.24" tokio = { version = "1", features = ["full"] } uuid = "0.8.2" oxidized-json-checker = "0.3.2" +walkdir = "2.3.2" [dependencies.sentry] default-features = false diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 717d728fc..2d0a543d4 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -1,6 +1,3 @@ -pub mod search; -mod updates; - use std::ops::Deref; use std::sync::Arc; @@ -11,6 +8,9 @@ use crate::index_controller::IndexController; use crate::index_controller::{IndexMetadata, IndexSettings}; use crate::option::Opt; +pub mod search; +mod updates; + #[derive(Clone)] pub struct Data { inner: Arc, diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index dfd2ebdc4..9ba03ddd2 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,6 +1,3 @@ -mod search; -mod updates; - use std::collections::{BTreeSet, HashSet}; use std::ops::Deref; use std::sync::Arc; @@ -8,10 +5,14 @@ use std::sync::Arc; use anyhow::{bail, Context}; use milli::obkv_to_json; use serde_json::{Map, Value}; +use walkdir::WalkDir; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, UpdateResult}; +mod search; +mod updates; + pub type Document = Map; #[derive(Clone)] @@ -126,6 +127,15 @@ impl Index { } } + pub fn size(&self) -> anyhow::Result { + Ok(WalkDir::new(self.env.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len())) + } + fn fields_to_display>( &self, txn: &heed::RoTxn, diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index a4228227f..6d0c1e8cd 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -12,12 +12,15 @@ use tokio::sync::mpsc; use tokio::task::spawn_blocking; use uuid::Uuid; -use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index_controller::update_handler::UpdateHandler; -use crate::index_controller::{get_arc_ownership_blocking, updates::Processing, UpdateMeta}; +use crate::index_controller::{ + get_arc_ownership_blocking, updates::Processing, IndexStats, UpdateMeta, +}; use crate::option::IndexerOpts; +use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; + pub struct IndexActor { read_receiver: Option>, write_receiver: Option>, @@ -146,6 +149,9 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + GetStats { uuid, ret } => { + let _ = ret.send(self.handle_get_stats(uuid).await); + } } } @@ -328,4 +334,25 @@ impl IndexActor { Ok(()) } + + async fn handle_get_stats(&self, uuid: Uuid) -> Result { + let index = self + .store + .get(uuid) + .await? + .ok_or(IndexError::UnexistingIndex)?; + + spawn_blocking(move || { + let rtxn = index.read_txn()?; + + Ok(IndexStats { + size: index.size()?, + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: false, // TODO check actual is_indexing + fields_distribution: index.fields_distribution(&rtxn)?, + }) + }) + .await + .map_err(|e| IndexError::Error(e.into()))? + } } diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index dba0f9e60..93406c13b 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -3,12 +3,13 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index::{Document, SearchQuery, SearchResult, Settings}; +use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{IndexSettings, IndexStats}; + use super::{ IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore, Result, UpdateResult, }; -use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::IndexSettings; -use crate::index_controller::{updates::Processing, UpdateMeta}; #[derive(Clone)] pub struct IndexActorHandleImpl { @@ -121,6 +122,13 @@ impl IndexActorHandle for IndexActorHandleImpl { let _ = self.read_sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } + + async fn get_index_stats(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = IndexMsg::GetStats { uuid, ret }; + let _ = self.read_sender.send(msg).await; + Ok(receiver.await.expect("IndexActor has been killed")?) + } } impl IndexActorHandleImpl { diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 46d7f6214..6da0f8628 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -3,9 +3,10 @@ use std::path::PathBuf; use tokio::sync::oneshot; use uuid::Uuid; -use super::{IndexMeta, IndexSettings, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{updates::Processing, IndexStats, UpdateMeta}; + +use super::{IndexMeta, IndexSettings, Result, UpdateResult}; pub enum IndexMsg { CreateIndex { @@ -58,4 +59,8 @@ pub enum IndexMsg { path: PathBuf, ret: oneshot::Sender>, }, + GetStats { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 2dc856b80..426eb29e4 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -1,30 +1,30 @@ -mod actor; -mod handle_impl; -mod message; -mod store; - use std::path::PathBuf; use chrono::{DateTime, Utc}; +#[cfg(test)] +use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; use uuid::Uuid; -use super::IndexSettings; +use actor::IndexActor; +pub use handle_impl::IndexActorHandleImpl; +use message::IndexMsg; +use store::{IndexStore, MapIndexStore}; + use crate::index::UpdateResult as UResult; use crate::index::{Document, Index, SearchQuery, SearchResult, Settings}; use crate::index_controller::{ updates::{Failed, Processed, Processing}, - UpdateMeta, + IndexStats, UpdateMeta, }; -use actor::IndexActor; -use message::IndexMsg; -use store::{IndexStore, MapIndexStore}; -pub use handle_impl::IndexActorHandleImpl; +use super::IndexSettings; -#[cfg(test)] -use mockall::automock; +mod actor; +mod handle_impl; +mod message; +mod store; pub type Result = std::result::Result; type UpdateResult = std::result::Result, Failed>; @@ -33,7 +33,7 @@ type UpdateResult = std::result::Result, Failed, - updated_at: DateTime, + pub updated_at: DateTime, primary_key: Option, } @@ -98,4 +98,5 @@ pub trait IndexActorHandle { async fn get_index_meta(&self, uuid: Uuid) -> Result; async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn get_index_stats(&self, uuid: Uuid) -> Result; } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index b26ab8828..e459af10c 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,10 +1,3 @@ -mod index_actor; -mod snapshot; -mod update_actor; -mod update_handler; -mod updates; -mod uuid_resolver; - use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -14,33 +7,40 @@ use anyhow::bail; use futures::stream::StreamExt; use log::info; use milli::update::{IndexDocumentsMethod, UpdateFormat}; +use milli::FieldsDistribution; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::sleep; use uuid::Uuid; +use index_actor::IndexActorHandle; +use snapshot::load_snapshot; +use snapshot::SnapshotService; +use update_actor::UpdateActorHandle; +pub use updates::{Failed, Processed, Processing}; +use uuid_resolver::UuidError; +use uuid_resolver::UuidResolverHandle; + use crate::index::{Document, SearchQuery, SearchResult}; use crate::index::{Facets, Settings, UpdateResult}; use crate::option::Opt; -use index_actor::IndexActorHandle; -use snapshot::load_snapshot; -use update_actor::UpdateActorHandle; -use uuid_resolver::UuidResolverHandle; - -use snapshot::SnapshotService; -pub use updates::{Failed, Processed, Processing}; -use uuid_resolver::UuidError; +mod index_actor; +mod snapshot; +mod update_actor; +mod update_handler; +mod updates; +mod uuid_resolver; pub type UpdateStatus = updates::UpdateStatus; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { - uid: String, + pub uid: String, name: String, #[serde(flatten)] - meta: index_actor::IndexMeta, + pub meta: index_actor::IndexMeta, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -63,6 +63,14 @@ pub struct IndexSettings { pub primary_key: Option, } +#[derive(Clone, Debug)] +pub struct IndexStats { + pub size: u64, + pub number_of_documents: u64, + pub is_indexing: bool, + pub fields_distribution: FieldsDistribution, +} + pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverHandleImpl, index_handle: index_actor::IndexActorHandleImpl, @@ -100,10 +108,11 @@ impl IndexController { update_handle.clone(), Duration::from_secs(options.snapshot_interval_sec), options.snapshot_dir.clone(), - options.db_path - .file_name() - .map(|n| n.to_owned().into_string().expect("invalid path")) - .unwrap_or_else(|| String::from("data.ms")), + options + .db_path + .file_name() + .map(|n| n.to_owned().into_string().expect("invalid path")) + .unwrap_or_else(|| String::from("data.ms")), ); tokio::task::spawn(snapshot_service.run()); @@ -341,6 +350,12 @@ impl IndexController { }; Ok(meta) } + + pub async fn get_stats(&self, uid: String) -> anyhow::Result { + let uuid = self.uuid_resolver.get(uid.clone()).await?; + + Ok(self.index_handle.get_index_stats(uuid).await?) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-http/src/routes/stats.rs b/meilisearch-http/src/routes/stats.rs index 108c67ca9..bab637cd6 100644 --- a/meilisearch-http/src/routes/stats.rs +++ b/meilisearch-http/src/routes/stats.rs @@ -1,18 +1,20 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use actix_web::get; use actix_web::web; use actix_web::HttpResponse; use chrono::{DateTime, Utc}; +use milli::FieldsDistribution; use serde::Serialize; use crate::error::ResponseError; use crate::helpers::Authentication; +use crate::index_controller::IndexStats; use crate::routes::IndexParam; use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { - cfg.service(index_stats) + cfg.service(get_index_stats) .service(get_stats) .service(get_version); } @@ -22,28 +24,61 @@ pub fn services(cfg: &mut web::ServiceConfig) { struct IndexStatsResponse { number_of_documents: u64, is_indexing: bool, - fields_distribution: BTreeMap, + fields_distribution: FieldsDistribution, +} + +impl From for IndexStatsResponse { + fn from(stats: IndexStats) -> Self { + Self { + number_of_documents: stats.number_of_documents, + is_indexing: stats.is_indexing, + fields_distribution: stats.fields_distribution, + } + } } #[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")] -async fn index_stats( - _data: web::Data, - _path: web::Path, +async fn get_index_stats( + data: web::Data, + path: web::Path, ) -> Result { - todo!() + let response: IndexStatsResponse = data + .index_controller + .get_stats(path.index_uid.clone()) + .await? + .into(); + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] #[serde(rename_all = "camelCase")] -struct StatsResult { +struct StatsResponse { database_size: u64, last_update: Option>, indexes: HashMap, } #[get("/stats", wrap = "Authentication::Private")] -async fn get_stats(_data: web::Data) -> Result { - todo!() +async fn get_stats(data: web::Data) -> Result { + let mut response = StatsResponse { + database_size: 0, + last_update: None, + indexes: HashMap::new(), + }; + + for index in data.index_controller.list_indexes().await? { + let stats = data.index_controller.get_stats(index.uid.clone()).await?; + + response.database_size += stats.size; + response.last_update = Some(match response.last_update { + Some(last_update) => last_update.max(index.meta.updated_at), + None => index.meta.updated_at, + }); + response.indexes.insert(index.uid, stats.into()); + } + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] @@ -58,11 +93,11 @@ struct VersionResponse { async fn get_version() -> HttpResponse { let commit_sha = match option_env!("COMMIT_SHA") { Some("") | None => env!("VERGEN_SHA"), - Some(commit_sha) => commit_sha + Some(commit_sha) => commit_sha, }; let commit_date = match option_env!("COMMIT_DATE") { Some("") | None => env!("VERGEN_COMMIT_DATE"), - Some(commit_date) => commit_date + Some(commit_date) => commit_date, }; HttpResponse::Ok().json(VersionResponse { From 09d9a291762ff31f03b563a552866183f138cdba Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Thu, 1 Apr 2021 21:54:37 +0300 Subject: [PATCH 2/7] test(http): server & index stats --- meilisearch-http/tests/common/index.rs | 5 +++ meilisearch-http/tests/common/server.rs | 4 ++ meilisearch-http/tests/index/mod.rs | 1 + meilisearch-http/tests/index/stats.rs | 48 ++++++++++++++++++++++++ meilisearch-http/tests/stats/mod.rs | 50 +++++++++++++++++++++++++ 5 files changed, 108 insertions(+) create mode 100644 meilisearch-http/tests/index/stats.rs diff --git a/meilisearch-http/tests/common/index.rs b/meilisearch-http/tests/common/index.rs index 78ad846de..67ea6c19a 100644 --- a/meilisearch-http/tests/common/index.rs +++ b/meilisearch-http/tests/common/index.rs @@ -161,6 +161,11 @@ impl Index<'_> { let url = format!("/indexes/{}/settings", self.uid); self.service.delete(url).await } + + pub async fn stats(&self) -> (Value, StatusCode) { + let url = format!("/indexes/{}/stats", self.uid); + self.service.get(url).await + } } pub struct GetDocumentOptions; diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 4655b10a8..e814ab3ed 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -58,6 +58,10 @@ impl Server { pub async fn version(&self) -> (Value, StatusCode) { self.service.get("/version").await } + + pub async fn stats(&self) -> (Value, StatusCode) { + self.service.get("/stats").await + } } pub fn default_settings(dir: impl AsRef) -> Opt { diff --git a/meilisearch-http/tests/index/mod.rs b/meilisearch-http/tests/index/mod.rs index c9804c160..9996df2e7 100644 --- a/meilisearch-http/tests/index/mod.rs +++ b/meilisearch-http/tests/index/mod.rs @@ -1,4 +1,5 @@ mod create_index; mod delete_index; mod get_index; +mod stats; mod update_index; diff --git a/meilisearch-http/tests/index/stats.rs b/meilisearch-http/tests/index/stats.rs new file mode 100644 index 000000000..d32c06d2b --- /dev/null +++ b/meilisearch-http/tests/index/stats.rs @@ -0,0 +1,48 @@ +use serde_json::json; + +use crate::common::Server; + +#[actix_rt::test] +async fn stats() { + let server = Server::new().await; + let index = server.index("test"); + let (_, code) = index.create(Some("id")).await; + + assert_eq!(code, 200); + + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["numberOfDocuments"], 0); + assert_eq!(response["isIndexing"], false); + assert!(response["fieldsDistribution"] + .as_object() + .unwrap() + .is_empty()); + + let documents = json!([ + { + "id": 1, + "name": "Alexey", + }, + { + "id": 2, + "age": 45, + } + ]); + + let (response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + assert_eq!(response["updateId"], 0); + + index.wait_update_id(0).await; + + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["numberOfDocuments"], 2); + assert_eq!(response["isIndexing"], false); + assert_eq!(response["fieldsDistribution"]["id"], 2); + assert_eq!(response["fieldsDistribution"]["name"], 1); + assert_eq!(response["fieldsDistribution"]["age"], 1); +} diff --git a/meilisearch-http/tests/stats/mod.rs b/meilisearch-http/tests/stats/mod.rs index db04fb1c0..e5027c71f 100644 --- a/meilisearch-http/tests/stats/mod.rs +++ b/meilisearch-http/tests/stats/mod.rs @@ -1,3 +1,5 @@ +use serde_json::json; + use crate::common::Server; #[actix_rt::test] @@ -19,3 +21,51 @@ async fn test_healthyness() { assert_eq!(status_code, 200); assert_eq!(response["status"], "available"); } + +#[actix_rt::test] +async fn stats() { + let server = Server::new().await; + let index = server.index("test"); + let (_, code) = index.create(Some("id")).await; + + assert_eq!(code, 200); + + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert!(response.get("databaseSize").is_some()); + assert!(response.get("lastUpdate").is_some()); + assert!(response["indexes"].get("test").is_some()); + assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 0); + assert_eq!(response["indexes"]["test"]["isIndexing"], false); + + let last_update = response["lastUpdate"].as_str().unwrap(); + + let documents = json!([ + { + "id": 1, + "name": "Alexey", + }, + { + "id": 2, + "age": 45, + } + ]); + + let (response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + assert_eq!(response["updateId"], 0); + + index.wait_update_id(0).await; + + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert!(response["databaseSize"].as_u64().unwrap() > 0); + assert!(response["lastUpdate"].as_str().unwrap() > last_update); + assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 2); + assert_eq!(response["indexes"]["test"]["isIndexing"], false); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["id"], 2); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["name"], 1); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["age"], 1); +} From 87412f63ef102974ab9ac7811060345dd2f357f1 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Fri, 2 Apr 2021 14:44:35 +0300 Subject: [PATCH 3/7] feat(http): implement is_indexing for stats --- .../src/index_controller/index_actor/actor.rs | 2 +- meilisearch-http/src/index_controller/mod.rs | 8 +++- .../index_controller/update_actor/actor.rs | 13 ++++++ .../update_actor/handle_impl.rs | 45 +++++++++++-------- .../index_controller/update_actor/message.rs | 4 ++ .../src/index_controller/update_actor/mod.rs | 1 + .../src/index_controller/update_handler.rs | 4 +- meilisearch-http/tests/index/stats.rs | 5 +++ meilisearch-http/tests/stats/mod.rs | 5 +++ 9 files changed, 65 insertions(+), 22 deletions(-) diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 6d0c1e8cd..d56c53fcd 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -348,7 +348,7 @@ impl IndexActor { Ok(IndexStats { size: index.size()?, number_of_documents: index.number_of_documents(&rtxn)?, - is_indexing: false, // TODO check actual is_indexing + is_indexing: false, // We set this field in src/index_controller/mod.rs get_stats fields_distribution: index.fields_distribution(&rtxn)?, }) }) diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index e459af10c..967506a55 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -354,7 +354,13 @@ impl IndexController { pub async fn get_stats(&self, uid: String) -> anyhow::Result { let uuid = self.uuid_resolver.get(uid.clone()).await?; - Ok(self.index_handle.get_index_stats(uuid).await?) + let stats = self.index_handle.get_index_stats(uuid); + let is_indexing = self.update_handle.is_locked(uuid); + + Ok(IndexStats { + is_indexing: is_indexing.await?, + ..stats.await? + }) } } diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index d87b910d6..c99c0059f 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -72,6 +72,9 @@ where Some(Snapshot { uuid, path, ret }) => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + Some(IsLocked { uuid, ret }) => { + let _ = ret.send(self.handle_is_locked(uuid).await); + } None => break, } } @@ -223,4 +226,14 @@ where Ok(()) } + + async fn handle_is_locked(&self, uuid: Uuid) -> Result { + let store = self + .store + .get(uuid) + .await? + .ok_or(UpdateError::UnexistingIndex(uuid))?; + + Ok(store.update_lock.is_locked()) + } } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 59f67fbe0..621675955 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -3,11 +3,12 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index_controller::IndexActorHandle; + use super::{ MapUpdateStoreStore, PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus, }; -use crate::index_controller::IndexActorHandle; #[derive(Clone)] pub struct UpdateActorHandleImpl { @@ -36,6 +37,7 @@ where Ok(Self { sender }) } } + #[async_trait::async_trait] impl UpdateActorHandle for UpdateActorHandleImpl where @@ -43,29 +45,12 @@ where { type Data = D; - async fn update( - &self, - meta: UpdateMeta, - data: mpsc::Receiver>, - uuid: Uuid, - ) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Update { - uuid, - data, - meta, - ret, - }; - let _ = self.sender.send(msg).await; - receiver.await.expect("update actor killed.") - } async fn get_all_updates_status(&self, uuid: Uuid) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::ListUpdates { uuid, ret }; let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } - async fn update_status(&self, uuid: Uuid, id: u64) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::GetUpdate { uuid, id, ret }; @@ -93,4 +78,28 @@ where let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } + + async fn update( + &self, + meta: UpdateMeta, + data: mpsc::Receiver>, + uuid: Uuid, + ) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::Update { + uuid, + data, + meta, + ret, + }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } + + async fn is_locked(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::IsLocked { uuid, ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } } diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 8e6e3c212..409fbcebc 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -34,4 +34,8 @@ pub enum UpdateMsg { path: PathBuf, ret: oneshot::Sender>, }, + IsLocked { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index f3c3caf04..095e068e8 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -52,4 +52,5 @@ pub trait UpdateActorHandle { data: mpsc::Receiver>, uuid: Uuid, ) -> Result; + async fn is_locked(&self, uuid: Uuid) -> Result; } diff --git a/meilisearch-http/src/index_controller/update_handler.rs b/meilisearch-http/src/index_controller/update_handler.rs index 17f7107a2..1eb622cbf 100644 --- a/meilisearch-http/src/index_controller/update_handler.rs +++ b/meilisearch-http/src/index_controller/update_handler.rs @@ -39,7 +39,7 @@ impl UpdateHandler { }) } - fn update_buidler(&self, update_id: u64) -> UpdateBuilder { + fn update_builder(&self, update_id: u64) -> UpdateBuilder { // We prepare the update by using the update builder. let mut update_builder = UpdateBuilder::new(update_id); if let Some(max_nb_chunks) = self.max_nb_chunks { @@ -67,7 +67,7 @@ impl UpdateHandler { let update_id = meta.id(); - let update_builder = self.update_buidler(update_id); + let update_builder = self.update_builder(update_id); let result = match meta.meta() { DocumentsAddition { diff --git a/meilisearch-http/tests/index/stats.rs b/meilisearch-http/tests/index/stats.rs index d32c06d2b..e1d8bd211 100644 --- a/meilisearch-http/tests/index/stats.rs +++ b/meilisearch-http/tests/index/stats.rs @@ -35,6 +35,11 @@ async fn stats() { assert_eq!(code, 202); assert_eq!(response["updateId"], 0); + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["isIndexing"], true); + index.wait_update_id(0).await; let (response, code) = index.stats().await; diff --git a/meilisearch-http/tests/stats/mod.rs b/meilisearch-http/tests/stats/mod.rs index e5027c71f..ee10f9708 100644 --- a/meilisearch-http/tests/stats/mod.rs +++ b/meilisearch-http/tests/stats/mod.rs @@ -56,6 +56,11 @@ async fn stats() { assert_eq!(code, 202); assert_eq!(response["updateId"], 0); + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["indexes"]["test"]["isIndexing"], true); + index.wait_update_id(0).await; let (response, code) = server.stats().await; From 698a1ea5822c3ca154cf3bfdfb0a8da27660bbfb Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Wed, 7 Apr 2021 18:57:46 +0300 Subject: [PATCH 4/7] feat(http): store processing as RwLock> in index_actor --- .../src/index_controller/index_actor/actor.rs | 39 +++++++++++++------ meilisearch-http/src/index_controller/mod.rs | 8 +--- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index d56c53fcd..0620765d5 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -8,7 +8,7 @@ use futures::pin_mut; use futures::stream::StreamExt; use heed::CompactionOption; use log::debug; -use tokio::sync::mpsc; +use tokio::sync::{mpsc, RwLock}; use tokio::task::spawn_blocking; use uuid::Uuid; @@ -25,6 +25,7 @@ pub struct IndexActor { read_receiver: Option>, write_receiver: Option>, update_handler: Arc, + processing: RwLock>, store: S, } @@ -42,8 +43,9 @@ impl IndexActor { Ok(Self { read_receiver, write_receiver, - store, update_handler, + processing: RwLock::new(Default::default()), + store, }) } @@ -181,16 +183,26 @@ impl IndexActor { meta: Processing, data: File, ) -> Result { - debug!("Processing update {}", meta.id()); - let uuid = meta.index_uuid(); - let update_handler = self.update_handler.clone(); - let index = match self.store.get(*uuid).await? { - Some(index) => index, - None => self.store.create(*uuid, None).await?, + let uuid = meta.index_uuid().clone(); + + *self.processing.write().await = Some(uuid); + + let result = { + debug!("Processing update {}", meta.id()); + let update_handler = self.update_handler.clone(); + let index = match self.store.get(uuid).await? { + Some(index) => index, + None => self.store.create(uuid, None).await?, + }; + + spawn_blocking(move || update_handler.handle_update(meta, data, index)) + .await + .map_err(|e| IndexError::Error(e.into())) }; - spawn_blocking(move || update_handler.handle_update(meta, data, index)) - .await - .map_err(|e| IndexError::Error(e.into())) + + *self.processing.write().await = None; + + result } async fn handle_settings(&self, uuid: Uuid) -> Result { @@ -342,13 +354,16 @@ impl IndexActor { .await? .ok_or(IndexError::UnexistingIndex)?; + let processing = self.processing.read().await; + let is_indexing = *processing == Some(uuid); + spawn_blocking(move || { let rtxn = index.read_txn()?; Ok(IndexStats { size: index.size()?, number_of_documents: index.number_of_documents(&rtxn)?, - is_indexing: false, // We set this field in src/index_controller/mod.rs get_stats + is_indexing, fields_distribution: index.fields_distribution(&rtxn)?, }) }) diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 967506a55..e459af10c 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -354,13 +354,7 @@ impl IndexController { pub async fn get_stats(&self, uid: String) -> anyhow::Result { let uuid = self.uuid_resolver.get(uid.clone()).await?; - let stats = self.index_handle.get_index_stats(uuid); - let is_indexing = self.update_handle.is_locked(uuid); - - Ok(IndexStats { - is_indexing: is_indexing.await?, - ..stats.await? - }) + Ok(self.index_handle.get_index_stats(uuid).await?) } } From ae1655586c907508fd4e6241c40519e12f75e32d Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Thu, 8 Apr 2021 16:35:28 +0300 Subject: [PATCH 5/7] fixes after review --- Cargo.lock | 16 -------- meilisearch-http/Cargo.lock | 16 -------- meilisearch-http/src/data/mod.rs | 32 +++++++++++++++- .../src/index_controller/index_actor/actor.rs | 19 +++++----- .../index_controller/update_actor/actor.rs | 13 ------- .../update_actor/handle_impl.rs | 7 ---- .../index_controller/update_actor/message.rs | 4 -- .../src/index_controller/update_actor/mod.rs | 1 - meilisearch-http/src/routes/stats.rs | 37 ++++++++----------- 9 files changed, 56 insertions(+), 89 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a79898d99..7ef514ffe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1894,22 +1894,6 @@ dependencies = [ "whatlang", ] -[[package]] -name = "meilisearch-tokenizer" -version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06" -dependencies = [ - "character_converter", - "cow-utils", - "deunicode", - "fst", - "jieba-rs", - "once_cell", - "slice-group-by", - "unicode-segmentation", - "whatlang", -] - [[package]] name = "memchr" version = "2.3.4" diff --git a/meilisearch-http/Cargo.lock b/meilisearch-http/Cargo.lock index 0bdc739d5..b9bfd06ac 100644 --- a/meilisearch-http/Cargo.lock +++ b/meilisearch-http/Cargo.lock @@ -1827,22 +1827,6 @@ dependencies = [ "vergen", ] -[[package]] -name = "meilisearch-tokenizer" -version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?branch=main#147b6154b1b34cb8f5da2df6a416b7da191bc850" -dependencies = [ - "character_converter", - "cow-utils", - "deunicode", - "fst", - "jieba-rs", - "once_cell", - "slice-group-by", - "unicode-segmentation", - "whatlang", -] - [[package]] name = "memchr" version = "2.3.4" diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 2d0a543d4..9227454a5 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -4,9 +4,11 @@ use std::sync::Arc; use sha2::Digest; use crate::index::Settings; -use crate::index_controller::IndexController; +use crate::index_controller::{IndexController, IndexStats}; use crate::index_controller::{IndexMetadata, IndexSettings}; use crate::option::Opt; +use std::collections::HashMap; +use chrono::{DateTime, Utc}; pub mod search; mod updates; @@ -37,6 +39,13 @@ pub struct ApiKeys { pub master: Option, } +#[derive(Default)] +pub struct Stats { + pub database_size: u64, + pub last_update: Option>, + pub indexes: HashMap, +} + impl ApiKeys { pub fn generate_missing_api_keys(&mut self) { if let Some(master_key) = &self.master { @@ -104,6 +113,27 @@ impl Data { Ok(meta) } + pub async fn get_index_stats(&self, uid: String) -> anyhow::Result { + Ok(self.index_controller.get_stats(uid).await?) + } + + pub async fn get_stats(&self) -> anyhow::Result { + let mut stats = Stats::default(); + + for index in self.index_controller.list_indexes().await? { + let index_stats = self.index_controller.get_stats(index.uid.clone()).await?; + + stats.database_size += index_stats.size; + stats.last_update = Some(match stats.last_update { + Some(last_update) => last_update.max(index.meta.updated_at), + None => index.meta.updated_at, + }); + stats.indexes.insert(index.uid, index_stats); + } + + Ok(stats) + } + #[inline] pub fn http_payload_size_limit(&self) -> usize { self.options.http_payload_size_limit.get_bytes() as usize diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 0620765d5..099bb97ca 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -44,7 +44,7 @@ impl IndexActor { read_receiver, write_receiver, update_handler, - processing: RwLock::new(Default::default()), + processing: RwLock::new(None), store, }) } @@ -183,23 +183,22 @@ impl IndexActor { meta: Processing, data: File, ) -> Result { - let uuid = meta.index_uuid().clone(); - - *self.processing.write().await = Some(uuid); - - let result = { + async fn get_result(actor: &IndexActor, meta: Processing, data: File) -> Result { debug!("Processing update {}", meta.id()); - let update_handler = self.update_handler.clone(); - let index = match self.store.get(uuid).await? { + let uuid = *meta.index_uuid(); + let update_handler = actor.update_handler.clone(); + let index = match actor.store.get(uuid).await? { Some(index) => index, - None => self.store.create(uuid, None).await?, + None => actor.store.create(uuid, None).await?, }; spawn_blocking(move || update_handler.handle_update(meta, data, index)) .await .map_err(|e| IndexError::Error(e.into())) - }; + } + *self.processing.write().await = Some(meta.index_uuid().clone()); + let result = get_result(self, meta, data).await; *self.processing.write().await = None; result diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index c99c0059f..d87b910d6 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -72,9 +72,6 @@ where Some(Snapshot { uuid, path, ret }) => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } - Some(IsLocked { uuid, ret }) => { - let _ = ret.send(self.handle_is_locked(uuid).await); - } None => break, } } @@ -226,14 +223,4 @@ where Ok(()) } - - async fn handle_is_locked(&self, uuid: Uuid) -> Result { - let store = self - .store - .get(uuid) - .await? - .ok_or(UpdateError::UnexistingIndex(uuid))?; - - Ok(store.update_lock.is_locked()) - } } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 621675955..8778a3674 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -95,11 +95,4 @@ where let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } - - async fn is_locked(&self, uuid: Uuid) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::IsLocked { uuid, ret }; - let _ = self.sender.send(msg).await; - receiver.await.expect("update actor killed.") - } } diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 409fbcebc..8e6e3c212 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -34,8 +34,4 @@ pub enum UpdateMsg { path: PathBuf, ret: oneshot::Sender>, }, - IsLocked { - uuid: Uuid, - ret: oneshot::Sender>, - }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index 095e068e8..f3c3caf04 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -52,5 +52,4 @@ pub trait UpdateActorHandle { data: mpsc::Receiver>, uuid: Uuid, ) -> Result; - async fn is_locked(&self, uuid: Uuid) -> Result; } diff --git a/meilisearch-http/src/routes/stats.rs b/meilisearch-http/src/routes/stats.rs index bab637cd6..c40a20609 100644 --- a/meilisearch-http/src/routes/stats.rs +++ b/meilisearch-http/src/routes/stats.rs @@ -12,6 +12,7 @@ use crate::helpers::Authentication; use crate::index_controller::IndexStats; use crate::routes::IndexParam; use crate::Data; +use crate::data::Stats; pub fn services(cfg: &mut web::ServiceConfig) { cfg.service(get_index_stats) @@ -42,11 +43,7 @@ async fn get_index_stats( data: web::Data, path: web::Path, ) -> Result { - let response: IndexStatsResponse = data - .index_controller - .get_stats(path.index_uid.clone()) - .await? - .into(); + let response: IndexStatsResponse = data.get_index_stats(path.index_uid.clone()).await?.into(); Ok(HttpResponse::Ok().json(response)) } @@ -59,24 +56,22 @@ struct StatsResponse { indexes: HashMap, } +impl From for StatsResponse { + fn from(stats: Stats) -> Self { + Self { + database_size: stats.database_size, + last_update: stats.last_update, + indexes: stats.indexes + .into_iter() + .map(|(uid, index_stats)| (uid, index_stats.into())) + .collect(), + } + } +} + #[get("/stats", wrap = "Authentication::Private")] async fn get_stats(data: web::Data) -> Result { - let mut response = StatsResponse { - database_size: 0, - last_update: None, - indexes: HashMap::new(), - }; - - for index in data.index_controller.list_indexes().await? { - let stats = data.index_controller.get_stats(index.uid.clone()).await?; - - response.database_size += stats.size; - response.last_update = Some(match response.last_update { - Some(last_update) => last_update.max(index.meta.updated_at), - None => index.meta.updated_at, - }); - response.indexes.insert(index.uid, stats.into()); - } + let response: StatsResponse = data.get_stats().await?.into(); Ok(HttpResponse::Ok().json(response)) } From adfdb99abcdf4a3dea552a5f31d7c72f25b03e1d Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Fri, 9 Apr 2021 15:41:24 +0300 Subject: [PATCH 6/7] feat(http): calculate updates' and uuids' dbs size --- meilisearch-http/src/analytics.rs | 10 +++------ meilisearch-http/src/data/mod.rs | 11 ++++++++-- meilisearch-http/src/helpers/env.rs | 16 ++++++++++++++ meilisearch-http/src/helpers/mod.rs | 2 ++ meilisearch-http/src/index/mod.rs | 17 ++++---------- .../src/index_controller/index_actor/actor.rs | 2 +- meilisearch-http/src/index_controller/mod.rs | 10 +++++++++ .../index_controller/update_actor/actor.rs | 22 ++++++++++++++++++- .../update_actor/handle_impl.rs | 7 ++++++ .../index_controller/update_actor/message.rs | 4 ++++ .../src/index_controller/update_actor/mod.rs | 1 + .../update_actor/update_store.rs | 17 +++++++++++++- .../index_controller/uuid_resolver/actor.rs | 7 ++++++ .../uuid_resolver/handle_impl.rs | 9 ++++++++ .../index_controller/uuid_resolver/message.rs | 4 ++++ .../src/index_controller/uuid_resolver/mod.rs | 1 + .../index_controller/uuid_resolver/store.rs | 6 +++++ 17 files changed, 121 insertions(+), 25 deletions(-) create mode 100644 meilisearch-http/src/helpers/env.rs diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 379a25030..a01cfabe0 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -8,6 +8,7 @@ use serde_qs as qs; use siphasher::sip::SipHasher; use walkdir::WalkDir; +use crate::helpers::EnvSizer; use crate::Data; use crate::Opt; @@ -33,12 +34,7 @@ impl EventProperties { } } - let database_size = WalkDir::new(&data.db_path) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()); + let database_size = data.env.size(); let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp()); @@ -116,7 +112,7 @@ pub fn analytics_sender(data: Data, opt: Opt) { time, app_version, user_properties, - event_properties + event_properties, }; let event = serde_json::to_string(&event).unwrap(); diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 9227454a5..3aee18217 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -1,14 +1,14 @@ +use std::collections::HashMap; use std::ops::Deref; use std::sync::Arc; +use chrono::{DateTime, Utc}; use sha2::Digest; use crate::index::Settings; use crate::index_controller::{IndexController, IndexStats}; use crate::index_controller::{IndexMetadata, IndexSettings}; use crate::option::Opt; -use std::collections::HashMap; -use chrono::{DateTime, Utc}; pub mod search; mod updates; @@ -119,15 +119,22 @@ impl Data { pub async fn get_stats(&self) -> anyhow::Result { let mut stats = Stats::default(); + stats.database_size += self.index_controller.get_uuids_size().await?; for index in self.index_controller.list_indexes().await? { let index_stats = self.index_controller.get_stats(index.uid.clone()).await?; stats.database_size += index_stats.size; + stats.database_size += self + .index_controller + .get_updates_size(index.uid.clone()) + .await?; + stats.last_update = Some(match stats.last_update { Some(last_update) => last_update.max(index.meta.updated_at), None => index.meta.updated_at, }); + stats.indexes.insert(index.uid, index_stats); } diff --git a/meilisearch-http/src/helpers/env.rs b/meilisearch-http/src/helpers/env.rs new file mode 100644 index 000000000..9bc81bc69 --- /dev/null +++ b/meilisearch-http/src/helpers/env.rs @@ -0,0 +1,16 @@ +use walkdir::WalkDir; + +pub trait EnvSizer { + fn size(&self) -> u64; +} + +impl EnvSizer for heed::Env { + fn size(&self) -> u64 { + WalkDir::new(self.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) + } +} diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs index d1204908f..a5cddf29c 100644 --- a/meilisearch-http/src/helpers/mod.rs +++ b/meilisearch-http/src/helpers/mod.rs @@ -1,4 +1,6 @@ pub mod authentication; pub mod compression; +mod env; pub use authentication::Authentication; +pub use env::EnvSizer; diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 9ba03ddd2..f8835eceb 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -5,10 +5,10 @@ use std::sync::Arc; use anyhow::{bail, Context}; use milli::obkv_to_json; use serde_json::{Map, Value}; -use walkdir::WalkDir; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, UpdateResult}; +use crate::helpers::EnvSizer; mod search; mod updates; @@ -55,11 +55,7 @@ impl Index { let stop_words = self .stop_words(&txn)? .map(|stop_words| -> anyhow::Result> { - Ok(stop_words - .stream() - .into_strs()? - .into_iter() - .collect()) + Ok(stop_words.stream().into_strs()?.into_iter().collect()) }) .transpose()? .unwrap_or_else(BTreeSet::new); @@ -127,13 +123,8 @@ impl Index { } } - pub fn size(&self) -> anyhow::Result { - Ok(WalkDir::new(self.env.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len())) + pub fn size(&self) -> u64 { + self.env.size() } fn fields_to_display>( diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 099bb97ca..c54b2edd2 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -360,7 +360,7 @@ impl IndexActor { let rtxn = index.read_txn()?; Ok(IndexStats { - size: index.size()?, + size: index.size(), number_of_documents: index.number_of_documents(&rtxn)?, is_indexing, fields_distribution: index.fields_distribution(&rtxn)?, diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index e459af10c..8361c45cc 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -356,6 +356,16 @@ impl IndexController { Ok(self.index_handle.get_index_stats(uuid).await?) } + + pub async fn get_updates_size(&self, uid: String) -> anyhow::Result { + let uuid = self.uuid_resolver.get(uid.clone()).await?; + + Ok(self.update_handle.get_size(uuid).await?) + } + + pub async fn get_uuids_size(&self) -> anyhow::Result { + Ok(self.uuid_resolver.get_size().await?) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index d87b910d6..f725dda84 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -8,10 +8,11 @@ use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio::sync::mpsc; use uuid::Uuid; -use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStoreStore}; use crate::index_controller::index_actor::IndexActorHandle; use crate::index_controller::{get_arc_ownership_blocking, UpdateMeta, UpdateStatus}; +use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStoreStore}; + pub struct UpdateActor { path: PathBuf, store: S, @@ -72,6 +73,9 @@ where Some(Snapshot { uuid, path, ret }) => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + Some(GetSize { uuid, ret }) => { + let _ = ret.send(self.handle_get_size(uuid).await); + } None => break, } } @@ -223,4 +227,20 @@ where Ok(()) } + + async fn handle_get_size(&self, uuid: Uuid) -> Result { + let size = match self.store.get(uuid).await? { + Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result { + let txn = update_store.env.read_txn()?; + + update_store.get_size(&txn) + }) + .await + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?, + None => 0, + }; + + Ok(size) + } } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 8778a3674..860cc2bc8 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -79,6 +79,13 @@ where receiver.await.expect("update actor killed.") } + async fn get_size(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::GetSize { uuid, ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } + async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 8e6e3c212..f8150c00a 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -34,4 +34,8 @@ pub enum UpdateMsg { path: PathBuf, ret: oneshot::Sender>, }, + GetSize { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index f3c3caf04..228b47b02 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -46,6 +46,7 @@ pub trait UpdateActorHandle { async fn delete(&self, uuid: Uuid) -> Result<()>; async fn create(&self, uuid: Uuid) -> Result<()>; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn get_size(&self, uuid: Uuid) -> Result; async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index a083eb186..3d6c4e396 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -1,3 +1,4 @@ +use std::fs::File; use std::fs::{copy, create_dir_all, remove_file}; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -6,10 +7,10 @@ use heed::types::{DecodeIgnore, OwnedType, SerdeJson}; use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use parking_lot::{Mutex, RwLock}; use serde::{Deserialize, Serialize}; -use std::fs::File; use tokio::sync::mpsc; use uuid::Uuid; +use crate::helpers::EnvSizer; use crate::index_controller::updates::*; type BEU64 = heed::zerocopy::U64; @@ -409,4 +410,18 @@ where Ok(()) } + + pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result { + let mut size = self.env.size(); + + for path in self.pending.iter(txn)? { + let (_, path) = path?; + + if let Ok(metadata) = path.metadata() { + size += metadata.len() + } + } + + Ok(size) + } } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index d5cde13e7..27ffaa05e 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -41,6 +41,9 @@ impl UuidResolverActor { Some(SnapshotRequest { path, ret }) => { let _ = ret.send(self.handle_snapshot(path).await); } + Some(GetSize { ret }) => { + let _ = ret.send(self.handle_get_size().await); + } // all senders have been dropped, need to quit. None => break, } @@ -86,6 +89,10 @@ impl UuidResolverActor { self.store.insert(uid, uuid).await?; Ok(()) } + + async fn handle_get_size(&self) -> Result { + self.store.get_size().await + } } fn is_index_uid_valid(uid: &str) -> bool { diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index f8625b379..c522e87e6 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -75,4 +75,13 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .await .expect("Uuid resolver actor has been killed")?) } + + async fn get_size(&self) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UuidResolveMsg::GetSize { ret }; + let _ = self.sender.send(msg).await; + Ok(receiver + .await + .expect("Uuid resolver actor has been killed")?) + } } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index 975c709b3..e7d29f05f 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -4,6 +4,7 @@ use tokio::sync::oneshot; use uuid::Uuid; use super::Result; + pub enum UuidResolveMsg { Get { uid: String, @@ -29,4 +30,7 @@ pub enum UuidResolveMsg { path: PathBuf, ret: oneshot::Sender>>, }, + GetSize { + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index 43cd9995b..33a089ddb 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -30,6 +30,7 @@ pub trait UuidResolverHandle { async fn delete(&self, name: String) -> anyhow::Result; async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn get_size(&self) -> Result; } #[derive(Debug, Error)] diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 435314911..1f057830b 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -8,6 +8,7 @@ use heed::{ use uuid::Uuid; use super::{Result, UuidError, UUID_STORE_SIZE}; +use crate::helpers::EnvSizer; #[async_trait::async_trait] pub trait UuidStore { @@ -19,6 +20,7 @@ pub trait UuidStore { async fn list(&self) -> Result>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn get_size(&self) -> Result; } pub struct HeedUuidStore { @@ -151,4 +153,8 @@ impl UuidStore for HeedUuidStore { }) .await? } + + async fn get_size(&self) -> Result { + Ok(self.env.size()) + } } From 9eaf048a06e4d18e03b1cfce946c29305c9b9b22 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Tue, 13 Apr 2021 11:58:22 +0300 Subject: [PATCH 7/7] fix(http): use BTreeMap instead of HashMap to preserve stats order --- meilisearch-http/src/routes/stats.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/meilisearch-http/src/routes/stats.rs b/meilisearch-http/src/routes/stats.rs index c40a20609..7434f1727 100644 --- a/meilisearch-http/src/routes/stats.rs +++ b/meilisearch-http/src/routes/stats.rs @@ -1,18 +1,18 @@ -use std::collections::HashMap; +use std::collections::BTreeMap; +use std::iter::FromIterator; use actix_web::get; use actix_web::web; use actix_web::HttpResponse; use chrono::{DateTime, Utc}; -use milli::FieldsDistribution; use serde::Serialize; +use crate::data::Stats; use crate::error::ResponseError; use crate::helpers::Authentication; use crate::index_controller::IndexStats; use crate::routes::IndexParam; use crate::Data; -use crate::data::Stats; pub fn services(cfg: &mut web::ServiceConfig) { cfg.service(get_index_stats) @@ -25,7 +25,7 @@ pub fn services(cfg: &mut web::ServiceConfig) { struct IndexStatsResponse { number_of_documents: u64, is_indexing: bool, - fields_distribution: FieldsDistribution, + fields_distribution: BTreeMap, } impl From for IndexStatsResponse { @@ -33,7 +33,7 @@ impl From for IndexStatsResponse { Self { number_of_documents: stats.number_of_documents, is_indexing: stats.is_indexing, - fields_distribution: stats.fields_distribution, + fields_distribution: BTreeMap::from_iter(stats.fields_distribution.into_iter()), } } } @@ -53,7 +53,7 @@ async fn get_index_stats( struct StatsResponse { database_size: u64, last_update: Option>, - indexes: HashMap, + indexes: BTreeMap, } impl From for StatsResponse { @@ -61,7 +61,8 @@ impl From for StatsResponse { Self { database_size: stats.database_size, last_update: stats.last_update, - indexes: stats.indexes + indexes: stats + .indexes .into_iter() .map(|(uid, index_stats)| (uid, index_stats.into())) .collect(),