diff --git a/Cargo.lock b/Cargo.lock index 461a4789b..a79898d99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1875,6 +1875,23 @@ dependencies = [ "urlencoding", "uuid", "vergen", + "walkdir", +] + +[[package]] +name = "meilisearch-tokenizer" +version = "0.1.1" +source = "git+https://github.com/meilisearch/Tokenizer.git?tag=v0.2.0#833c48b2ee39071f8b4f51abd15122afdb3c8c06" +dependencies = [ + "character_converter", + "cow-utils", + "deunicode", + "fst", + "jieba-rs", + "once_cell", + "slice-group-by", + "unicode-segmentation", + "whatlang", ] [[package]] @@ -2840,6 +2857,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -3717,6 +3743,17 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi 0.3.9", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 3e04b876a..9ab386882 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -62,6 +62,7 @@ thiserror = "1.0.24" tokio = { version = "1", features = ["full"] } uuid = "0.8.2" oxidized-json-checker = "0.3.2" +walkdir = "2.3.2" [dependencies.sentry] default-features = false diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 717d728fc..2d0a543d4 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -1,6 +1,3 @@ -pub mod search; -mod updates; - use std::ops::Deref; use std::sync::Arc; @@ -11,6 +8,9 @@ use crate::index_controller::IndexController; use crate::index_controller::{IndexMetadata, IndexSettings}; use crate::option::Opt; +pub mod search; +mod updates; + #[derive(Clone)] pub struct Data { inner: Arc, diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index dfd2ebdc4..9ba03ddd2 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,6 +1,3 @@ -mod search; -mod updates; - use std::collections::{BTreeSet, HashSet}; use std::ops::Deref; use std::sync::Arc; @@ -8,10 +5,14 @@ use std::sync::Arc; use anyhow::{bail, Context}; use milli::obkv_to_json; use serde_json::{Map, Value}; +use walkdir::WalkDir; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, UpdateResult}; +mod search; +mod updates; + pub type Document = Map; #[derive(Clone)] @@ -126,6 +127,15 @@ impl Index { } } + pub fn size(&self) -> anyhow::Result { + Ok(WalkDir::new(self.env.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len())) + } + fn fields_to_display>( &self, txn: &heed::RoTxn, diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index a4228227f..6d0c1e8cd 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -12,12 +12,15 @@ use tokio::sync::mpsc; use tokio::task::spawn_blocking; use uuid::Uuid; -use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index_controller::update_handler::UpdateHandler; -use crate::index_controller::{get_arc_ownership_blocking, updates::Processing, UpdateMeta}; +use crate::index_controller::{ + get_arc_ownership_blocking, updates::Processing, IndexStats, UpdateMeta, +}; use crate::option::IndexerOpts; +use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; + pub struct IndexActor { read_receiver: Option>, write_receiver: Option>, @@ -146,6 +149,9 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + GetStats { uuid, ret } => { + let _ = ret.send(self.handle_get_stats(uuid).await); + } } } @@ -328,4 +334,25 @@ impl IndexActor { Ok(()) } + + async fn handle_get_stats(&self, uuid: Uuid) -> Result { + let index = self + .store + .get(uuid) + .await? + .ok_or(IndexError::UnexistingIndex)?; + + spawn_blocking(move || { + let rtxn = index.read_txn()?; + + Ok(IndexStats { + size: index.size()?, + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: false, // TODO check actual is_indexing + fields_distribution: index.fields_distribution(&rtxn)?, + }) + }) + .await + .map_err(|e| IndexError::Error(e.into()))? + } } diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index dba0f9e60..93406c13b 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -3,12 +3,13 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index::{Document, SearchQuery, SearchResult, Settings}; +use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{IndexSettings, IndexStats}; + use super::{ IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore, Result, UpdateResult, }; -use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::IndexSettings; -use crate::index_controller::{updates::Processing, UpdateMeta}; #[derive(Clone)] pub struct IndexActorHandleImpl { @@ -121,6 +122,13 @@ impl IndexActorHandle for IndexActorHandleImpl { let _ = self.read_sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } + + async fn get_index_stats(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = IndexMsg::GetStats { uuid, ret }; + let _ = self.read_sender.send(msg).await; + Ok(receiver.await.expect("IndexActor has been killed")?) + } } impl IndexActorHandleImpl { diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 46d7f6214..6da0f8628 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -3,9 +3,10 @@ use std::path::PathBuf; use tokio::sync::oneshot; use uuid::Uuid; -use super::{IndexMeta, IndexSettings, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{updates::Processing, IndexStats, UpdateMeta}; + +use super::{IndexMeta, IndexSettings, Result, UpdateResult}; pub enum IndexMsg { CreateIndex { @@ -58,4 +59,8 @@ pub enum IndexMsg { path: PathBuf, ret: oneshot::Sender>, }, + GetStats { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 2dc856b80..426eb29e4 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -1,30 +1,30 @@ -mod actor; -mod handle_impl; -mod message; -mod store; - use std::path::PathBuf; use chrono::{DateTime, Utc}; +#[cfg(test)] +use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; use uuid::Uuid; -use super::IndexSettings; +use actor::IndexActor; +pub use handle_impl::IndexActorHandleImpl; +use message::IndexMsg; +use store::{IndexStore, MapIndexStore}; + use crate::index::UpdateResult as UResult; use crate::index::{Document, Index, SearchQuery, SearchResult, Settings}; use crate::index_controller::{ updates::{Failed, Processed, Processing}, - UpdateMeta, + IndexStats, UpdateMeta, }; -use actor::IndexActor; -use message::IndexMsg; -use store::{IndexStore, MapIndexStore}; -pub use handle_impl::IndexActorHandleImpl; +use super::IndexSettings; -#[cfg(test)] -use mockall::automock; +mod actor; +mod handle_impl; +mod message; +mod store; pub type Result = std::result::Result; type UpdateResult = std::result::Result, Failed>; @@ -33,7 +33,7 @@ type UpdateResult = std::result::Result, Failed, - updated_at: DateTime, + pub updated_at: DateTime, primary_key: Option, } @@ -98,4 +98,5 @@ pub trait IndexActorHandle { async fn get_index_meta(&self, uuid: Uuid) -> Result; async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn get_index_stats(&self, uuid: Uuid) -> Result; } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index b26ab8828..e459af10c 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,10 +1,3 @@ -mod index_actor; -mod snapshot; -mod update_actor; -mod update_handler; -mod updates; -mod uuid_resolver; - use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -14,33 +7,40 @@ use anyhow::bail; use futures::stream::StreamExt; use log::info; use milli::update::{IndexDocumentsMethod, UpdateFormat}; +use milli::FieldsDistribution; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::sleep; use uuid::Uuid; +use index_actor::IndexActorHandle; +use snapshot::load_snapshot; +use snapshot::SnapshotService; +use update_actor::UpdateActorHandle; +pub use updates::{Failed, Processed, Processing}; +use uuid_resolver::UuidError; +use uuid_resolver::UuidResolverHandle; + use crate::index::{Document, SearchQuery, SearchResult}; use crate::index::{Facets, Settings, UpdateResult}; use crate::option::Opt; -use index_actor::IndexActorHandle; -use snapshot::load_snapshot; -use update_actor::UpdateActorHandle; -use uuid_resolver::UuidResolverHandle; - -use snapshot::SnapshotService; -pub use updates::{Failed, Processed, Processing}; -use uuid_resolver::UuidError; +mod index_actor; +mod snapshot; +mod update_actor; +mod update_handler; +mod updates; +mod uuid_resolver; pub type UpdateStatus = updates::UpdateStatus; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { - uid: String, + pub uid: String, name: String, #[serde(flatten)] - meta: index_actor::IndexMeta, + pub meta: index_actor::IndexMeta, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -63,6 +63,14 @@ pub struct IndexSettings { pub primary_key: Option, } +#[derive(Clone, Debug)] +pub struct IndexStats { + pub size: u64, + pub number_of_documents: u64, + pub is_indexing: bool, + pub fields_distribution: FieldsDistribution, +} + pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverHandleImpl, index_handle: index_actor::IndexActorHandleImpl, @@ -100,10 +108,11 @@ impl IndexController { update_handle.clone(), Duration::from_secs(options.snapshot_interval_sec), options.snapshot_dir.clone(), - options.db_path - .file_name() - .map(|n| n.to_owned().into_string().expect("invalid path")) - .unwrap_or_else(|| String::from("data.ms")), + options + .db_path + .file_name() + .map(|n| n.to_owned().into_string().expect("invalid path")) + .unwrap_or_else(|| String::from("data.ms")), ); tokio::task::spawn(snapshot_service.run()); @@ -341,6 +350,12 @@ impl IndexController { }; Ok(meta) } + + pub async fn get_stats(&self, uid: String) -> anyhow::Result { + let uuid = self.uuid_resolver.get(uid.clone()).await?; + + Ok(self.index_handle.get_index_stats(uuid).await?) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-http/src/routes/stats.rs b/meilisearch-http/src/routes/stats.rs index 108c67ca9..bab637cd6 100644 --- a/meilisearch-http/src/routes/stats.rs +++ b/meilisearch-http/src/routes/stats.rs @@ -1,18 +1,20 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use actix_web::get; use actix_web::web; use actix_web::HttpResponse; use chrono::{DateTime, Utc}; +use milli::FieldsDistribution; use serde::Serialize; use crate::error::ResponseError; use crate::helpers::Authentication; +use crate::index_controller::IndexStats; use crate::routes::IndexParam; use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { - cfg.service(index_stats) + cfg.service(get_index_stats) .service(get_stats) .service(get_version); } @@ -22,28 +24,61 @@ pub fn services(cfg: &mut web::ServiceConfig) { struct IndexStatsResponse { number_of_documents: u64, is_indexing: bool, - fields_distribution: BTreeMap, + fields_distribution: FieldsDistribution, +} + +impl From for IndexStatsResponse { + fn from(stats: IndexStats) -> Self { + Self { + number_of_documents: stats.number_of_documents, + is_indexing: stats.is_indexing, + fields_distribution: stats.fields_distribution, + } + } } #[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")] -async fn index_stats( - _data: web::Data, - _path: web::Path, +async fn get_index_stats( + data: web::Data, + path: web::Path, ) -> Result { - todo!() + let response: IndexStatsResponse = data + .index_controller + .get_stats(path.index_uid.clone()) + .await? + .into(); + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] #[serde(rename_all = "camelCase")] -struct StatsResult { +struct StatsResponse { database_size: u64, last_update: Option>, indexes: HashMap, } #[get("/stats", wrap = "Authentication::Private")] -async fn get_stats(_data: web::Data) -> Result { - todo!() +async fn get_stats(data: web::Data) -> Result { + let mut response = StatsResponse { + database_size: 0, + last_update: None, + indexes: HashMap::new(), + }; + + for index in data.index_controller.list_indexes().await? { + let stats = data.index_controller.get_stats(index.uid.clone()).await?; + + response.database_size += stats.size; + response.last_update = Some(match response.last_update { + Some(last_update) => last_update.max(index.meta.updated_at), + None => index.meta.updated_at, + }); + response.indexes.insert(index.uid, stats.into()); + } + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] @@ -58,11 +93,11 @@ struct VersionResponse { async fn get_version() -> HttpResponse { let commit_sha = match option_env!("COMMIT_SHA") { Some("") | None => env!("VERGEN_SHA"), - Some(commit_sha) => commit_sha + Some(commit_sha) => commit_sha, }; let commit_date = match option_env!("COMMIT_DATE") { Some("") | None => env!("VERGEN_COMMIT_DATE"), - Some(commit_date) => commit_date + Some(commit_date) => commit_date, }; HttpResponse::Ok().json(VersionResponse {