diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 03287d7ae..41b5793b1 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -847,8 +847,10 @@ impl IndexScheduler { // this is a non-critical operation. If it fails, we should not fail // the entire batch. let res = || -> Result<()> { + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?; let mut wtxn = self.env.write_txn()?; - self.index_mapper.compute_and_store_stats_of(&mut wtxn, &index_uid)?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, stats)?; wtxn.commit()?; Ok(()) }(); @@ -888,6 +890,10 @@ impl IndexScheduler { )?; index_wtxn.commit()?; } + + // drop rtxn before starting a new wtxn on the same db + rtxn.commit()?; + task.status = Status::Succeeded; task.details = Some(Details::IndexInfo { primary_key }); @@ -897,7 +903,9 @@ impl IndexScheduler { // the entire batch. let res = || -> Result<()> { let mut wtxn = self.env.write_txn()?; - self.index_mapper.compute_and_store_stats_of(&mut wtxn, &index_uid)?; + let index_rtxn = index.read_txn()?; + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?; + self.index_mapper.store_stats_of(&mut wtxn, &index_uid, stats)?; wtxn.commit()?; Ok(()) }(); diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 9e1de438a..174f4f9a3 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -54,8 +54,11 @@ pub struct IndexMapper { /// Map an index name with an index uuid currently available on disk. pub(crate) index_mapping: Database, - /// Map an index name with the cached stats associated to the index. - pub(crate) index_stats: Database>, + /// Map an index UUID with the cached stats associated to the index. + /// + /// Using an UUID forces to use the index_mapping table to recover the index behind a name, ensuring + /// consistency wrt index swapping. + pub(crate) index_stats: Database>, /// Path to the folder where the LMDB environments of each index are. base_path: PathBuf, @@ -80,15 +83,39 @@ pub enum IndexStatus { Available(Index), } +/// The statistics that can be computed from an `Index` object. #[derive(Serialize, Deserialize, Debug)] pub struct IndexStats { + /// Number of documents in the index. pub number_of_documents: u64, + /// Size of the index' DB, in bytes. pub database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, + /// Creation date of the index. pub created_at: OffsetDateTime, + /// Date of the last update of the index. pub updated_at: OffsetDateTime, } +impl IndexStats { + /// Compute the stats of an index + /// + /// # Parameters + /// + /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. + pub fn new(index: &Index, rtxn: &RoTxn) -> Result { + let database_size = index.on_disk_size()?; + Ok(IndexStats { + number_of_documents: index.number_of_documents(rtxn)?, + database_size, + field_distribution: index.field_distribution(rtxn)?, + created_at: index.created_at(rtxn)?, + updated_at: index.updated_at(rtxn)?, + }) + } +} + impl IndexMapper { pub fn new( env: &Env, @@ -149,12 +176,14 @@ impl IndexMapper { /// Removes the index from the mapping table and the in-memory index map /// but keeps the associated tasks. pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> { - self.index_stats.delete(&mut wtxn, name)?; let uuid = self .index_mapping .get(&wtxn, name)? .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; + // Not an error if the index had no stats in cache. + self.index_stats.delete(&mut wtxn, &uuid)?; + // Once we retrieved the UUID of the index we remove it from the mapping table. assert!(self.index_mapping.delete(&mut wtxn, name)?); @@ -375,26 +404,42 @@ impl IndexMapper { Ok(()) } - /// Return the stored stats of an index. + /// The stats of an index. + /// + /// If available in the cache, they are directly returned. + /// Otherwise, the `Index` is opened to compute the stats on the fly (the result is not cached). + /// The stats for an index are cached after each `Index` update. pub fn stats_of(&self, rtxn: &RoTxn, index_uid: &str) -> Result { - self.index_stats + let uuid = self + .index_mapping .get(rtxn, index_uid)? - .ok_or_else(|| Error::IndexNotFound(index_uid.to_string())) + .ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?; + + match self.index_stats.get(rtxn, &uuid)? { + Some(stats) => Ok(stats), + None => { + let index = self.index(rtxn, index_uid)?; + let index_rtxn = index.read_txn()?; + IndexStats::new(&index, &index_rtxn) + } + } } - /// Return the stats of an index and write it in the index-mapper database. - pub fn compute_and_store_stats_of(&self, wtxn: &mut RwTxn, index_uid: &str) -> Result<()> { - let index = self.index(wtxn, index_uid)?; - let database_size = index.on_disk_size()?; - let rtxn = index.read_txn()?; - let stats = IndexStats { - number_of_documents: index.number_of_documents(&rtxn)?, - database_size, - field_distribution: index.field_distribution(&rtxn)?, - created_at: index.created_at(&rtxn)?, - updated_at: index.updated_at(&rtxn)?, - }; - self.index_stats.put(wtxn, index_uid, &stats)?; + /// Stores the new stats for an index. + /// + /// Expected usage is to compute the stats the index using `IndexStats::new`, the pass it to this function. + pub fn store_stats_of( + &self, + wtxn: &mut RwTxn, + index_uid: &str, + stats: IndexStats, + ) -> Result<()> { + let uuid = self + .index_mapping + .get(wtxn, index_uid)? + .ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?; + + self.index_stats.put(wtxn, &uuid, &stats)?; Ok(()) } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 4f875eaca..7c5970fad 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1245,9 +1245,14 @@ struct IndexBudget { task_db_size: usize, } +/// The statistics that can be computed from an `Index` object and the scheduler. +/// +/// Compared with `index_mapper::IndexStats`, it adds the scheduling status. #[derive(Debug)] pub struct IndexStats { + /// Whether this index is currently performing indexation, according to the scheduler. pub is_indexing: bool, + /// Internal stats computed from the index. pub inner_stats: index_mapper::IndexStats, } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 28988e30b..ba925b3d5 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -220,11 +220,15 @@ pub async fn delete_index( Ok(HttpResponse::Accepted().json(task)) } +/// Stats of an `Index`, as known to the `stats` route. #[derive(Serialize, Debug)] #[serde(rename_all = "camelCase")] pub struct IndexStats { + /// Number of documents in the index pub number_of_documents: u64, + /// Whether the index is currently performing indexation, according to the scheduler. pub is_indexing: bool, + /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, }