From 3db613ff77eae85b6b4e37b3df545dc8a3613108 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 20 Feb 2023 16:42:54 +0100 Subject: [PATCH] Don't iterate all indexes manually --- index-scheduler/src/batch.rs | 11 +++++---- index-scheduler/src/index_mapper.rs | 32 +++++++++++++++++++++----- index-scheduler/src/insta_snapshot.rs | 2 +- index-scheduler/src/lib.rs | 33 ++++++++++++++++++++++++--- meilisearch/src/routes/indexes/mod.rs | 18 +++++++++------ meilisearch/src/routes/mod.rs | 11 +++++---- 6 files changed, 80 insertions(+), 27 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 655922785..66c516d9b 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -788,15 +788,15 @@ impl IndexScheduler { dump_tasks.flush()?; // 3. Dump the indexes - for (uid, index) in self.index_mapper.indexes(&rtxn)? { + self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { let rtxn = index.read_txn()?; let metadata = IndexMetadata { - uid: uid.clone(), + uid: uid.to_owned(), primary_key: index.primary_key(&rtxn)?.map(String::from), created_at: index.created_at(&rtxn)?, updated_at: index.updated_at(&rtxn)?, }; - let mut index_dumper = dump.create_index(&uid, &metadata)?; + let mut index_dumper = dump.create_index(uid, &metadata)?; let fields_ids_map = index.fields_ids_map(&rtxn)?; let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); @@ -809,9 +809,10 @@ impl IndexScheduler { } // 3.2. Dump the settings - let settings = meilisearch_types::settings::settings(&index, &rtxn)?; + let settings = meilisearch_types::settings::settings(index, &rtxn)?; index_dumper.settings(&settings)?; - } + Ok(()) + })?; let dump_uid = started_at.format(format_description!( "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs index cbeda1942..c6ca0b269 100644 --- a/index-scheduler/src/index_mapper.rs +++ b/index-scheduler/src/index_mapper.rs @@ -682,18 +682,38 @@ impl IndexMapper { Ok(index) } - /// Return all indexes, may open them if they weren't already opened. - pub fn indexes(&self, rtxn: &RoTxn) -> Result> { + /// Attempts `f` for each index that exists in the index mapper. + /// + /// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened, + /// which is unsupported in general. + /// + /// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of + /// all the indexes, but this function makes it harder and so less likely to do accidentally. + pub fn try_for_each_index( + &self, + rtxn: &RoTxn, + mut f: impl FnMut(&str, &Index) -> Result, + ) -> Result + where + V: FromIterator, + { self.index_mapping .iter(rtxn)? - .map(|ret| { - ret.map_err(Error::from).and_then(|(name, _)| { - self.index(rtxn, name).map(|index| (name.to_string(), index)) - }) + .map(|res| { + res.map_err(Error::from) + .and_then(|(name, _)| self.index(rtxn, name).and_then(|index| f(name, &index))) }) .collect() } + /// Return the name of all indexes without opening them. + pub fn index_names(&self, rtxn: &RoTxn) -> Result> { + self.index_mapping + .iter(rtxn)? + .map(|res| res.map_err(Error::from).map(|(name, _)| name.to_string())) + .collect() + } + /// Swap two index names. pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> { let lhs_uuid = self diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index e8d07ee63..dcc348c98 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -254,6 +254,6 @@ pub fn snapshot_canceled_by( snap } pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String { - let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::>(); + let names = mapper.index_names(rtxn).unwrap(); format!("{names:?}") } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 094b5fc7b..65e9f63ff 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -541,15 +541,42 @@ impl IndexScheduler { /// /// * If the index wasn't opened before, the index will be opened. /// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown. + /// + /// ### Note + /// + /// As an `Index` requires a large swath of the virtual memory address space, correct usage of an `Index` does not + /// keep its handle for too long. + /// + /// Some configurations also can't reasonably open multiple indexes at once. + /// If you need to fetch information from or perform an action on all indexes, + /// see the `try_for_each_index` function. pub fn index(&self, name: &str) -> Result { let rtxn = self.env.read_txn()?; self.index_mapper.index(&rtxn, name) } - /// Return and open all the indexes. - pub fn indexes(&self) -> Result> { + /// Return the name of all indexes without opening them. + pub fn index_names(self) -> Result> { let rtxn = self.env.read_txn()?; - self.index_mapper.indexes(&rtxn) + self.index_mapper.index_names(&rtxn) + } + + /// Attempts `f` for each index that exists known to the index scheduler. + /// + /// It is preferable to use this function rather than a loop that opens all indexes, as a way to avoid having all indexes opened, + /// which is unsupported in general. + /// + /// Since `f` is allowed to return a result, and `Index` is cloneable, it is still possible to wrongly build e.g. a vector of + /// all the indexes, but this function makes it harder and so less likely to do accidentally. + /// + /// If many indexes exist, this operation can take time to complete (in the order of seconds for a 1000 of indexes) as it needs to open + /// all the indexes. + pub fn try_for_each_index(&self, f: impl FnMut(&str, &Index) -> Result) -> Result + where + V: FromIterator, + { + let rtxn = self.env.read_txn()?; + self.index_mapper.try_for_each_index(&rtxn, f) } /// Return the task ids matched by the given query from the index scheduler's point of view. diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index da24a92ad..c5c168786 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -61,6 +61,8 @@ pub struct IndexView { impl IndexView { fn new(uid: String, index: &Index) -> Result { + // It is important that this function does not keep the Index handle or a clone of it, because + // `list_indexes` relies on this property to avoid opening all indexes at once. let rtxn = index.read_txn()?; Ok(IndexView { uid, @@ -90,13 +92,15 @@ pub async fn list_indexes( paginate: AwebQueryParameter, ) -> Result { let filters = index_scheduler.filters(); - let indexes: Vec<_> = index_scheduler.indexes()?; - let indexes = indexes - .into_iter() - .filter(|(name, _)| filters.is_index_authorized(name)) - .map(|(name, index)| IndexView::new(name, &index)) - .collect::, _>>()?; - + let indexes: Vec> = + index_scheduler.try_for_each_index(|uid, index| -> Result, _> { + if !filters.is_index_authorized(uid) { + return Ok(None); + } + Ok(Some(IndexView::new(uid.to_string(), index)?)) + })?; + // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened. + let indexes: Vec = indexes.into_iter().flatten().collect(); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); debug!("returns: {:?}", ret); diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 622e26c75..bd3dd0649 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -261,9 +261,9 @@ pub fn create_all_stats( )?; // accumulate the size of each indexes let processing_index = processing_task.first().and_then(|task| task.index_uid()); - for (name, index) in index_scheduler.indexes()? { - if !filters.is_index_authorized(&name) { - continue; + index_scheduler.try_for_each_index(|name, index| { + if !filters.is_index_authorized(name) { + return Ok(()); } database_size += index.on_disk_size()?; @@ -278,8 +278,9 @@ pub fn create_all_stats( let updated_at = index.updated_at(&rtxn)?; last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at))); - indexes.insert(name, stats); - } + indexes.insert(name.to_string(), stats); + Ok(()) + })?; database_size += index_scheduler.size()?; database_size += auth_controller.size()?;