meilisearch/index-scheduler/src/index_mapper.rs

345 lines
13 KiB
Rust
Raw Normal View History

use std::collections::hash_map::Entry;
use std::collections::HashMap;
2022-10-21 19:58:58 +08:00
use std::path::{Path, PathBuf};
2022-10-05 00:50:18 +08:00
use std::sync::{Arc, RwLock};
use std::{fs, thread};
use log::error;
2022-10-30 05:57:30 +08:00
use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
2023-01-10 01:11:01 +08:00
use synchronoise::SignalEvent;
2022-12-22 18:46:17 +08:00
use time::OffsetDateTime;
use uuid::Uuid;
2023-01-10 01:11:01 +08:00
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
const INDEX_MAPPING: &str = "index-mapping";
/// Structure managing meilisearch's indexes.
///
/// It is responsible for:
/// 1. Creating new indexes
/// 2. Opening indexes and storing references to these opened indexes
/// 3. Accessing indexes through their uuid
/// 4. Mapping a user-defined name to each index uuid.
#[derive(Clone)]
pub struct IndexMapper {
/// Keep track of the opened indexes. Used mainly by the index resolver.
index_map: Arc<RwLock<HashMap<Uuid, IndexStatus>>>,
/// Map an index name with an index uuid currently available on disk.
2022-10-30 05:57:30 +08:00
pub(crate) index_mapping: Database<Str, UuidCodec>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
index_size: usize,
2022-10-16 07:39:01 +08:00
pub indexer_config: Arc<IndexerConfig>,
}
/// Whether the index is available for use or is forbidden to be inserted back in the index map
2022-10-22 22:35:42 +08:00
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
2023-01-10 01:11:01 +08:00
/// Temporarily do not insert the index in the index map as it is currently being resized.
BeingResized(Arc<SignalEvent>),
/// You can use the index without worrying about anything.
Available(Index),
}
impl IndexMapper {
pub fn new(
env: &Env,
base_path: PathBuf,
index_size: usize,
indexer_config: IndexerConfig,
) -> Result<Self> {
Ok(Self {
index_map: Arc::default(),
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
base_path,
index_size,
indexer_config: Arc::new(indexer_config),
})
}
2022-10-21 19:58:58 +08:00
/// Create or open an index in the specified path.
/// The path *must* exists or an error will be thrown.
2022-12-20 02:24:56 +08:00
fn create_or_open_index(
&self,
path: &Path,
2022-12-22 18:46:17 +08:00
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
2022-12-20 02:24:56 +08:00
) -> Result<Index> {
2022-10-21 19:58:58 +08:00
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
2022-10-21 19:58:58 +08:00
options.max_readers(1024);
2022-12-20 02:24:56 +08:00
2022-12-21 21:28:00 +08:00
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
2022-12-21 21:28:00 +08:00
} else {
Ok(Index::new(options, path)?)
}
2022-10-21 19:58:58 +08:00
}
/// Get or create the index.
2022-12-20 02:24:56 +08:00
pub fn create_index(
&self,
mut wtxn: RwTxn,
name: &str,
2022-12-22 18:46:17 +08:00
date: Option<(OffsetDateTime, OffsetDateTime)>,
2022-12-20 02:24:56 +08:00
) -> Result<Index> {
2022-10-27 00:03:48 +08:00
match self.index(&wtxn, name) {
Ok(index) => {
wtxn.commit()?;
Ok(index)
}
Err(Error::IndexNotFound(_)) => {
let uuid = Uuid::new_v4();
2022-10-26 20:19:56 +08:00
self.index_mapping.put(&mut wtxn, name, &uuid)?;
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
wtxn.commit()?;
2023-01-10 01:11:01 +08:00
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
2022-10-26 20:27:52 +08:00
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
2023-01-10 01:11:01 +08:00
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
2022-10-26 20:19:56 +08:00
{
2023-01-10 01:11:01 +08:00
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
2022-10-26 20:19:56 +08:00
}
Ok(index)
}
error => error,
}
}
/// Removes the index from the mapping table and the in-memory index map
/// but keeps the associated tasks.
pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> {
let uuid = self
.index_mapping
.get(&wtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// Once we retrieved the UUID of the index we remove it from the mapping table.
assert!(self.index_mapping.delete(&mut wtxn, name)?);
wtxn.commit()?;
// We remove the index from the in-memory index map.
2023-01-10 01:11:01 +08:00
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
let resize_operation = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => break Some(index.prepare_for_closing()),
// The target index is in the middle of a resize operation.
// Wait for this operation to complete, then try again.
Some(BeingResized(resize_operation)) => resize_operation.clone(),
// The index is already being deleted or doesn't exist.
// It's OK to remove it from the map again.
_ => break None,
};
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
// will involve operations on the very map we're locking.
drop(lock);
resize_operation.wait();
};
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 23:27:41 +08:00
thread::Builder::new()
.name(String::from("index_deleter"))
.spawn(move || {
// We first wait to be sure that the previously opened index is effectively closed.
// This can take a lot of time, this is why we do that in a seperate thread.
if let Some(closing_event) = closing_event {
closing_event.wait();
}
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 23:27:41 +08:00
// Then we remove the content from disk.
if let Err(e) = fs::remove_dir_all(&index_path) {
error!(
"An error happened when deleting the index {} ({}): {}",
index_name, uuid, e
);
}
Bring back `release-v0.30.0` into `release-v0.30.0-temp` (final: into `main`) (#3145) * Fix error code of the "duplicate index found" error * Use the content of the ProcessingTasks in the tasks cancelation system * Change the missing_filters error code into missing_task_filters * WIP Introduce the invalid_task_uid error code * Use more precise error codes/message for the task routes + Allow star operator in delete/cancel tasks + rename originalQuery to originalFilters + Display error/canceled_by in task view even when they are = null + Rename task filter fields by using their plural forms + Prepare an error code for canceledBy filter + Only return global tasks if the API key action `index.*` is there * Add canceledBy task filter * Update tests following task API changes * Rename original_query to original_filters everywhere * Update more insta-snap tests * Make clippy happy They're a happy clip now. * Make rustfmt happy >:-( * Fix Index name parsing error message to fit the specification * Bump milli version to 0.35.1 * Fix the new error messages * fix the error messages and add tests * rename the error codes for the sake of consistency * refactor the way we send the cli informations + add the analytics for the config file and ssl usage * Apply suggestions from code review Co-authored-by: Clément Renault <clement@meilisearch.com> * add a comment over the new infos structure * reformat, sorry @kero * Store analytics for the documents deletions * Add analytics on all the settings * Spawn threads with names * Spawn rayon threads with names * update the distinct attributes to the spec update * update the analytics on the search route * implements the analytics on the health and version routes * Fix task details serialization * Add the question mark to the task deletion query filter * Add the question mark to the task cancelation query filter * Fix tests * add analytics on the task route * Add all the missing fields of the new task query type * Create a new analytics for the task deletion * Create a new analytics for the task creation * batch the tasks seen events * Update the finite pagination analytics * add the analytics of the swap-indexes route * Stop removing the DB when failing to read it * Rename originalFilters into originalFilters * Rename matchedDocuments into providedIds * Add `workflow_dispatch` to flaky.yml * Bump grenad to 0.4.4 * Bump milli to version v0.37.0 * Don't multiply total memory returned by sysinfo anymore sysinfo now returns bytes rather than KB * Add a dispatch to the publish binaries workflow * Fix publish release CI * Don't use gold but the default linker * Always display details for the indexDeletion task * Fix the insta tests * refactorize the whole test suite 1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice. 2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever. 3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general. 4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use. 5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock. * rebase on release-v0.30 * makes clippy happy * update the snapshots after a rebase * try to remove the flakyness of the failing test * Add more analytics on the ranking rules positions * Update the dump test to check for the dumpUid dumpCreation task details * send the ranking rules as a string because amplitude is too dumb to process an array as a single value * Display a null dumpUid until we computed the dump itself on disk * Update tests * Check if the master key is missing before returning an error Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 23:27:41 +08:00
// Finally we remove the entry from the index map.
assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted)));
})
.unwrap();
Ok(())
}
pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
2023-01-09 16:35:37 +08:00
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
// fixme: factor to a function?
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
// signal that will be sent when the resize operation completes
let resize_operation = Arc::new(SignalEvent::manual(false));
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
Some(Available(index)) => index,
Some(previous_status) => {
lock.insert(uuid, previous_status);
panic!(
"Attempting to resize index {name} that is already being resized or deleted."
)
}
None => {
panic!("Could not find the status of index {name} in the in-memory index mapper.")
}
};
drop(lock);
let current_size = index.map_size()?;
let new_size = current_size * 2;
let closing_event = index.prepare_for_closing();
log::debug!("Waiting for index {name} to close");
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
// fail after 10 minutes waiting
panic!("Could not resize index {name} (unable to close it)");
}
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None, new_size)?;
// Add back the resized index
let mut lock = self.index_map.write().unwrap();
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
panic!("Index state for index {name} was modified while it was being resized")
};
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
drop(lock);
resize_operation.signal();
Ok(())
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
.index_mapping
2022-10-03 21:29:37 +08:00
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
2023-01-10 01:11:01 +08:00
let index = loop {
let index = self.index_map.read().unwrap().get(&uuid).cloned();
match index {
Some(Available(index)) => break index,
Some(BeingResized(ref resize_operation)) => {
// Avoiding deadlocks: no lock taken while doing this operation.
resize_operation.wait();
continue;
}
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index =
self.create_or_open_index(&index_path, None, self.index_size)?;
entry.insert(Available(index.clone()));
break index;
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => break index.clone(),
BeingResized(resize_operation) => {
// Avoiding the deadlock: we drop the lock before waiting
let resize_operation = resize_operation.clone();
drop(index_map);
resize_operation.wait();
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
}
};
Ok(index)
}
/// Return all indexes, may open them if they weren't already opened.
pub fn indexes(&self, rtxn: &RoTxn) -> Result<Vec<(String, Index)>> {
self.index_mapping
2022-10-03 21:29:37 +08:00
.iter(rtxn)?
.map(|ret| {
ret.map_err(Error::from).and_then(|(name, _)| {
2022-10-21 00:00:07 +08:00
self.index(rtxn, name).map(|index| (name.to_string(), index))
})
})
.collect()
}
2022-10-17 22:30:18 +08:00
/// Swap two index names.
pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> {
let lhs_uuid = self
.index_mapping
.get(wtxn, lhs)?
2022-10-03 21:29:37 +08:00
.ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?;
let rhs_uuid = self
.index_mapping
.get(wtxn, rhs)?
2022-10-03 21:29:37 +08:00
.ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?;
self.index_mapping.put(wtxn, lhs, &rhs_uuid)?;
self.index_mapping.put(wtxn, rhs, &lhs_uuid)?;
Ok(())
}
2022-10-17 22:30:18 +08:00
pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result<bool> {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
pub fn indexer_config(&self) -> &IndexerConfig {
&self.indexer_config
}
}