From e389c088ebb729865d3d93230852c8cfdac28b1a Mon Sep 17 00:00:00 2001 From: tamo Date: Wed, 28 Apr 2021 16:43:49 +0200 Subject: [PATCH 01/54] WIP: rebasing on master --- meilisearch-http/src/dump.rs | 423 ------------------ meilisearch-http/src/index_controller/dump.rs | 258 +++++++++++ .../src/index_controller/index_actor/actor.rs | 36 +- .../index_actor/handle_impl.rs | 9 +- .../index_controller/index_actor/message.rs | 5 + .../src/index_controller/index_actor/mod.rs | 1 - meilisearch-http/src/index_controller/mod.rs | 9 +- .../index_controller/update_actor/actor.rs | 93 +++- .../update_actor/handle_impl.rs | 14 + .../index_controller/update_actor/message.rs | 9 + .../src/index_controller/update_actor/mod.rs | 5 +- .../update_actor/update_store.rs | 47 ++ .../index_controller/uuid_resolver/actor.rs | 7 + .../uuid_resolver/handle_impl.rs | 10 + .../index_controller/uuid_resolver/message.rs | 4 + .../src/index_controller/uuid_resolver/mod.rs | 1 + .../index_controller/uuid_resolver/store.rs | 28 ++ meilisearch-http/src/option.rs | 1 + meilisearch-http/src/routes/index.rs | 11 + .../tests/settings/get_settings.rs | 2 +- 20 files changed, 540 insertions(+), 433 deletions(-) delete mode 100644 meilisearch-http/src/dump.rs create mode 100644 meilisearch-http/src/index_controller/dump.rs diff --git a/meilisearch-http/src/dump.rs b/meilisearch-http/src/dump.rs deleted file mode 100644 index 544fffaa7..000000000 --- a/meilisearch-http/src/dump.rs +++ /dev/null @@ -1,423 +0,0 @@ -use std::fs::{create_dir_all, File}; -use std::io::prelude::*; -use std::path::{Path, PathBuf}; -use std::sync::Mutex; -use std::thread; - -use actix_web::web; -use chrono::offset::Utc; -use indexmap::IndexMap; -use log::{error, info}; -use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use tempfile::TempDir; - -use crate::Data; -use crate::error::{Error, ResponseError}; -use crate::helpers::compression; -use crate::routes::index; -use crate::routes::setting::Settings; -use crate::routes::index::IndexResponse; - -// Mutex to share dump progress. -static DUMP_INFO: Lazy>> = Lazy::new(Mutex::default); - -#[derive(Debug, Serialize, Deserialize, Copy, Clone)] -enum DumpVersion { - V1, -} - -impl DumpVersion { - const CURRENT: Self = Self::V1; -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct DumpMetadata { - indexes: Vec, - db_version: String, - dump_version: DumpVersion, -} - -impl DumpMetadata { - /// Create a DumpMetadata with the current dump version of meilisearch. - pub fn new(indexes: Vec, db_version: String) -> Self { - DumpMetadata { - indexes, - db_version, - dump_version: DumpVersion::CURRENT, - } - } - - /// Extract DumpMetadata from `metadata.json` file present at provided `dir_path` - fn from_path(dir_path: &Path) -> Result { - let path = dir_path.join("metadata.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) - } - - /// Write DumpMetadata in `metadata.json` file at provided `dir_path` - fn to_path(&self, dir_path: &Path) -> Result<(), Error> { - let path = dir_path.join("metadata.json"); - let file = File::create(path)?; - - serde_json::to_writer(file, &self)?; - - Ok(()) - } -} - -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn settings_from_path(dir_path: &Path) -> Result { - let path = dir_path.join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) -} - -/// Write Settings in `settings.json` file at provided `dir_path` -fn settings_to_path(settings: &Settings, dir_path: &Path) -> Result<(), Error> { - let path = dir_path.join("settings.json"); - let file = File::create(path)?; - - serde_json::to_writer(file, settings)?; - - Ok(()) -} - -/// Import settings and documents of a dump with version `DumpVersion::V1` in specified index. -fn import_index_v1( - data: &Data, - dumps_dir: &Path, - index_uid: &str, - document_batch_size: usize, - write_txn: &mut MainWriter, -) -> Result<(), Error> { - - // open index - let index = data - .db - .open_index(index_uid) - .ok_or(Error::index_not_found(index_uid))?; - - // index dir path in dump dir - let index_path = &dumps_dir.join(index_uid); - - // extract `settings.json` file and import content - let settings = settings_from_path(&index_path)?; - let settings = settings.to_update().map_err(|e| Error::dump_failed(format!("importing settings for index {}; {}", index_uid, e)))?; - apply_settings_update(write_txn, &index, settings)?; - - // create iterator over documents in `documents.jsonl` to make batch importation - // create iterator over documents in `documents.jsonl` to make batch importation - let documents = { - let file = File::open(&index_path.join("documents.jsonl"))?; - let reader = std::io::BufReader::new(file); - let deserializer = serde_json::Deserializer::from_reader(reader); - deserializer.into_iter::>() - }; - - // batch import document every `document_batch_size`: - // create a Vec to bufferize documents - let mut values = Vec::with_capacity(document_batch_size); - // iterate over documents - for document in documents { - // push document in buffer - values.push(document?); - // if buffer is full, create and apply a batch, and clean buffer - if values.len() == document_batch_size { - let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size)); - apply_documents_addition(write_txn, &index, batch)?; - } - } - - // apply documents remaining in the buffer - if !values.is_empty() { - apply_documents_addition(write_txn, &index, values)?; - } - - // sync index information: stats, updated_at, last_update - if let Err(e) = crate::index_update_callback_txn(index, index_uid, data, write_txn) { - return Err(Error::Internal(e)); - } - - Ok(()) -} - -/// Import dump from `dump_path` in database. -pub fn import_dump( - data: &Data, - dump_path: &Path, - document_batch_size: usize, -) -> Result<(), Error> { - info!("Importing dump from {:?}...", dump_path); - - // create a temporary directory - let tmp_dir = TempDir::new()?; - let tmp_dir_path = tmp_dir.path(); - - // extract dump in temporary directory - compression::from_tar_gz(dump_path, tmp_dir_path)?; - - // read dump metadata - let metadata = DumpMetadata::from_path(&tmp_dir_path)?; - - // choose importation function from DumpVersion of metadata - let import_index = match metadata.dump_version { - DumpVersion::V1 => import_index_v1, - }; - - // remove indexes which have same `uid` than indexes to import and create empty indexes - let existing_index_uids = data.db.indexes_uids(); - for index in metadata.indexes.iter() { - if existing_index_uids.contains(&index.uid) { - data.db.delete_index(index.uid.clone())?; - } - index::create_index_sync(&data.db, index.uid.clone(), index.name.clone(), index.primary_key.clone())?; - } - - // import each indexes content - data.db.main_write::<_, _, Error>(|mut writer| { - for index in metadata.indexes { - import_index(&data, tmp_dir_path, &index.uid, document_batch_size, &mut writer)?; - } - Ok(()) - })?; - - info!("Dump importation from {:?} succeed", dump_path); - Ok(()) -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] -#[serde(rename_all = "snake_case")] -pub enum DumpStatus { - Done, - InProgress, - Failed, -} - -#[derive(Debug, Serialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct DumpInfo { - pub uid: String, - pub status: DumpStatus, - #[serde(skip_serializing_if = "Option::is_none", flatten)] - pub error: Option, -} - -impl DumpInfo { - pub fn new(uid: String, status: DumpStatus) -> Self { - Self { uid, status, error: None } - } - - pub fn with_error(mut self, error: ResponseError) -> Self { - self.status = DumpStatus::Failed; - self.error = Some(json!(error)); - - self - } - - pub fn dump_already_in_progress(&self) -> bool { - self.status == DumpStatus::InProgress - } - - pub fn get_current() -> Option { - DUMP_INFO.lock().unwrap().clone() - } - - pub fn set_current(&self) { - *DUMP_INFO.lock().unwrap() = Some(self.clone()); - } -} - -/// Generate uid from creation date -fn generate_uid() -> String { - Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() -} - -/// Infer dumps_dir from dump_uid -pub fn compressed_dumps_dir(dumps_dir: &Path, dump_uid: &str) -> PathBuf { - dumps_dir.join(format!("{}.dump", dump_uid)) -} - -/// Write metadata in dump -fn dump_metadata(data: &web::Data, dir_path: &Path, indexes: Vec) -> Result<(), Error> { - let (db_major, db_minor, db_patch) = data.db.version(); - let metadata = DumpMetadata::new(indexes, format!("{}.{}.{}", db_major, db_minor, db_patch)); - - metadata.to_path(dir_path) -} - -/// Export settings of provided index in dump -fn dump_index_settings(data: &web::Data, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { - let settings = crate::routes::setting::get_all_sync(data, reader, index_uid)?; - - settings_to_path(&settings, dir_path) -} - -/// Export updates of provided index in dump -fn dump_index_updates(data: &web::Data, reader: &UpdateReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { - let updates_path = dir_path.join("updates.jsonl"); - let updates = crate::routes::index::get_all_updates_status_sync(data, reader, index_uid)?; - - let file = File::create(updates_path)?; - - for update in updates { - serde_json::to_writer(&file, &update)?; - writeln!(&file)?; - } - - Ok(()) -} - -/// Export documents of provided index in dump -fn dump_index_documents(data: &web::Data, reader: &MainReader, dir_path: &Path, index_uid: &str) -> Result<(), Error> { - let documents_path = dir_path.join("documents.jsonl"); - let file = File::create(documents_path)?; - let dump_batch_size = data.dump_batch_size; - - let mut offset = 0; - loop { - let documents = crate::routes::document::get_all_documents_sync(data, reader, index_uid, offset, dump_batch_size, None)?; - if documents.is_empty() { break; } else { offset += dump_batch_size; } - - for document in documents { - serde_json::to_writer(&file, &document)?; - writeln!(&file)?; - } - } - - Ok(()) -} - -/// Write error with a context. -fn fail_dump_process(dump_info: DumpInfo, context: &str, error: E) { - let error_message = format!("{}; {}", context, error); - - error!("Something went wrong during dump process: {}", &error_message); - dump_info.with_error(Error::dump_failed(error_message).into()).set_current(); -} - -/// Main function of dump. -fn dump_process(data: web::Data, dumps_dir: PathBuf, dump_info: DumpInfo) { - // open read transaction on Update - let update_reader = match data.db.update_read_txn() { - Ok(r) => r, - Err(e) => { - fail_dump_process(dump_info, "creating RO transaction on updates", e); - return ; - } - }; - - // open read transaction on Main - let main_reader = match data.db.main_read_txn() { - Ok(r) => r, - Err(e) => { - fail_dump_process(dump_info, "creating RO transaction on main", e); - return ; - } - }; - - // create a temporary directory - let tmp_dir = match TempDir::new() { - Ok(tmp_dir) => tmp_dir, - Err(e) => { - fail_dump_process(dump_info, "creating temporary directory", e); - return ; - } - }; - let tmp_dir_path = tmp_dir.path(); - - // fetch indexes - let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) { - Ok(indexes) => indexes, - Err(e) => { - fail_dump_process(dump_info, "listing indexes", e); - return ; - } - }; - - // create metadata - if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) { - fail_dump_process(dump_info, "generating metadata", e); - return ; - } - - // export settings, updates and documents for each indexes - for index in indexes { - let index_path = tmp_dir_path.join(&index.uid); - - // create index sub-dircetory - if let Err(e) = create_dir_all(&index_path) { - fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e); - return ; - } - - // export settings - if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e); - return ; - } - - // export documents - if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e); - return ; - } - - // export updates - if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) { - fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e); - return ; - } - } - - // compress dump in a file named `{dump_uid}.dump` in `dumps_dir` - if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) { - fail_dump_process(dump_info, "compressing dump", e); - return ; - } - - // update dump info to `done` - let resume = DumpInfo::new( - dump_info.uid, - DumpStatus::Done - ); - - resume.set_current(); -} - -pub fn init_dump_process(data: &web::Data, dumps_dir: &Path) -> Result { - create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?; - - // check if a dump is already in progress - if let Some(resume) = DumpInfo::get_current() { - if resume.dump_already_in_progress() { - return Err(Error::dump_conflict()) - } - } - - // generate a new dump info - let info = DumpInfo::new( - generate_uid(), - DumpStatus::InProgress - ); - - info.set_current(); - - let data = data.clone(); - let dumps_dir = dumps_dir.to_path_buf(); - let info_cloned = info.clone(); - // run dump process in a new thread - thread::spawn(move || - dump_process(data, dumps_dir, info_cloned) - ); - - Ok(info) -} diff --git a/meilisearch-http/src/index_controller/dump.rs b/meilisearch-http/src/index_controller/dump.rs new file mode 100644 index 000000000..afdcfd9ce --- /dev/null +++ b/meilisearch-http/src/index_controller/dump.rs @@ -0,0 +1,258 @@ +use std::{ + fs::File, + path::{Path, PathBuf}, + sync::Arc, +}; + +use anyhow::bail; +use heed::EnvOpenOptions; +use log::{error, info}; +use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use tokio::fs; +use tokio::task::spawn_blocking; + +use super::update_actor::UpdateActorHandle; +use super::uuid_resolver::UuidResolverHandle; +use super::IndexMetadata; +use crate::index::Index; +use crate::index_controller::uuid_resolver; +use crate::{helpers::compression, index::Settings}; + +#[derive(Debug, Serialize, Deserialize, Copy, Clone)] +enum DumpVersion { + V1, +} + +impl DumpVersion { + const CURRENT: Self = Self::V1; +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DumpMetadata { + indexes: Vec, + db_version: String, + dump_version: DumpVersion, +} + +impl DumpMetadata { + /// Create a DumpMetadata with the current dump version of meilisearch. + pub fn new(indexes: Vec, db_version: String) -> Self { + DumpMetadata { + indexes, + db_version, + dump_version: DumpVersion::CURRENT, + } + } + + /// Extract DumpMetadata from `metadata.json` file present at provided `dir_path` + fn from_path(dir_path: &Path) -> anyhow::Result { + let path = dir_path.join("metadata.json"); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; + + Ok(metadata) + } + + /// Write DumpMetadata in `metadata.json` file at provided `dir_path` + fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> { + let path = dir_path.join("metadata.json"); + let file = File::create(path)?; + + serde_json::to_writer(file, &self)?; + + Ok(()) + } +} + +pub struct DumpService { + uuid_resolver_handle: R, + update_handle: U, + dump_path: PathBuf, + db_name: String, +} + +impl DumpService +where + U: UpdateActorHandle, + R: UuidResolverHandle, +{ + pub fn new( + uuid_resolver_handle: R, + update_handle: U, + dump_path: PathBuf, + db_name: String, + ) -> Self { + Self { + uuid_resolver_handle, + update_handle, + dump_path, + db_name, + } + } + + pub async fn run(self) { + if let Err(e) = self.perform_dump().await { + error!("{}", e); + } + } + + async fn perform_dump(&self) -> anyhow::Result<()> { + info!("Performing dump."); + + let dump_dir = self.dump_path.clone(); + fs::create_dir_all(&dump_dir).await?; + let temp_dump_dir = spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let uuids = self + .uuid_resolver_handle + .dump(temp_dump_path.clone()) + .await?; + + if uuids.is_empty() { + return Ok(()); + } + + let tasks = uuids + .iter() + .map(|&uuid| self.update_handle.dump(uuid, temp_dump_path.clone())) + .collect::>(); + + futures::future::try_join_all(tasks).await?; + + let dump_dir = self.dump_path.clone(); + let dump_path = self.dump_path.join(format!("{}.dump", self.db_name)); + let dump_path = spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; + let temp_dump_file_path = temp_dump_file.path().to_owned(); + compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; + temp_dump_file.persist(&dump_path)?; + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } +} + +/// Extract Settings from `settings.json` file present at provided `dir_path` +fn settings_from_path(dir_path: &Path) -> anyhow::Result { + let path = dir_path.join("settings.json"); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; + + Ok(metadata) +} + +/// Write Settings in `settings.json` file at provided `dir_path` +fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> { + let path = dir_path.join("settings.json"); + let file = File::create(path)?; + + serde_json::to_writer(file, settings)?; + + Ok(()) +} + +fn import_index_v1(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { + std::fs::create_dir_all(&index_path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, index_path)?; + let index = Index(Arc::new(index)); + + // extract `settings.json` file and import content + let settings = settings_from_path(&dump_path)?; + let update_builder = UpdateBuilder::new(0); + index.update_settings(&settings, update_builder)?; + + let update_builder = UpdateBuilder::new(1); + let file = File::open(&index_path.join("documents.jsonl"))?; + let reader = std::io::BufReader::new(file); + index.update_documents( + UpdateFormat::JsonStream, + IndexDocumentsMethod::ReplaceDocuments, + reader, + update_builder, + None, + )?; + + // the last step: we extract the milli::Index and close it + Arc::try_unwrap(index.0) + .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .unwrap() + .prepare_for_closing() + .wait(); + + Ok(()) +} + +pub fn load_dump( + db_path: impl AsRef, + dump_path: impl AsRef, + size: usize, +) -> anyhow::Result<()> { + info!("Importing dump from {}...", dump_path.as_ref().display()); + let db_path = db_path.as_ref(); + let dump_path = dump_path.as_ref(); + let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; + + // extract the dump in a temporary directory + let tmp_dir = TempDir::new()?; + let tmp_dir_path = tmp_dir.path(); + compression::from_tar_gz(dump_path, tmp_dir_path)?; + + // read dump metadata + let metadata = DumpMetadata::from_path(&tmp_dir_path)?; + + // choose importation function from DumpVersion of metadata + let import_index = match metadata.dump_version { + DumpVersion::V1 => import_index_v1, + }; + + // remove indexes which have same `uuid` than indexes to import and create empty indexes + let existing_index_uids = futures::executor::block_on(uuid_resolver.list())?; + + info!("Deleting indexes provided in the dump..."); + for idx in &metadata.indexes { + if let Some((_, uuid)) = existing_index_uids.iter().find(|(s, _)| s == &idx.uid) { + // if we find the index in the `uuid_resolver` it's supposed to exist on the file system + // and we want to delete it + let path = db_path.join(&format!("indexes/index-{}", uuid)); + info!("Deleting {}", path.display()); + use std::io::ErrorKind::*; + match std::fs::remove_dir_all(path) { + Ok(()) => (), + // if an index was present in the metadata but missing of the fs we can ignore the + // problem because we are going to create it later + Err(e) if e.kind() == NotFound => (), + Err(e) => bail!(e), + } + } else { + // if the index does not exist in the `uuid_resolver` we create it + futures::executor::block_on(uuid_resolver.create(idx.uid.clone()))?; + } + } + + // import each indexes content + for idx in metadata.indexes { + let dump_path = tmp_dir_path.join(&idx.uid); + let uuid = futures::executor::block_on(uuid_resolver.get(idx.uid))?; + let index_path = db_path.join(&format!("indexes/index-{}", uuid)); + + info!("Importing dump from {} into {}...", dump_path.display(), index_path.display()); + import_index(size, &dump_path, &index_path).unwrap(); + info!("Dump importation from {} succeed", dump_path.display()); + } + + + info!("Dump importation from {} succeed", dump_path.display()); + Ok(()) +} diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 1f1cf146b..535c405dc 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -36,6 +36,9 @@ impl IndexActor { Ok(Self { receiver, update_handler, store }) } + /// `run` poll the write_receiver and read_receiver concurrently, but while messages send + /// through the read channel are processed concurrently, the messages sent through the write + /// channel are processed one at a time. pub async fn run(mut self) { let mut receiver = self .receiver @@ -119,6 +122,9 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + Dump { uuid, path, ret } => { + let _ = ret.send(self.handle_dump(uuid, path).await); + } GetStats { uuid, ret } => { let _ = ret.send(self.handle_get_stats(uuid).await); } @@ -306,7 +312,35 @@ impl IndexActor { Ok(()) } - async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult { + async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> { + use tokio::fs::create_dir_all; + + path.push("indexes"); + create_dir_all(&path) + .await + .map_err(|e| IndexError::Error(e.into()))?; + + if let Some(index) = self.store.get(uuid).await? { + let mut index_path = path.join(format!("index-{}", uuid)); + create_dir_all(&index_path) + .await + .map_err(|e| IndexError::Error(e.into()))?; + index_path.push("data.mdb"); + spawn_blocking(move || -> anyhow::Result<()> { + // Get write txn to wait for ongoing write transaction before dump. + let _txn = index.write_txn()?; + index.env.copy_to_path(index_path, CompactionOption::Enabled)?; + Ok(()) + }) + .await + .map_err(|e| IndexError::Error(e.into()))? + .map_err(IndexError::Error)?; + } + + Ok(()) + } + + async fn handle_get_stats(&self, uuid: Uuid) -> Result { let index = self .store .get(uuid) diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index 4569ea020..d625a763e 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -136,7 +136,14 @@ impl IndexActorHandle for IndexActorHandleImpl { Ok(receiver.await.expect("IndexActor has been killed")?) } - async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { + async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + let (ret, receiver) = oneshot::channel(); + let msg = IndexMsg::Dump { uuid, path, ret }; + let _ = self.read_sender.send(msg).await; + Ok(receiver.await.expect("IndexActor has been killed")?) + } + + async fn get_index_stats(&self, uuid: Uuid) -> Result { let (ret, receiver) = oneshot::channel(); let msg = IndexMsg::GetStats { uuid, ret }; let _ = self.sender.send(msg).await; diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 4e2824871..0d88532ca 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -60,6 +60,11 @@ pub enum IndexMsg { path: PathBuf, ret: oneshot::Sender>, }, + Dump { + uuid: Uuid, + path: PathBuf, + ret: oneshot::Sender>, + }, GetStats { uuid: Uuid, ret: oneshot::Sender>, diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index f7f230349..46105742b 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -180,5 +180,4 @@ mod test { async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { self.as_ref().get_index_stats(uuid).await } - } } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index f1da36740..10b9142cc 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -5,7 +5,6 @@ use std::time::Duration; use actix_web::web::{Bytes, Payload}; use anyhow::bail; -use chrono::{DateTime, Utc}; use futures::stream::StreamExt; use log::info; use milli::FieldsDistribution; @@ -25,6 +24,7 @@ use crate::option::Opt; mod index_actor; mod snapshot; +mod dump; mod update_actor; mod update_handler; mod updates; @@ -87,6 +87,13 @@ impl IndexController { options.ignore_snapshot_if_db_exists, options.ignore_missing_snapshot, )?; + } else if let Some(ref path) = options.import_dump { + load_dump( + &options.db_path, + path, + index_size, + ); + } std::fs::create_dir_all(&path)?; diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index e47edc5bc..7885d0b3b 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -71,11 +71,16 @@ where Some(Delete { uuid, ret }) => { let _ = ret.send(self.handle_delete(uuid).await); } - Some(Snapshot { uuids, path, ret }) => { - let _ = ret.send(self.handle_snapshot(uuids, path).await); + Some(Snapshot { uuid, path, ret }) => { + let _ = ret.send(self.handle_snapshot(uuid, path).await); + } + Some(Dump { uuid, path, ret }) => { + let _ = ret.send(self.handle_dump(uuid, path).await); } Some(GetInfo { ret }) => { let _ = ret.send(self.handle_get_info().await); + Some(GetSize { uuid, ret }) => { + let _ = ret.send(self.handle_get_size(uuid).await); } None => break, } @@ -194,9 +199,51 @@ where } async fn handle_delete(&self, uuid: Uuid) -> Result<()> { - let store = self.store.clone(); + let store = self.store.delete(uuid).await?; - tokio::task::spawn_blocking(move || store.delete_all(uuid)) + if let Some(store) = store { + tokio::task::spawn(async move { + let store = get_arc_ownership_blocking(store).await; + tokio::task::spawn_blocking(move || { + store.prepare_for_closing().wait(); + info!("Update store {} was closed.", uuid); + }); + }); + } + + Ok(()) + } + + async fn handle_create(&self, uuid: Uuid) -> Result<()> { + let _ = self.store.get_or_create(uuid).await?; + Ok(()) + } + + Ok(()) + } + + async fn handle_create(&self, uuid: Uuid) -> Result<()> { + let _ = self.store.get_or_create(uuid).await?; + Ok(()) + } + + async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + let index_handle = self.index_handle.clone(); + if let Some(update_store) = self.store.get(uuid).await? { + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { + // acquire write lock to prevent further writes during snapshot + // the update lock must be acquired BEFORE the write lock to prevent dead lock + let _lock = update_store.update_lock.lock(); + let mut txn = update_store.env.write_txn()?; + + // create db snapshot + update_store.snapshot(&mut txn, &path, uuid)?; + + futures::executor::block_on( + async move { index_handle.snapshot(uuid, path).await }, + )?; + Ok(()) + }) .await .map_err(|e| UpdateError::Error(e.into()))? .map_err(|e| UpdateError::Error(e.into()))?; @@ -245,4 +292,42 @@ where Ok(info) } + + async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + let index_handle = self.index_handle.clone(); + if let Some(update_store) = self.store.get(uuid).await? { + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { + // acquire write lock to prevent further writes during the dump + // the update lock must be acquired BEFORE the write lock to prevent dead lock + let _lock = update_store.update_lock.lock(); + let mut txn = update_store.env.write_txn()?; + + // create db dump + update_store.dump(&mut txn, &path, uuid)?; + + futures::executor::block_on( + async move { index_handle.dump(uuid, path).await }, + )?; + Ok(()) + }) + .await + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?; + } + + Ok(()) + } + + async fn handle_get_size(&self, uuid: Uuid) -> Result { + let size = match self.store.get(uuid).await? { + Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result { + let txn = update_store.env.read_txn()?; + + update_store.get_size(&txn) + }) + .await + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?, + None => 0, + }; } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 999481573..569b896b0 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -78,6 +78,20 @@ where receiver.await.expect("update actor killed.") } + async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::Dump { uuid, path, ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } + + async fn get_size(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::GetSize { uuid, ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } + async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 17b2b3579..3f39c224f 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -31,7 +31,16 @@ pub enum UpdateMsg { path: PathBuf, ret: oneshot::Sender>, }, + Dump { + uuid: Uuid, + path: PathBuf, + ret: oneshot::Sender>, + }, GetInfo { ret: oneshot::Sender>, }, + GetSize { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index e7a12b7ff..4d8ab6f20 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -40,8 +40,11 @@ pub trait UpdateActorHandle { async fn get_all_updates_status(&self, uuid: Uuid) -> Result>; async fn update_status(&self, uuid: Uuid, id: u64) -> Result; async fn delete(&self, uuid: Uuid) -> Result<()>; - async fn snapshot(&self, uuids: HashSet, path: PathBuf) -> Result<()>; + async fn create(&self, uuid: Uuid) -> Result<()>; + async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>; async fn get_info(&self) -> Result; + async fn get_size(&self, uuid: Uuid) -> Result; async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 6a916af33..4bc4c8c75 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -499,9 +499,56 @@ impl UpdateStore { Ok(()) } + pub fn dump( + &self, + txn: &mut heed::RwTxn, + path: impl AsRef, + uuid: Uuid, + ) -> anyhow::Result<()> { + let update_path = path.as_ref().join("updates"); + create_dir_all(&update_path)?; + + let mut dump_path = update_path.join(format!("update-{}", uuid)); + // acquire write lock to prevent further writes during dump + create_dir_all(&dump_path)?; + dump_path.push("data.mdb"); + + // create db dump + self.env.copy_to_path(&dump_path, CompactionOption::Enabled)?; + + let update_files_path = update_path.join("update_files"); + create_dir_all(&update_files_path)?; + + for path in self.pending.iter(&txn)? { + let (_, path) = path?; + let name = path.file_name().unwrap(); + let to = update_files_path.join(name); + copy(path, to)?; + } + + Ok(()) + } + pub fn get_info(&self) -> anyhow::Result { let mut size = self.env.size(); let txn = self.env.read_txn()?; + for entry in self.pending_queue.iter(&txn)? { + let (_, pending) = entry?; + if let Some(path) = pending.content_path() { + size += File::open(path)?.metadata()?.len(); + } + } + let processing = match *self.state.read() { + State::Processing(uuid, _) => Some(uuid), + _ => None, + }; + + Ok(UpdateStoreInfo { size, processing }) + } + + pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result { + let mut size = self.env.size(); + let txn = self.env.read_txn()?; for entry in self.pending_queue.iter(&txn)? { let (_, pending) = entry?; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index 253326276..9c180e4a8 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -41,6 +41,9 @@ impl UuidResolverActor { Some(SnapshotRequest { path, ret }) => { let _ = ret.send(self.handle_snapshot(path).await); } + Some(DumpRequest { path, ret }) => { + let _ = ret.send(self.handle_dump(path).await); + } Some(GetSize { ret }) => { let _ = ret.send(self.handle_get_size().await); } @@ -82,6 +85,10 @@ impl UuidResolverActor { self.store.snapshot(path).await } + async fn handle_dump(&self, path: PathBuf) -> Result> { + self.store.dump(path).await + } + async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { if !is_index_uid_valid(&uid) { return Err(UuidError::BadlyFormatted(uid)); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index db4c482bd..e47f9a8e0 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -68,6 +68,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .expect("Uuid resolver actor has been killed")?) } + /// TODO: we should merge this function with the dump function async fn snapshot(&self, path: PathBuf) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UuidResolveMsg::SnapshotRequest { path, ret }; @@ -77,6 +78,15 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .expect("Uuid resolver actor has been killed")?) } + async fn dump(&self, path: PathBuf) -> Result> { + let (ret, receiver) = oneshot::channel(); + let msg = UuidResolveMsg::DumpRequest { path, ret }; + let _ = self.sender.send(msg).await; + Ok(receiver + .await + .expect("Uuid resolver actor has been killed")?) + } + async fn get_size(&self) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UuidResolveMsg::GetSize { ret }; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index a72bf0587..67493c2cd 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -31,6 +31,10 @@ pub enum UuidResolveMsg { path: PathBuf, ret: oneshot::Sender>>, }, + DumpRequest { + path: PathBuf, + ret: oneshot::Sender>>, + }, GetSize { ret: oneshot::Sender>, }, diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index ef17133ff..a8361095c 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -31,6 +31,7 @@ pub trait UuidResolverHandle { async fn delete(&self, name: String) -> anyhow::Result; async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 29c034c44..df4c3a2fb 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -21,6 +21,7 @@ pub trait UuidStore { async fn list(&self) -> Result>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } @@ -130,6 +131,8 @@ impl UuidStore for HeedUuidStore { .await? } + // TODO: we should merge this function and the following function for the dump. it's exactly + // the same code async fn snapshot(&self, mut path: PathBuf) -> Result> { let env = self.env.clone(); let db = self.db; @@ -155,6 +158,31 @@ impl UuidStore for HeedUuidStore { .await? } + async fn dump(&self, mut path: PathBuf) -> Result> { + let env = self.env.clone(); + let db = self.db; + tokio::task::spawn_blocking(move || { + // Write transaction to acquire a lock on the database. + let txn = env.write_txn()?; + let mut entries = Vec::new(); + for entry in db.iter(&txn)? { + let (_, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.push(uuid) + } + + // only perform dump if there are indexes + if !entries.is_empty() { + path.push("index_uuids"); + create_dir_all(&path).unwrap(); + path.push("data.mdb"); + env.copy_to_path(path, CompactionOption::Enabled)?; + } + Ok(entries) + }) + .await? + } + async fn get_size(&self) -> Result { Ok(self.env.size()) } diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 1997718cc..87238c4d7 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -203,6 +203,7 @@ pub struct Opt { pub import_dump: Option, /// The batch size used in the importation process, the bigger it is the faster the dump is created. + /// This options is now deprecated and will be ignored #[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")] pub dump_batch_size: usize, diff --git a/meilisearch-http/src/routes/index.rs b/meilisearch-http/src/routes/index.rs index 4424c8cfe..1afc01806 100644 --- a/meilisearch-http/src/routes/index.rs +++ b/meilisearch-http/src/routes/index.rs @@ -1,5 +1,6 @@ use actix_web::{delete, get, post, put}; use actix_web::{web, HttpResponse}; +use chrono::DateTime; use serde::Deserialize; use crate::error::ResponseError; @@ -68,6 +69,16 @@ struct UpdateIndexRequest { primary_key: Option, } +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct UpdateIndexResponse { + name: String, + uid: String, + created_at: DateTime, + updated_at: DateTime, + primary_key: Option, +} + #[put("/indexes/{index_uid}", wrap = "Authentication::Private")] async fn update_index( data: web::Data, diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 4230e19f8..e5f51d7f0 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -19,7 +19,7 @@ async fn get_settings() { assert_eq!(settings.keys().len(), 6); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); - assert_eq!(settings["attributesForFaceting"], json!({})); + assert_eq!(settings["attributesForFaceting"], json!(null)); assert_eq!(settings["distinctAttribute"], json!(null)); assert_eq!( settings["rankingRules"], From c4d898a26545a06e01bc696f24d6e7b00198c1f8 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 27 Apr 2021 10:27:43 +0200 Subject: [PATCH 02/54] split the dumps between v1 and v2 --- meilisearch-http/src/data/mod.rs | 4 +- .../index_controller/{dump.rs => dump/mod.rs} | 91 +++++--------- .../src/index_controller/dump/v1.rs | 119 ++++++++++++++++++ .../src/index_controller/dump/v2.rs | 51 ++++++++ meilisearch-http/src/index_controller/mod.rs | 4 +- meilisearch-http/src/main.rs | 2 +- 6 files changed, 205 insertions(+), 66 deletions(-) rename meilisearch-http/src/index_controller/{dump.rs => dump/mod.rs} (73%) create mode 100644 meilisearch-http/src/index_controller/dump/v1.rs create mode 100644 meilisearch-http/src/index_controller/dump/v2.rs diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index c7979210e..e2bb7fbfb 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -55,10 +55,10 @@ impl ApiKeys { } impl Data { - pub fn new(options: Opt) -> anyhow::Result { + pub async fn new(options: Opt) -> anyhow::Result { let path = options.db_path.clone(); - let index_controller = IndexController::new(&path, &options)?; + let index_controller = IndexController::new(&path, &options).await?; let mut api_keys = ApiKeys { master: options.clone().master_key, diff --git a/meilisearch-http/src/index_controller/dump.rs b/meilisearch-http/src/index_controller/dump/mod.rs similarity index 73% rename from meilisearch-http/src/index_controller/dump.rs rename to meilisearch-http/src/index_controller/dump/mod.rs index afdcfd9ce..7278a7b77 100644 --- a/meilisearch-http/src/index_controller/dump.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -1,14 +1,13 @@ -use std::{ - fs::File, - path::{Path, PathBuf}, - sync::Arc, -}; +mod v1; +mod v2; + +use std::{fs::File, path::{Path, PathBuf}, sync::Arc}; use anyhow::bail; use heed::EnvOpenOptions; use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; -use serde::{Deserialize, Serialize}; +use serde::{de::Deserializer, Deserialize, Serialize}; use tempfile::TempDir; use tokio::fs; use tokio::task::spawn_blocking; @@ -20,13 +19,30 @@ use crate::index::Index; use crate::index_controller::uuid_resolver; use crate::{helpers::compression, index::Settings}; +pub (super) fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} + #[derive(Debug, Serialize, Deserialize, Copy, Clone)] enum DumpVersion { V1, + V2, } impl DumpVersion { - const CURRENT: Self = Self::V1; + const CURRENT: Self = Self::V2; + + /// Select the good importation function from the `DumpVersion` of metadata + pub fn import_index(self, size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { + match self { + Self::V1 => v1::import_index(size, dump_path, index_path), + Self::V2 => v2::import_index(size, dump_path, index_path), + } + } } #[derive(Debug, Serialize, Deserialize)] @@ -141,16 +157,6 @@ where } } -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn settings_from_path(dir_path: &Path) -> anyhow::Result { - let path = dir_path.join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) -} - /// Write Settings in `settings.json` file at provided `dir_path` fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> { let path = dir_path.join("settings.json"); @@ -161,40 +167,7 @@ fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> Ok(()) } -fn import_index_v1(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { - std::fs::create_dir_all(&index_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, index_path)?; - let index = Index(Arc::new(index)); - - // extract `settings.json` file and import content - let settings = settings_from_path(&dump_path)?; - let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings, update_builder)?; - - let update_builder = UpdateBuilder::new(1); - let file = File::open(&index_path.join("documents.jsonl"))?; - let reader = std::io::BufReader::new(file); - index.update_documents( - UpdateFormat::JsonStream, - IndexDocumentsMethod::ReplaceDocuments, - reader, - update_builder, - None, - )?; - - // the last step: we extract the milli::Index and close it - Arc::try_unwrap(index.0) - .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") - .unwrap() - .prepare_for_closing() - .wait(); - - Ok(()) -} - -pub fn load_dump( +pub async fn load_dump( db_path: impl AsRef, dump_path: impl AsRef, size: usize, @@ -212,15 +185,10 @@ pub fn load_dump( // read dump metadata let metadata = DumpMetadata::from_path(&tmp_dir_path)?; - // choose importation function from DumpVersion of metadata - let import_index = match metadata.dump_version { - DumpVersion::V1 => import_index_v1, - }; - // remove indexes which have same `uuid` than indexes to import and create empty indexes - let existing_index_uids = futures::executor::block_on(uuid_resolver.list())?; + let existing_index_uids = uuid_resolver.list().await?; - info!("Deleting indexes provided in the dump..."); + info!("Deleting indexes already present in the db and provided in the dump..."); for idx in &metadata.indexes { if let Some((_, uuid)) = existing_index_uids.iter().find(|(s, _)| s == &idx.uid) { // if we find the index in the `uuid_resolver` it's supposed to exist on the file system @@ -237,18 +205,19 @@ pub fn load_dump( } } else { // if the index does not exist in the `uuid_resolver` we create it - futures::executor::block_on(uuid_resolver.create(idx.uid.clone()))?; + uuid_resolver.create(idx.uid.clone()).await?; } } // import each indexes content for idx in metadata.indexes { let dump_path = tmp_dir_path.join(&idx.uid); - let uuid = futures::executor::block_on(uuid_resolver.get(idx.uid))?; + let uuid = uuid_resolver.get(idx.uid).await?; let index_path = db_path.join(&format!("indexes/index-{}", uuid)); + let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db info!("Importing dump from {} into {}...", dump_path.display(), index_path.display()); - import_index(size, &dump_path, &index_path).unwrap(); + metadata.dump_version.import_index(size, &dump_path, &index_path).unwrap(); info!("Dump importation from {} succeed", dump_path.display()); } diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump/v1.rs new file mode 100644 index 000000000..433d529e1 --- /dev/null +++ b/meilisearch-http/src/index_controller/dump/v1.rs @@ -0,0 +1,119 @@ +use std::collections::{BTreeMap, BTreeSet}; + +use log::warn; +use serde::{Deserialize, Serialize}; +use crate::index_controller; +use super::*; + +/// This is the settings used in the last version of meilisearch exporting dump in V1 +#[derive(Default, Clone, Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +struct Settings { + #[serde(default, deserialize_with = "deserialize_some")] + pub ranking_rules: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub distinct_attribute: Option>, + #[serde(default, deserialize_with = "deserialize_some")] + pub searchable_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub displayed_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub stop_words: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub synonyms: Option>>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub attributes_for_faceting: Option>>, +} + +/// we need to **always** be able to convert the old settings to the settings currently being used +impl From for index_controller::Settings { + fn from(settings: Settings) -> Self { + if settings.distinct_attribute.flatten().is_some() { + error!("`distinct_attribute` are not yet implemented and thus will be ignored"); + } + if settings.synonyms.flatten().is_some() { + error!("`synonyms` are not yet implemented and thus will be ignored"); + } + Self { + // we need to convert the old `Vec` into a `BTreeSet` + displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), + searchable_attributes: settings.searchable_attributes, + // we previously had a `Vec` but now we have a `HashMap` + // representing the name of the faceted field + the type of the field. Since the type + // was not known in the V1 of the dump we are just going to assume everything is a + // String + attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())), + // we need to convert the old `Vec` into a `BTreeSet` + ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { + match criterion.as_str() { + "words" | "typo" | "proximity" => Some(criterion), + s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), + "wordsPosition" => { + warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); + Some(String::from("words")) + } + "attribute" | "exactness" => { + error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); + None + } + s => { + error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); + None + } + } + }).collect())), + // we need to convert the old `Vec` into a `BTreeSet` + stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), + } + } +} + +/// Extract Settings from `settings.json` file present at provided `dir_path` +fn import_settings(dir_path: &Path) -> anyhow::Result { + let path = dir_path.join("settings.json"); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; + + Ok(metadata) +} + + +pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { + info!("Importing a dump from an old version of meilisearch with dump version 1"); + + std::fs::create_dir_all(&index_path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, index_path)?; + let index = Index(Arc::new(index)); + + // extract `settings.json` file and import content + let settings = import_settings(&dump_path)?; + dbg!(&settings); + let settings = settings.into(); + dbg!(&settings); + let update_builder = UpdateBuilder::new(0); + index.update_settings(&settings, update_builder)?; + + let update_builder = UpdateBuilder::new(1); + let file = File::open(&dump_path.join("documents.jsonl"))?; + let reader = std::io::BufReader::new(file); + + index.update_documents( + UpdateFormat::JsonStream, + IndexDocumentsMethod::ReplaceDocuments, + reader, + update_builder, + None, + )?; + + // the last step: we extract the original milli::Index and close it + Arc::try_unwrap(index.0) + .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .unwrap() + .prepare_for_closing() + .wait(); + + Ok(()) +} diff --git a/meilisearch-http/src/index_controller/dump/v2.rs b/meilisearch-http/src/index_controller/dump/v2.rs new file mode 100644 index 000000000..f9303af0d --- /dev/null +++ b/meilisearch-http/src/index_controller/dump/v2.rs @@ -0,0 +1,51 @@ +use heed::EnvOpenOptions; +use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use crate::index::Index; +use crate::index_controller::Settings; +use std::{fs::File, path::Path, sync::Arc}; + +/// Extract Settings from `settings.json` file present at provided `dir_path` +fn import_settings(dir_path: &Path) -> anyhow::Result { + let path = dir_path.join("settings.json"); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; + + Ok(metadata) +} + +pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { + std::fs::create_dir_all(&index_path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, index_path)?; + let index = Index(Arc::new(index)); + + // extract `settings.json` file and import content + let settings = import_settings(&dump_path)?; + let update_builder = UpdateBuilder::new(0); + index.update_settings(&settings, update_builder)?; + dbg!(settings); + + let update_builder = UpdateBuilder::new(1); + let file = File::open(&dump_path.join("documents.jsonl"))?; + let reader = std::io::BufReader::new(file); + + index.update_documents( + UpdateFormat::JsonStream, + IndexDocumentsMethod::ReplaceDocuments, + reader, + update_builder, + None, + )?; + + // the last step: we extract the original milli::Index and close it + Arc::try_unwrap(index.0) + .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .unwrap() + .prepare_for_closing() + .wait(); + + Ok(()) +} + diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 10b9142cc..fe894298d 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -75,7 +75,7 @@ pub struct Stats { } impl IndexController { - pub fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { + pub async fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { let index_size = options.max_mdb_size.get_bytes() as usize; let update_store_size = options.max_udb_size.get_bytes() as usize; @@ -92,7 +92,7 @@ impl IndexController { &options.db_path, path, index_size, - ); + ).await?; } diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index b16f3c0e1..592b70d30 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -54,7 +54,7 @@ async fn main() -> Result<(), MainError> { //snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?; //} - let data = Data::new(opt.clone())?; + let data = Data::new(opt.clone()).await?; //if !opt.no_analytics { //let analytics_data = data.clone(); From 0fee81678e4f61cbe332d8d88beee32d515c7880 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:22:18 +0200 Subject: [PATCH 03/54] [WIP] rebase on main --- meilisearch-http/src/index/mod.rs | 19 ++++++++++++++ meilisearch-http/src/index/updates.rs | 26 +++++++++---------- .../src/index_controller/dump/mod.rs | 8 ------ .../src/index_controller/dump/v1.rs | 7 ++--- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index c897fac3f..048ed56bb 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -9,6 +9,7 @@ use serde_json::{Map, Value}; use crate::helpers::EnvSizer; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, Checked, Unchecked}; +use serde::{de::Deserializer, Deserialize}; mod search; mod updates; @@ -26,6 +27,22 @@ impl Deref for Index { } } +pub fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} + +pub fn deserialize_wildcard<'de, D>(deserializer: D) -> Result>>, D::Error> +where + D: Deserializer<'de>, +{ + Ok(> as Deserialize>::deserialize(deserializer)? + .map(|item: Vec| (!item.iter().any(|s| s == "*")).then(|| item))) +} + impl Index { pub fn settings(&self) -> anyhow::Result> { let txn = self.read_txn()?; @@ -88,6 +105,8 @@ impl Index { let mut documents = Vec::new(); + println!("fields to display: {:?}", fields_to_display); + for entry in iter { let (_id, obkv) = entry?; let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 0d76f2ae6..a3012fe9a 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -5,11 +5,17 @@ use std::marker::PhantomData; use flate2::read::GzDecoder; use log::info; -use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; -use serde::{de::Deserializer, Deserialize, Serialize}; +use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use serde::{Deserialize, Serialize}; -use super::Index; -use crate::index_controller::UpdateResult; +use super::{deserialize_some, deserialize_wildcard, Index}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum UpdateResult { + DocumentsAddition(DocumentAdditionResult), + DocumentDeletion { deleted: u64 }, + Other, +} #[derive(Clone, Default, Debug)] pub struct Checked; @@ -22,14 +28,14 @@ pub struct Unchecked; pub struct Settings { #[serde( default, - deserialize_with = "deserialize_some", + deserialize_with = "deserialize_wildcard", skip_serializing_if = "Option::is_none" )] pub displayed_attributes: Option>>, #[serde( default, - deserialize_with = "deserialize_some", + deserialize_with = "deserialize_wildcard", skip_serializing_if = "Option::is_none" )] pub searchable_attributes: Option>>, @@ -118,14 +124,6 @@ pub struct Facets { pub min_level_size: Option, } -fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> -where - T: Deserialize<'de>, - D: Deserializer<'de>, -{ - Deserialize::deserialize(deserializer).map(Some) -} - impl Index { pub fn update_documents( &self, diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump/mod.rs index 7278a7b77..6be9c5161 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -19,14 +19,6 @@ use crate::index::Index; use crate::index_controller::uuid_resolver; use crate::{helpers::compression, index::Settings}; -pub (super) fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> -where - T: Deserialize<'de>, - D: Deserializer<'de>, -{ - Deserialize::deserialize(deserializer).map(Some) -} - #[derive(Debug, Serialize, Deserialize, Copy, Clone)] enum DumpVersion { V1, diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump/v1.rs index 433d529e1..3e82b9084 100644 --- a/meilisearch-http/src/index_controller/dump/v1.rs +++ b/meilisearch-http/src/index_controller/dump/v1.rs @@ -3,6 +3,7 @@ use std::collections::{BTreeMap, BTreeSet}; use log::warn; use serde::{Deserialize, Serialize}; use crate::index_controller; +use crate::index::{deserialize_wildcard, deserialize_some}; use super::*; /// This is the settings used in the last version of meilisearch exporting dump in V1 @@ -13,10 +14,10 @@ struct Settings { pub ranking_rules: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub distinct_attribute: Option>, - #[serde(default, deserialize_with = "deserialize_some")] + #[serde(default, deserialize_with = "deserialize_wildcard")] pub searchable_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub displayed_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_wildcard")] + pub displayed_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub stop_words: Option>>, #[serde(default, deserialize_with = "deserialize_some")] From 1b5fc61eb666512bbdd9a9b50a1a1507cea48e1f Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:23:12 +0200 Subject: [PATCH 04/54] [WIP] rebase on main --- meilisearch-http/src/index/mod.rs | 9 +- .../src/index_controller/dump/mod.rs | 13 +- .../src/index_controller/dump/v1.rs | 11 +- meilisearch-http/src/index_controller/mod.rs | 2 +- .../src/index_controller/uuid_resolver/mod.rs | 3 +- .../index_controller/uuid_resolver/store.rs | 263 ++++++++++-------- 6 files changed, 164 insertions(+), 137 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 048ed56bb..b0c145001 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -8,7 +8,7 @@ use serde_json::{Map, Value}; use crate::helpers::EnvSizer; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; -pub use updates::{Facets, Settings, Checked, Unchecked}; +pub use updates::{Facets, Settings, Checked, Unchecked, UpdateResult}; use serde::{de::Deserializer, Deserialize}; mod search; @@ -35,12 +35,13 @@ where Deserialize::deserialize(deserializer).map(Some) } -pub fn deserialize_wildcard<'de, D>(deserializer: D) -> Result>>, D::Error> +pub fn deserialize_wildcard<'de, I, D>(deserializer: D) -> Result>, D::Error> where D: Deserializer<'de>, + I: IntoIterator + Deserialize<'de> + Clone, { - Ok(> as Deserialize>::deserialize(deserializer)? - .map(|item: Vec| (!item.iter().any(|s| s == "*")).then(|| item))) + Ok( as Deserialize>::deserialize(deserializer)? + .map(|item: I| (!item.clone().into_iter().any(|s| s == "*")).then(|| item))) } impl Index { diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump/mod.rs index 6be9c5161..1f42466cb 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -7,7 +7,7 @@ use anyhow::bail; use heed::EnvOpenOptions; use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; -use serde::{de::Deserializer, Deserialize, Serialize}; +use serde::{Deserialize, Serialize}; use tempfile::TempDir; use tokio::fs; use tokio::task::spawn_blocking; @@ -159,7 +159,7 @@ fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> Ok(()) } -pub async fn load_dump( +pub fn load_dump( db_path: impl AsRef, dump_path: impl AsRef, size: usize, @@ -167,7 +167,7 @@ pub async fn load_dump( info!("Importing dump from {}...", dump_path.as_ref().display()); let db_path = db_path.as_ref(); let dump_path = dump_path.as_ref(); - let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?; + let uuid_resolver = uuid_resolver::HeedUuidStore::new(&db_path)?; // extract the dump in a temporary directory let tmp_dir = TempDir::new()?; @@ -178,7 +178,7 @@ pub async fn load_dump( let metadata = DumpMetadata::from_path(&tmp_dir_path)?; // remove indexes which have same `uuid` than indexes to import and create empty indexes - let existing_index_uids = uuid_resolver.list().await?; + let existing_index_uids = uuid_resolver.list()?; info!("Deleting indexes already present in the db and provided in the dump..."); for idx in &metadata.indexes { @@ -197,14 +197,15 @@ pub async fn load_dump( } } else { // if the index does not exist in the `uuid_resolver` we create it - uuid_resolver.create(idx.uid.clone()).await?; + uuid_resolver.create_uuid(idx.uid.clone(), false)?; } } // import each indexes content for idx in metadata.indexes { let dump_path = tmp_dir_path.join(&idx.uid); - let uuid = uuid_resolver.get(idx.uid).await?; + // this cannot fail since we created all the missing uuid in the previous loop + let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap(); let index_path = db_path.join(&format!("indexes/index-{}", uuid)); let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump/v1.rs index 3e82b9084..02d97e8c6 100644 --- a/meilisearch-http/src/index_controller/dump/v1.rs +++ b/meilisearch-http/src/index_controller/dump/v1.rs @@ -17,7 +17,7 @@ struct Settings { #[serde(default, deserialize_with = "deserialize_wildcard")] pub searchable_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_wildcard")] - pub displayed_attributes: Option>>, + pub displayed_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub stop_words: Option>>, #[serde(default, deserialize_with = "deserialize_some")] @@ -92,8 +92,13 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; dbg!(&settings); - let settings = settings.into(); - dbg!(&settings); + let mut settings: index_controller::Settings = settings.into(); + if settings.displayed_attributes.as_ref().map_or(false, |o| o.as_ref().map_or(false, |v| v.contains(&String::from("*")))) { + settings.displayed_attributes = None; + } + if settings.searchable_attributes.as_ref().map_or(false, |o| o.as_ref().map_or(false, |v| v.contains(&String::from("*")))) { + settings.searchable_attributes = None; + } let update_builder = UpdateBuilder::new(0); index.update_settings(&settings, update_builder)?; diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index fe894298d..7a67265a7 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -92,7 +92,7 @@ impl IndexController { &options.db_path, path, index_size, - ).await?; + )?; } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index a8361095c..b3f70ba2e 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -11,11 +11,12 @@ use uuid::Uuid; use actor::UuidResolverActor; use message::UuidResolveMsg; -use store::{HeedUuidStore, UuidStore}; +use store::UuidStore; #[cfg(test)] use mockall::automock; +pub use store::HeedUuidStore; pub use handle_impl::UuidResolverHandleImpl; const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index df4c3a2fb..1d387ddc3 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,6 +1,6 @@ -use std::path::{Path, PathBuf}; use std::collections::HashSet; use std::fs::create_dir_all; +use std::path::{Path, PathBuf}; use heed::{ types::{ByteSlice, Str}, @@ -25,6 +25,7 @@ pub trait UuidStore { async fn get_size(&self) -> Result; } +#[derive(Clone)] pub struct HeedUuidStore { env: Env, db: Database, @@ -40,150 +41,168 @@ impl HeedUuidStore { let db = env.create_database(None)?; Ok(Self { env, db }) } + + pub fn create_uuid(&self, name: String, err: bool) -> Result { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; + match db.get(&txn, &name)? { + Some(uuid) => { + if err { + Err(UuidError::NameAlreadyExist) + } else { + let uuid = Uuid::from_slice(uuid)?; + Ok(uuid) + } + } + None => { + let uuid = Uuid::new_v4(); + db.put(&mut txn, &name, uuid.as_bytes())?; + txn.commit()?; + Ok(uuid) + } + } + } + + pub fn get_uuid(&self, name: String) -> Result> { + let env = self.env.clone(); + let db = self.db; + let txn = env.read_txn()?; + match db.get(&txn, &name)? { + Some(uuid) => { + let uuid = Uuid::from_slice(uuid)?; + Ok(Some(uuid)) + } + None => Ok(None), + } + } + + pub fn delete(&self, uid: String) -> Result> { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; + match db.get(&txn, &uid)? { + Some(uuid) => { + let uuid = Uuid::from_slice(uuid)?; + db.delete(&mut txn, &uid)?; + txn.commit()?; + Ok(Some(uuid)) + } + None => Ok(None), + } + } + + pub fn list(&self) -> Result> { + let env = self.env.clone(); + let db = self.db; + let txn = env.read_txn()?; + let mut entries = Vec::new(); + for entry in db.iter(&txn)? { + let (name, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.push((name.to_owned(), uuid)) + } + Ok(entries) + } + + pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; + db.put(&mut txn, &name, uuid.as_bytes())?; + txn.commit()?; + Ok(()) + } + + // TODO: we should merge this function and the following function for the dump. it's exactly + // the same code + pub fn snapshot(&self, mut path: PathBuf) -> Result> { + let env = self.env.clone(); + let db = self.db; + // Write transaction to acquire a lock on the database. + let txn = env.write_txn()?; + let mut entries = HashSet::new(); + for entry in db.iter(&txn)? { + let (_, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.insert(uuid); + } + + // only perform snapshot if there are indexes + if !entries.is_empty() { + path.push("index_uuids"); + create_dir_all(&path).unwrap(); + path.push("data.mdb"); + env.copy_to_path(path, CompactionOption::Enabled)?; + } + Ok(entries) + } + + pub fn dump(&self, mut path: PathBuf) -> Result> { + let env = self.env.clone(); + let db = self.db; + // Write transaction to acquire a lock on the database. + let txn = env.write_txn()?; + let mut entries = Vec::new(); + for entry in db.iter(&txn)? { + let (_, uuid) = entry?; + let uuid = Uuid::from_slice(uuid)?; + entries.push(uuid) + } + + // only perform dump if there are indexes + if !entries.is_empty() { + path.push("index_uuids"); + create_dir_all(&path).unwrap(); + path.push("data.mdb"); + env.copy_to_path(path, CompactionOption::Enabled)?; + } + Ok(entries) + } + + pub fn get_size(&self) -> Result { + Ok(self.env.size()) + } } #[async_trait::async_trait] impl UuidStore for HeedUuidStore { async fn create_uuid(&self, name: String, err: bool) -> Result { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - let mut txn = env.write_txn()?; - match db.get(&txn, &name)? { - Some(uuid) => { - if err { - Err(UuidError::NameAlreadyExist) - } else { - let uuid = Uuid::from_slice(uuid)?; - Ok(uuid) - } - } - None => { - let uuid = Uuid::new_v4(); - db.put(&mut txn, &name, uuid.as_bytes())?; - txn.commit()?; - Ok(uuid) - } - } - }) - .await? + let this = self.clone(); + tokio::task::spawn_blocking(move || this.create_uuid(name, err)).await? } async fn get_uuid(&self, name: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - let txn = env.read_txn()?; - match db.get(&txn, &name)? { - Some(uuid) => { - let uuid = Uuid::from_slice(uuid)?; - Ok(Some(uuid)) - } - None => Ok(None), - } - }) - .await? + let this = self.clone(); + tokio::task::spawn_blocking(move || this.get_uuid(name)).await? } async fn delete(&self, uid: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - let mut txn = env.write_txn()?; - match db.get(&txn, &uid)? { - Some(uuid) => { - let uuid = Uuid::from_slice(uuid)?; - db.delete(&mut txn, &uid)?; - txn.commit()?; - Ok(Some(uuid)) - } - None => Ok(None), - } - }) - .await? + let this = self.clone(); + tokio::task::spawn_blocking(move || this.delete(uid)).await? } async fn list(&self) -> Result> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - let txn = env.read_txn()?; - let mut entries = Vec::new(); - for entry in db.iter(&txn)? { - let (name, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.push((name.to_owned(), uuid)) - } - Ok(entries) - }) - .await? + let this = self.clone(); + tokio::task::spawn_blocking(move || this.list()).await? } async fn insert(&self, name: String, uuid: Uuid) -> Result<()> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - let mut txn = env.write_txn()?; - db.put(&mut txn, &name, uuid.as_bytes())?; - txn.commit()?; - Ok(()) - }) - .await? + let this = self.clone(); + tokio::task::spawn_blocking(move || this.insert(name, uuid)).await? } - // TODO: we should merge this function and the following function for the dump. it's exactly - // the same code - async fn snapshot(&self, mut path: PathBuf) -> Result> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - // Write transaction to acquire a lock on the database. - let txn = env.write_txn()?; - let mut entries = HashSet::new(); - for entry in db.iter(&txn)? { - let (_, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.insert(uuid); - } - - // only perform snapshot if there are indexes - if !entries.is_empty() { - path.push("index_uuids"); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - }) - .await? + async fn snapshot(&self, path: PathBuf) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.snapshot(path)).await? } - async fn dump(&self, mut path: PathBuf) -> Result> { - let env = self.env.clone(); - let db = self.db; - tokio::task::spawn_blocking(move || { - // Write transaction to acquire a lock on the database. - let txn = env.write_txn()?; - let mut entries = Vec::new(); - for entry in db.iter(&txn)? { - let (_, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.push(uuid) - } - - // only perform dump if there are indexes - if !entries.is_empty() { - path.push("index_uuids"); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - }) - .await? + async fn dump(&self, path: PathBuf) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.dump(path)).await? } async fn get_size(&self) -> Result { - Ok(self.env.size()) + self.get_size() } } From 0275b36fb0db758a6fb32b3a933ba2595e9a578d Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:24:14 +0200 Subject: [PATCH 05/54] [WIP] rebase on main --- meilisearch-http/src/index/updates.rs | 9 +- .../src/index_controller/dump/mod.rs | 4 +- .../src/index_controller/dump/v1.rs | 6 +- .../src/index_controller/dump/v2.rs | 2 +- .../src/index_controller/index_actor/actor.rs | 4 +- .../index_actor/handle_impl.rs | 6 +- .../index_controller/index_actor/message.rs | 2 +- .../src/index_controller/index_actor/mod.rs | 2 + meilisearch-http/src/index_controller/mod.rs | 3 + .../index_controller/update_actor/actor.rs | 125 +++++------------- .../update_actor/handle_impl.rs | 11 +- .../index_controller/update_actor/message.rs | 6 +- .../src/index_controller/update_actor/mod.rs | 6 +- .../update_actor/update_store.rs | 55 +++----- .../index_controller/uuid_resolver/actor.rs | 2 +- .../uuid_resolver/handle_impl.rs | 2 +- .../index_controller/uuid_resolver/message.rs | 2 +- .../src/index_controller/uuid_resolver/mod.rs | 2 +- .../index_controller/uuid_resolver/store.rs | 14 +- meilisearch-http/src/routes/index.rs | 4 +- 20 files changed, 93 insertions(+), 174 deletions(-) diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index a3012fe9a..75d0dc3e6 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -5,17 +5,12 @@ use std::marker::PhantomData; use flate2::read::GzDecoder; use log::info; -use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; use super::{deserialize_some, deserialize_wildcard, Index}; +use crate::index_controller::UpdateResult; -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateResult { - DocumentsAddition(DocumentAdditionResult), - DocumentDeletion { deleted: u64 }, - Other, -} #[derive(Clone, Default, Debug)] pub struct Checked; diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump/mod.rs index 1f42466cb..c12041e9d 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -9,8 +9,6 @@ use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; use tempfile::TempDir; -use tokio::fs; -use tokio::task::spawn_blocking; use super::update_actor::UpdateActorHandle; use super::uuid_resolver::UuidResolverHandle; @@ -109,6 +107,7 @@ where } async fn perform_dump(&self) -> anyhow::Result<()> { + /* info!("Performing dump."); let dump_dir = self.dump_path.clone(); @@ -144,6 +143,7 @@ where .await??; info!("Created dump in {:?}.", dump_path); + */ Ok(()) } diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump/v1.rs index 02d97e8c6..89b55c51e 100644 --- a/meilisearch-http/src/index_controller/dump/v1.rs +++ b/meilisearch-http/src/index_controller/dump/v1.rs @@ -29,13 +29,11 @@ struct Settings { /// we need to **always** be able to convert the old settings to the settings currently being used impl From for index_controller::Settings { fn from(settings: Settings) -> Self { - if settings.distinct_attribute.flatten().is_some() { - error!("`distinct_attribute` are not yet implemented and thus will be ignored"); - } if settings.synonyms.flatten().is_some() { error!("`synonyms` are not yet implemented and thus will be ignored"); } Self { + distinct_attribute: settings.distinct_attribute, // we need to convert the old `Vec` into a `BTreeSet` displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), searchable_attributes: settings.searchable_attributes, @@ -109,7 +107,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: index.update_documents( UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, - reader, + Some(reader), update_builder, None, )?; diff --git a/meilisearch-http/src/index_controller/dump/v2.rs b/meilisearch-http/src/index_controller/dump/v2.rs index f9303af0d..7b9a56772 100644 --- a/meilisearch-http/src/index_controller/dump/v2.rs +++ b/meilisearch-http/src/index_controller/dump/v2.rs @@ -34,7 +34,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: index.update_documents( UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, - reader, + Some(reader), update_builder, None, )?; diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 535c405dc..ecf71b62b 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -312,7 +312,7 @@ impl IndexActor { Ok(()) } - async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> { + async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> { use tokio::fs::create_dir_all; path.push("indexes"); @@ -340,7 +340,7 @@ impl IndexActor { Ok(()) } - async fn handle_get_stats(&self, uuid: Uuid) -> Result { + async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult { let index = self .store .get(uuid) diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index d625a763e..26aa189d0 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -136,14 +136,14 @@ impl IndexActorHandle for IndexActorHandleImpl { Ok(receiver.await.expect("IndexActor has been killed")?) } - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { let (ret, receiver) = oneshot::channel(); let msg = IndexMsg::Dump { uuid, path, ret }; - let _ = self.read_sender.send(msg).await; + let _ = self.sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } - async fn get_index_stats(&self, uuid: Uuid) -> Result { + async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { let (ret, receiver) = oneshot::channel(); let msg = IndexMsg::GetStats { uuid, ret }; let _ = self.sender.send(msg).await; diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 0d88532ca..714a30ecc 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -63,7 +63,7 @@ pub enum IndexMsg { Dump { uuid: Uuid, path: PathBuf, - ret: oneshot::Sender>, + ret: oneshot::Sender>, }, GetStats { uuid: Uuid, diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 46105742b..7522acc67 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -97,6 +97,7 @@ pub trait IndexActorHandle { index_settings: IndexSettings, ) -> IndexResult; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; async fn get_index_stats(&self, uuid: Uuid) -> IndexResult; } @@ -180,4 +181,5 @@ mod test { async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { self.as_ref().get_index_stats(uuid).await } + } } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 7a67265a7..9ed6f10e4 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -5,6 +5,7 @@ use std::time::Duration; use actix_web::web::{Bytes, Payload}; use anyhow::bail; +use chrono::{DateTime, Utc}; use futures::stream::StreamExt; use log::info; use milli::FieldsDistribution; @@ -22,6 +23,8 @@ use uuid_resolver::{UuidError, UuidResolverHandle}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; +use self::dump::load_dump; + mod index_actor; mod snapshot; mod dump; diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 7885d0b3b..6789bc6ce 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -13,7 +13,7 @@ use tokio::sync::mpsc; use uuid::Uuid; use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo}; -use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG}; +use crate::index_controller::{index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG}}; use crate::index_controller::{UpdateMeta, UpdateStatus}; pub struct UpdateActor { @@ -71,16 +71,14 @@ where Some(Delete { uuid, ret }) => { let _ = ret.send(self.handle_delete(uuid).await); } - Some(Snapshot { uuid, path, ret }) => { - let _ = ret.send(self.handle_snapshot(uuid, path).await); + Some(Snapshot { uuids, path, ret }) => { + let _ = ret.send(self.handle_snapshot(uuids, path).await); } - Some(Dump { uuid, path, ret }) => { - let _ = ret.send(self.handle_dump(uuid, path).await); + Some(Dump { uuids, path, ret }) => { + let _ = ret.send(self.handle_dump(uuids, path).await); } Some(GetInfo { ret }) => { let _ = ret.send(self.handle_get_info().await); - Some(GetSize { uuid, ret }) => { - let _ = ret.send(self.handle_get_size(uuid).await); } None => break, } @@ -199,51 +197,9 @@ where } async fn handle_delete(&self, uuid: Uuid) -> Result<()> { - let store = self.store.delete(uuid).await?; + let store = self.store.clone(); - if let Some(store) = store { - tokio::task::spawn(async move { - let store = get_arc_ownership_blocking(store).await; - tokio::task::spawn_blocking(move || { - store.prepare_for_closing().wait(); - info!("Update store {} was closed.", uuid); - }); - }); - } - - Ok(()) - } - - async fn handle_create(&self, uuid: Uuid) -> Result<()> { - let _ = self.store.get_or_create(uuid).await?; - Ok(()) - } - - Ok(()) - } - - async fn handle_create(&self, uuid: Uuid) -> Result<()> { - let _ = self.store.get_or_create(uuid).await?; - Ok(()) - } - - async fn handle_snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - let index_handle = self.index_handle.clone(); - if let Some(update_store) = self.store.get(uuid).await? { - tokio::task::spawn_blocking(move || -> anyhow::Result<()> { - // acquire write lock to prevent further writes during snapshot - // the update lock must be acquired BEFORE the write lock to prevent dead lock - let _lock = update_store.update_lock.lock(); - let mut txn = update_store.env.write_txn()?; - - // create db snapshot - update_store.snapshot(&mut txn, &path, uuid)?; - - futures::executor::block_on( - async move { index_handle.snapshot(uuid, path).await }, - )?; - Ok(()) - }) + tokio::task::spawn_blocking(move || store.delete_all(uuid)) .await .map_err(|e| UpdateError::Error(e.into()))? .map_err(|e| UpdateError::Error(e.into()))?; @@ -280,6 +236,35 @@ where Ok(()) } + async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { + let index_handle = self.index_handle.clone(); + let update_store = self.store.clone(); + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { + update_store.dump(&uuids, &path)?; + + // Perform the snapshot of each index concurently. Only a third of the capabilities of + // the index actor at a time not to put too much pressure on the index actor + let path = &path; + let handle = &index_handle; + + let mut stream = futures::stream::iter(uuids.iter()) + .map(|&uuid| handle.dump(uuid, path.clone())) + .buffer_unordered(CONCURRENT_INDEX_MSG / 3); + + Handle::current().block_on(async { + while let Some(res) = stream.next().await { + res?; + } + Ok(()) + }) + }) + .await + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?; + + Ok(()) + } + async fn handle_get_info(&self) -> Result { let update_store = self.store.clone(); let info = tokio::task::spawn_blocking(move || -> anyhow::Result { @@ -292,42 +277,4 @@ where Ok(info) } - - async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { - let index_handle = self.index_handle.clone(); - if let Some(update_store) = self.store.get(uuid).await? { - tokio::task::spawn_blocking(move || -> anyhow::Result<()> { - // acquire write lock to prevent further writes during the dump - // the update lock must be acquired BEFORE the write lock to prevent dead lock - let _lock = update_store.update_lock.lock(); - let mut txn = update_store.env.write_txn()?; - - // create db dump - update_store.dump(&mut txn, &path, uuid)?; - - futures::executor::block_on( - async move { index_handle.dump(uuid, path).await }, - )?; - Ok(()) - }) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; - } - - Ok(()) - } - - async fn handle_get_size(&self, uuid: Uuid) -> Result { - let size = match self.store.get(uuid).await? { - Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result { - let txn = update_store.env.read_txn()?; - - update_store.get_size(&txn) - }) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?, - None => 0, - }; } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 569b896b0..09a242377 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -78,16 +78,9 @@ where receiver.await.expect("update actor killed.") } - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()> { + async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Dump { uuid, path, ret }; - let _ = self.sender.send(msg).await; - receiver.await.expect("update actor killed.") - } - - async fn get_size(&self, uuid: Uuid) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::GetSize { uuid, ret }; + let msg = UpdateMsg::Dump { uuids, path, ret }; let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 3f39c224f..37df2af32 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -32,15 +32,11 @@ pub enum UpdateMsg { ret: oneshot::Sender>, }, Dump { - uuid: Uuid, + uuids: HashSet, path: PathBuf, ret: oneshot::Sender>, }, GetInfo { ret: oneshot::Sender>, }, - GetSize { - uuid: Uuid, - ret: oneshot::Sender>, - }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index 4d8ab6f20..36390c290 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -40,11 +40,9 @@ pub trait UpdateActorHandle { async fn get_all_updates_status(&self, uuid: Uuid) -> Result>; async fn update_status(&self, uuid: Uuid, id: u64) -> Result; async fn delete(&self, uuid: Uuid) -> Result<()>; - async fn create(&self, uuid: Uuid) -> Result<()>; - async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; - async fn dump(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn snapshot(&self, uuid: HashSet, path: PathBuf) -> Result<()>; + async fn dump(&self, uuid: HashSet, path: PathBuf) -> Result<()>; async fn get_info(&self) -> Result; - async fn get_size(&self, uuid: Uuid) -> Result; async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 4bc4c8c75..6f698e693 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -499,31 +499,37 @@ impl UpdateStore { Ok(()) } - pub fn dump( - &self, - txn: &mut heed::RwTxn, - path: impl AsRef, - uuid: Uuid, - ) -> anyhow::Result<()> { + pub fn dump(&self, uuids: &HashSet, path: impl AsRef) -> anyhow::Result<()> { + let state_lock = self.state.write(); + state_lock.swap(State::Snapshoting); // TODO: rename the state + + let txn = self.env.write_txn()?; + let update_path = path.as_ref().join("updates"); create_dir_all(&update_path)?; - let mut dump_path = update_path.join(format!("update-{}", uuid)); // acquire write lock to prevent further writes during dump - create_dir_all(&dump_path)?; - dump_path.push("data.mdb"); + create_dir_all(&update_path)?; + let db_path = update_path.join("data.mdb"); + // TODO: everything // create db dump - self.env.copy_to_path(&dump_path, CompactionOption::Enabled)?; + self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; let update_files_path = update_path.join("update_files"); create_dir_all(&update_files_path)?; - for path in self.pending.iter(&txn)? { - let (_, path) = path?; - let name = path.file_name().unwrap(); - let to = update_files_path.join(name); - copy(path, to)?; + let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); + + for entry in pendings { + let ((_, uuid, _), pending) = entry?; + if uuids.contains(&uuid) { + if let Some(path) = pending.decode()?.content_path() { + let name = path.file_name().unwrap(); + let to = update_files_path.join(name); + copy(path, to)?; + } + } } Ok(()) @@ -545,25 +551,6 @@ impl UpdateStore { Ok(UpdateStoreInfo { size, processing }) } - - pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result { - let mut size = self.env.size(); - let txn = self.env.read_txn()?; - - for entry in self.pending_queue.iter(&txn)? { - let (_, pending) = entry?; - if let Some(path) = pending.content_path() { - size += File::open(path)?.metadata()?.len(); - } - } - - let processing = match *self.state.read() { - State::Processing(uuid, _) => Some(uuid), - _ => None, - }; - - Ok(UpdateStoreInfo { size, processing }) - } } #[cfg(test)] diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index 9c180e4a8..df83ceba9 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -85,7 +85,7 @@ impl UuidResolverActor { self.store.snapshot(path).await } - async fn handle_dump(&self, path: PathBuf) -> Result> { + async fn handle_dump(&self, path: PathBuf) -> Result> { self.store.dump(path).await } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index e47f9a8e0..d9e9a20fc 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -78,7 +78,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .expect("Uuid resolver actor has been killed")?) } - async fn dump(&self, path: PathBuf) -> Result> { + async fn dump(&self, path: PathBuf) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UuidResolveMsg::DumpRequest { path, ret }; let _ = self.sender.send(msg).await; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index 67493c2cd..78f62eea2 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -33,7 +33,7 @@ pub enum UuidResolveMsg { }, DumpRequest { path: PathBuf, - ret: oneshot::Sender>>, + ret: oneshot::Sender>>, }, GetSize { ret: oneshot::Sender>, diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index b3f70ba2e..aca730db9 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -32,7 +32,7 @@ pub trait UuidResolverHandle { async fn delete(&self, name: String) -> anyhow::Result; async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; - async fn dump(&self, path: PathBuf) -> Result>; + async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 1d387ddc3..917e0b4a5 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -21,7 +21,7 @@ pub trait UuidStore { async fn list(&self) -> Result>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; - async fn dump(&self, path: PathBuf) -> Result>; + async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } @@ -116,7 +116,7 @@ impl HeedUuidStore { // TODO: we should merge this function and the following function for the dump. it's exactly // the same code - pub fn snapshot(&self, mut path: PathBuf) -> Result> { + pub fn snapshot(&self, mut path: PathBuf) -> Result> { let env = self.env.clone(); let db = self.db; // Write transaction to acquire a lock on the database. @@ -138,16 +138,16 @@ impl HeedUuidStore { Ok(entries) } - pub fn dump(&self, mut path: PathBuf) -> Result> { + pub fn dump(&self, mut path: PathBuf) -> Result> { let env = self.env.clone(); let db = self.db; // Write transaction to acquire a lock on the database. let txn = env.write_txn()?; - let mut entries = Vec::new(); + let mut entries = HashSet::new(); for entry in db.iter(&txn)? { let (_, uuid) = entry?; let uuid = Uuid::from_slice(uuid)?; - entries.push(uuid) + entries.insert(uuid); } // only perform dump if there are indexes @@ -192,12 +192,12 @@ impl UuidStore for HeedUuidStore { tokio::task::spawn_blocking(move || this.insert(name, uuid)).await? } - async fn snapshot(&self, path: PathBuf) -> Result> { + async fn snapshot(&self, path: PathBuf) -> Result> { let this = self.clone(); tokio::task::spawn_blocking(move || this.snapshot(path)).await? } - async fn dump(&self, path: PathBuf) -> Result> { + async fn dump(&self, path: PathBuf) -> Result> { let this = self.clone(); tokio::task::spawn_blocking(move || this.dump(path)).await? } diff --git a/meilisearch-http/src/routes/index.rs b/meilisearch-http/src/routes/index.rs index 1afc01806..62717c90d 100644 --- a/meilisearch-http/src/routes/index.rs +++ b/meilisearch-http/src/routes/index.rs @@ -1,7 +1,7 @@ use actix_web::{delete, get, post, put}; use actix_web::{web, HttpResponse}; -use chrono::DateTime; -use serde::Deserialize; +use chrono::{DateTime, Utc}; +use serde::{Serialize, Deserialize}; use crate::error::ResponseError; use crate::helpers::Authentication; From 0f94ef8abc5fa3e994afb19bccab8f5f7f571757 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 29 Apr 2021 14:45:08 +0200 Subject: [PATCH 06/54] WIP: dump --- meilisearch-http/src/data/mod.rs | 4 +- .../src/index_controller/dump/mod.rs | 34 ++++++++++------- .../src/index_controller/dump/v1.rs | 13 +++---- .../src/index_controller/index_actor/mod.rs | 4 ++ meilisearch-http/src/index_controller/mod.rs | 2 +- .../index_controller/update_actor/actor.rs | 1 - .../src/index_controller/update_actor/mod.rs | 3 +- .../update_actor/update_store.rs | 37 ++++++++++++++----- meilisearch-http/src/main.rs | 2 +- .../tests/settings/get_settings.rs | 4 +- 10 files changed, 64 insertions(+), 40 deletions(-) diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index e2bb7fbfb..c7979210e 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -55,10 +55,10 @@ impl ApiKeys { } impl Data { - pub async fn new(options: Opt) -> anyhow::Result { + pub fn new(options: Opt) -> anyhow::Result { let path = options.db_path.clone(); - let index_controller = IndexController::new(&path, &options).await?; + let index_controller = IndexController::new(&path, &options)?; let mut api_keys = ApiKeys { master: options.clone().master_key, diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump/mod.rs index c12041e9d..2bd5f167e 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -1,7 +1,7 @@ mod v1; mod v2; -use std::{fs::File, path::{Path, PathBuf}, sync::Arc}; +use std::{fs::File, path::{Path}, sync::Arc}; use anyhow::bail; use heed::EnvOpenOptions; @@ -10,12 +10,10 @@ use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; use tempfile::TempDir; -use super::update_actor::UpdateActorHandle; -use super::uuid_resolver::UuidResolverHandle; use super::IndexMetadata; use crate::index::Index; use crate::index_controller::uuid_resolver; -use crate::{helpers::compression, index::Settings}; +use crate::helpers::compression; #[derive(Debug, Serialize, Deserialize, Copy, Clone)] enum DumpVersion { @@ -24,7 +22,7 @@ enum DumpVersion { } impl DumpVersion { - const CURRENT: Self = Self::V2; + // const CURRENT: Self = Self::V2; /// Select the good importation function from the `DumpVersion` of metadata pub fn import_index(self, size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { @@ -37,23 +35,25 @@ impl DumpVersion { #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] -pub struct DumpMetadata { +pub struct Metadata { indexes: Vec, db_version: String, dump_version: DumpVersion, } -impl DumpMetadata { - /// Create a DumpMetadata with the current dump version of meilisearch. +impl Metadata { + /* + /// Create a Metadata with the current dump version of meilisearch. pub fn new(indexes: Vec, db_version: String) -> Self { - DumpMetadata { + Metadata { indexes, db_version, dump_version: DumpVersion::CURRENT, } } + */ - /// Extract DumpMetadata from `metadata.json` file present at provided `dir_path` + /// Extract Metadata from `metadata.json` file present at provided `dir_path` fn from_path(dir_path: &Path) -> anyhow::Result { let path = dir_path.join("metadata.json"); let file = File::open(path)?; @@ -63,7 +63,8 @@ impl DumpMetadata { Ok(metadata) } - /// Write DumpMetadata in `metadata.json` file at provided `dir_path` + /* + /// Write Metadata in `metadata.json` file at provided `dir_path` fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> { let path = dir_path.join("metadata.json"); let file = File::create(path)?; @@ -72,8 +73,10 @@ impl DumpMetadata { Ok(()) } + */ } +/* pub struct DumpService { uuid_resolver_handle: R, update_handle: U, @@ -148,7 +151,9 @@ where Ok(()) } } +*/ +/* /// Write Settings in `settings.json` file at provided `dir_path` fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> { let path = dir_path.join("settings.json"); @@ -158,6 +163,7 @@ fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> Ok(()) } +*/ pub fn load_dump( db_path: impl AsRef, @@ -170,12 +176,12 @@ pub fn load_dump( let uuid_resolver = uuid_resolver::HeedUuidStore::new(&db_path)?; // extract the dump in a temporary directory - let tmp_dir = TempDir::new()?; + let tmp_dir = TempDir::new_in(db_path)?; let tmp_dir_path = tmp_dir.path(); compression::from_tar_gz(dump_path, tmp_dir_path)?; // read dump metadata - let metadata = DumpMetadata::from_path(&tmp_dir_path)?; + let metadata = Metadata::from_path(&tmp_dir_path)?; // remove indexes which have same `uuid` than indexes to import and create empty indexes let existing_index_uids = uuid_resolver.list()?; @@ -207,7 +213,7 @@ pub fn load_dump( // this cannot fail since we created all the missing uuid in the previous loop let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap(); let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db + // let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db info!("Importing dump from {} into {}...", dump_path.display(), index_path.display()); metadata.dump_version.import_index(size, &dump_path, &index_path).unwrap(); diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump/v1.rs index 89b55c51e..3a20299f3 100644 --- a/meilisearch-http/src/index_controller/dump/v1.rs +++ b/meilisearch-http/src/index_controller/dump/v1.rs @@ -84,19 +84,13 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: std::fs::create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); - let index = milli::Index::new(options, index_path)?; + let index = milli::Index::new(options.clone(), index_path)?; let index = Index(Arc::new(index)); // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; dbg!(&settings); - let mut settings: index_controller::Settings = settings.into(); - if settings.displayed_attributes.as_ref().map_or(false, |o| o.as_ref().map_or(false, |v| v.contains(&String::from("*")))) { - settings.displayed_attributes = None; - } - if settings.searchable_attributes.as_ref().map_or(false, |o| o.as_ref().map_or(false, |v| v.contains(&String::from("*")))) { - settings.searchable_attributes = None; - } + let settings: index_controller::Settings = settings.into(); let update_builder = UpdateBuilder::new(0); index.update_settings(&settings, update_builder)?; @@ -112,6 +106,9 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: None, )?; + // at this point we should handle the updates, but since the update logic is not handled in + // meilisearch we are just going to ignore this part + // the last step: we extract the original milli::Index and close it Arc::try_unwrap(index.0) .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 7522acc67..e06658ff8 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -178,6 +178,10 @@ mod test { self.as_ref().snapshot(uuid, path).await } + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + self.as_ref().dump(uuid, path).await + } + async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { self.as_ref().get_index_stats(uuid).await } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 9ed6f10e4..ebcc9ed76 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -78,7 +78,7 @@ pub struct Stats { } impl IndexController { - pub async fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { + pub fn new(path: impl AsRef, options: &Opt) -> anyhow::Result { let index_size = options.max_mdb_size.get_bytes() as usize; let update_store_size = options.max_udb_size.get_bytes() as usize; diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 6789bc6ce..fee907001 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -91,7 +91,6 @@ where meta: UpdateMeta, mut payload: mpsc::Receiver>, ) -> Result { - let file_path = match meta { UpdateMeta::DocumentsAddition { .. } | UpdateMeta::DeleteDocuments => { diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index 36390c290..eeca6629f 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -13,9 +13,8 @@ use crate::index_controller::{UpdateMeta, UpdateStatus}; use actor::UpdateActor; use message::UpdateMsg; -use update_store::UpdateStore; -pub use update_store::UpdateStoreInfo; +pub use update_store::{UpdateStore, UpdateStoreInfo}; pub use handle_impl::UpdateActorHandleImpl; pub type Result = std::result::Result; diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 6f698e693..27018764f 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -177,11 +177,7 @@ pub struct UpdateStore { } impl UpdateStore { - pub fn open( - mut options: EnvOpenOptions, - path: impl AsRef, - index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static, - ) -> anyhow::Result> { + pub fn create(mut options: EnvOpenOptions, path: impl AsRef) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { options.max_dbs(5); let env = options.open(path)?; @@ -189,21 +185,30 @@ impl UpdateStore { let next_update_id = env.create_database(Some("next-update-id"))?; let updates = env.create_database(Some("updates"))?; - let (notification_sender, mut notification_receiver) = mpsc::channel(10); + let state = Arc::new(StateLock::from_state(State::Idle)); + + let (notification_sender, notification_receiver) = mpsc::channel(10); // Send a first notification to trigger the process. let _ = notification_sender.send(()); - let state = Arc::new(StateLock::from_state(State::Idle)); + Ok((Self { env, pending_queue, next_update_id, updates, state, notification_sender }, notification_receiver)) + } + + pub fn open( + options: EnvOpenOptions, + path: impl AsRef, + index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static, + ) -> anyhow::Result> { + let (update_store, mut notification_receiver) = Self::create(options, path)?; + let update_store = Arc::new(update_store); // Init update loop to perform any pending updates at launch. // Since we just launched the update store, and we still own the receiving end of the // channel, this call is guaranteed to succeed. - notification_sender + update_store.notification_sender .try_send(()) .expect("Failed to init update store"); - let update_store = Arc::new(UpdateStore { env, pending_queue, next_update_id, updates, state, notification_sender }); - // We need a weak reference so we can take ownership on the arc later when we // want to close the index. let update_store_weak = Arc::downgrade(&update_store); @@ -283,6 +288,18 @@ impl UpdateStore { Ok(meta) } + /// Push already processed updates in the UpdateStore. This is useful for the dumps + pub fn register_already_processed_update ( + &self, + result: UpdateStatus, + index_uuid: Uuid, + ) -> heed::Result<()> { + let mut wtxn = self.env.write_txn()?; + let (_global_id, update_id) = self.next_update_id(&mut wtxn, index_uuid)?; + self.updates.remap_key_type::().put(&mut wtxn, &(index_uuid, update_id), &result)?; + wtxn.commit() + } + /// Executes the user provided function on the next pending update (the one with the lowest id). /// This is asynchronous as it let the user process the update with a read-only txn and /// only writing the result meta to the processed-meta store *after* it has been processed. diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 592b70d30..b16f3c0e1 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -54,7 +54,7 @@ async fn main() -> Result<(), MainError> { //snapshot::load_snapshot(&opt.db_path, path, opt.ignore_snapshot_if_db_exists, opt.ignore_missing_snapshot)?; //} - let data = Data::new(opt.clone()).await?; + let data = Data::new(opt.clone())?; //if !opt.no_analytics { //let analytics_data = data.clone(); diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index e5f51d7f0..a39dd54e9 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -19,7 +19,7 @@ async fn get_settings() { assert_eq!(settings.keys().len(), 6); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); - assert_eq!(settings["attributesForFaceting"], json!(null)); + assert_eq!(settings["attributesForFaceting"], json!({})); assert_eq!(settings["distinctAttribute"], json!(null)); assert_eq!( settings["rankingRules"], @@ -82,7 +82,9 @@ async fn reset_all_settings() { assert_eq!(response["searchableAttributes"], json!(["bar"])); assert_eq!(response["stopWords"], json!(["the"])); + eprintln!("BEFORE"); index.delete_settings().await; + eprintln!("AFTER"); index.wait_update_id(1).await; let (response, code) = index.settings().await; From c3552cecdfb4f80d2a36279797d49c151725a59e Mon Sep 17 00:00:00 2001 From: tamo Date: Wed, 5 May 2021 14:11:56 +0200 Subject: [PATCH 07/54] WIP rebase on main --- meilisearch-http/src/data/mod.rs | 4 + .../src/index_controller/dump/mod.rs | 154 +++++++----------- .../src/index_controller/index_actor/actor.rs | 50 ++++-- .../index_actor/handle_impl.rs | 4 +- .../index_controller/index_actor/message.rs | 1 + .../src/index_controller/index_actor/mod.rs | 2 +- meilisearch-http/src/index_controller/mod.rs | 9 +- .../index_controller/update_actor/actor.rs | 6 +- .../update_actor/handle_impl.rs | 8 +- .../index_controller/update_actor/message.rs | 2 +- .../src/index_controller/update_actor/mod.rs | 2 +- .../update_actor/update_store.rs | 30 ++-- .../index_controller/uuid_resolver/actor.rs | 7 - .../uuid_resolver/handle_impl.rs | 10 -- .../index_controller/uuid_resolver/message.rs | 4 - .../src/index_controller/uuid_resolver/mod.rs | 1 - .../index_controller/uuid_resolver/store.rs | 30 ---- meilisearch-http/src/lib.rs | 4 +- meilisearch-http/src/routes/dump.rs | 25 ++- meilisearch-http/src/routes/mod.rs | 2 +- 20 files changed, 158 insertions(+), 197 deletions(-) diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index c7979210e..39cfed626 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -108,6 +108,10 @@ impl Data { Ok(self.index_controller.get_all_stats().await?) } + pub async fn dump(&self) -> anyhow::Result { + Ok(self.index_controller.dump(self.options.dumps_dir.clone()).await?) + } + #[inline] pub fn http_payload_size_limit(&self) -> usize { self.options.http_payload_size_limit.get_bytes() as usize diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump/mod.rs index 2bd5f167e..a44d4235b 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump/mod.rs @@ -1,16 +1,20 @@ mod v1; mod v2; -use std::{fs::File, path::{Path}, sync::Arc}; +use std::{collections::HashSet, fs::{File}, path::{Path, PathBuf}, sync::Arc}; use anyhow::bail; +use chrono::Utc; use heed::EnvOpenOptions; use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; use tempfile::TempDir; +use tokio::task::spawn_blocking; +use tokio::fs; +use uuid::Uuid; -use super::IndexMetadata; +use super::{IndexController, IndexMetadata, update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use crate::index::Index; use crate::index_controller::uuid_resolver; use crate::helpers::compression; @@ -22,7 +26,7 @@ enum DumpVersion { } impl DumpVersion { - // const CURRENT: Self = Self::V2; + const CURRENT: Self = Self::V2; /// Select the good importation function from the `DumpVersion` of metadata pub fn import_index(self, size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { @@ -42,7 +46,6 @@ pub struct Metadata { } impl Metadata { - /* /// Create a Metadata with the current dump version of meilisearch. pub fn new(indexes: Vec, db_version: String) -> Self { Metadata { @@ -51,7 +54,6 @@ impl Metadata { dump_version: DumpVersion::CURRENT, } } - */ /// Extract Metadata from `metadata.json` file present at provided `dir_path` fn from_path(dir_path: &Path) -> anyhow::Result { @@ -63,105 +65,73 @@ impl Metadata { Ok(metadata) } - /* /// Write Metadata in `metadata.json` file at provided `dir_path` - fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> { + pub async fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> { let path = dir_path.join("metadata.json"); - let file = File::create(path)?; - - serde_json::to_writer(file, &self)?; - - Ok(()) - } - */ -} - -/* -pub struct DumpService { - uuid_resolver_handle: R, - update_handle: U, - dump_path: PathBuf, - db_name: String, -} - -impl DumpService -where - U: UpdateActorHandle, - R: UuidResolverHandle, -{ - pub fn new( - uuid_resolver_handle: R, - update_handle: U, - dump_path: PathBuf, - db_name: String, - ) -> Self { - Self { - uuid_resolver_handle, - update_handle, - dump_path, - db_name, - } - } - - pub async fn run(self) { - if let Err(e) = self.perform_dump().await { - error!("{}", e); - } - } - - async fn perform_dump(&self) -> anyhow::Result<()> { - /* - info!("Performing dump."); - - let dump_dir = self.dump_path.clone(); - fs::create_dir_all(&dump_dir).await?; - let temp_dump_dir = spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let uuids = self - .uuid_resolver_handle - .dump(temp_dump_path.clone()) - .await?; - - if uuids.is_empty() { - return Ok(()); - } - - let tasks = uuids - .iter() - .map(|&uuid| self.update_handle.dump(uuid, temp_dump_path.clone())) - .collect::>(); - - futures::future::try_join_all(tasks).await?; - - let dump_dir = self.dump_path.clone(); - let dump_path = self.dump_path.join(format!("{}.dump", self.db_name)); - let dump_path = spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; - let temp_dump_file_path = temp_dump_file.path().to_owned(); - compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; - temp_dump_file.persist(&dump_path)?; - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - */ + tokio::fs::write(path, serde_json::to_string(self)?).await?; Ok(()) } } -*/ + +/// Generate uid from creation date +fn generate_uid() -> String { + Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() +} + +pub async fn perform_dump(index_controller: &IndexController, dump_path: PathBuf) -> anyhow::Result { + info!("Performing dump."); + + let dump_dir = dump_path.clone(); + let uid = generate_uid(); + fs::create_dir_all(&dump_dir).await?; + let temp_dump_dir = spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let uuids = index_controller.uuid_resolver.list().await?; + // maybe we could just keep the vec as-is + let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); + + if uuids.is_empty() { + return Ok(uid); + } + + let indexes = index_controller.list_indexes().await?; + + // we create one directory by index + for meta in indexes.iter() { + tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; + } + + let metadata = Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); + metadata.to_path(&temp_dump_path).await?; + + index_controller.update_handle.dump(uuids, temp_dump_path.clone()).await?; + let dump_dir = dump_path.clone(); + let dump_path = dump_path.join(format!("{}.dump", uid)); + let dump_path = spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; + let temp_dump_file_path = temp_dump_file.path().to_owned(); + compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; + temp_dump_file.persist(&dump_path)?; + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(uid) +} /* /// Write Settings in `settings.json` file at provided `dir_path` fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> { - let path = dir_path.join("settings.json"); - let file = File::create(path)?; +let path = dir_path.join("settings.json"); +let file = File::create(path)?; - serde_json::to_writer(file, settings)?; +serde_json::to_writer(file, settings)?; - Ok(()) +Ok(()) } */ diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index ecf71b62b..623b42ddc 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -122,8 +122,8 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } - Dump { uuid, path, ret } => { - let _ = ret.send(self.handle_dump(uuid, path).await); + Dump { uid, uuid, path, ret } => { + let _ = ret.send(self.handle_dump(&uid, uuid, path).await); } GetStats { uuid, ret } => { let _ = ret.send(self.handle_get_stats(uuid).await); @@ -312,24 +312,52 @@ impl IndexActor { Ok(()) } - async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> { + /// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the + /// documents and all the settings. + async fn handle_dump(&self, uid: &str, uuid: Uuid, path: PathBuf) -> IndexResult<()> { use tokio::fs::create_dir_all; + use std::io::prelude::*; - path.push("indexes"); create_dir_all(&path) .await .map_err(|e| IndexError::Error(e.into()))?; if let Some(index) = self.store.get(uuid).await? { - let mut index_path = path.join(format!("index-{}", uuid)); - create_dir_all(&index_path) - .await - .map_err(|e| IndexError::Error(e.into()))?; - index_path.push("data.mdb"); + let documents_path = path.join(uid).join("documents.jsonl"); + let settings_path = path.join(uid).join("settings.json"); + spawn_blocking(move || -> anyhow::Result<()> { + // first we dump all the documents + let file = File::create(documents_path)?; + let mut file = std::io::BufWriter::new(file); + // Get write txn to wait for ongoing write transaction before dump. - let _txn = index.write_txn()?; - index.env.copy_to_path(index_path, CompactionOption::Enabled)?; + let txn = index.write_txn()?; + let documents_ids = index.documents_ids(&txn)?; + // TODO: TAMO: calling this function here can consume **a lot** of RAM, we should + // use some kind of iterators -> waiting for a milli release + let documents = index.documents(&txn, documents_ids)?; + + let fields_ids_map = index.fields_ids_map(&txn)?; + // we want to save **all** the fields in the dump. + let fields_to_dump: Vec = fields_ids_map.iter().map(|(id, _)| id).collect(); + + for (_doc_id, document) in documents { + let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?; + file.write_all(serde_json::to_string(&json)?.as_bytes())?; + file.write_all(b"\n")?; + } + + + // then we dump all the settings + let file = File::create(settings_path)?; + let mut file = std::io::BufWriter::new(file); + let settings = index.settings()?; + + file.write_all(serde_json::to_string(&settings)?.as_bytes())?; + file.write_all(b"\n")?; + + Ok(()) }) .await diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index 26aa189d0..64b63e5f0 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -136,9 +136,9 @@ impl IndexActorHandle for IndexActorHandleImpl { Ok(receiver.await.expect("IndexActor has been killed")?) } - async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()> { let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Dump { uuid, path, ret }; + let msg = IndexMsg::Dump { uid, uuid, path, ret }; let _ = self.sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 714a30ecc..37faa1e31 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -61,6 +61,7 @@ pub enum IndexMsg { ret: oneshot::Sender>, }, Dump { + uid: String, uuid: Uuid, path: PathBuf, ret: oneshot::Sender>, diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index e06658ff8..0145a33d9 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -97,7 +97,7 @@ pub trait IndexActorHandle { index_settings: IndexSettings, ) -> IndexResult; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; - async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; + async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()>; async fn get_index_stats(&self, uuid: Uuid) -> IndexResult; } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index ebcc9ed76..6ea42c73d 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::{collections::BTreeMap, path::PathBuf}; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -378,6 +378,13 @@ impl IndexController { Ok(stats) } + pub async fn dump(&self, path: PathBuf) -> anyhow::Result { + eprintln!("index_controller::mod called"); + let res = dump::perform_dump(self, path).await?; + eprintln!("index_controller::mod finished"); + Ok(res) + } + pub async fn get_all_stats(&self) -> anyhow::Result { let update_infos = self.update_handle.get_info().await?; let mut database_size = self.get_uuids_size().await? + update_infos.size; diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index fee907001..64794bc6f 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -235,11 +235,11 @@ where Ok(()) } - async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { + async fn handle_dump(&self, uuids: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()> { let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); tokio::task::spawn_blocking(move || -> anyhow::Result<()> { - update_store.dump(&uuids, &path)?; + update_store.dump(&uuids, path.to_path_buf())?; // Perform the snapshot of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor @@ -247,7 +247,7 @@ where let handle = &index_handle; let mut stream = futures::stream::iter(uuids.iter()) - .map(|&uuid| handle.dump(uuid, path.clone())) + .map(|(uid, uuid)| handle.dump(uid.clone(), *uuid, path.clone())) .buffer_unordered(CONCURRENT_INDEX_MSG / 3); Handle::current().block_on(async { diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 09a242377..a497a3c5c 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -71,16 +71,16 @@ where receiver.await.expect("update actor killed.") } - async fn get_info(&self) -> Result { + async fn dump(&self, uuids: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()> { let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::GetInfo { ret }; + let msg = UpdateMsg::Dump { uuids, path, ret }; let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } - async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { + async fn get_info(&self) -> Result { let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Dump { uuids, path, ret }; + let msg = UpdateMsg::GetInfo { ret }; let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 37df2af32..4103ca121 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -32,7 +32,7 @@ pub enum UpdateMsg { ret: oneshot::Sender>, }, Dump { - uuids: HashSet, + uuids: HashSet<(String, Uuid)>, path: PathBuf, ret: oneshot::Sender>, }, diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index eeca6629f..05b793e45 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -40,7 +40,7 @@ pub trait UpdateActorHandle { async fn update_status(&self, uuid: Uuid, id: u64) -> Result; async fn delete(&self, uuid: Uuid) -> Result<()>; async fn snapshot(&self, uuid: HashSet, path: PathBuf) -> Result<()>; - async fn dump(&self, uuid: HashSet, path: PathBuf) -> Result<()>; + async fn dump(&self, uuid: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()>; async fn get_info(&self) -> Result; async fn update( &self, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 27018764f..d767dfa93 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -1,4 +1,4 @@ -use std::borrow::Cow; +use std::{borrow::Cow, path::PathBuf}; use std::collections::{BTreeMap, HashSet}; use std::convert::TryInto; use std::fs::{copy, create_dir_all, remove_file, File}; @@ -294,6 +294,7 @@ impl UpdateStore { result: UpdateStatus, index_uuid: Uuid, ) -> heed::Result<()> { + // TODO: TAMO: load already processed updates let mut wtxn = self.env.write_txn()?; let (_global_id, update_id) = self.next_update_id(&mut wtxn, index_uuid)?; self.updates.remap_key_type::().put(&mut wtxn, &(index_uuid, update_id), &result)?; @@ -516,31 +517,34 @@ impl UpdateStore { Ok(()) } - pub fn dump(&self, uuids: &HashSet, path: impl AsRef) -> anyhow::Result<()> { + pub fn dump(&self, uuids: &HashSet<(String, Uuid)>, path: PathBuf) -> anyhow::Result<()> { + use std::io::prelude::*; let state_lock = self.state.write(); - state_lock.swap(State::Snapshoting); // TODO: rename the state + state_lock.swap(State::Snapshoting); // TODO: TAMO rename the state somehow let txn = self.env.write_txn()?; - let update_path = path.as_ref().join("updates"); - create_dir_all(&update_path)?; + for (uid, uuid) in uuids.iter() { + let file = File::create(path.join(uid).join("updates.jsonl"))?; + let mut file = std::io::BufWriter::new(file); - // acquire write lock to prevent further writes during dump - create_dir_all(&update_path)?; - let db_path = update_path.join("data.mdb"); + for update in &self.list(*uuid)? { + serde_json::to_writer(&mut file, update)?; + file.write_all(b"\n")?; + } + } - // TODO: everything - // create db dump - self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; + // TODO: TAMO: the updates + // already processed updates seems to works, but I've not tried with currently running updates - let update_files_path = update_path.join("update_files"); + let update_files_path = path.join("update_files"); create_dir_all(&update_files_path)?; let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); for entry in pendings { let ((_, uuid, _), pending) = entry?; - if uuids.contains(&uuid) { + if uuids.iter().any(|(_, id)| id == &uuid) { if let Some(path) = pending.decode()?.content_path() { let name = path.file_name().unwrap(); let to = update_files_path.join(name); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index df83ceba9..253326276 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -41,9 +41,6 @@ impl UuidResolverActor { Some(SnapshotRequest { path, ret }) => { let _ = ret.send(self.handle_snapshot(path).await); } - Some(DumpRequest { path, ret }) => { - let _ = ret.send(self.handle_dump(path).await); - } Some(GetSize { ret }) => { let _ = ret.send(self.handle_get_size().await); } @@ -85,10 +82,6 @@ impl UuidResolverActor { self.store.snapshot(path).await } - async fn handle_dump(&self, path: PathBuf) -> Result> { - self.store.dump(path).await - } - async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { if !is_index_uid_valid(&uid) { return Err(UuidError::BadlyFormatted(uid)); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index d9e9a20fc..db4c482bd 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -68,7 +68,6 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .expect("Uuid resolver actor has been killed")?) } - /// TODO: we should merge this function with the dump function async fn snapshot(&self, path: PathBuf) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UuidResolveMsg::SnapshotRequest { path, ret }; @@ -78,15 +77,6 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .expect("Uuid resolver actor has been killed")?) } - async fn dump(&self, path: PathBuf) -> Result> { - let (ret, receiver) = oneshot::channel(); - let msg = UuidResolveMsg::DumpRequest { path, ret }; - let _ = self.sender.send(msg).await; - Ok(receiver - .await - .expect("Uuid resolver actor has been killed")?) - } - async fn get_size(&self) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UuidResolveMsg::GetSize { ret }; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index 78f62eea2..a72bf0587 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -31,10 +31,6 @@ pub enum UuidResolveMsg { path: PathBuf, ret: oneshot::Sender>>, }, - DumpRequest { - path: PathBuf, - ret: oneshot::Sender>>, - }, GetSize { ret: oneshot::Sender>, }, diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index aca730db9..0cbb2895b 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -32,7 +32,6 @@ pub trait UuidResolverHandle { async fn delete(&self, name: String) -> anyhow::Result; async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; - async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 917e0b4a5..a781edcba 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -21,7 +21,6 @@ pub trait UuidStore { async fn list(&self) -> Result>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; - async fn dump(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; } @@ -114,8 +113,6 @@ impl HeedUuidStore { Ok(()) } - // TODO: we should merge this function and the following function for the dump. it's exactly - // the same code pub fn snapshot(&self, mut path: PathBuf) -> Result> { let env = self.env.clone(); let db = self.db; @@ -138,28 +135,6 @@ impl HeedUuidStore { Ok(entries) } - pub fn dump(&self, mut path: PathBuf) -> Result> { - let env = self.env.clone(); - let db = self.db; - // Write transaction to acquire a lock on the database. - let txn = env.write_txn()?; - let mut entries = HashSet::new(); - for entry in db.iter(&txn)? { - let (_, uuid) = entry?; - let uuid = Uuid::from_slice(uuid)?; - entries.insert(uuid); - } - - // only perform dump if there are indexes - if !entries.is_empty() { - path.push("index_uuids"); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - } - pub fn get_size(&self) -> Result { Ok(self.env.size()) } @@ -197,11 +172,6 @@ impl UuidStore for HeedUuidStore { tokio::task::spawn_blocking(move || this.snapshot(path)).await? } - async fn dump(&self, path: PathBuf) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.dump(path)).await? - } - async fn get_size(&self) -> Result { self.get_size() } diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index fd5cf6786..e19037482 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -46,8 +46,8 @@ macro_rules! create_app { .configure(synonym::services) .configure(health::services) .configure(stats::services) - .configure(key::services); - //.configure(routes::dump::services); + .configure(key::services) + .configure(dump::services); #[cfg(feature = "mini-dashboard")] let app = if $enable_frontend { let generated = dashboard::generate(); diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index c46b0e502..410b817b8 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,14 +1,10 @@ -use std::fs::File; -use std::path::Path; +use actix_web::{post, get, web}; +use actix_web::HttpResponse; +use serde::{Serialize, Deserialize}; -use actix_web::{get, post}; -use actix_web::{HttpResponse, web}; -use serde::{Deserialize, Serialize}; - -use crate::dump::{DumpInfo, DumpStatus, compressed_dumps_dir, init_dump_process}; -use crate::Data; -use crate::error::{Error, ResponseError}; +use crate::error::ResponseError; use crate::helpers::Authentication; +use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { cfg.service(trigger_dump) @@ -19,7 +15,10 @@ pub fn services(cfg: &mut web::ServiceConfig) { async fn trigger_dump( data: web::Data, ) -> Result { - todo!() + eprintln!("dump started"); + let res = data.dump().await?; + + Ok(HttpResponse::Ok().body(res)) } #[derive(Debug, Serialize)] @@ -30,13 +29,13 @@ struct DumpStatusResponse { #[derive(Deserialize)] struct DumpParam { - dump_uid: String, + _dump_uid: String, } #[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")] async fn get_dump_status( - data: web::Data, - path: web::Path, + _data: web::Data, + _path: web::Path, ) -> Result { todo!() } diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index aaf13613a..999c4f881 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -9,7 +9,7 @@ pub mod search; pub mod settings; pub mod stats; pub mod synonym; -//pub mod dump; +pub mod dump; #[derive(Deserialize)] pub struct IndexParam { From efca63f9cea84d8b98255ea7b2a333dabf63f924 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:25:09 +0200 Subject: [PATCH 08/54] [WIP] rebase on main --- meilisearch-http/src/data/mod.rs | 11 +- .../src/index_controller/dump_actor/actor.rs | 200 ++++++++++++++++++ .../dump_actor/handle_impl.rs | 41 ++++ .../index_controller/dump_actor/message.rs | 15 ++ .../{dump => dump_actor}/mod.rs | 151 +++++++------ .../{dump => dump_actor}/v1.rs | 0 .../{dump => dump_actor}/v2.rs | 0 .../src/index_controller/index_actor/actor.rs | 4 +- meilisearch-http/src/index_controller/mod.rs | 27 ++- meilisearch-http/src/routes/dump.rs | 19 +- 10 files changed, 381 insertions(+), 87 deletions(-) create mode 100644 meilisearch-http/src/index_controller/dump_actor/actor.rs create mode 100644 meilisearch-http/src/index_controller/dump_actor/handle_impl.rs create mode 100644 meilisearch-http/src/index_controller/dump_actor/message.rs rename meilisearch-http/src/index_controller/{dump => dump_actor}/mod.rs (63%) rename meilisearch-http/src/index_controller/{dump => dump_actor}/v1.rs (100%) rename meilisearch-http/src/index_controller/{dump => dump_actor}/v2.rs (100%) diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 39cfed626..008065d74 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -4,8 +4,7 @@ use std::sync::Arc; use sha2::Digest; use crate::index::{Checked, Settings}; -use crate::index_controller::{IndexController, IndexStats, Stats}; -use crate::index_controller::{IndexMetadata, IndexSettings}; +use crate::index_controller::{IndexController, IndexStats, Stats, DumpInfo, IndexMetadata, IndexSettings}; use crate::option::Opt; pub mod search; @@ -108,8 +107,12 @@ impl Data { Ok(self.index_controller.get_all_stats().await?) } - pub async fn dump(&self) -> anyhow::Result { - Ok(self.index_controller.dump(self.options.dumps_dir.clone()).await?) + pub async fn create_dump(&self) -> anyhow::Result { + Ok(self.index_controller.create_dump().await?) + } + + pub async fn dump_status(&self, uid: String) -> anyhow::Result { + Ok(self.index_controller.dump_info(uid).await?) } #[inline] diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs new file mode 100644 index 000000000..b41ddadcf --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -0,0 +1,200 @@ +use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus}; +use crate::helpers::compression; +use crate::index_controller::{index_actor, update_actor, uuid_resolver, IndexMetadata}; +use chrono::Utc; +use log::{error, info, warn}; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, + sync::Arc, +}; +use tokio::sync::{mpsc, Mutex}; +use uuid::Uuid; + +pub struct DumpActor { + inbox: mpsc::Receiver, + inner: InnerDump, +} + +#[derive(Clone)] +struct InnerDump { + pub uuid_resolver: UuidResolver, + pub index: Index, + pub update: Update, + pub dump_path: PathBuf, + pub dump_info: Arc>>, +} + +/// Generate uid from creation date +fn generate_uid() -> String { + Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() +} + +impl DumpActor +where + UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, + Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, + Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, +{ + pub fn new( + inbox: mpsc::Receiver, + uuid_resolver: UuidResolver, + index: Index, + update: Update, + dump_path: impl AsRef, + ) -> Self { + Self { + inbox, + inner: InnerDump { + uuid_resolver, + index, + update, + dump_path: dump_path.as_ref().into(), + dump_info: Arc::new(Mutex::new(None)), + }, + } + } + + pub async fn run(mut self) { + use DumpMsg::*; + + info!("Started dump actor."); + + loop { + match self.inbox.recv().await { + Some(CreateDump { ret }) => { + let _ = ret.send(self.inner.clone().handle_create_dump().await); + } + Some(DumpInfo { ret, uid }) => { + let _ = ret.send(self.inner.handle_dump_info(uid).await); + } + None => break, + } + } + + error!("Dump actor stopped."); + } +} + +impl InnerDump +where + UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, + Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, + Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, +{ + async fn handle_create_dump(self) -> DumpResult { + if self.is_running().await { + return Err(DumpError::DumpAlreadyRunning); + } + let uid = generate_uid(); + let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); + *self.dump_info.lock().await = Some(info.clone()); + + let this = self.clone(); + + tokio::task::spawn(async move { + match this.perform_dump(uid).await { + Ok(()) => { + if let Some(ref mut info) = *self.dump_info.lock().await { + info.done(); + } else { + warn!("dump actor was in an inconsistant state"); + } + info!("Dump succeed"); + } + Err(e) => { + if let Some(ref mut info) = *self.dump_info.lock().await { + info.with_error(e.to_string()); + } else { + warn!("dump actor was in an inconsistant state"); + } + error!("Dump failed: {}", e); + } + }; + }); + + Ok(info) + } + + async fn perform_dump(self, uid: String) -> anyhow::Result<()> { + info!("Performing dump."); + + let dump_dir = self.dump_path.clone(); + tokio::fs::create_dir_all(&dump_dir).await?; + let temp_dump_dir = + tokio::task::spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let uuids = self.uuid_resolver.list().await?; + // maybe we could just keep the vec as-is + let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); + + if uuids.is_empty() { + return Ok(()); + } + + let indexes = self.list_indexes().await?; + + // we create one directory by index + for meta in indexes.iter() { + tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; + } + + let metadata = super::Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); + metadata.to_path(&temp_dump_path).await?; + + self.update.dump(uuids, temp_dump_path.clone()).await?; + + let dump_dir = self.dump_path.clone(); + let dump_path = self.dump_path.join(format!("{}.dump", uid)); + let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; + let temp_dump_file_path = temp_dump_file.path().to_owned(); + compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; + temp_dump_file.persist(&dump_path)?; + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } + + async fn list_indexes(&self) -> anyhow::Result> { + let uuids = self.uuid_resolver.list().await?; + + let mut ret = Vec::new(); + + for (uid, uuid) in uuids { + let meta = self.index.get_index_meta(uuid).await?; + let meta = IndexMetadata { + uuid, + name: uid.clone(), + uid, + meta, + }; + ret.push(meta); + } + + Ok(ret) + } + + async fn handle_dump_info(&self, uid: String) -> DumpResult { + match &*self.dump_info.lock().await { + None => Err(DumpError::DumpDoesNotExist(uid)), + Some(DumpInfo { uid: ref s, .. }) if &uid != s => Err(DumpError::DumpDoesNotExist(uid)), + Some(info) => Ok(info.clone()), + } + } + + async fn is_running(&self) -> bool { + matches!( + *self.dump_info.lock().await, + Some(DumpInfo { + status: DumpStatus::InProgress, + .. + }) + ) + } +} diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs new file mode 100644 index 000000000..601c97c01 --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -0,0 +1,41 @@ +use std::path::{Path}; +use actix_web::web::Bytes; +use tokio::sync::{mpsc, oneshot}; +use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; + +#[derive(Clone)] +pub struct DumpActorHandleImpl { + sender: mpsc::Sender, +} + +#[async_trait::async_trait] +impl DumpActorHandle for DumpActorHandleImpl { + async fn create_dump(&self) -> DumpResult { + let (ret, receiver) = oneshot::channel(); + let msg = DumpMsg::CreateDump { ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("IndexActor has been killed") + } + + async fn dump_info(&self, uid: String) -> DumpResult { + let (ret, receiver) = oneshot::channel(); + let msg = DumpMsg::DumpInfo { ret, uid }; + let _ = self.sender.send(msg).await; + receiver.await.expect("IndexActor has been killed") + } +} + +impl DumpActorHandleImpl { + pub fn new( + path: impl AsRef, + uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, + index: crate::index_controller::index_actor::IndexActorHandleImpl, + update: crate::index_controller::update_actor::UpdateActorHandleImpl, + ) -> anyhow::Result { + let (sender, receiver) = mpsc::channel(10); + let actor = DumpActor::new(receiver, uuid_resolver, index, update, path); + + tokio::task::spawn(actor.run()); + Ok(Self { sender }) + } +} diff --git a/meilisearch-http/src/index_controller/dump_actor/message.rs b/meilisearch-http/src/index_controller/dump_actor/message.rs new file mode 100644 index 000000000..14409afbb --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/message.rs @@ -0,0 +1,15 @@ +use tokio::sync::oneshot; + +use super::{DumpResult, DumpInfo}; + + +pub enum DumpMsg { + CreateDump { + ret: oneshot::Sender>, + }, + DumpInfo { + uid: String, + ret: oneshot::Sender>, + }, +} + diff --git a/meilisearch-http/src/index_controller/dump/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs similarity index 63% rename from meilisearch-http/src/index_controller/dump/mod.rs rename to meilisearch-http/src/index_controller/dump_actor/mod.rs index a44d4235b..f57c27c59 100644 --- a/meilisearch-http/src/index_controller/dump/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,23 +1,48 @@ mod v1; mod v2; +mod handle_impl; +mod actor; +mod message; -use std::{collections::HashSet, fs::{File}, path::{Path, PathBuf}, sync::Arc}; +use std::{ + fs::File, + path::Path, + sync::Arc, +}; +#[cfg(test)] +use mockall::automock; use anyhow::bail; -use chrono::Utc; +use thiserror::Error; use heed::EnvOpenOptions; use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; +use serde_json::json; use tempfile::TempDir; -use tokio::task::spawn_blocking; -use tokio::fs; -use uuid::Uuid; -use super::{IndexController, IndexMetadata, update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; +use super::IndexMetadata; +use crate::helpers::compression; use crate::index::Index; use crate::index_controller::uuid_resolver; -use crate::helpers::compression; + +pub use handle_impl::*; +pub use actor::DumpActor; +pub use message::DumpMsg; + +pub type DumpResult = std::result::Result; + +#[derive(Error, Debug)] +pub enum DumpError { + #[error("error with index: {0}")] + Error(#[from] anyhow::Error), + #[error("Heed error: {0}")] + HeedError(#[from] heed::Error), + #[error("dump already running")] + DumpAlreadyRunning, + #[error("dump `{0}` does not exist")] + DumpDoesNotExist(String), +} #[derive(Debug, Serialize, Deserialize, Copy, Clone)] enum DumpVersion { @@ -29,7 +54,12 @@ impl DumpVersion { const CURRENT: Self = Self::V2; /// Select the good importation function from the `DumpVersion` of metadata - pub fn import_index(self, size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { + pub fn import_index( + self, + size: usize, + dump_path: &Path, + index_path: &Path, + ) -> anyhow::Result<()> { match self { Self::V1 => v1::import_index(size, dump_path, index_path), Self::V2 => v2::import_index(size, dump_path, index_path), @@ -37,6 +67,19 @@ impl DumpVersion { } } +#[async_trait::async_trait] +#[cfg_attr(test, automock)] +pub trait DumpActorHandle { + /// Start the creation of a dump + /// Implementation: [handle_impl::DumpActorHandleImpl::create_dump] + async fn create_dump(&self) -> DumpResult; + + /// Return the status of an already created dump + /// Implementation: [handle_impl::DumpActorHandleImpl::dump_status] + async fn dump_info(&self, uid: String) -> DumpResult; +} + + #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Metadata { @@ -74,66 +117,46 @@ impl Metadata { } } -/// Generate uid from creation date -fn generate_uid() -> String { - Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] +#[serde(rename_all = "snake_case")] +pub enum DumpStatus { + Done, + InProgress, + Failed, } -pub async fn perform_dump(index_controller: &IndexController, dump_path: PathBuf) -> anyhow::Result { - info!("Performing dump."); +#[derive(Debug, Serialize, Clone)] +#[serde(rename_all = "camelCase")] +pub struct DumpInfo { + pub uid: String, + pub status: DumpStatus, + #[serde(skip_serializing_if = "Option::is_none", flatten)] + pub error: Option, +} - let dump_dir = dump_path.clone(); - let uid = generate_uid(); - fs::create_dir_all(&dump_dir).await?; - let temp_dump_dir = spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let uuids = index_controller.uuid_resolver.list().await?; - // maybe we could just keep the vec as-is - let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); - - if uuids.is_empty() { - return Ok(uid); +impl DumpInfo { + pub fn new(uid: String, status: DumpStatus) -> Self { + Self { + uid, + status, + error: None, + } } - let indexes = index_controller.list_indexes().await?; - - // we create one directory by index - for meta in indexes.iter() { - tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; + pub fn with_error(&mut self, error: String) { + self.status = DumpStatus::Failed; + self.error = Some(json!(error)); } - let metadata = Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); - metadata.to_path(&temp_dump_path).await?; + pub fn done(&mut self) { + self.status = DumpStatus::Done; + } - index_controller.update_handle.dump(uuids, temp_dump_path.clone()).await?; - let dump_dir = dump_path.clone(); - let dump_path = dump_path.join(format!("{}.dump", uid)); - let dump_path = spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; - let temp_dump_file_path = temp_dump_file.path().to_owned(); - compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; - temp_dump_file.persist(&dump_path)?; - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(uid) + pub fn dump_already_in_progress(&self) -> bool { + self.status == DumpStatus::InProgress + } } -/* -/// Write Settings in `settings.json` file at provided `dir_path` -fn settings_to_path(settings: &Settings, dir_path: &Path) -> anyhow::Result<()> { -let path = dir_path.join("settings.json"); -let file = File::create(path)?; - -serde_json::to_writer(file, settings)?; - -Ok(()) -} -*/ pub fn load_dump( db_path: impl AsRef, @@ -185,12 +208,18 @@ pub fn load_dump( let index_path = db_path.join(&format!("indexes/index-{}", uuid)); // let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db - info!("Importing dump from {} into {}...", dump_path.display(), index_path.display()); - metadata.dump_version.import_index(size, &dump_path, &index_path).unwrap(); + info!( + "Importing dump from {} into {}...", + dump_path.display(), + index_path.display() + ); + metadata + .dump_version + .import_index(size, &dump_path, &index_path) + .unwrap(); info!("Dump importation from {} succeed", dump_path.display()); } - info!("Dump importation from {} succeed", dump_path.display()); Ok(()) } diff --git a/meilisearch-http/src/index_controller/dump/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump/v1.rs rename to meilisearch-http/src/index_controller/dump_actor/v1.rs diff --git a/meilisearch-http/src/index_controller/dump/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs similarity index 100% rename from meilisearch-http/src/index_controller/dump/v2.rs rename to meilisearch-http/src/index_controller/dump_actor/v2.rs diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 623b42ddc..ca23663b7 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -315,8 +315,8 @@ impl IndexActor { /// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the /// documents and all the settings. async fn handle_dump(&self, uid: &str, uuid: Uuid, path: PathBuf) -> IndexResult<()> { - use tokio::fs::create_dir_all; use std::io::prelude::*; + use tokio::fs::create_dir_all; create_dir_all(&path) .await @@ -348,7 +348,6 @@ impl IndexActor { file.write_all(b"\n")?; } - // then we dump all the settings let file = File::create(settings_path)?; let mut file = std::io::BufWriter::new(file); @@ -357,7 +356,6 @@ impl IndexActor { file.write_all(serde_json::to_string(&settings)?.as_bytes())?; file.write_all(b"\n")?; - Ok(()) }) .await diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 6ea42c73d..d1bb5e170 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, path::PathBuf}; +use std::collections::BTreeMap; use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -15,6 +15,8 @@ use tokio::time::sleep; use uuid::Uuid; pub use updates::*; +pub use dump_actor::{DumpInfo, DumpStatus}; +use dump_actor::DumpActorHandle; use index_actor::IndexActorHandle; use snapshot::{SnapshotService, load_snapshot}; use update_actor::UpdateActorHandle; @@ -23,11 +25,11 @@ use uuid_resolver::{UuidError, UuidResolverHandle}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; -use self::dump::load_dump; +use dump_actor::load_dump; mod index_actor; mod snapshot; -mod dump; +mod dump_actor; mod update_actor; mod update_handler; mod updates; @@ -63,10 +65,12 @@ pub struct IndexStats { pub fields_distribution: FieldsDistribution, } +#[derive(Clone)] pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverHandleImpl, index_handle: index_actor::IndexActorHandleImpl, update_handle: update_actor::UpdateActorHandleImpl, + dump_handle: dump_actor::DumpActorHandleImpl, } #[derive(Serialize)] @@ -108,6 +112,7 @@ impl IndexController { &path, update_store_size, )?; + let dump_handle = dump_actor::DumpActorHandleImpl::new(&options.dumps_dir, uuid_resolver.clone(), index_handle.clone(), update_handle.clone())?; if options.schedule_snapshot { let snapshot_service = SnapshotService::new( @@ -129,6 +134,7 @@ impl IndexController { uuid_resolver, index_handle, update_handle, + dump_handle, }) } @@ -378,13 +384,6 @@ impl IndexController { Ok(stats) } - pub async fn dump(&self, path: PathBuf) -> anyhow::Result { - eprintln!("index_controller::mod called"); - let res = dump::perform_dump(self, path).await?; - eprintln!("index_controller::mod finished"); - Ok(res) - } - pub async fn get_all_stats(&self) -> anyhow::Result { let update_infos = self.update_handle.get_info().await?; let mut database_size = self.get_uuids_size().await? + update_infos.size; @@ -410,6 +409,14 @@ impl IndexController { indexes, }) } + + pub async fn create_dump(&self) -> anyhow::Result { + Ok(self.dump_handle.create_dump().await?) + } + + pub async fn dump_info(&self, uid: String) -> anyhow::Result { + Ok(self.dump_handle.dump_info(uid).await?) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 410b817b8..e6be4ca93 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -7,18 +7,17 @@ use crate::helpers::Authentication; use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { - cfg.service(trigger_dump) + cfg.service(create_dump) .service(get_dump_status); } #[post("/dumps", wrap = "Authentication::Private")] -async fn trigger_dump( +async fn create_dump( data: web::Data, ) -> Result { - eprintln!("dump started"); - let res = data.dump().await?; + let res = data.create_dump().await?; - Ok(HttpResponse::Ok().body(res)) + Ok(HttpResponse::Ok().json(res)) } #[derive(Debug, Serialize)] @@ -29,13 +28,15 @@ struct DumpStatusResponse { #[derive(Deserialize)] struct DumpParam { - _dump_uid: String, + dump_uid: String, } #[get("/dumps/{dump_uid}/status", wrap = "Authentication::Private")] async fn get_dump_status( - _data: web::Data, - _path: web::Path, + data: web::Data, + path: web::Path, ) -> Result { - todo!() + let res = data.dump_status(path.dump_uid.clone()).await?; + + Ok(HttpResponse::Ok().json(res)) } From 24192fc5504894c0aa61576cbc6e3ea2a50c041b Mon Sep 17 00:00:00 2001 From: tamo Date: Wed, 5 May 2021 18:03:21 +0200 Subject: [PATCH 09/54] fix tests --- meilisearch-http/src/index_controller/dump_actor/v1.rs | 1 - meilisearch-http/src/index_controller/dump_actor/v2.rs | 1 - meilisearch-http/src/index_controller/index_actor/mod.rs | 4 ++-- meilisearch-http/src/index_controller/update_actor/actor.rs | 2 +- .../src/index_controller/update_actor/update_store.rs | 6 ++++-- meilisearch-http/tests/settings/get_settings.rs | 2 -- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index 3a20299f3..f22120849 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -89,7 +89,6 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; - dbg!(&settings); let settings: index_controller::Settings = settings.into(); let update_builder = UpdateBuilder::new(0); index.update_settings(&settings, update_builder)?; diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index 7b9a56772..5c5e5fb2d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -25,7 +25,6 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: let settings = import_settings(&dump_path)?; let update_builder = UpdateBuilder::new(0); index.update_settings(&settings, update_builder)?; - dbg!(settings); let update_builder = UpdateBuilder::new(1); let file = File::open(&dump_path.join("documents.jsonl"))?; diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 0145a33d9..cf6a81223 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -178,8 +178,8 @@ mod test { self.as_ref().snapshot(uuid, path).await } - async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { - self.as_ref().dump(uuid, path).await + async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + self.as_ref().dump(uid, uuid, path).await } async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 64794bc6f..fe4458acd 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -241,7 +241,7 @@ where tokio::task::spawn_blocking(move || -> anyhow::Result<()> { update_store.dump(&uuids, path.to_path_buf())?; - // Perform the snapshot of each index concurently. Only a third of the capabilities of + // Perform the dump of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor let path = &path; let handle = &index_handle; diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index d767dfa93..f3d7dfd0a 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -11,6 +11,7 @@ use arc_swap::ArcSwap; use heed::types::{ByteSlice, OwnedType, SerdeJson}; use heed::zerocopy::U64; use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions}; +use log::error; use parking_lot::{Mutex, MutexGuard}; use tokio::runtime::Handle; use tokio::sync::mpsc; @@ -77,6 +78,7 @@ pub enum State { Idle, Processing(Uuid, Processing), Snapshoting, + Dumping, } impl<'a> BytesEncode<'a> for NextIdCodec { @@ -227,7 +229,7 @@ impl UpdateStore { match res { Ok(Some(_)) => (), Ok(None) => break, - Err(e) => eprintln!("error while processing update: {}", e), + Err(e) => error!("error while processing update: {}", e), } } // the ownership on the arc has been taken, we need to exit. @@ -520,7 +522,7 @@ impl UpdateStore { pub fn dump(&self, uuids: &HashSet<(String, Uuid)>, path: PathBuf) -> anyhow::Result<()> { use std::io::prelude::*; let state_lock = self.state.write(); - state_lock.swap(State::Snapshoting); // TODO: TAMO rename the state somehow + state_lock.swap(State::Dumping); let txn = self.env.write_txn()?; diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index a39dd54e9..4230e19f8 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -82,9 +82,7 @@ async fn reset_all_settings() { assert_eq!(response["searchableAttributes"], json!(["bar"])); assert_eq!(response["stopWords"], json!(["the"])); - eprintln!("BEFORE"); index.delete_settings().await; - eprintln!("AFTER"); index.wait_update_id(1).await; let (response, code) = index.settings().await; From 956012da95c78d83a8832e8e2e4dc27d506ee58c Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 5 May 2021 19:06:07 +0200 Subject: [PATCH 10/54] fix dump lock --- .../index_controller/update_actor/actor.rs | 24 +++-------------- .../update_actor/update_store.rs | 26 ++++++++++++++++--- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index fe4458acd..54d068f14 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -239,28 +239,12 @@ where let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); tokio::task::spawn_blocking(move || -> anyhow::Result<()> { - update_store.dump(&uuids, path.to_path_buf())?; - - // Perform the dump of each index concurently. Only a third of the capabilities of - // the index actor at a time not to put too much pressure on the index actor - let path = &path; - let handle = &index_handle; - - let mut stream = futures::stream::iter(uuids.iter()) - .map(|(uid, uuid)| handle.dump(uid.clone(), *uuid, path.clone())) - .buffer_unordered(CONCURRENT_INDEX_MSG / 3); - - Handle::current().block_on(async { - while let Some(res) = stream.next().await { - res?; - } - Ok(()) - }) + update_store.dump(&uuids, path.to_path_buf(), index_handle)?; + Ok(()) }) .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; - + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?; Ok(()) } diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index f3d7dfd0a..524fefe84 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -16,10 +16,11 @@ use parking_lot::{Mutex, MutexGuard}; use tokio::runtime::Handle; use tokio::sync::mpsc; use uuid::Uuid; +use futures::StreamExt; use super::UpdateMeta; use crate::helpers::EnvSizer; -use crate::index_controller::{IndexActorHandle, updates::*}; +use crate::index_controller::{IndexActorHandle, updates::*, index_actor::CONCURRENT_INDEX_MSG}; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; @@ -519,7 +520,12 @@ impl UpdateStore { Ok(()) } - pub fn dump(&self, uuids: &HashSet<(String, Uuid)>, path: PathBuf) -> anyhow::Result<()> { + pub fn dump( + &self, + uuids: &HashSet<(String, Uuid)>, + path: PathBuf, + handle: impl IndexActorHandle + ) -> anyhow::Result<()> { use std::io::prelude::*; let state_lock = self.state.write(); state_lock.swap(State::Dumping); @@ -555,7 +561,21 @@ impl UpdateStore { } } - Ok(()) + + // Perform the dump of each index concurently. Only a third of the capabilities of + // the index actor at a time not to put too much pressure on the index actor + let path = &path; + + let mut stream = futures::stream::iter(uuids.iter()) + .map(|(uid, uuid)| handle.dump(uid.clone(), *uuid, path.clone())) + .buffer_unordered(CONCURRENT_INDEX_MSG / 3); + + Handle::current().block_on(async { + while let Some(res) = stream.next().await { + res?; + } + Ok(()) + }) } pub fn get_info(&self) -> anyhow::Result { From 26dcb9e66d5311feea92302e9cb0b57ac0c590f5 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 6 May 2021 11:57:42 +0200 Subject: [PATCH 11/54] bump milli version and fix a performance issue for large dumps --- Cargo.lock | 6 ++++-- meilisearch-http/Cargo.toml | 2 +- .../src/index_controller/index_actor/actor.rs | 8 ++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1c109a79..26c53663a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "actix-codec" version = "0.4.0" @@ -1840,8 +1842,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.2.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.0#792225eaffce6b3682f9b30b7370b6a547c4757e" +version = "0.2.1" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.1#25f75d4d03732131e6edcf20f4d126210b159d43" dependencies = [ "anyhow", "bstr", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 7ac3ecb38..c9f8d63b7 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -51,7 +51,7 @@ main_error = "0.1.0" meilisearch-error = { path = "../meilisearch-error" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" } memmap = "0.7.0" -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.1" } mime = "0.3.16" once_cell = "1.5.2" oxidized-json-checker = "0.3.2" diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index ca23663b7..1f0091265 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -333,16 +333,12 @@ impl IndexActor { // Get write txn to wait for ongoing write transaction before dump. let txn = index.write_txn()?; - let documents_ids = index.documents_ids(&txn)?; - // TODO: TAMO: calling this function here can consume **a lot** of RAM, we should - // use some kind of iterators -> waiting for a milli release - let documents = index.documents(&txn, documents_ids)?; - let fields_ids_map = index.fields_ids_map(&txn)?; // we want to save **all** the fields in the dump. let fields_to_dump: Vec = fields_ids_map.iter().map(|(id, _)| id).collect(); - for (_doc_id, document) in documents { + for document in index.all_documents(&txn)? { + let (_doc_id, document) = document?; let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?; file.write_all(serde_json::to_string(&json)?.as_bytes())?; file.write_all(b"\n")?; From 5f5402a3abd9fc2bbedf1e3af980eb050a035c3a Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 6 May 2021 18:12:57 +0200 Subject: [PATCH 12/54] provide a way to access the internal content path of all processing State --- .../src/index_controller/updates.rs | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-http/src/index_controller/updates.rs index a129a25a0..6b5ef345d 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-http/src/index_controller/updates.rs @@ -72,6 +72,10 @@ impl Enqueued { pub fn content_path(&self) -> Option<&Path> { self.content.as_deref() } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + self.content.as_mut() + } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -87,6 +91,14 @@ impl Processed { pub fn id(&self) -> u64 { self.from.id() } + + pub fn content_path(&self) -> Option<&Path> { + self.from.content_path() + } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + self.from.content_path_mut() + } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -106,6 +118,14 @@ impl Processing { self.from.meta() } + pub fn content_path(&self) -> Option<&Path> { + self.from.content_path() + } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + self.from.content_path_mut() + } + pub fn process(self, success: UpdateResult) -> Processed { Processed { success, @@ -135,6 +155,14 @@ impl Aborted { pub fn id(&self) -> u64 { self.from.id() } + + pub fn content_path(&self) -> Option<&Path> { + self.from.content_path() + } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + self.from.content_path_mut() + } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -150,6 +178,14 @@ impl Failed { pub fn id(&self) -> u64 { self.from.id() } + + pub fn content_path(&self) -> Option<&Path> { + self.from.content_path() + } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + self.from.content_path_mut() + } } #[derive(Debug, Serialize, Deserialize)] @@ -179,6 +215,26 @@ impl UpdateStatus { _ => None, } } + + pub fn content_path(&self) -> Option<&Path> { + match self { + UpdateStatus::Processing(u) => u.content_path(), + UpdateStatus::Processed(u) => u.content_path(), + UpdateStatus::Aborted(u) => u.content_path(), + UpdateStatus::Failed(u) => u.content_path(), + UpdateStatus::Enqueued(u) => u.content_path(), + } + } + + pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { + match self { + UpdateStatus::Processing(u) => u.content_path_mut(), + UpdateStatus::Processed(u) => u.content_path_mut(), + UpdateStatus::Aborted(u) => u.content_path_mut(), + UpdateStatus::Failed(u) => u.content_path_mut(), + UpdateStatus::Enqueued(u) => u.content_path_mut(), + } + } } impl From for UpdateStatus { From 40ced3ff8d1eb51358c0d134129866e7bfed7000 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 6 May 2021 18:44:16 +0200 Subject: [PATCH 13/54] first working version --- .../src/index_controller/dump_actor/mod.rs | 29 ++++++++++-- .../src/index_controller/dump_actor/v1.rs | 4 +- .../src/index_controller/dump_actor/v2.rs | 14 ++++-- .../src/index_controller/index_actor/mod.rs | 2 +- .../update_actor/update_store.rs | 46 +++++++++++++------ 5 files changed, 68 insertions(+), 27 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index f57c27c59..eb2bc4684 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -59,10 +59,11 @@ impl DumpVersion { size: usize, dump_path: &Path, index_path: &Path, + primary_key: Option<&str>, ) -> anyhow::Result<()> { match self { - Self::V1 => v1::import_index(size, dump_path, index_path), - Self::V2 => v2::import_index(size, dump_path, index_path), + Self::V1 => v1::import_index(size, dump_path, index_path, primary_key), + Self::V2 => v2::import_index(size, dump_path, index_path, primary_key), } } } @@ -206,7 +207,26 @@ pub fn load_dump( // this cannot fail since we created all the missing uuid in the previous loop let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap(); let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - // let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db + // let update_path = db_path.join(&format!("updates")); + + info!("importing the updates"); + use crate::index_controller::update_actor::UpdateStore; + use std::io::BufRead; + + let update_path = db_path.join("updates"); + let options = EnvOpenOptions::new(); + // create an UpdateStore to import the updates + std::fs::create_dir_all(&update_path)?; + let (update_store, _) = UpdateStore::create(options, update_path)?; + let file = File::open(&dump_path.join("updates.jsonl"))?; + let reader = std::io::BufReader::new(file); + + let mut wtxn = update_store.env.write_txn()?; + for update in reader.lines() { + let update = serde_json::from_str(&update?)?; + update_store.register_raw_updates(&mut wtxn, update, uuid)?; + } + wtxn.commit()?; info!( "Importing dump from {} into {}...", @@ -215,11 +235,12 @@ pub fn load_dump( ); metadata .dump_version - .import_index(size, &dump_path, &index_path) + .import_index(size, &dump_path, &index_path, idx.meta.primary_key.as_ref().map(|s| s.as_ref())) .unwrap(); info!("Dump importation from {} succeed", dump_path.display()); } + info!("Dump importation from {} succeed", dump_path.display()); Ok(()) } diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index f22120849..d20723e8c 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -78,7 +78,7 @@ fn import_settings(dir_path: &Path) -> anyhow::Result { } -pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { +pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { info!("Importing a dump from an old version of meilisearch with dump version 1"); std::fs::create_dir_all(&index_path)?; @@ -102,7 +102,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: IndexDocumentsMethod::ReplaceDocuments, Some(reader), update_builder, - None, + primary_key, )?; // at this point we should handle the updates, but since the update logic is not handled in diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index 5c5e5fb2d..301268233 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -1,5 +1,5 @@ use heed::EnvOpenOptions; -use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use crate::index::Index; use crate::index_controller::Settings; use std::{fs::File, path::Path, sync::Arc}; @@ -14,7 +14,7 @@ fn import_settings(dir_path: &Path) -> anyhow::Result { Ok(metadata) } -pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> { +pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { std::fs::create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); @@ -26,17 +26,21 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow: let update_builder = UpdateBuilder::new(0); index.update_settings(&settings, update_builder)?; + // import the documents in the index let update_builder = UpdateBuilder::new(1); let file = File::open(&dump_path.join("documents.jsonl"))?; let reader = std::io::BufReader::new(file); - index.update_documents( + // TODO: TAMO: currently we ignore any error caused by the importation of the documents because + // if there is no documents nor primary key it'll throw an anyhow error, but we must remove + // this before the merge on main + let _ = index.update_documents( UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), update_builder, - None, - )?; + primary_key, + ); // the last step: we extract the original milli::Index and close it Arc::try_unwrap(index.0) diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index cf6a81223..3b92b1078 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -31,7 +31,7 @@ pub type IndexResult = std::result::Result; pub struct IndexMeta { created_at: DateTime, pub updated_at: DateTime, - primary_key: Option, + pub primary_key: Option, } impl IndexMeta { diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 524fefe84..745311f05 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -250,21 +250,31 @@ impl UpdateStore { .get(txn, &NextIdKey::Global)? .map(U64::get) .unwrap_or_default(); + + self.next_update_id + .put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?; + + let update_id = self.next_update_id_raw(txn, index_uuid)?; + + Ok((global_id, update_id)) + } + + /// Returns the next next update id for a given `index_uuid` without + /// incrementing the global update id. This is useful for the dumps. + fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result { let update_id = self .next_update_id .get(txn, &NextIdKey::Index(index_uuid))? .map(U64::get) .unwrap_or_default(); - self.next_update_id - .put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?; self.next_update_id.put( txn, &NextIdKey::Index(index_uuid), &BEU64::new(update_id + 1), )?; - Ok((global_id, update_id)) + Ok(update_id) } /// Registers the update content in the pending store and the meta @@ -291,17 +301,27 @@ impl UpdateStore { Ok(meta) } - /// Push already processed updates in the UpdateStore. This is useful for the dumps - pub fn register_already_processed_update ( + /// Push already processed update in the UpdateStore without triggering the notification + /// process. This is useful for the dumps. + pub fn register_raw_updates ( &self, - result: UpdateStatus, + wtxn: &mut heed::RwTxn, + update: UpdateStatus, index_uuid: Uuid, ) -> heed::Result<()> { - // TODO: TAMO: load already processed updates - let mut wtxn = self.env.write_txn()?; - let (_global_id, update_id) = self.next_update_id(&mut wtxn, index_uuid)?; - self.updates.remap_key_type::().put(&mut wtxn, &(index_uuid, update_id), &result)?; - wtxn.commit() + // TODO: TAMO: since I don't want to store anything I currently generate a new global ID + // everytime I encounter an enqueued update, can we do better? + match update { + UpdateStatus::Enqueued(enqueued) => { + let (global_id, update_id) = self.next_update_id(wtxn, index_uuid)?; + self.pending_queue.remap_key_type::().put(wtxn, &(global_id, index_uuid, update_id), &enqueued)?; + } + _ => { + let update_id = self.next_update_id_raw(wtxn, index_uuid)?; + self.updates.remap_key_type::().put(wtxn, &(index_uuid, update_id), &update)?; + } + } + Ok(()) } /// Executes the user provided function on the next pending update (the one with the lowest id). @@ -542,9 +562,6 @@ impl UpdateStore { } } - // TODO: TAMO: the updates - // already processed updates seems to works, but I've not tried with currently running updates - let update_files_path = path.join("update_files"); create_dir_all(&update_files_path)?; @@ -561,7 +578,6 @@ impl UpdateStore { } } - // Perform the dump of each index concurently. Only a third of the capabilities of // the index actor at a time not to put too much pressure on the index actor let path = &path; From ef438852cd0579878607264983aef4ccd85e2a59 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 6 May 2021 18:47:56 +0200 Subject: [PATCH 14/54] fix the v1 --- meilisearch-http/src/index_controller/dump_actor/v1.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index d20723e8c..89c5b24c0 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -51,7 +51,7 @@ impl From for index_controller::Settings { warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); Some(String::from("words")) } - "attribute" | "exactness" => { + "exactness" => { error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); None } @@ -97,13 +97,14 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke let file = File::open(&dump_path.join("documents.jsonl"))?; let reader = std::io::BufReader::new(file); - index.update_documents( + // TODO: TAMO: waiting for milli. We should use the result + let _ = index.update_documents( UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), update_builder, primary_key, - )?; + ); // at this point we should handle the updates, but since the update logic is not handled in // meilisearch we are just going to ignore this part From d7679904240fa5771c995cdd9ece72d3939c467c Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:20:36 +0200 Subject: [PATCH 15/54] fix the import of the updates in the dump --- meilisearch-http/src/index/mod.rs | 9 --- meilisearch-http/src/index/updates.rs | 6 +- .../src/index_controller/dump_actor/mod.rs | 76 +++++++++++------- .../src/index_controller/dump_actor/v1.rs | 6 +- .../update_actor/update_store.rs | 77 ++++++++++++++----- .../src/index_controller/updates.rs | 2 +- .../tests/settings/get_settings.rs | 4 +- 7 files changed, 116 insertions(+), 64 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index b0c145001..ceaa6103e 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -35,15 +35,6 @@ where Deserialize::deserialize(deserializer).map(Some) } -pub fn deserialize_wildcard<'de, I, D>(deserializer: D) -> Result>, D::Error> -where - D: Deserializer<'de>, - I: IntoIterator + Deserialize<'de> + Clone, -{ - Ok( as Deserialize>::deserialize(deserializer)? - .map(|item: I| (!item.clone().into_iter().any(|s| s == "*")).then(|| item))) -} - impl Index { pub fn settings(&self) -> anyhow::Result> { let txn = self.read_txn()?; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 75d0dc3e6..67edc15d0 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -8,7 +8,7 @@ use log::info; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; -use super::{deserialize_some, deserialize_wildcard, Index}; +use super::{deserialize_some, Index}; use crate::index_controller::UpdateResult; @@ -23,14 +23,14 @@ pub struct Unchecked; pub struct Settings { #[serde( default, - deserialize_with = "deserialize_wildcard", + deserialize_with = "deserialize_some", skip_serializing_if = "Option::is_none" )] pub displayed_attributes: Option>>, #[serde( default, - deserialize_with = "deserialize_wildcard", + deserialize_with = "deserialize_some", skip_serializing_if = "Option::is_none" )] pub searchable_attributes: Option>>, diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index eb2bc4684..f79cd839b 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,33 +1,29 @@ +mod actor; +mod handle_impl; +mod message; mod v1; mod v2; -mod handle_impl; -mod actor; -mod message; -use std::{ - fs::File, - path::Path, - sync::Arc, -}; +use std::{fs::File, path::Path, sync::Arc}; -#[cfg(test)] -use mockall::automock; use anyhow::bail; -use thiserror::Error; use heed::EnvOpenOptions; use log::{error, info}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +#[cfg(test)] +use mockall::automock; use serde::{Deserialize, Serialize}; use serde_json::json; use tempfile::TempDir; +use thiserror::Error; use super::IndexMetadata; use crate::helpers::compression; use crate::index::Index; -use crate::index_controller::uuid_resolver; +use crate::index_controller::{uuid_resolver, UpdateStatus}; -pub use handle_impl::*; pub use actor::DumpActor; +pub use handle_impl::*; pub use message::DumpMsg; pub type DumpResult = std::result::Result; @@ -80,7 +76,6 @@ pub trait DumpActorHandle { async fn dump_info(&self, uid: String) -> DumpResult; } - #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Metadata { @@ -158,7 +153,6 @@ impl DumpInfo { } } - pub fn load_dump( db_path: impl AsRef, dump_path: impl AsRef, @@ -209,6 +203,22 @@ pub fn load_dump( let index_path = db_path.join(&format!("indexes/index-{}", uuid)); // let update_path = db_path.join(&format!("updates")); + info!( + "Importing dump from {} into {}...", + dump_path.display(), + index_path.display() + ); + metadata + .dump_version + .import_index( + size, + &dump_path, + &index_path, + idx.meta.primary_key.as_ref().map(|s| s.as_ref()), + ) + .unwrap(); + info!("Dump importation from {} succeed", dump_path.display()); + info!("importing the updates"); use crate::index_controller::update_actor::UpdateStore; use std::io::BufRead; @@ -217,29 +227,39 @@ pub fn load_dump( let options = EnvOpenOptions::new(); // create an UpdateStore to import the updates std::fs::create_dir_all(&update_path)?; - let (update_store, _) = UpdateStore::create(options, update_path)?; + let (update_store, _) = UpdateStore::create(options, &update_path)?; let file = File::open(&dump_path.join("updates.jsonl"))?; let reader = std::io::BufReader::new(file); let mut wtxn = update_store.env.write_txn()?; for update in reader.lines() { - let update = serde_json::from_str(&update?)?; + let mut update: UpdateStatus = serde_json::from_str(&update?)?; + if let Some(path) = update.content_path_mut() { + *path = update_path.join("update_files").join(&path).into(); + } update_store.register_raw_updates(&mut wtxn, update, uuid)?; } wtxn.commit()?; - - info!( - "Importing dump from {} into {}...", - dump_path.display(), - index_path.display() - ); - metadata - .dump_version - .import_index(size, &dump_path, &index_path, idx.meta.primary_key.as_ref().map(|s| s.as_ref())) - .unwrap(); - info!("Dump importation from {} succeed", dump_path.display()); } + // finally we can move all the unprocessed update file into our new DB + let update_path = tmp_dir_path.join("update_files"); + let files: Vec<_> = std::fs::read_dir(&db_path.join("updates"))? + .map(|file| file.unwrap().path()) + .collect(); + let db_update_path = db_path.join("updates/update_files"); + eprintln!("path {:?} exists: {:?}", update_path, update_path.exists()); + eprintln!( + "path {:?} exists: {:?}", + db_update_path, + db_update_path.exists() + ); + let _ = std::fs::remove_dir_all(db_update_path); + std::fs::rename( + tmp_dir_path.join("update_files"), + db_path.join("updates/update_files"), + ) + .unwrap(); info!("Dump importation from {} succeed", dump_path.display()); Ok(()) diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index 89c5b24c0..92f8bf712 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet}; use log::warn; use serde::{Deserialize, Serialize}; use crate::index_controller; -use crate::index::{deserialize_wildcard, deserialize_some}; +use crate::index::deserialize_some; use super::*; /// This is the settings used in the last version of meilisearch exporting dump in V1 @@ -14,9 +14,9 @@ struct Settings { pub ranking_rules: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub distinct_attribute: Option>, - #[serde(default, deserialize_with = "deserialize_wildcard")] + #[serde(default, deserialize_with = "deserialize_some")] pub searchable_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_wildcard")] + #[serde(default, deserialize_with = "deserialize_some")] pub displayed_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub stop_words: Option>>, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index 745311f05..07dfdf273 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -1,13 +1,14 @@ -use std::{borrow::Cow, path::PathBuf}; use std::collections::{BTreeMap, HashSet}; use std::convert::TryInto; use std::fs::{copy, create_dir_all, remove_file, File}; use std::mem::size_of; use std::path::Path; use std::sync::Arc; +use std::{borrow::Cow, path::PathBuf}; use anyhow::Context; use arc_swap::ArcSwap; +use futures::StreamExt; use heed::types::{ByteSlice, OwnedType, SerdeJson}; use heed::zerocopy::U64; use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions}; @@ -16,11 +17,10 @@ use parking_lot::{Mutex, MutexGuard}; use tokio::runtime::Handle; use tokio::sync::mpsc; use uuid::Uuid; -use futures::StreamExt; use super::UpdateMeta; use crate::helpers::EnvSizer; -use crate::index_controller::{IndexActorHandle, updates::*, index_actor::CONCURRENT_INDEX_MSG}; +use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; @@ -180,7 +180,10 @@ pub struct UpdateStore { } impl UpdateStore { - pub fn create(mut options: EnvOpenOptions, path: impl AsRef) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { + pub fn create( + mut options: EnvOpenOptions, + path: impl AsRef, + ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { options.max_dbs(5); let env = options.open(path)?; @@ -194,7 +197,17 @@ impl UpdateStore { // Send a first notification to trigger the process. let _ = notification_sender.send(()); - Ok((Self { env, pending_queue, next_update_id, updates, state, notification_sender }, notification_receiver)) + Ok(( + Self { + env, + pending_queue, + next_update_id, + updates, + state, + notification_sender, + }, + notification_receiver, + )) } pub fn open( @@ -208,7 +221,8 @@ impl UpdateStore { // Init update loop to perform any pending updates at launch. // Since we just launched the update store, and we still own the receiving end of the // channel, this call is guaranteed to succeed. - update_store.notification_sender + update_store + .notification_sender .try_send(()) .expect("Failed to init update store"); @@ -303,22 +317,28 @@ impl UpdateStore { /// Push already processed update in the UpdateStore without triggering the notification /// process. This is useful for the dumps. - pub fn register_raw_updates ( + pub fn register_raw_updates( &self, wtxn: &mut heed::RwTxn, update: UpdateStatus, index_uuid: Uuid, ) -> heed::Result<()> { - // TODO: TAMO: since I don't want to store anything I currently generate a new global ID - // everytime I encounter an enqueued update, can we do better? match update { UpdateStatus::Enqueued(enqueued) => { - let (global_id, update_id) = self.next_update_id(wtxn, index_uuid)?; - self.pending_queue.remap_key_type::().put(wtxn, &(global_id, index_uuid, update_id), &enqueued)?; + let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?; + self.pending_queue.remap_key_type::().put( + wtxn, + &(global_id, index_uuid, enqueued.id()), + &enqueued, + )?; } _ => { - let update_id = self.next_update_id_raw(wtxn, index_uuid)?; - self.updates.remap_key_type::().put(wtxn, &(index_uuid, update_id), &update)?; + let _update_id = self.next_update_id_raw(wtxn, index_uuid)?; + self.updates.remap_key_type::().put( + wtxn, + &(index_uuid, update.id()), + &update, + )?; } } Ok(()) @@ -544,20 +564,39 @@ impl UpdateStore { &self, uuids: &HashSet<(String, Uuid)>, path: PathBuf, - handle: impl IndexActorHandle - ) -> anyhow::Result<()> { + handle: impl IndexActorHandle, + ) -> anyhow::Result<()> { use std::io::prelude::*; let state_lock = self.state.write(); state_lock.swap(State::Dumping); let txn = self.env.write_txn()?; - for (uid, uuid) in uuids.iter() { - let file = File::create(path.join(uid).join("updates.jsonl"))?; + for (index_uid, index_uuid) in uuids.iter() { + let file = File::create(path.join(index_uid).join("updates.jsonl"))?; let mut file = std::io::BufWriter::new(file); - for update in &self.list(*uuid)? { - serde_json::to_writer(&mut file, update)?; + let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); + for entry in pendings { + let ((_, uuid, _), pending) = entry?; + if &uuid == index_uuid { + let mut update: UpdateStatus = pending.decode()?.into(); + if let Some(path) = update.content_path_mut() { + *path = path.file_name().expect("update path can't be empty").into(); + } + serde_json::to_writer(&mut file, &update)?; + file.write_all(b"\n")?; + } + } + + let updates = self.updates.prefix_iter(&txn, index_uuid.as_bytes())?; + for entry in updates { + let (_, update) = entry?; + let mut update = update.clone(); + if let Some(path) = update.content_path_mut() { + *path = path.file_name().expect("update path can't be empty").into(); + } + serde_json::to_writer(&mut file, &update)?; file.write_all(b"\n")?; } } diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-http/src/index_controller/updates.rs index 6b5ef345d..31f0005f8 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-http/src/index_controller/updates.rs @@ -188,7 +188,7 @@ impl Failed { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] #[serde(tag = "status", rename_all = "camelCase")] pub enum UpdateStatus { Processing(Processing), diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 4230e19f8..ab688076d 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -73,7 +73,7 @@ async fn reset_all_settings() { let server = Server::new().await; let index = server.index("test"); index - .update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"] })) + .update_settings(json!({"displayedAttributes": ["foo"], "searchableAttributes": ["bar"], "stopWords": ["the"], "attributesForFaceting": { "toto": "string" } })) .await; index.wait_update_id(0).await; let (response, code) = index.settings().await; @@ -81,6 +81,7 @@ async fn reset_all_settings() { assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["bar"])); assert_eq!(response["stopWords"], json!(["the"])); + assert_eq!(response["attributesForFaceting"], json!({"toto": "string"})); index.delete_settings().await; index.wait_update_id(1).await; @@ -90,6 +91,7 @@ async fn reset_all_settings() { assert_eq!(response["displayedAttributes"], json!(["*"])); assert_eq!(response["searchableAttributes"], json!(["*"])); assert_eq!(response["stopWords"], json!([])); + assert_eq!(response["attributesForFaceting"], json!({})); } #[actix_rt::test] From 7d748fa3841287131b6252479bb53d336a611ed8 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 10 May 2021 20:48:06 +0200 Subject: [PATCH 16/54] integrate the new Settings in the dumps --- meilisearch-http/src/index/mod.rs | 2 +- meilisearch-http/src/index/updates.rs | 9 +++++++-- .../src/index_controller/dump_actor/mod.rs | 3 --- .../src/index_controller/dump_actor/v1.rs | 11 ++++++----- .../src/index_controller/dump_actor/v2.rs | 4 ++-- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index ceaa6103e..d3f30bf2e 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -8,7 +8,7 @@ use serde_json::{Map, Value}; use crate::helpers::EnvSizer; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; -pub use updates::{Facets, Settings, Checked, Unchecked, UpdateResult}; +pub use updates::{Facets, Settings, Checked, Unchecked}; use serde::{de::Deserializer, Deserialize}; mod search; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 67edc15d0..0762c8550 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -8,9 +8,10 @@ use log::info; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; -use super::{deserialize_some, Index}; use crate::index_controller::UpdateResult; +use super::{deserialize_some, Index}; + #[derive(Clone, Default, Debug)] pub struct Checked; @@ -35,7 +36,11 @@ pub struct Settings { )] pub searchable_attributes: Option>>, - #[serde(default)] + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] pub attributes_for_faceting: Option>>, #[serde( diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index f79cd839b..a8409f623 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -244,9 +244,6 @@ pub fn load_dump( // finally we can move all the unprocessed update file into our new DB let update_path = tmp_dir_path.join("update_files"); - let files: Vec<_> = std::fs::read_dir(&db_path.join("updates"))? - .map(|file| file.unwrap().path()) - .collect(); let db_update_path = db_path.join("updates/update_files"); eprintln!("path {:?} exists: {:?}", update_path, update_path.exists()); eprintln!( diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index 92f8bf712..33fab6930 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -1,8 +1,8 @@ -use std::collections::{BTreeMap, BTreeSet}; +use std::{collections::{BTreeMap, BTreeSet}, marker::PhantomData}; use log::warn; use serde::{Deserialize, Serialize}; -use crate::index_controller; +use crate::{index::Unchecked, index_controller}; use crate::index::deserialize_some; use super::*; @@ -27,7 +27,7 @@ struct Settings { } /// we need to **always** be able to convert the old settings to the settings currently being used -impl From for index_controller::Settings { +impl From for index_controller::Settings { fn from(settings: Settings) -> Self { if settings.synonyms.flatten().is_some() { error!("`synonyms` are not yet implemented and thus will be ignored"); @@ -63,6 +63,7 @@ impl From for index_controller::Settings { }).collect())), // we need to convert the old `Vec` into a `BTreeSet` stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), + _kind: PhantomData, } } } @@ -89,9 +90,9 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; - let settings: index_controller::Settings = settings.into(); + let settings: index_controller::Settings = settings.into(); let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings, update_builder)?; + index.update_settings(&settings.check(), update_builder)?; let update_builder = UpdateBuilder::new(1); let file = File::open(&dump_path.join("documents.jsonl"))?; diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index 301268233..d8f43fc58 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -1,11 +1,11 @@ use heed::EnvOpenOptions; use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; -use crate::index::Index; +use crate::index::{Checked, Index}; use crate::index_controller::Settings; use std::{fs::File, path::Path, sync::Arc}; /// Extract Settings from `settings.json` file present at provided `dir_path` -fn import_settings(dir_path: &Path) -> anyhow::Result { +fn import_settings(dir_path: &Path) -> anyhow::Result> { let path = dir_path.join("settings.json"); let file = File::open(path)?; let reader = std::io::BufReader::new(file); From 8b7735c20a1779059a47ab58242b9f67320f0e46 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 00:20:55 +0200 Subject: [PATCH 17/54] move the import of the updates in the v2 and ignore the v1 for now --- .../src/index_controller/dump_actor/mod.rs | 39 +++++-------------- .../src/index_controller/dump_actor/v1.rs | 3 +- .../src/index_controller/dump_actor/v2.rs | 32 ++++++++++++++- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index a8409f623..d416d7d92 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -16,11 +16,12 @@ use serde::{Deserialize, Serialize}; use serde_json::json; use tempfile::TempDir; use thiserror::Error; +use uuid::Uuid; use super::IndexMetadata; use crate::helpers::compression; use crate::index::Index; -use crate::index_controller::{uuid_resolver, UpdateStatus}; +use crate::index_controller::uuid_resolver; pub use actor::DumpActor; pub use handle_impl::*; @@ -53,13 +54,14 @@ impl DumpVersion { pub fn import_index( self, size: usize, + uuid: Uuid, dump_path: &Path, - index_path: &Path, + db_path: &Path, primary_key: Option<&str>, ) -> anyhow::Result<()> { match self { - Self::V1 => v1::import_index(size, dump_path, index_path, primary_key), - Self::V2 => v2::import_index(size, dump_path, index_path, primary_key), + Self::V1 => v1::import_index(size, uuid, dump_path, db_path, primary_key), + Self::V2 => v2::import_index(size, uuid, dump_path, db_path, primary_key), } } } @@ -200,46 +202,23 @@ pub fn load_dump( let dump_path = tmp_dir_path.join(&idx.uid); // this cannot fail since we created all the missing uuid in the previous loop let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap(); - let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - // let update_path = db_path.join(&format!("updates")); info!( "Importing dump from {} into {}...", dump_path.display(), - index_path.display() + db_path.display() ); metadata .dump_version .import_index( size, + uuid, &dump_path, - &index_path, + &db_path, idx.meta.primary_key.as_ref().map(|s| s.as_ref()), ) .unwrap(); info!("Dump importation from {} succeed", dump_path.display()); - - info!("importing the updates"); - use crate::index_controller::update_actor::UpdateStore; - use std::io::BufRead; - - let update_path = db_path.join("updates"); - let options = EnvOpenOptions::new(); - // create an UpdateStore to import the updates - std::fs::create_dir_all(&update_path)?; - let (update_store, _) = UpdateStore::create(options, &update_path)?; - let file = File::open(&dump_path.join("updates.jsonl"))?; - let reader = std::io::BufReader::new(file); - - let mut wtxn = update_store.env.write_txn()?; - for update in reader.lines() { - let mut update: UpdateStatus = serde_json::from_str(&update?)?; - if let Some(path) = update.content_path_mut() { - *path = update_path.join("update_files").join(&path).into(); - } - update_store.register_raw_updates(&mut wtxn, update, uuid)?; - } - wtxn.commit()?; } // finally we can move all the unprocessed update file into our new DB diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index 33fab6930..fad48dd8f 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -79,7 +79,8 @@ fn import_settings(dir_path: &Path) -> anyhow::Result { } -pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { +pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { + let index_path = db_path.join(&format!("indexes/index-{}", uuid)); info!("Importing a dump from an old version of meilisearch with dump version 1"); std::fs::create_dir_all(&index_path)?; diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index d8f43fc58..969442296 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -1,4 +1,8 @@ use heed::EnvOpenOptions; +use log::info; +use uuid::Uuid; +use crate::index_controller::{UpdateStatus, update_actor::UpdateStore}; +use std::io::BufRead; use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use crate::index::{Checked, Index}; use crate::index_controller::Settings; @@ -14,13 +18,15 @@ fn import_settings(dir_path: &Path) -> anyhow::Result> { Ok(metadata) } -pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { +pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { + let index_path = db_path.join(&format!("indexes/index-{}", uuid)); std::fs::create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, index_path)?; let index = Index(Arc::new(index)); + info!("importing the settings..."); // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; let update_builder = UpdateBuilder::new(0); @@ -31,6 +37,7 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke let file = File::open(&dump_path.join("documents.jsonl"))?; let reader = std::io::BufReader::new(file); + info!("importing the documents..."); // TODO: TAMO: currently we ignore any error caused by the importation of the documents because // if there is no documents nor primary key it'll throw an anyhow error, but we must remove // this before the merge on main @@ -49,6 +56,27 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke .prepare_for_closing() .wait(); - Ok(()) + info!("importing the updates..."); + import_updates(uuid, dump_path, db_path) } +fn import_updates(uuid: Uuid, dump_path: &Path, db_path: &Path) -> anyhow::Result<()> { + let update_path = db_path.join("updates"); + let options = EnvOpenOptions::new(); + // create an UpdateStore to import the updates + std::fs::create_dir_all(&update_path)?; + let (update_store, _) = UpdateStore::create(options, &update_path)?; + let file = File::open(&dump_path.join("updates.jsonl"))?; + let reader = std::io::BufReader::new(file); + + let mut wtxn = update_store.env.write_txn()?; + for update in reader.lines() { + let mut update: UpdateStatus = serde_json::from_str(&update?)?; + if let Some(path) = update.content_path_mut() { + *path = update_path.join("update_files").join(&path).into(); + } + update_store.register_raw_updates(&mut wtxn, update, uuid)?; + } + wtxn.commit()?; + Ok(()) +} From 92a7c8cd176a7cbcf4ab7f20be3341505333a9d1 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 00:27:22 +0200 Subject: [PATCH 18/54] make clippy happy --- meilisearch-http/src/index_controller/dump_actor/v1.rs | 8 ++++---- meilisearch-http/src/index_controller/dump_actor/v2.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index fad48dd8f..6844ea241 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -86,7 +86,7 @@ pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, p std::fs::create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); - let index = milli::Index::new(options.clone(), index_path)?; + let index = milli::Index::new(options, index_path)?; let index = Index(Arc::new(index)); // extract `settings.json` file and import content @@ -108,9 +108,6 @@ pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, p primary_key, ); - // at this point we should handle the updates, but since the update logic is not handled in - // meilisearch we are just going to ignore this part - // the last step: we extract the original milli::Index and close it Arc::try_unwrap(index.0) .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") @@ -118,5 +115,8 @@ pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, p .prepare_for_closing() .wait(); + // at this point we should handle the import of the updates, but since the update logic is not handled in + // meilisearch we are just going to ignore this part + Ok(()) } diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index 969442296..4f39f88bf 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -73,7 +73,7 @@ fn import_updates(uuid: Uuid, dump_path: &Path, db_path: &Path) -> anyhow::Resul for update in reader.lines() { let mut update: UpdateStatus = serde_json::from_str(&update?)?; if let Some(path) = update.content_path_mut() { - *path = update_path.join("update_files").join(&path).into(); + *path = update_path.join("update_files").join(&path); } update_store.register_raw_updates(&mut wtxn, update, uuid)?; } From 384afb3455212f4cf375d23a5d088fd24296fc36 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 11:47:04 +0200 Subject: [PATCH 19/54] fix the way we return the settings --- meilisearch-http/src/index/mod.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index d3f30bf2e..f26cc4283 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -41,13 +41,11 @@ impl Index { let displayed_attributes = self .displayed_fields(&txn)? - .map(|fields| fields.into_iter().map(String::from).collect()) - .unwrap_or_else(|| vec!["*".to_string()]); + .map(|fields| fields.into_iter().map(String::from).collect()); let searchable_attributes = self .searchable_fields(&txn)? - .map(|fields| fields.into_iter().map(String::from).collect()) - .unwrap_or_else(|| vec!["*".to_string()]); + .map(|fields| fields.into_iter().map(String::from).collect()); let faceted_attributes = self .faceted_fields(&txn)? @@ -71,8 +69,8 @@ impl Index { let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from); Ok(Settings { - displayed_attributes: Some(Some(displayed_attributes)), - searchable_attributes: Some(Some(searchable_attributes)), + displayed_attributes: Some(displayed_attributes), + searchable_attributes: Some(searchable_attributes), attributes_for_faceting: Some(Some(faceted_attributes)), ranking_rules: Some(Some(criteria)), stop_words: Some(Some(stop_words)), From 9e798fea75bcd8f1cee4b98b014f61f49df28715 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 13:03:47 +0200 Subject: [PATCH 20/54] fix the import of dump without unprocessing updates --- .../src/index_controller/dump_actor/mod.rs | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index d416d7d92..7d2e5a951 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -208,34 +208,27 @@ pub fn load_dump( dump_path.display(), db_path.display() ); - metadata - .dump_version - .import_index( - size, - uuid, - &dump_path, - &db_path, - idx.meta.primary_key.as_ref().map(|s| s.as_ref()), - ) - .unwrap(); + metadata.dump_version.import_index( + size, + uuid, + &dump_path, + &db_path, + idx.meta.primary_key.as_ref().map(|s| s.as_ref()), + )?; info!("Dump importation from {} succeed", dump_path.display()); } // finally we can move all the unprocessed update file into our new DB + // this directory may not exists let update_path = tmp_dir_path.join("update_files"); let db_update_path = db_path.join("updates/update_files"); - eprintln!("path {:?} exists: {:?}", update_path, update_path.exists()); - eprintln!( - "path {:?} exists: {:?}", - db_update_path, - db_update_path.exists() - ); - let _ = std::fs::remove_dir_all(db_update_path); - std::fs::rename( - tmp_dir_path.join("update_files"), - db_path.join("updates/update_files"), - ) - .unwrap(); + if update_path.exists() { + let _ = std::fs::remove_dir_all(db_update_path); + std::fs::rename( + tmp_dir_path.join("update_files"), + db_path.join("updates/update_files"), + )?; + } info!("Dump importation from {} succeed", dump_path.display()); Ok(()) From c30b32e173548cd02b0cddf683ff6e41c415a65b Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 13:21:36 +0200 Subject: [PATCH 21/54] add the criterion attribute when importing dumps from the v1 --- meilisearch-http/src/index_controller/dump_actor/v1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs index 6844ea241..6f199193c 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v1.rs @@ -45,7 +45,7 @@ impl From for index_controller::Settings { // we need to convert the old `Vec` into a `BTreeSet` ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { match criterion.as_str() { - "words" | "typo" | "proximity" => Some(criterion), + "words" | "typo" | "proximity" | "attribute" => Some(criterion), s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), "wordsPosition" => { warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); From 6d837e3e07a68ba561e25fb2986cde803b889c29 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 11 May 2021 17:34:34 +0200 Subject: [PATCH 22/54] the route to create a dump must return a 202 --- meilisearch-http/src/routes/dump.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index e6be4ca93..47c081e6f 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -17,7 +17,7 @@ async fn create_dump( ) -> Result { let res = data.create_dump().await?; - Ok(HttpResponse::Ok().json(res)) + Ok(HttpResponse::Accepted().json(res)) } #[derive(Debug, Serialize)] From 295f496e8a5b75ba27cb6f921f43eaea7a484fb9 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 12 May 2021 16:21:37 +0200 Subject: [PATCH 23/54] atomic index dump load --- meilisearch-http/src/index/updates.rs | 69 +++++++++++-------- .../src/index_controller/dump_actor/v2.rs | 19 +++-- 2 files changed, 54 insertions(+), 34 deletions(-) diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 0762c8550..b9c772ee2 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -132,17 +132,30 @@ impl Index { content: Option, update_builder: UpdateBuilder, primary_key: Option<&str>, + ) -> anyhow::Result { + let mut txn = self.write_txn()?; + let result = self.update_documents_txn(&mut txn, format, method, content, update_builder, primary_key)?; + txn.commit()?; + Ok(result) + } + + pub fn update_documents_txn<'a, 'b>( + &'a self, + txn: &mut heed::RwTxn<'a, 'b>, + format: UpdateFormat, + method: IndexDocumentsMethod, + content: Option, + update_builder: UpdateBuilder, + primary_key: Option<&str>, ) -> anyhow::Result { info!("performing document addition"); - // We must use the write transaction of the update here. - let mut wtxn = self.write_txn()?; // Set the primary key if not set already, ignore if already set. - if let (None, Some(ref primary_key)) = (self.primary_key(&wtxn)?, primary_key) { - self.put_primary_key(&mut wtxn, primary_key)?; + if let (None, Some(ref primary_key)) = (self.primary_key(txn)?, primary_key) { + self.put_primary_key(txn, primary_key)?; } - let mut builder = update_builder.index_documents(&mut wtxn, self); + let mut builder = update_builder.index_documents(txn, self); builder.update_format(format); builder.index_documents_method(method); @@ -150,19 +163,15 @@ impl Index { |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); let gzipped = false; - let result = match content { - Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback), - Some(content) => builder.execute(content, indexing_callback), - None => builder.execute(std::io::empty(), indexing_callback), + let addition = match content { + Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback)?, + Some(content) => builder.execute(content, indexing_callback)?, + None => builder.execute(std::io::empty(), indexing_callback)?, }; - info!("document addition done: {:?}", result); + info!("document addition done: {:?}", addition); - result.and_then(|addition_result| { - wtxn.commit() - .and(Ok(UpdateResult::DocumentsAddition(addition_result))) - .map_err(Into::into) - }) + Ok(UpdateResult::DocumentsAddition(addition)) } pub fn clear_documents(&self, update_builder: UpdateBuilder) -> anyhow::Result { @@ -179,14 +188,14 @@ impl Index { } } - pub fn update_settings( - &self, + pub fn update_settings_txn<'a, 'b>( + &'a self, + txn: &mut heed::RwTxn<'a, 'b>, settings: &Settings, update_builder: UpdateBuilder, ) -> anyhow::Result { // We must use the write transaction of the update here. - let mut wtxn = self.write_txn()?; - let mut builder = update_builder.settings(&mut wtxn, self); + let mut builder = update_builder.settings(txn, self); if let Some(ref names) = settings.searchable_attributes { match names { @@ -228,16 +237,20 @@ impl Index { } } - let result = builder - .execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step)); + builder.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step))?; - match result { - Ok(()) => wtxn - .commit() - .and(Ok(UpdateResult::Other)) - .map_err(Into::into), - Err(e) => Err(e), - } + Ok(UpdateResult::Other) + } + + pub fn update_settings( + &self, + settings: &Settings, + update_builder: UpdateBuilder, + ) -> anyhow::Result { + let mut txn = self.write_txn()?; + let result = self.update_settings_txn(&mut txn, settings, update_builder)?; + txn.commit()?; + Ok(result) } pub fn delete_documents( diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs index 4f39f88bf..eeda78e8a 100644 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/v2.rs @@ -1,7 +1,7 @@ use heed::EnvOpenOptions; use log::info; use uuid::Uuid; -use crate::index_controller::{UpdateStatus, update_actor::UpdateStore}; +use crate::{index::Unchecked, index_controller::{UpdateStatus, update_actor::UpdateStore}}; use std::io::BufRead; use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use crate::index::{Checked, Index}; @@ -13,9 +13,11 @@ fn import_settings(dir_path: &Path) -> anyhow::Result> { let path = dir_path.join("settings.json"); let file = File::open(path)?; let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; + let metadata: Settings = serde_json::from_reader(reader)?; - Ok(metadata) + println!("Meta: {:?}", metadata); + + Ok(metadata.check()) } pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { @@ -26,11 +28,13 @@ pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, p let index = milli::Index::new(options, index_path)?; let index = Index(Arc::new(index)); + let mut txn = index.write_txn()?; + info!("importing the settings..."); // extract `settings.json` file and import content let settings = import_settings(&dump_path)?; let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings, update_builder)?; + index.update_settings_txn(&mut txn, &settings, update_builder)?; // import the documents in the index let update_builder = UpdateBuilder::new(1); @@ -41,13 +45,16 @@ pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, p // TODO: TAMO: currently we ignore any error caused by the importation of the documents because // if there is no documents nor primary key it'll throw an anyhow error, but we must remove // this before the merge on main - let _ = index.update_documents( + index.update_documents_txn( + &mut txn, UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), update_builder, primary_key, - ); + )?; + + txn.commit()?; // the last step: we extract the original milli::Index and close it Arc::try_unwrap(index.0) From e0e23636c64e6fce23d6dfacfbd089ef1833134e Mon Sep 17 00:00:00 2001 From: tamo Date: Wed, 12 May 2021 17:04:24 +0200 Subject: [PATCH 24/54] fix the serializer + reformat the file --- meilisearch-http/src/index/updates.rs | 35 ++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index b9c772ee2..0f4bf3589 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -1,17 +1,24 @@ use std::collections::{BTreeSet, HashMap}; use std::io; -use std::num::NonZeroUsize; use std::marker::PhantomData; +use std::num::NonZeroUsize; use flate2::read::GzDecoder; use log::info; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize, Serializer}; use crate::index_controller::UpdateResult; use super::{deserialize_some, Index}; +fn serialize_with_wildcard(field: &Option>>, s: S) -> Result +where + S: Serializer, +{ + let wildcard = vec!["*".to_string()]; + s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard))) +} #[derive(Clone, Default, Debug)] pub struct Checked; @@ -25,6 +32,7 @@ pub struct Settings { #[serde( default, deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", skip_serializing_if = "Option::is_none" )] pub displayed_attributes: Option>>, @@ -32,6 +40,7 @@ pub struct Settings { #[serde( default, deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", skip_serializing_if = "Option::is_none" )] pub searchable_attributes: Option>>, @@ -134,7 +143,14 @@ impl Index { primary_key: Option<&str>, ) -> anyhow::Result { let mut txn = self.write_txn()?; - let result = self.update_documents_txn(&mut txn, format, method, content, update_builder, primary_key)?; + let result = self.update_documents_txn( + &mut txn, + format, + method, + content, + update_builder, + primary_key, + )?; txn.commit()?; Ok(result) } @@ -164,7 +180,9 @@ impl Index { let gzipped = false; let addition = match content { - Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback)?, + Some(content) if gzipped => { + builder.execute(GzDecoder::new(content), indexing_callback)? + } Some(content) => builder.execute(content, indexing_callback)?, None => builder.execute(std::io::empty(), indexing_callback)?, }; @@ -237,7 +255,9 @@ impl Index { } } - builder.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step))?; + builder.execute(|indexing_step, update_id| { + info!("update {}: {:?}", update_id, indexing_step) + })?; Ok(UpdateResult::Other) } @@ -299,7 +319,10 @@ mod test { let checked = settings.clone().check(); assert_eq!(settings.displayed_attributes, checked.displayed_attributes); - assert_eq!(settings.searchable_attributes, checked.searchable_attributes); + assert_eq!( + settings.searchable_attributes, + checked.searchable_attributes + ); // test wildcard // test no changes From 8a11c6c4291b1bcaa3ada62f15080d65e793a8b0 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 24 May 2021 12:35:46 +0200 Subject: [PATCH 25/54] Implements the legacy behaviour of the dump When asked if a dump exists we check if it's the current dump, and if it's not then we check on the filesystem for any file matching our `uid.dump` --- .../src/index_controller/dump_actor/actor.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index b41ddadcf..82a38cf96 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -182,12 +182,20 @@ where async fn handle_dump_info(&self, uid: String) -> DumpResult { match &*self.dump_info.lock().await { - None => Err(DumpError::DumpDoesNotExist(uid)), - Some(DumpInfo { uid: ref s, .. }) if &uid != s => Err(DumpError::DumpDoesNotExist(uid)), + None => self.dump_from_fs(uid).await, + Some(DumpInfo { uid: ref s, .. }) if &uid != s => self.dump_from_fs(uid).await, Some(info) => Ok(info.clone()), } } + async fn dump_from_fs(&self, uid: String) -> DumpResult { + self.dump_path + .join(format!("{}.dump", &uid)) + .exists() + .then(|| DumpInfo::new(uid.clone(), DumpStatus::Done)) + .ok_or(DumpError::DumpDoesNotExist(uid)) + } + async fn is_running(&self) -> bool { matches!( *self.dump_info.lock().await, From 529f7962f46873fcdce04125e15da5c485e1a929 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 24 May 2021 15:42:12 +0200 Subject: [PATCH 26/54] handle parallel requests for the dump actor --- .../src/index_controller/dump_actor/actor.rs | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 82a38cf96..fac67cbc0 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,7 +1,9 @@ use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus}; use crate::helpers::compression; use crate::index_controller::{index_actor, update_actor, uuid_resolver, IndexMetadata}; +use async_stream::stream; use chrono::Utc; +use futures::stream::StreamExt; use log::{error, info, warn}; use std::{ collections::HashSet, @@ -11,8 +13,10 @@ use std::{ use tokio::sync::{mpsc, Mutex}; use uuid::Uuid; +pub const CONCURRENT_DUMP_MSG: usize = 10; + pub struct DumpActor { - inbox: mpsc::Receiver, + inbox: Option>, inner: InnerDump, } @@ -44,7 +48,7 @@ where dump_path: impl AsRef, ) -> Self { Self { - inbox, + inbox: Some(inbox), inner: InnerDump { uuid_resolver, index, @@ -56,24 +60,41 @@ where } pub async fn run(mut self) { - use DumpMsg::*; - info!("Started dump actor."); - loop { - match self.inbox.recv().await { - Some(CreateDump { ret }) => { - let _ = ret.send(self.inner.clone().handle_create_dump().await); + let mut inbox = self + .inbox + .take() + .expect("Dump Actor must have a inbox at this point."); + + let stream = stream! { + loop { + match inbox.recv().await { + Some(msg) => yield msg, + None => break, } - Some(DumpInfo { ret, uid }) => { - let _ = ret.send(self.inner.handle_dump_info(uid).await); - } - None => break, } - } + }; + + stream + .for_each_concurrent(Some(CONCURRENT_DUMP_MSG), |msg| self.handle_message(msg)) + .await; error!("Dump actor stopped."); } + + async fn handle_message(&self, msg: DumpMsg) { + use DumpMsg::*; + + match msg { + CreateDump { ret } => { + let _ = ret.send(self.inner.clone().handle_create_dump().await); + } + DumpInfo { ret, uid } => { + let _ = ret.send(self.inner.handle_dump_info(uid).await); + } + } + } } impl InnerDump From dcf29e10816ef4b8e48677062206fcfaa889b878 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 24 May 2021 17:33:42 +0200 Subject: [PATCH 27/54] fix the error handling in case there is a panic while creating a dump --- .../src/index_controller/dump_actor/actor.rs | 59 +++++++++++-------- .../src/index_controller/dump_actor/mod.rs | 5 +- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index fac67cbc0..c10cd90b8 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -10,7 +10,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::{mpsc, Mutex}; +use tokio::sync::{mpsc, oneshot, Mutex}; use uuid::Uuid; pub const CONCURRENT_DUMP_MSG: usize = 10; @@ -88,7 +88,7 @@ where match msg { CreateDump { ret } => { - let _ = ret.send(self.inner.clone().handle_create_dump().await); + let _ = self.inner.clone().handle_create_dump(ret).await; } DumpInfo { ret, uid } => { let _ = ret.send(self.inner.handle_dump_info(uid).await); @@ -103,38 +103,45 @@ where Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, { - async fn handle_create_dump(self) -> DumpResult { + async fn handle_create_dump(self, ret: oneshot::Sender>) { if self.is_running().await { - return Err(DumpError::DumpAlreadyRunning); + ret.send(Err(DumpError::DumpAlreadyRunning)) + .expect("Dump actor is dead"); + return; } let uid = generate_uid(); let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); *self.dump_info.lock().await = Some(info.clone()); - let this = self.clone(); + ret.send(Ok(info)).expect("Dump actor is dead"); - tokio::task::spawn(async move { - match this.perform_dump(uid).await { - Ok(()) => { - if let Some(ref mut info) = *self.dump_info.lock().await { - info.done(); - } else { - warn!("dump actor was in an inconsistant state"); - } - info!("Dump succeed"); - } - Err(e) => { - if let Some(ref mut info) = *self.dump_info.lock().await { - info.with_error(e.to_string()); - } else { - warn!("dump actor was in an inconsistant state"); - } - error!("Dump failed: {}", e); - } - }; - }); + let dump_info = self.dump_info.clone(); + let cloned_uid = uid.clone(); - Ok(info) + let task_result = tokio::task::spawn(self.clone().perform_dump(cloned_uid)).await; + + match task_result { + Ok(Ok(())) => { + if let Some(ref mut info) = *dump_info.lock().await { + info.done(); + } else { + warn!("dump actor was in an inconsistant state"); + } + info!("Dump succeed"); + } + Ok(Err(e)) => { + if let Some(ref mut info) = *dump_info.lock().await { + info.with_error(e.to_string()); + } else { + warn!("dump actor was in an inconsistant state"); + } + error!("Dump failed: {}", e); + } + Err(_) => { + error!("Dump panicked. Dump status set to failed"); + *dump_info.lock().await = Some(DumpInfo::new(uid, DumpStatus::Failed)); + } + }; } async fn perform_dump(self, uid: String) -> anyhow::Result<()> { diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index 7d2e5a951..ea0d7adbd 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -13,7 +13,6 @@ use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; -use serde_json::json; use tempfile::TempDir; use thiserror::Error; use uuid::Uuid; @@ -129,7 +128,7 @@ pub struct DumpInfo { pub uid: String, pub status: DumpStatus, #[serde(skip_serializing_if = "Option::is_none", flatten)] - pub error: Option, + pub error: Option, } impl DumpInfo { @@ -143,7 +142,7 @@ impl DumpInfo { pub fn with_error(&mut self, error: String) { self.status = DumpStatus::Failed; - self.error = Some(json!(error)); + self.error = Some(error); } pub fn done(&mut self) { From 912f0286b332d7b3e1a1840386fcbddc989665a2 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 24 May 2021 18:06:20 +0200 Subject: [PATCH 28/54] remove the dump_inner trickery --- .../src/index_controller/dump_actor/actor.rs | 196 +++++++++--------- 1 file changed, 103 insertions(+), 93 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index c10cd90b8..39d095e9f 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -17,16 +17,11 @@ pub const CONCURRENT_DUMP_MSG: usize = 10; pub struct DumpActor { inbox: Option>, - inner: InnerDump, -} - -#[derive(Clone)] -struct InnerDump { - pub uuid_resolver: UuidResolver, - pub index: Index, - pub update: Update, - pub dump_path: PathBuf, - pub dump_info: Arc>>, + uuid_resolver: UuidResolver, + index: Index, + update: Update, + dump_path: PathBuf, + dump_info: Arc>>, } /// Generate uid from creation date @@ -49,13 +44,11 @@ where ) -> Self { Self { inbox: Some(inbox), - inner: InnerDump { - uuid_resolver, - index, - update, - dump_path: dump_path.as_ref().into(), - dump_info: Arc::new(Mutex::new(None)), - }, + uuid_resolver, + index, + update, + dump_path: dump_path.as_ref().into(), + dump_info: Arc::new(Mutex::new(None)), } } @@ -88,22 +81,15 @@ where match msg { CreateDump { ret } => { - let _ = self.inner.clone().handle_create_dump(ret).await; + let _ = self.handle_create_dump(ret).await; } DumpInfo { ret, uid } => { - let _ = ret.send(self.inner.handle_dump_info(uid).await); + let _ = ret.send(self.handle_dump_info(uid).await); } } } -} -impl InnerDump -where - UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, - Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, - Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, -{ - async fn handle_create_dump(self, ret: oneshot::Sender>) { + async fn handle_create_dump(&self, ret: oneshot::Sender>) { if self.is_running().await { ret.send(Err(DumpError::DumpAlreadyRunning)) .expect("Dump actor is dead"); @@ -116,9 +102,15 @@ where ret.send(Ok(info)).expect("Dump actor is dead"); let dump_info = self.dump_info.clone(); - let cloned_uid = uid.clone(); - let task_result = tokio::task::spawn(self.clone().perform_dump(cloned_uid)).await; + let task_result = tokio::task::spawn(perform_dump( + self.dump_path.clone(), + self.uuid_resolver.clone(), + self.index.clone(), + self.update.clone(), + uid.clone(), + )) + .await; match task_result { Ok(Ok(())) => { @@ -144,70 +136,6 @@ where }; } - async fn perform_dump(self, uid: String) -> anyhow::Result<()> { - info!("Performing dump."); - - let dump_dir = self.dump_path.clone(); - tokio::fs::create_dir_all(&dump_dir).await?; - let temp_dump_dir = - tokio::task::spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let uuids = self.uuid_resolver.list().await?; - // maybe we could just keep the vec as-is - let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); - - if uuids.is_empty() { - return Ok(()); - } - - let indexes = self.list_indexes().await?; - - // we create one directory by index - for meta in indexes.iter() { - tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; - } - - let metadata = super::Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); - metadata.to_path(&temp_dump_path).await?; - - self.update.dump(uuids, temp_dump_path.clone()).await?; - - let dump_dir = self.dump_path.clone(); - let dump_path = self.dump_path.join(format!("{}.dump", uid)); - let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; - let temp_dump_file_path = temp_dump_file.path().to_owned(); - compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; - temp_dump_file.persist(&dump_path)?; - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) - } - - async fn list_indexes(&self) -> anyhow::Result> { - let uuids = self.uuid_resolver.list().await?; - - let mut ret = Vec::new(); - - for (uid, uuid) in uuids { - let meta = self.index.get_index_meta(uuid).await?; - let meta = IndexMetadata { - uuid, - name: uid.clone(), - uid, - meta, - }; - ret.push(meta); - } - - Ok(ret) - } - async fn handle_dump_info(&self, uid: String) -> DumpResult { match &*self.dump_info.lock().await { None => self.dump_from_fs(uid).await, @@ -234,3 +162,85 @@ where ) } } + +async fn perform_dump( + dump_path: PathBuf, + uuid_resolver: UuidResolver, + index: Index, + update: Update, + uid: String, +) -> anyhow::Result<()> +where + UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, + Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, + Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, +{ + info!("Performing dump."); + + let dump_dir = dump_path.clone(); + tokio::fs::create_dir_all(&dump_dir).await?; + let temp_dump_dir = + tokio::task::spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let uuids = uuid_resolver.list().await?; + // maybe we could just keep the vec as-is + let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); + + if uuids.is_empty() { + return Ok(()); + } + + let indexes = list_indexes(&uuid_resolver, &index).await?; + + // we create one directory by index + for meta in indexes.iter() { + tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; + } + + let metadata = super::Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); + metadata.to_path(&temp_dump_path).await?; + + update.dump(uuids, temp_dump_path.clone()).await?; + + let dump_dir = dump_path.clone(); + let dump_path = dump_path.join(format!("{}.dump", uid)); + let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; + let temp_dump_file_path = temp_dump_file.path().to_owned(); + compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; + temp_dump_file.persist(&dump_path)?; + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) +} + +async fn list_indexes( + uuid_resolver: &UuidResolver, + index: &Index, +) -> anyhow::Result> +where + UuidResolver: uuid_resolver::UuidResolverHandle, + Index: index_actor::IndexActorHandle, +{ + let uuids = uuid_resolver.list().await?; + + let mut ret = Vec::new(); + + for (uid, uuid) in uuids { + let meta = index.get_index_meta(uuid).await?; + let meta = IndexMetadata { + uuid, + name: uid.clone(), + uid, + meta, + }; + ret.push(meta); + } + + Ok(ret) +} From 49a0e8aa19b5bdf27e3093f3075f461b6047b173 Mon Sep 17 00:00:00 2001 From: tamo Date: Mon, 24 May 2021 18:19:34 +0200 Subject: [PATCH 29/54] use a RwLock instead of a Mutex --- .../src/index_controller/dump_actor/actor.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 39d095e9f..248526723 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -10,7 +10,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::{mpsc, oneshot, Mutex}; +use tokio::sync::{mpsc, oneshot, RwLock}; use uuid::Uuid; pub const CONCURRENT_DUMP_MSG: usize = 10; @@ -21,7 +21,7 @@ pub struct DumpActor { index: Index, update: Update, dump_path: PathBuf, - dump_info: Arc>>, + dump_info: Arc>>, } /// Generate uid from creation date @@ -48,7 +48,7 @@ where index, update, dump_path: dump_path.as_ref().into(), - dump_info: Arc::new(Mutex::new(None)), + dump_info: Arc::new(RwLock::new(None)), } } @@ -97,7 +97,7 @@ where } let uid = generate_uid(); let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); - *self.dump_info.lock().await = Some(info.clone()); + *self.dump_info.write().await = Some(info.clone()); ret.send(Ok(info)).expect("Dump actor is dead"); @@ -114,7 +114,7 @@ where match task_result { Ok(Ok(())) => { - if let Some(ref mut info) = *dump_info.lock().await { + if let Some(ref mut info) = *dump_info.write().await { info.done(); } else { warn!("dump actor was in an inconsistant state"); @@ -122,7 +122,7 @@ where info!("Dump succeed"); } Ok(Err(e)) => { - if let Some(ref mut info) = *dump_info.lock().await { + if let Some(ref mut info) = *dump_info.write().await { info.with_error(e.to_string()); } else { warn!("dump actor was in an inconsistant state"); @@ -131,13 +131,13 @@ where } Err(_) => { error!("Dump panicked. Dump status set to failed"); - *dump_info.lock().await = Some(DumpInfo::new(uid, DumpStatus::Failed)); + *dump_info.write().await = Some(DumpInfo::new(uid, DumpStatus::Failed)); } }; } async fn handle_dump_info(&self, uid: String) -> DumpResult { - match &*self.dump_info.lock().await { + match &*self.dump_info.read().await { None => self.dump_from_fs(uid).await, Some(DumpInfo { uid: ref s, .. }) if &uid != s => self.dump_from_fs(uid).await, Some(info) => Ok(info.clone()), @@ -154,7 +154,7 @@ where async fn is_running(&self) -> bool { matches!( - *self.dump_info.lock().await, + *self.dump_info.read().await, Some(DumpInfo { status: DumpStatus::InProgress, .. From 991d8e1ec618b8e4aa7861041dfad182be4c7b52 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 25 May 2021 10:48:57 +0200 Subject: [PATCH 30/54] fix the error printing --- meilisearch-http/src/index_controller/dump_actor/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index ea0d7adbd..1508f8eb7 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -127,7 +127,7 @@ pub enum DumpStatus { pub struct DumpInfo { pub uid: String, pub status: DumpStatus, - #[serde(skip_serializing_if = "Option::is_none", flatten)] + #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } From fe260f13309897655de5c0cea3506016a4658b20 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 25 May 2021 15:13:47 +0200 Subject: [PATCH 31/54] Update meilisearch-http/src/index_controller/dump_actor/actor.rs Co-authored-by: marin --- meilisearch-http/src/index_controller/dump_actor/actor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 248526723..981b4236d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -117,7 +117,7 @@ where if let Some(ref mut info) = *dump_info.write().await { info.done(); } else { - warn!("dump actor was in an inconsistant state"); + warn!("Dump actor is in an inconsistent state"); } info!("Dump succeed"); } From 1a6dcec83a517475869700c2b42fee280adaf2fc Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 25 May 2021 15:23:13 +0200 Subject: [PATCH 32/54] crash when the actor have no inbox --- .../src/index_controller/dump_actor/actor.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 248526723..12afb4558 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -4,7 +4,7 @@ use crate::index_controller::{index_actor, update_actor, uuid_resolver, IndexMet use async_stream::stream; use chrono::Utc; use futures::stream::StreamExt; -use log::{error, info, warn}; +use log::{error, info}; use std::{ collections::HashSet, path::{Path, PathBuf}, @@ -114,19 +114,11 @@ where match task_result { Ok(Ok(())) => { - if let Some(ref mut info) = *dump_info.write().await { - info.done(); - } else { - warn!("dump actor was in an inconsistant state"); - } + (*dump_info.write().await).as_mut().expect("Dump actor should have an inbox").done(); info!("Dump succeed"); } Ok(Err(e)) => { - if let Some(ref mut info) = *dump_info.write().await { - info.with_error(e.to_string()); - } else { - warn!("dump actor was in an inconsistant state"); - } + (*dump_info.write().await).as_mut().expect("Dump actor should have an inbox").with_error(e.to_string()); error!("Dump failed: {}", e); } Err(_) => { From 89846d1656183d1f2523dd228f6f6a921e9e084d Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 25 May 2021 15:47:57 +0200 Subject: [PATCH 33/54] improve panic message --- meilisearch-http/src/index_controller/dump_actor/actor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 12afb4558..8e1e48ebe 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -114,11 +114,11 @@ where match task_result { Ok(Ok(())) => { - (*dump_info.write().await).as_mut().expect("Dump actor should have an inbox").done(); + (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").done(); info!("Dump succeed"); } Ok(Err(e)) => { - (*dump_info.write().await).as_mut().expect("Dump actor should have an inbox").with_error(e.to_string()); + (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").with_error(e.to_string()); error!("Dump failed: {}", e); } Err(_) => { From 2185fb8367f66b69249308814aa65b551198a5a5 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 24 May 2021 16:05:43 +0200 Subject: [PATCH 34/54] dump uuid resolver --- meilisearch-http/src/index_controller/mod.rs | 6 ++-- .../index_controller/uuid_resolver/actor.rs | 17 ++++++--- .../uuid_resolver/handle_impl.rs | 8 +++++ .../index_controller/uuid_resolver/message.rs | 4 +++ .../src/index_controller/uuid_resolver/mod.rs | 35 ++++++++++++++----- .../index_controller/uuid_resolver/store.rs | 33 ++++++++++++++--- 6 files changed, 82 insertions(+), 21 deletions(-) diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index d1bb5e170..900482257 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -20,7 +20,7 @@ use dump_actor::DumpActorHandle; use index_actor::IndexActorHandle; use snapshot::{SnapshotService, load_snapshot}; use update_actor::UpdateActorHandle; -use uuid_resolver::{UuidError, UuidResolverHandle}; +use uuid_resolver::{UuidResolverError, UuidResolverHandle}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; @@ -176,7 +176,7 @@ impl IndexController { match self.uuid_resolver.get(uid).await { Ok(uuid) => Ok(perform_update(uuid).await?), - Err(UuidError::UnexistingIndex(name)) => { + Err(UuidResolverError::UnexistingIndex(name)) => { let uuid = Uuid::new_v4(); let status = perform_update(uuid).await?; // ignore if index creation fails now, since it may already have been created @@ -230,7 +230,7 @@ impl IndexController { match self.uuid_resolver.get(uid).await { Ok(uuid) => Ok(perform_udpate(uuid).await?), - Err(UuidError::UnexistingIndex(name)) if create => { + Err(UuidResolverError::UnexistingIndex(name)) if create => { let uuid = Uuid::new_v4(); let status = perform_udpate(uuid).await?; // ignore if index creation fails now, since it may already have been created diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index 253326276..3592c3551 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -4,7 +4,7 @@ use log::{info, warn}; use tokio::sync::mpsc; use uuid::Uuid; -use super::{Result, UuidError, UuidResolveMsg, UuidStore}; +use super::{Result, UuidResolverError, UuidResolveMsg, UuidStore}; pub struct UuidResolverActor { inbox: mpsc::Receiver, @@ -44,6 +44,9 @@ impl UuidResolverActor { Some(GetSize { ret }) => { let _ = ret.send(self.handle_get_size().await); } + Some(DumpRequest { path, ret }) => { + let _ = ret.send(self.handle_dump(path).await); + } // all senders have been dropped, need to quit. None => break, } @@ -54,7 +57,7 @@ impl UuidResolverActor { async fn handle_create(&self, uid: String) -> Result { if !is_index_uid_valid(&uid) { - return Err(UuidError::BadlyFormatted(uid)); + return Err(UuidResolverError::BadlyFormatted(uid)); } self.store.create_uuid(uid, true).await } @@ -63,14 +66,14 @@ impl UuidResolverActor { self.store .get_uuid(uid.clone()) .await? - .ok_or(UuidError::UnexistingIndex(uid)) + .ok_or(UuidResolverError::UnexistingIndex(uid)) } async fn handle_delete(&self, uid: String) -> Result { self.store .delete(uid.clone()) .await? - .ok_or(UuidError::UnexistingIndex(uid)) + .ok_or(UuidResolverError::UnexistingIndex(uid)) } async fn handle_list(&self) -> Result> { @@ -82,9 +85,13 @@ impl UuidResolverActor { self.store.snapshot(path).await } + async fn handle_dump(&self, path: PathBuf) -> Result> { + self.store.dump(path).await + } + async fn handle_insert(&self, uid: String, uuid: Uuid) -> Result<()> { if !is_index_uid_valid(&uid) { - return Err(UuidError::BadlyFormatted(uid)); + return Err(UuidResolverError::BadlyFormatted(uid)); } self.store.insert(uid, uuid).await?; Ok(()) diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index db4c482bd..981beb0f6 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -85,4 +85,12 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .await .expect("Uuid resolver actor has been killed")?) } + async fn dump(&self, path: PathBuf) -> Result> { + let (ret, receiver) = oneshot::channel(); + let msg = UuidResolveMsg::DumpRequest { ret, path }; + let _ = self.sender.send(msg).await; + Ok(receiver + .await + .expect("Uuid resolver actor has been killed")?) + } } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index a72bf0587..166347455 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -34,4 +34,8 @@ pub enum UuidResolveMsg { GetSize { ret: oneshot::Sender>, }, + DumpRequest { + path: PathBuf, + ret: oneshot::Sender>>, + } } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index 0cbb2895b..b84025094 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -16,12 +16,12 @@ use store::UuidStore; #[cfg(test)] use mockall::automock; -pub use store::HeedUuidStore; pub use handle_impl::UuidResolverHandleImpl; +pub use store::HeedUuidStore; const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB -pub type Result = std::result::Result; +pub type Result = std::result::Result; #[async_trait::async_trait] #[cfg_attr(test, automock)] @@ -33,20 +33,37 @@ pub trait UuidResolverHandle { async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; + async fn dump(&self, path: PathBuf) -> Result>; } #[derive(Debug, Error)] -pub enum UuidError { +pub enum UuidResolverError { #[error("Name already exist.")] NameAlreadyExist, #[error("Index \"{0}\" doesn't exist.")] UnexistingIndex(String), - #[error("Error performing task: {0}")] - TokioTask(#[from] tokio::task::JoinError), - #[error("Database error: {0}")] - Heed(#[from] heed::Error), - #[error("Uuid error: {0}")] - Uuid(#[from] uuid::Error), #[error("Badly formatted index uid: {0}")] BadlyFormatted(String), + #[error("Internal error resolving index uid: {0}")] + Internal(String), } + +macro_rules! internal_error { + ($($other:path), *) => { + $( + impl From<$other> for UuidResolverError { + fn from(other: $other) -> Self { + Self::Internal(other.to_string()) + } + } + )* + } +} + +internal_error!( + heed::Error, + uuid::Error, + std::io::Error, + tokio::task::JoinError, + serde_json::Error +); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index a781edcba..4fbaa37b4 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,5 +1,5 @@ -use std::collections::HashSet; -use std::fs::create_dir_all; +use std::{collections::HashSet, io::Write}; +use std::fs::{create_dir_all, File}; use std::path::{Path, PathBuf}; use heed::{ @@ -8,7 +8,7 @@ use heed::{ }; use uuid::Uuid; -use super::{Result, UuidError, UUID_STORE_SIZE}; +use super::{Result, UuidResolverError, UUID_STORE_SIZE}; use crate::helpers::EnvSizer; #[async_trait::async_trait] @@ -22,6 +22,7 @@ pub trait UuidStore { async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; + async fn dump(&self, path: PathBuf) -> Result>; } #[derive(Clone)] @@ -48,7 +49,7 @@ impl HeedUuidStore { match db.get(&txn, &name)? { Some(uuid) => { if err { - Err(UuidError::NameAlreadyExist) + Err(UuidResolverError::NameAlreadyExist) } else { let uuid = Uuid::from_slice(uuid)?; Ok(uuid) @@ -138,6 +139,25 @@ impl HeedUuidStore { pub fn get_size(&self) -> Result { Ok(self.env.size()) } + + pub fn dump(&self, path: PathBuf) -> Result> { + let dump_path = path.join("index_uuids"); + create_dir_all(&dump_path)?; + let dump_file_path = dump_path.join("data.jsonl"); + let mut dump_file = File::create(&dump_file_path)?; + let mut uuids = HashSet::new(); + + let txn = self.env.read_txn()?; + for entry in self.db.iter(&txn)? { + let entry = entry?; + let uuid = Uuid::from_slice(entry.1)?; + uuids.insert(uuid); + serde_json::to_writer(&mut dump_file, &serde_json::json!({ "uid": entry.0, "uuid": uuid }))?; + dump_file.write(b"\n").unwrap(); + } + + Ok(uuids) + } } #[async_trait::async_trait] @@ -175,4 +195,9 @@ impl UuidStore for HeedUuidStore { async fn get_size(&self) -> Result { self.get_size() } + + async fn dump(&self, path: PathBuf) -> Result> { + let this = self.clone(); + tokio::task::spawn_blocking(move || this.dump(path)).await? + } } From 7ad553670fe0bedac02982dce047b76848761713 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 24 May 2021 17:20:44 +0200 Subject: [PATCH 35/54] index error handling --- .../src/index_controller/index_actor/actor.rs | 91 +++++++++---------- .../src/index_controller/index_actor/mod.rs | 20 +++- .../src/index_controller/index_actor/store.rs | 14 +-- 3 files changed, 63 insertions(+), 62 deletions(-) diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 1f0091265..0e2e63468 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -30,10 +30,14 @@ pub struct IndexActor { impl IndexActor { pub fn new(receiver: mpsc::Receiver, store: S) -> IndexResult { let options = IndexerOpts::default(); - let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?; + let update_handler = UpdateHandler::new(&options)?; let update_handler = Arc::new(update_handler); let receiver = Some(receiver); - Ok(Self { receiver, update_handler, store }) + Ok(Self { + receiver, + update_handler, + store, + }) } /// `run` poll the write_receiver and read_receiver concurrently, but while messages send @@ -122,7 +126,12 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } - Dump { uid, uuid, path, ret } => { + Dump { + uid, + uuid, + path, + ret, + } => { let _ = ret.send(self.handle_dump(&uid, uuid, path).await); } GetStats { uuid, ret } => { @@ -146,9 +155,7 @@ impl IndexActor { primary_key: Option, ) -> IndexResult { let index = self.store.create(uuid, primary_key).await?; - let meta = spawn_blocking(move || IndexMeta::new(&index)) - .await - .map_err(|e| IndexError::Error(e.into()))??; + let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; Ok(meta) } @@ -165,9 +172,9 @@ impl IndexActor { None => self.store.create(uuid, None).await?, }; - spawn_blocking(move || update_handler.handle_update(meta, data, index)) - .await - .map_err(|e| IndexError::Error(e.into())) + let result = + spawn_blocking(move || update_handler.handle_update(meta, data, index)).await?; + Ok(result) } async fn handle_settings(&self, uuid: Uuid) -> IndexResult> { @@ -176,9 +183,8 @@ impl IndexActor { .get(uuid) .await? .ok_or(IndexError::UnexistingIndex)?; - spawn_blocking(move || index.settings().map_err(IndexError::Error)) - .await - .map_err(|e| IndexError::Error(e.into()))? + let result = spawn_blocking(move || index.settings()).await??; + Ok(result) } async fn handle_fetch_documents( @@ -193,13 +199,11 @@ impl IndexActor { .get(uuid) .await? .ok_or(IndexError::UnexistingIndex)?; - spawn_blocking(move || { - index - .retrieve_documents(offset, limit, attributes_to_retrieve) - .map_err(IndexError::Error) - }) - .await - .map_err(|e| IndexError::Error(e.into()))? + let result = + spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) + .await??; + + Ok(result) } async fn handle_fetch_document( @@ -213,13 +217,12 @@ impl IndexActor { .get(uuid) .await? .ok_or(IndexError::UnexistingIndex)?; - spawn_blocking(move || { - index - .retrieve_document(doc_id, attributes_to_retrieve) - .map_err(IndexError::Error) - }) - .await - .map_err(|e| IndexError::Error(e.into()))? + + let result = + spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) + .await??; + + Ok(result) } async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> { @@ -242,9 +245,7 @@ impl IndexActor { async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult { match self.store.get(uuid).await? { Some(index) => { - let meta = spawn_blocking(move || IndexMeta::new(&index)) - .await - .map_err(|e| IndexError::Error(e.into()))??; + let meta = spawn_blocking(move || IndexMeta::new(&index)).await??; Ok(meta) } None => Err(IndexError::UnexistingIndex), @@ -262,7 +263,7 @@ impl IndexActor { .await? .ok_or(IndexError::UnexistingIndex)?; - spawn_blocking(move || match index_settings.primary_key { + let result = spawn_blocking(move || match index_settings.primary_key { Some(ref primary_key) => { let mut txn = index.write_txn()?; if index.primary_key(&txn)?.is_some() { @@ -278,23 +279,22 @@ impl IndexActor { Ok(meta) } }) - .await - .map_err(|e| IndexError::Error(e.into()))? + .await??; + + Ok(result) } async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> { use tokio::fs::create_dir_all; path.push("indexes"); - create_dir_all(&path) - .await - .map_err(|e| IndexError::Error(e.into()))?; + create_dir_all(&path).await?; if let Some(index) = self.store.get(uuid).await? { let mut index_path = path.join(format!("index-{}", uuid)); - create_dir_all(&index_path) - .await - .map_err(|e| IndexError::Error(e.into()))?; + + create_dir_all(&index_path).await?; + index_path.push("data.mdb"); spawn_blocking(move || -> anyhow::Result<()> { // Get write txn to wait for ongoing write transaction before snapshot. @@ -304,9 +304,7 @@ impl IndexActor { .copy_to_path(index_path, CompactionOption::Enabled)?; Ok(()) }) - .await - .map_err(|e| IndexError::Error(e.into()))? - .map_err(IndexError::Error)?; + .await??; } Ok(()) @@ -318,9 +316,7 @@ impl IndexActor { use std::io::prelude::*; use tokio::fs::create_dir_all; - create_dir_all(&path) - .await - .map_err(|e| IndexError::Error(e.into()))?; + create_dir_all(&path).await?; if let Some(index) = self.store.get(uuid).await? { let documents_path = path.join(uid).join("documents.jsonl"); @@ -354,9 +350,7 @@ impl IndexActor { Ok(()) }) - .await - .map_err(|e| IndexError::Error(e.into()))? - .map_err(IndexError::Error)?; + .await??; } Ok(()) @@ -379,7 +373,6 @@ impl IndexActor { fields_distribution: index.fields_distribution(&rtxn)?, }) }) - .await - .map_err(|e| IndexError::Error(e.into()))? + .await? } } diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 3b92b1078..fd1d59e8f 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -50,18 +50,30 @@ impl IndexMeta { #[derive(Error, Debug)] pub enum IndexError { - #[error("error with index: {0}")] - Error(#[from] anyhow::Error), #[error("index already exists")] IndexAlreadyExists, #[error("Index doesn't exists")] UnexistingIndex, - #[error("Heed error: {0}")] - HeedError(#[from] heed::Error), #[error("Existing primary key")] ExistingPrimaryKey, + #[error("Internal Index Error: {0}")] + Internal(String) } +macro_rules! internal_error { + ($($other:path), *) => { + $( + impl From<$other> for IndexError { + fn from(other: $other) -> Self { + Self::Internal(other.to_string()) + } + } + )* + } +} + +internal_error!(anyhow::Error, heed::Error, tokio::task::JoinError, std::io::Error); + #[async_trait::async_trait] #[cfg_attr(test, automock)] pub trait IndexActorHandle { diff --git a/meilisearch-http/src/index_controller/index_actor/store.rs b/meilisearch-http/src/index_controller/index_actor/store.rs index 44f076f2f..3dee166a9 100644 --- a/meilisearch-http/src/index_controller/index_actor/store.rs +++ b/meilisearch-http/src/index_controller/index_actor/store.rs @@ -56,8 +56,7 @@ impl IndexStore for MapIndexStore { } Ok(index) }) - .await - .map_err(|e| IndexError::Error(e.into()))??; + .await??; self.index_store.write().await.insert(uuid, index.clone()); @@ -78,8 +77,7 @@ impl IndexStore for MapIndexStore { let index_size = self.index_size; let index = spawn_blocking(move || open_index(path, index_size)) - .await - .map_err(|e| IndexError::Error(e.into()))??; + .await??; self.index_store.write().await.insert(uuid, index.clone()); Ok(Some(index)) } @@ -88,18 +86,16 @@ impl IndexStore for MapIndexStore { async fn delete(&self, uuid: Uuid) -> IndexResult> { let db_path = self.path.join(format!("index-{}", uuid)); - fs::remove_dir_all(db_path) - .await - .map_err(|e| IndexError::Error(e.into()))?; + fs::remove_dir_all(db_path).await?; let index = self.index_store.write().await.remove(&uuid); Ok(index) } } fn open_index(path: impl AsRef, size: usize) -> IndexResult { - std::fs::create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?; + std::fs::create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); - let index = milli::Index::new(options, &path).map_err(IndexError::Error)?; + let index = milli::Index::new(options, &path)?; Ok(Index(Arc::new(index))) } From 4acbe8e473bac6e506ca71228a0d405551934e2f Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 24 May 2021 18:16:35 +0200 Subject: [PATCH 36/54] implement index dump --- meilisearch-http/src/index/mod.rs | 61 ++++++++++++++++++- .../dump_actor/handle_impl.rs | 2 +- .../src/index_controller/index_actor/actor.rs | 58 ++++-------------- .../index_actor/handle_impl.rs | 4 +- .../index_controller/index_actor/message.rs | 1 - .../src/index_controller/index_actor/mod.rs | 2 +- .../update_actor/update_store.rs | 2 +- .../index_controller/uuid_resolver/store.rs | 4 +- 8 files changed, 79 insertions(+), 55 deletions(-) diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index f26cc4283..c4bf19856 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,8 +1,11 @@ -use std::{collections::{BTreeSet, HashSet}, marker::PhantomData}; +use std::{collections::{BTreeSet, HashSet}, io::Write, marker::PhantomData, path::{Path, PathBuf}}; use std::ops::Deref; use std::sync::Arc; +use std::fs::File; use anyhow::{bail, Context}; +use heed::RoTxn; +use indexmap::IndexMap; use milli::obkv_to_json; use serde_json::{Map, Value}; @@ -38,7 +41,10 @@ where impl Index { pub fn settings(&self) -> anyhow::Result> { let txn = self.read_txn()?; + self.settings_txn(&txn) + } + pub fn settings_txn(&self, txn: &RoTxn) -> anyhow::Result> { let displayed_attributes = self .displayed_fields(&txn)? .map(|fields| fields.into_iter().map(String::from).collect()); @@ -161,4 +167,57 @@ impl Index { displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); Ok(displayed_fields_ids) } + + pub fn dump(&self, path: PathBuf) -> anyhow::Result<()> { + // acquire write txn make sure any ongoing write is finnished before we start. + let txn = self.env.write_txn()?; + + self.dump_documents(&txn, &path)?; + self.dump_meta(&txn, &path)?; + + Ok(()) + } + + fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { + println!("dumping documents"); + let document_file_path = path.as_ref().join("documents.jsonl"); + let mut document_file = File::create(&document_file_path)?; + + let documents = self.all_documents(txn)?; + let fields_ids_map = self.fields_ids_map(txn)?; + + // dump documents + let mut json_map = IndexMap::new(); + for document in documents { + let (_, reader) = document?; + + for (fid, bytes) in reader.iter() { + if let Some(name) = fields_ids_map.name(fid) { + json_map.insert(name, serde_json::from_slice::(bytes)?); + } + } + + serde_json::to_writer(&mut document_file, &json_map)?; + document_file.write(b"\n")?; + + json_map.clear(); + } + + Ok(()) + } + + fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { + println!("dumping settings"); + let meta_file_path = path.as_ref().join("meta.json"); + let mut meta_file = File::create(&meta_file_path)?; + + let settings = self.settings_txn(txn)?; + let json = serde_json::json!({ + "settings": settings, + }); + + serde_json::to_writer(&mut meta_file, &json)?; + + Ok(()) + } } diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index 601c97c01..575119410 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -1,4 +1,4 @@ -use std::path::{Path}; +use std::path::Path; use actix_web::web::Bytes; use tokio::sync::{mpsc, oneshot}; use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 0e2e63468..f6f7cdc28 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -6,7 +6,7 @@ use async_stream::stream; use futures::stream::StreamExt; use heed::CompactionOption; use log::debug; -use tokio::sync::mpsc; +use tokio::{fs, sync::mpsc}; use tokio::task::spawn_blocking; use uuid::Uuid; @@ -126,13 +126,8 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } - Dump { - uid, - uuid, - path, - ret, - } => { - let _ = ret.send(self.handle_dump(&uid, uuid, path).await); + Dump { uuid, path, ret } => { + let _ = ret.send(self.handle_dump(uuid, path).await); } GetStats { uuid, ret } => { let _ = ret.send(self.handle_get_stats(uuid).await); @@ -312,46 +307,17 @@ impl IndexActor { /// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the /// documents and all the settings. - async fn handle_dump(&self, uid: &str, uuid: Uuid, path: PathBuf) -> IndexResult<()> { - use std::io::prelude::*; - use tokio::fs::create_dir_all; + async fn handle_dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + let index = self + .store + .get(uuid) + .await? + .ok_or(IndexError::UnexistingIndex)?; - create_dir_all(&path).await?; + let path = path.join(format!("indexes/index-{}/", uuid)); + fs::create_dir_all(&path).await?; - if let Some(index) = self.store.get(uuid).await? { - let documents_path = path.join(uid).join("documents.jsonl"); - let settings_path = path.join(uid).join("settings.json"); - - spawn_blocking(move || -> anyhow::Result<()> { - // first we dump all the documents - let file = File::create(documents_path)?; - let mut file = std::io::BufWriter::new(file); - - // Get write txn to wait for ongoing write transaction before dump. - let txn = index.write_txn()?; - let fields_ids_map = index.fields_ids_map(&txn)?; - // we want to save **all** the fields in the dump. - let fields_to_dump: Vec = fields_ids_map.iter().map(|(id, _)| id).collect(); - - for document in index.all_documents(&txn)? { - let (_doc_id, document) = document?; - let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?; - file.write_all(serde_json::to_string(&json)?.as_bytes())?; - file.write_all(b"\n")?; - } - - // then we dump all the settings - let file = File::create(settings_path)?; - let mut file = std::io::BufWriter::new(file); - let settings = index.settings()?; - - file.write_all(serde_json::to_string(&settings)?.as_bytes())?; - file.write_all(b"\n")?; - - Ok(()) - }) - .await??; - } + tokio::task::spawn_blocking(move || index.dump(path)).await??; Ok(()) } diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index 64b63e5f0..26aa189d0 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -136,9 +136,9 @@ impl IndexActorHandle for IndexActorHandleImpl { Ok(receiver.await.expect("IndexActor has been killed")?) } - async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { let (ret, receiver) = oneshot::channel(); - let msg = IndexMsg::Dump { uid, uuid, path, ret }; + let msg = IndexMsg::Dump { uuid, path, ret }; let _ = self.sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 37faa1e31..714a30ecc 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -61,7 +61,6 @@ pub enum IndexMsg { ret: oneshot::Sender>, }, Dump { - uid: String, uuid: Uuid, path: PathBuf, ret: oneshot::Sender>, diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index fd1d59e8f..dbea5151d 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -109,7 +109,7 @@ pub trait IndexActorHandle { index_settings: IndexSettings, ) -> IndexResult; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; - async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()>; + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>; async fn get_index_stats(&self, uuid: Uuid) -> IndexResult; } diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index f91a2740c..d22be0bd4 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -642,7 +642,7 @@ impl UpdateStore { let path = &path; let mut stream = futures::stream::iter(uuids.iter()) - .map(|(uid, uuid)| handle.dump(uid.clone(), *uuid, path.clone())) + .map(|(uid, uuid)| handle.dump(*uuid, path.clone())) .buffer_unordered(CONCURRENT_INDEX_MSG / 3); Handle::current().block_on(async { diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 4fbaa37b4..b497116cb 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -152,8 +152,8 @@ impl HeedUuidStore { let entry = entry?; let uuid = Uuid::from_slice(entry.1)?; uuids.insert(uuid); - serde_json::to_writer(&mut dump_file, &serde_json::json!({ "uid": entry.0, "uuid": uuid }))?; - dump_file.write(b"\n").unwrap(); + serde_json::to_writer(&mut dump_file, &serde_json::json!({ "uid": entry.0, "uuid": uuid + }))?; dump_file.write(b"\n").unwrap(); } Ok(uuids) From 464639aa0f331e1720711358b1e8e9e055822eaa Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 25 May 2021 09:46:11 +0200 Subject: [PATCH 37/54] udpate actor error improvements --- meilisearch-http/src/index_controller/mod.rs | 8 +- .../index_controller/update_actor/actor.rs | 87 +++++++------------ .../src/index_controller/update_actor/mod.rs | 30 ++++++- 3 files changed, 58 insertions(+), 67 deletions(-) diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 900482257..61bc71114 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -158,13 +158,7 @@ impl IndexController { // prevent dead_locking between the update_handle::update that waits for the update to be // registered and the update_actor that waits for the the payload to be sent to it. tokio::task::spawn_local(async move { - payload - .map(|bytes| { - bytes.map_err(|e| { - Box::new(e) as Box - }) - }) - .for_each(|r| async { + payload.for_each(|r| async { let _ = sender.send(r).await; }) .await diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index f576ce7a8..27906a1a8 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -11,7 +11,7 @@ use tokio::sync::mpsc; use uuid::Uuid; use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo}; -use crate::index_controller::index_actor::{IndexActorHandle}; +use crate::index_controller::index_actor::IndexActorHandle; use crate::index_controller::{UpdateMeta, UpdateStatus}; pub struct UpdateActor { @@ -42,7 +42,12 @@ where let store = UpdateStore::open(options, &path, index_handle.clone())?; std::fs::create_dir_all(path.join("update_files"))?; assert!(path.exists()); - Ok(Self { path, store, inbox, index_handle }) + Ok(Self { + path, + store, + inbox, + index_handle, + }) } pub async fn run(mut self) { @@ -90,9 +95,7 @@ where mut payload: mpsc::Receiver>, ) -> Result { let file_path = match meta { - UpdateMeta::DocumentsAddition { .. } - | UpdateMeta::DeleteDocuments => { - + UpdateMeta::DocumentsAddition { .. } | UpdateMeta::DeleteDocuments => { let update_file_id = uuid::Uuid::new_v4(); let path = self .path @@ -102,39 +105,26 @@ where .write(true) .create(true) .open(&path) - .await - .map_err(|e| UpdateError::Error(Box::new(e)))?; + .await?; let mut file_len = 0; while let Some(bytes) = payload.recv().await { - match bytes { - Ok(bytes) => { - file_len += bytes.as_ref().len(); - file.write_all(bytes.as_ref()) - .await - .map_err(|e| UpdateError::Error(Box::new(e)))?; - } - Err(e) => { - return Err(UpdateError::Error(e)); - } - } + let bytes = bytes?; + file_len += bytes.as_ref().len(); + file.write_all(bytes.as_ref()).await?; } if file_len != 0 { - file.flush() - .await - .map_err(|e| UpdateError::Error(Box::new(e)))?; + file.flush().await?; let file = file.into_std().await; Some((file, path)) } else { // empty update, delete the empty file. - fs::remove_file(&path) - .await - .map_err(|e| UpdateError::Error(Box::new(e)))?; + fs::remove_file(&path).await?; None } } - _ => None + _ => None, }; let update_store = self.store.clone(); @@ -145,17 +135,15 @@ where // If the payload is empty, ignore the check. let path = if let Some((mut file, path)) = file_path { // set the file back to the beginning - file.seek(SeekFrom::Start(0)).map_err(|e| UpdateError::Error(Box::new(e)))?; + file.seek(SeekFrom::Start(0))?; // Check that the json payload is valid: let reader = BufReader::new(&mut file); let mut checker = JsonChecker::new(reader); if copy(&mut checker, &mut sink()).is_err() || checker.finish().is_err() { // The json file is invalid, we use Serde to get a nice error message: - file.seek(SeekFrom::Start(0)) - .map_err(|e| UpdateError::Error(Box::new(e)))?; - let _: serde_json::Value = serde_json::from_reader(file) - .map_err(|e| UpdateError::Error(Box::new(e)))?; + file.seek(SeekFrom::Start(0))?; + let _: serde_json::Value = serde_json::from_reader(file)?; } Some(path) } else { @@ -163,32 +151,27 @@ where }; // The payload is valid, we can register it to the update store. - update_store + let status = update_store .register_update(meta, path, uuid) - .map(UpdateStatus::Enqueued) - .map_err(|e| UpdateError::Error(Box::new(e))) + .map(UpdateStatus::Enqueued)?; + Ok(status) }) - .await - .map_err(|e| UpdateError::Error(Box::new(e)))? + .await? } async fn handle_list_updates(&self, uuid: Uuid) -> Result> { let update_store = self.store.clone(); tokio::task::spawn_blocking(move || { - let result = update_store - .list(uuid) - .map_err(|e| UpdateError::Error(e.into()))?; + let result = update_store.list(uuid)?; Ok(result) }) - .await - .map_err(|e| UpdateError::Error(Box::new(e)))? + .await? } async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result { let store = self.store.clone(); let result = store - .meta(uuid, id) - .map_err(|e| UpdateError::Error(Box::new(e)))? + .meta(uuid, id)? .ok_or(UpdateError::UnexistingUpdate(id))?; Ok(result) } @@ -196,10 +179,7 @@ where async fn handle_delete(&self, uuid: Uuid) -> Result<()> { let store = self.store.clone(); - tokio::task::spawn_blocking(move || store.delete_all(uuid)) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; + tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??; Ok(()) } @@ -208,10 +188,8 @@ where let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); - tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; + tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) + .await??; Ok(()) } @@ -223,9 +201,8 @@ where update_store.dump(&uuids, path.to_path_buf(), index_handle)?; Ok(()) }) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; + .await??; + Ok(()) } @@ -235,9 +212,7 @@ where let info = update_store.get_info()?; Ok(info) }) - .await - .map_err(|e| UpdateError::Error(e.into()))? - .map_err(|e| UpdateError::Error(e.into()))?; + .await??; Ok(info) } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index 05b793e45..a0c498e92 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -5,6 +5,7 @@ mod update_store; use std::{collections::HashSet, path::PathBuf}; +use actix_http::error::PayloadError; use thiserror::Error; use tokio::sync::mpsc; use uuid::Uuid; @@ -14,23 +15,44 @@ use crate::index_controller::{UpdateMeta, UpdateStatus}; use actor::UpdateActor; use message::UpdateMsg; -pub use update_store::{UpdateStore, UpdateStoreInfo}; pub use handle_impl::UpdateActorHandleImpl; +pub use update_store::{UpdateStore, UpdateStoreInfo}; pub type Result = std::result::Result; -type PayloadData = std::result::Result>; +type PayloadData = std::result::Result; #[cfg(test)] use mockall::automock; #[derive(Debug, Error)] pub enum UpdateError { - #[error("error with update: {0}")] - Error(Box), #[error("Update {0} doesn't exist.")] UnexistingUpdate(u64), + #[error("Internal error processing update: {0}")] + Internal(String), } +macro_rules! internal_error { + ($($other:path), *) => { + $( + impl From<$other> for UpdateError { + fn from(other: $other) -> Self { + Self::Internal(other.to_string()) + } + } + )* + } +} + +internal_error!( + heed::Error, + std::io::Error, + serde_json::Error, + PayloadError, + tokio::task::JoinError, + anyhow::Error +); + #[async_trait::async_trait] #[cfg_attr(test, automock(type Data=Vec;))] pub trait UpdateActorHandle { From 3593ebb8aab919014433b3a0d62a95f593a25f56 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 25 May 2021 16:33:09 +0200 Subject: [PATCH 38/54] dump updates --- meilisearch-http/src/index/updates.rs | 1 + .../index_controller/update_actor/actor.rs | 3 +- .../update_actor/handle_impl.rs | 2 +- .../index_controller/update_actor/message.rs | 2 +- .../src/index_controller/update_actor/mod.rs | 6 +- .../update_actor/store/codec.rs | 86 +++++++++ .../update_actor/store/dump.rs | 146 +++++++++++++++ .../{update_store.rs => store/mod.rs} | 175 ++---------------- 8 files changed, 252 insertions(+), 169 deletions(-) create mode 100644 meilisearch-http/src/index_controller/update_actor/store/codec.rs create mode 100644 meilisearch-http/src/index_controller/update_actor/store/dump.rs rename meilisearch-http/src/index_controller/update_actor/{update_store.rs => store/mod.rs} (79%) diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 0f4bf3589..2b489451b 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -178,6 +178,7 @@ impl Index { let indexing_callback = |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); + let gzipped = false; let addition = match content { Some(content) if gzipped => { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 27906a1a8..4097f31aa 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -194,9 +194,10 @@ where Ok(()) } - async fn handle_dump(&self, uuids: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()> { + async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); + println!("starting dump"); tokio::task::spawn_blocking(move || -> anyhow::Result<()> { update_store.dump(&uuids, path.to_path_buf(), index_handle)?; Ok(()) diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index a497a3c5c..cc5ba9757 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -71,7 +71,7 @@ where receiver.await.expect("update actor killed.") } - async fn dump(&self, uuids: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()> { + async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::Dump { uuids, path, ret }; let _ = self.sender.send(msg).await; diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 4103ca121..37df2af32 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -32,7 +32,7 @@ pub enum UpdateMsg { ret: oneshot::Sender>, }, Dump { - uuids: HashSet<(String, Uuid)>, + uuids: HashSet, path: PathBuf, ret: oneshot::Sender>, }, diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index a0c498e92..8cd77e252 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -1,7 +1,7 @@ mod actor; mod handle_impl; mod message; -mod update_store; +mod store; use std::{collections::HashSet, path::PathBuf}; @@ -16,7 +16,7 @@ use actor::UpdateActor; use message::UpdateMsg; pub use handle_impl::UpdateActorHandleImpl; -pub use update_store::{UpdateStore, UpdateStoreInfo}; +pub use store::{UpdateStore, UpdateStoreInfo}; pub type Result = std::result::Result; type PayloadData = std::result::Result; @@ -62,7 +62,7 @@ pub trait UpdateActorHandle { async fn update_status(&self, uuid: Uuid, id: u64) -> Result; async fn delete(&self, uuid: Uuid) -> Result<()>; async fn snapshot(&self, uuid: HashSet, path: PathBuf) -> Result<()>; - async fn dump(&self, uuid: HashSet<(String, Uuid)>, path: PathBuf) -> Result<()>; + async fn dump(&self, uuids: HashSet, path: PathBuf) -> Result<()>; async fn get_info(&self) -> Result; async fn update( &self, diff --git a/meilisearch-http/src/index_controller/update_actor/store/codec.rs b/meilisearch-http/src/index_controller/update_actor/store/codec.rs new file mode 100644 index 000000000..e07b52eec --- /dev/null +++ b/meilisearch-http/src/index_controller/update_actor/store/codec.rs @@ -0,0 +1,86 @@ +use std::{borrow::Cow, convert::TryInto, mem::size_of}; + +use heed::{BytesDecode, BytesEncode}; +use uuid::Uuid; + +pub struct NextIdCodec; + +pub enum NextIdKey { + Global, + Index(Uuid), +} + +impl<'a> BytesEncode<'a> for NextIdCodec { + type EItem = NextIdKey; + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + match item { + NextIdKey::Global => Some(Cow::Borrowed(b"__global__")), + NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())), + } + } +} + +pub struct PendingKeyCodec; + +impl<'a> BytesEncode<'a> for PendingKeyCodec { + type EItem = (u64, Uuid, u64); + + fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option> { + let mut bytes = Vec::with_capacity(size_of::()); + bytes.extend_from_slice(&global_id.to_be_bytes()); + bytes.extend_from_slice(uuid.as_bytes()); + bytes.extend_from_slice(&update_id.to_be_bytes()); + Some(Cow::Owned(bytes)) + } +} + +impl<'a> BytesDecode<'a> for PendingKeyCodec { + type DItem = (u64, Uuid, u64); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let global_id_bytes = bytes.get(0..size_of::())?.try_into().ok()?; + let global_id = u64::from_be_bytes(global_id_bytes); + + let uuid_bytes = bytes + .get(size_of::()..(size_of::() + size_of::()))? + .try_into() + .ok()?; + let uuid = Uuid::from_bytes(uuid_bytes); + + let update_id_bytes = bytes + .get((size_of::() + size_of::())..)? + .try_into() + .ok()?; + let update_id = u64::from_be_bytes(update_id_bytes); + + Some((global_id, uuid, update_id)) + } +} + +pub struct UpdateKeyCodec; + +impl<'a> BytesEncode<'a> for UpdateKeyCodec { + type EItem = (Uuid, u64); + + fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option> { + let mut bytes = Vec::with_capacity(size_of::()); + bytes.extend_from_slice(uuid.as_bytes()); + bytes.extend_from_slice(&update_id.to_be_bytes()); + Some(Cow::Owned(bytes)) + } +} + +impl<'a> BytesDecode<'a> for UpdateKeyCodec { + type DItem = (Uuid, u64); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let uuid_bytes = bytes.get(0..size_of::())?.try_into().ok()?; + let uuid = Uuid::from_bytes(uuid_bytes); + + let update_id_bytes = bytes.get(size_of::()..)?.try_into().ok()?; + let update_id = u64::from_be_bytes(update_id_bytes); + + Some((uuid, update_id)) + } +} diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs new file mode 100644 index 000000000..8b75f9e5d --- /dev/null +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -0,0 +1,146 @@ +use std::{ + collections::HashSet, + fs::{copy, create_dir_all, File}, + io::Write, + path::{Path, PathBuf}, +}; + +use anyhow::Context; +use heed::RoTxn; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::{State, codec::UpdateKeyCodec}; +use super::UpdateStore; +use crate::index_controller::{index_actor::IndexActorHandle, UpdateStatus}; + +#[derive(Serialize, Deserialize)] +struct UpdateEntry { + uuid: Uuid, + update: UpdateStatus, +} + +impl UpdateStore { + pub fn dump( + &self, + uuids: &HashSet, + path: PathBuf, + handle: impl IndexActorHandle, + ) -> anyhow::Result<()> { + let state_lock = self.state.write(); + state_lock.swap(State::Dumping); + + // txn must *always* be acquired after state lock, or it will dead lock. + let txn = self.env.write_txn()?; + + let dump_path = path.join("updates"); + create_dir_all(&dump_path)?; + + self.dump_updates(&txn, uuids, &dump_path)?; + + let fut = dump_indexes(uuids, handle, &path); + tokio::runtime::Handle::current().block_on(fut)?; + + state_lock.swap(State::Idle); + + Ok(()) + } + + fn dump_updates( + &self, + txn: &RoTxn, + uuids: &HashSet, + path: impl AsRef, + ) -> anyhow::Result<()> { + let dump_data_path = path.as_ref().join("data.jsonl"); + let mut dump_data_file = File::create(dump_data_path)?; + + let update_files_path = path.as_ref().join("update_files"); + create_dir_all(&update_files_path)?; + + self.dump_pending(&txn, uuids, &mut dump_data_file, &update_files_path)?; + self.dump_completed(&txn, uuids, &mut dump_data_file)?; + + Ok(()) + } + + fn dump_pending( + &self, + txn: &RoTxn, + uuids: &HashSet, + mut file: &mut File, + update_files_path: impl AsRef, + ) -> anyhow::Result<()> { + let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); + + for pending in pendings { + let ((_, uuid, _), data) = pending?; + if uuids.contains(&uuid) { + let mut update = data.decode()?; + + if let Some(content) = update.content.take() { + update.content = Some(dump_update_file(content, &update_files_path)?); + } + + let update_json = UpdateEntry { + uuid, + update: update.into(), + }; + + serde_json::to_writer(&mut file, &update_json)?; + file.write(b"\n")?; + } + } + + Ok(()) + } + + fn dump_completed( + &self, + txn: &RoTxn, + uuids: &HashSet, + mut file: &mut File, + ) -> anyhow::Result<()> { + let updates = self + .updates + .iter(txn)? + .remap_key_type::() + .lazily_decode_data(); + + for update in updates { + let ((uuid, _), data) = update?; + if uuids.contains(&uuid) { + let update = data.decode()?.into(); + + let update_json = UpdateEntry { uuid, update }; + + serde_json::to_writer(&mut file, &update_json)?; + file.write(b"\n")?; + } + } + + Ok(()) + } +} + +async fn dump_indexes(uuids: &HashSet, handle: impl IndexActorHandle, path: impl AsRef)-> anyhow::Result<()> { + for uuid in uuids { + handle.dump(*uuid, path.as_ref().to_owned()).await?; + } + + Ok(()) +} + +fn dump_update_file( + file_path: impl AsRef, + dump_path: impl AsRef, +) -> anyhow::Result { + let filename: PathBuf = file_path + .as_ref() + .file_name() + .context("invalid update file name")? + .into(); + let dump_file_path = dump_path.as_ref().join(&filename); + copy(file_path, dump_file_path)?; + Ok(filename) +} diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs similarity index 79% rename from meilisearch-http/src/index_controller/update_actor/update_store.rs rename to meilisearch-http/src/index_controller/update_actor/store/mod.rs index d22be0bd4..52bd8d62a 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -1,23 +1,25 @@ +mod dump; +mod codec; + use std::collections::{BTreeMap, HashSet}; -use std::convert::TryInto; use std::fs::{copy, create_dir_all, remove_file, File}; -use std::mem::size_of; use std::path::Path; use std::sync::Arc; -use std::{borrow::Cow, path::PathBuf}; use anyhow::Context; use arc_swap::ArcSwap; use futures::StreamExt; use heed::types::{ByteSlice, OwnedType, SerdeJson}; use heed::zerocopy::U64; -use heed::{BytesDecode, BytesEncode, CompactionOption, Database, Env, EnvOpenOptions}; +use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use log::error; use parking_lot::{Mutex, MutexGuard}; use tokio::runtime::Handle; use tokio::sync::mpsc; use uuid::Uuid; +use codec::*; + use super::UpdateMeta; use crate::{helpers::EnvSizer, index_controller::index_actor::IndexResult}; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; @@ -25,13 +27,6 @@ use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, Ind #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; -struct NextIdCodec; - -enum NextIdKey { - Global, - Index(Uuid), -} - pub struct UpdateStoreInfo { /// Size of the update store in bytes. pub size: u64, @@ -45,13 +40,13 @@ pub struct StateLock { data: ArcSwap, } -struct StateLockGuard<'a> { +pub struct StateLockGuard<'a> { _lock: MutexGuard<'a, ()>, state: &'a StateLock, } impl StateLockGuard<'_> { - fn swap(&self, state: State) -> Arc { + pub fn swap(&self, state: State) -> Arc { self.state.data.swap(Arc::new(state)) } } @@ -63,11 +58,11 @@ impl StateLock { Self { lock, data } } - fn read(&self) -> Arc { + pub fn read(&self) -> Arc { self.data.load().clone() } - fn write(&self) -> StateLockGuard { + pub fn write(&self) -> StateLockGuard { let _lock = self.lock.lock(); let state = &self; StateLockGuard { _lock, state } @@ -82,81 +77,6 @@ pub enum State { Dumping, } -impl<'a> BytesEncode<'a> for NextIdCodec { - type EItem = NextIdKey; - - fn bytes_encode(item: &'a Self::EItem) -> Option> { - match item { - NextIdKey::Global => Some(Cow::Borrowed(b"__global__")), - NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())), - } - } -} - -struct PendingKeyCodec; - -impl<'a> BytesEncode<'a> for PendingKeyCodec { - type EItem = (u64, Uuid, u64); - - fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(size_of::()); - bytes.extend_from_slice(&global_id.to_be_bytes()); - bytes.extend_from_slice(uuid.as_bytes()); - bytes.extend_from_slice(&update_id.to_be_bytes()); - Some(Cow::Owned(bytes)) - } -} - -impl<'a> BytesDecode<'a> for PendingKeyCodec { - type DItem = (u64, Uuid, u64); - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let global_id_bytes = bytes.get(0..size_of::())?.try_into().ok()?; - let global_id = u64::from_be_bytes(global_id_bytes); - - let uuid_bytes = bytes - .get(size_of::()..(size_of::() + size_of::()))? - .try_into() - .ok()?; - let uuid = Uuid::from_bytes(uuid_bytes); - - let update_id_bytes = bytes - .get((size_of::() + size_of::())..)? - .try_into() - .ok()?; - let update_id = u64::from_be_bytes(update_id_bytes); - - Some((global_id, uuid, update_id)) - } -} - -struct UpdateKeyCodec; - -impl<'a> BytesEncode<'a> for UpdateKeyCodec { - type EItem = (Uuid, u64); - - fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(size_of::()); - bytes.extend_from_slice(uuid.as_bytes()); - bytes.extend_from_slice(&update_id.to_be_bytes()); - Some(Cow::Owned(bytes)) - } -} - -impl<'a> BytesDecode<'a> for UpdateKeyCodec { - type DItem = (Uuid, u64); - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let uuid_bytes = bytes.get(0..size_of::())?.try_into().ok()?; - let uuid = Uuid::from_bytes(uuid_bytes); - - let update_id_bytes = bytes.get(size_of::()..)?.try_into().ok()?; - let update_id = u64::from_be_bytes(update_id_bytes); - - Some((uuid, update_id)) - } -} - #[derive(Clone)] pub struct UpdateStore { pub env: Env, @@ -174,7 +94,7 @@ pub struct UpdateStore { /// | 16-bytes | 8-bytes | updates: Database>, /// Indicates the current state of the update store, - state: Arc, + pub state: Arc, /// Wake up the loop when a new event occurs. notification_sender: mpsc::Sender<()>, } @@ -364,6 +284,7 @@ impl UpdateStore { let processing = pending.processing(); // Acquire the state lock and set the current state to processing. + // txn must *always* be acquired after state lock, or it will dead lock. let state = self.state.write(); state.swap(State::Processing(index_uuid, processing.clone())); @@ -580,78 +501,6 @@ impl UpdateStore { Ok(()) } - pub fn dump( - &self, - uuids: &HashSet<(String, Uuid)>, - path: PathBuf, - handle: impl IndexActorHandle, - ) -> anyhow::Result<()> { - use std::io::prelude::*; - let state_lock = self.state.write(); - state_lock.swap(State::Dumping); - - let txn = self.env.write_txn()?; - - for (index_uid, index_uuid) in uuids.iter() { - let file = File::create(path.join(index_uid).join("updates.jsonl"))?; - let mut file = std::io::BufWriter::new(file); - - let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); - for entry in pendings { - let ((_, uuid, _), pending) = entry?; - if &uuid == index_uuid { - let mut update: UpdateStatus = pending.decode()?.into(); - if let Some(path) = update.content_path_mut() { - *path = path.file_name().expect("update path can't be empty").into(); - } - serde_json::to_writer(&mut file, &update)?; - file.write_all(b"\n")?; - } - } - - let updates = self.updates.prefix_iter(&txn, index_uuid.as_bytes())?; - for entry in updates { - let (_, update) = entry?; - let mut update = update.clone(); - if let Some(path) = update.content_path_mut() { - *path = path.file_name().expect("update path can't be empty").into(); - } - serde_json::to_writer(&mut file, &update)?; - file.write_all(b"\n")?; - } - } - - let update_files_path = path.join("update_files"); - create_dir_all(&update_files_path)?; - - let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); - - for entry in pendings { - let ((_, uuid, _), pending) = entry?; - if uuids.iter().any(|(_, id)| id == &uuid) { - if let Some(path) = pending.decode()?.content_path() { - let name = path.file_name().unwrap(); - let to = update_files_path.join(name); - copy(path, to)?; - } - } - } - - // Perform the dump of each index concurently. Only a third of the capabilities of - // the index actor at a time not to put too much pressure on the index actor - let path = &path; - - let mut stream = futures::stream::iter(uuids.iter()) - .map(|(uid, uuid)| handle.dump(*uuid, path.clone())) - .buffer_unordered(CONCURRENT_INDEX_MSG / 3); - - Handle::current().block_on(async { - while let Some(res) = stream.next().await { - res?; - } - Ok(()) - }) - } pub fn get_info(&self) -> anyhow::Result { let mut size = self.env.size(); From 9278a6fe5906f0246d6c4c4d4bf07873ab2763c9 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 25 May 2021 18:14:11 +0200 Subject: [PATCH 39/54] integrate in dump actor --- .../src/index_controller/dump_actor/actor.rs | 37 ++++--------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 8e1e48ebe..2d931dcbd 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -106,7 +106,6 @@ where let task_result = tokio::task::spawn(perform_dump( self.dump_path.clone(), self.uuid_resolver.clone(), - self.index.clone(), self.update.clone(), uid.clone(), )) @@ -155,50 +154,28 @@ where } } -async fn perform_dump( +async fn perform_dump( dump_path: PathBuf, uuid_resolver: UuidResolver, - index: Index, - update: Update, + update_handle: Update, uid: String, ) -> anyhow::Result<()> where UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, - Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, { info!("Performing dump."); - let dump_dir = dump_path.clone(); - tokio::fs::create_dir_all(&dump_dir).await?; - let temp_dump_dir = - tokio::task::spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); + let dump_path_clone = dump_path.clone(); + let temp_dump_path = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(dump_path_clone)).await??; - let uuids = uuid_resolver.list().await?; - // maybe we could just keep the vec as-is - let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect(); + let uuids = uuid_resolver.dump(temp_dump_path.path().to_owned()).await?; - if uuids.is_empty() { - return Ok(()); - } + update_handle.dump(uuids, temp_dump_path.path().to_owned()).await?; - let indexes = list_indexes(&uuid_resolver, &index).await?; - - // we create one directory by index - for meta in indexes.iter() { - tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?; - } - - let metadata = super::Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string()); - metadata.to_path(&temp_dump_path).await?; - - update.dump(uuids, temp_dump_path.clone()).await?; - - let dump_dir = dump_path.clone(); let dump_path = dump_path.join(format!("{}.dump", uid)); let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?; + let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; let temp_dump_file_path = temp_dump_file.path().to_owned(); compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; temp_dump_file.persist(&dump_path)?; From e818c33fec0cb8e8af281335dd40150bca1ad1ce Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 26 May 2021 20:42:09 +0200 Subject: [PATCH 40/54] implement load uuid_resolver --- .../src/index_controller/dump_actor/actor.rs | 68 +++---- .../dump_actor/handle_impl.rs | 6 +- .../dump_actor/loaders/mod.rs | 2 + .../index_controller/dump_actor/loaders/v1.rs | 137 ++++++++++++++ .../index_controller/dump_actor/loaders/v2.rs | 179 ++++++++++++++++++ .../src/index_controller/dump_actor/mod.rs | 148 +++------------ .../src/index_controller/dump_actor/v1.rs | 122 ------------ .../src/index_controller/dump_actor/v2.rs | 89 --------- meilisearch-http/src/index_controller/mod.rs | 30 +-- .../src/index_controller/update_actor/mod.rs | 2 +- .../update_actor/store/dump.rs | 2 +- .../update_actor/store/mod.rs | 6 +- .../src/index_controller/uuid_resolver/mod.rs | 2 +- .../index_controller/uuid_resolver/store.rs | 56 +++++- 14 files changed, 438 insertions(+), 411 deletions(-) create mode 100644 meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs create mode 100644 meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs create mode 100644 meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs delete mode 100644 meilisearch-http/src/index_controller/dump_actor/v1.rs delete mode 100644 meilisearch-http/src/index_controller/dump_actor/v2.rs diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 2d931dcbd..31378f89c 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,27 +1,26 @@ use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus}; use crate::helpers::compression; -use crate::index_controller::{index_actor, update_actor, uuid_resolver, IndexMetadata}; +use crate::index_controller::{update_actor, uuid_resolver}; use async_stream::stream; use chrono::Utc; use futures::stream::StreamExt; use log::{error, info}; use std::{ - collections::HashSet, path::{Path, PathBuf}, sync::Arc, }; -use tokio::sync::{mpsc, oneshot, RwLock}; -use uuid::Uuid; +use tokio::{fs::create_dir_all, sync::{mpsc, oneshot, RwLock}}; pub const CONCURRENT_DUMP_MSG: usize = 10; -pub struct DumpActor { +pub struct DumpActor { inbox: Option>, uuid_resolver: UuidResolver, - index: Index, update: Update, dump_path: PathBuf, dump_info: Arc>>, + _update_db_size: u64, + _index_db_size: u64, } /// Generate uid from creation date @@ -29,26 +28,27 @@ fn generate_uid() -> String { Utc::now().format("%Y%m%d-%H%M%S%3f").to_string() } -impl DumpActor +impl DumpActor where UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, - Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static, Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, { pub fn new( inbox: mpsc::Receiver, uuid_resolver: UuidResolver, - index: Index, update: Update, dump_path: impl AsRef, + _index_db_size: u64, + _update_db_size: u64, ) -> Self { Self { inbox: Some(inbox), uuid_resolver, - index, update, dump_path: dump_path.as_ref().into(), dump_info: Arc::new(RwLock::new(None)), + _index_db_size, + _update_db_size, } } @@ -155,7 +155,7 @@ where } async fn perform_dump( - dump_path: PathBuf, + path: PathBuf, uuid_resolver: UuidResolver, update_handle: Update, uid: String, @@ -166,19 +166,23 @@ where { info!("Performing dump."); - let dump_path_clone = dump_path.clone(); - let temp_dump_path = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(dump_path_clone)).await??; + create_dir_all(&path).await?; - let uuids = uuid_resolver.dump(temp_dump_path.path().to_owned()).await?; + let path_clone = path.clone(); + let temp_dump_dir = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); - update_handle.dump(uuids, temp_dump_path.path().to_owned()).await?; + let uuids = uuid_resolver.dump(temp_dump_path.clone()).await?; + + update_handle.dump(uuids, temp_dump_path.clone()).await?; - let dump_path = dump_path.join(format!("{}.dump", uid)); let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; - let temp_dump_file_path = temp_dump_file.path().to_owned(); - compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?; + let temp_dump_file = tempfile::NamedTempFile::new_in(&path)?; + compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; + + let dump_path = path.join(format!("{}.dump", uid)); temp_dump_file.persist(&dump_path)?; + Ok(dump_path) }) .await??; @@ -187,29 +191,3 @@ where Ok(()) } - -async fn list_indexes( - uuid_resolver: &UuidResolver, - index: &Index, -) -> anyhow::Result> -where - UuidResolver: uuid_resolver::UuidResolverHandle, - Index: index_actor::IndexActorHandle, -{ - let uuids = uuid_resolver.list().await?; - - let mut ret = Vec::new(); - - for (uid, uuid) in uuids { - let meta = index.get_index_meta(uuid).await?; - let meta = IndexMetadata { - uuid, - name: uid.clone(), - uid, - meta, - }; - ret.push(meta); - } - - Ok(ret) -} diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index 575119410..ff663798f 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -29,13 +29,15 @@ impl DumpActorHandleImpl { pub fn new( path: impl AsRef, uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, - index: crate::index_controller::index_actor::IndexActorHandleImpl, update: crate::index_controller::update_actor::UpdateActorHandleImpl, + index_db_size: u64, + update_db_size: u64, ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(10); - let actor = DumpActor::new(receiver, uuid_resolver, index, update, path); + let actor = DumpActor::new(receiver, uuid_resolver, update, path, index_db_size, update_db_size); tokio::task::spawn(actor.run()); + Ok(Self { sender }) } } diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs new file mode 100644 index 000000000..ae6adc7cf --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/mod.rs @@ -0,0 +1,2 @@ +pub mod v1; +pub mod v2; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs new file mode 100644 index 000000000..76207ff7b --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -0,0 +1,137 @@ +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use crate::index_controller::IndexMetadata; + +#[derive(Serialize, Deserialize, Debug)] +pub struct MetadataV1 { + db_version: String, + indexes: Vec, +} + +impl MetadataV1 { + pub fn load_dump(self, _src: impl AsRef, _dst: impl AsRef) -> anyhow::Result<()> { + todo!("implement load v1") + } +} + +// This is the settings used in the last version of meilisearch exporting dump in V1 +//#[derive(Default, Clone, Serialize, Deserialize, Debug)] +//#[serde(rename_all = "camelCase", deny_unknown_fields)] +//struct Settings { + //#[serde(default, deserialize_with = "deserialize_some")] + //pub ranking_rules: Option>>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub distinct_attribute: Option>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub searchable_attributes: Option>>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub displayed_attributes: Option>>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub stop_words: Option>>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub synonyms: Option>>>, + //#[serde(default, deserialize_with = "deserialize_some")] + //pub attributes_for_faceting: Option>>, +//} + +///// we need to **always** be able to convert the old settings to the settings currently being used +//impl From for index_controller::Settings { + //fn from(settings: Settings) -> Self { + //if settings.synonyms.flatten().is_some() { + //error!("`synonyms` are not yet implemented and thus will be ignored"); + //} + //Self { + //distinct_attribute: settings.distinct_attribute, + //// we need to convert the old `Vec` into a `BTreeSet` + //displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), + //searchable_attributes: settings.searchable_attributes, + //// we previously had a `Vec` but now we have a `HashMap` + //// representing the name of the faceted field + the type of the field. Since the type + //// was not known in the V1 of the dump we are just going to assume everything is a + //// String + //attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())), + //// we need to convert the old `Vec` into a `BTreeSet` + //ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { + //match criterion.as_str() { + //"words" | "typo" | "proximity" | "attribute" => Some(criterion), + //s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), + //"wordsPosition" => { + //warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); + //Some(String::from("words")) + //} + //"exactness" => { + //error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); + //None + //} + //s => { + //error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); + //None + //} + //} + //}).collect())), + //// we need to convert the old `Vec` into a `BTreeSet` + //stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), + //_kind: PhantomData, + //} + //} +//} + +///// Extract Settings from `settings.json` file present at provided `dir_path` +//fn import_settings(dir_path: &Path) -> anyhow::Result { + //let path = dir_path.join("settings.json"); + //let file = File::open(path)?; + //let reader = std::io::BufReader::new(file); + //let metadata = serde_json::from_reader(reader)?; + + //Ok(metadata) +//} + +//pub fn import_dump( + //size: usize, + //uuid: Uuid, + //dump_path: &Path, + //db_path: &Path, + //primary_key: Option<&str>, +//) -> anyhow::Result<()> { + //let index_path = db_path.join(&format!("indexes/index-{}", uuid)); + //info!("Importing a dump from an old version of meilisearch with dump version 1"); + + //std::fs::create_dir_all(&index_path)?; + //let mut options = EnvOpenOptions::new(); + //options.map_size(size); + //let index = milli::Index::new(options, index_path)?; + //let index = Index(Arc::new(index)); + + //// extract `settings.json` file and import content + //let settings = import_settings(&dump_path)?; + //let settings: index_controller::Settings = settings.into(); + //let update_builder = UpdateBuilder::new(0); + //index.update_settings(&settings.check(), update_builder)?; + + //let update_builder = UpdateBuilder::new(1); + //let file = File::open(&dump_path.join("documents.jsonl"))?; + //let reader = std::io::BufReader::new(file); + + //// TODO: TAMO: waiting for milli. We should use the result + //let _ = index.update_documents( + //UpdateFormat::JsonStream, + //IndexDocumentsMethod::ReplaceDocuments, + //Some(reader), + //update_builder, + //primary_key, + //); + + //// the last step: we extract the original milli::Index and close it + //Arc::try_unwrap(index.0) + //.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + //.unwrap() + //.prepare_for_closing() + //.wait(); + + //// at this point we should handle the import of the updates, but since the update logic is not handled in + //// meilisearch we are just going to ignore this part + + //Ok(()) +//} diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs new file mode 100644 index 000000000..ee7044fd1 --- /dev/null +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -0,0 +1,179 @@ +use std::{fs::File, io::BufReader, marker::PhantomData, path::Path}; + +use anyhow::Context; +use chrono::{DateTime, Utc}; +use log::info; +use serde::{Deserialize, Serialize}; + +use crate::index_controller::uuid_resolver::store::UuidStore; + +#[derive(Serialize, Deserialize, Debug)] +pub struct MetadataV2 { + db_version: String, + index_db_size: usize, + update_db_size: usize, + dump_date: DateTime, + _pth: PhantomData, +} + +impl MetadataV2 +where U: UuidStore, +{ + pub fn load_dump(self, src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { + info!( + "Loading dump from {}, dump database version: {}, dump version: V2", + self.dump_date, self.db_version + ); + // get dir in which to load the db: + let dst_dir = dst + .as_ref() + .parent() + .with_context(|| format!("Invalid db path: {}", dst.as_ref().display()))?; + + let tmp_dst = tempfile::tempdir_in(dst_dir)?; + + self.load_index_resolver(&src, tmp_dst.path())?; + load_updates(&src, tmp_dst.path())?; + load_indexes(&src, tmp_dst.path())?; + Ok(()) + } + + fn load_index_resolver( + &self, + src: impl AsRef, + dst: impl AsRef, + ) -> anyhow::Result<()> { + info!("Loading index database."); + let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); + std::fs::create_dir_all(&uuid_resolver_path)?; + + U::load_dump(src.as_ref(), dst.as_ref())?; + + Ok(()) + } +} + + +fn load_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { + info!("Loading updates."); + todo!() +} + +fn load_indexes(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { + info!("Loading indexes"); + todo!() +} + +// Extract Settings from `settings.json` file present at provided `dir_path` +//fn import_settings(dir_path: &Path) -> anyhow::Result> { +//let path = dir_path.join("settings.json"); +//let file = File::open(path)?; +//let reader = BufReader::new(file); +//let metadata: Settings = serde_json::from_reader(reader)?; + +//Ok(metadata.check()) +//} + +//pub fn import_dump( +//_db_size: usize, +//update_db_size: usize, +//_uuid: Uuid, +//dump_path: impl AsRef, +//db_path: impl AsRef, +//_primary_key: Option<&str>, +//) -> anyhow::Result<()> { +//info!("Dump import started."); +//info!("Importing outstanding updates..."); + +//import_updates(&dump_path, &db_path, update_db_size)?; + +//info!("done importing updates"); + +//Ok(()) +////let index_path = db_path.join(&format!("indexes/index-{}", uuid)); +////std::fs::create_dir_all(&index_path)?; +////let mut options = EnvOpenOptions::new(); +////options.map_size(size); +////let index = milli::Index::new(options, index_path)?; +////let index = Index(Arc::new(index)); + +////let mut txn = index.write_txn()?; + +////info!("importing the settings..."); +////// extract `settings.json` file and import content +////let settings = import_settings(&dump_path)?; +////let update_builder = UpdateBuilder::new(0); +////index.update_settings_txn(&mut txn, &settings, update_builder)?; + +////// import the documents in the index +////let update_builder = UpdateBuilder::new(1); +////let file = File::open(&dump_path.join("documents.jsonl"))?; +////let reader = std::io::BufReader::new(file); + +////info!("importing the documents..."); +////// TODO: TAMO: currently we ignore any error caused by the importation of the documents because +////// if there is no documents nor primary key it'll throw an anyhow error, but we must remove +////// this before the merge on main +////index.update_documents_txn( +////&mut txn, +////UpdateFormat::JsonStream, +////IndexDocumentsMethod::ReplaceDocuments, +////Some(reader), +////update_builder, +////primary_key, +////)?; + +////txn.commit()?; + +////// the last step: we extract the original milli::Index and close it +////Arc::try_unwrap(index.0) +////.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") +////.unwrap() +////.prepare_for_closing() +////.wait(); + +////info!("importing the updates..."); +////import_updates(dump_path, db_path) +//} + +//fn import_updates( +//src_path: impl AsRef, +//dst_path: impl AsRef, +//_update_db_size: usize +//) -> anyhow::Result<()> { +//let dst_update_path = dst_path.as_ref().join("updates"); +//std::fs::create_dir_all(&dst_update_path)?; + +//let dst_update_files_path = dst_update_path.join("update_files"); +//std::fs::create_dir_all(&dst_update_files_path)?; + +//let options = EnvOpenOptions::new(); +//let (update_store, _) = UpdateStore::create(options, &dst_update_path)?; + +//let src_update_path = src_path.as_ref().join("updates"); +//let src_update_files_path = src_update_path.join("update_files"); +//let update_data = File::open(&src_update_path.join("data.jsonl"))?; +//let mut update_data = BufReader::new(update_data); + +//let mut wtxn = update_store.env.write_txn()?; +//let mut line = String::new(); +//loop { +//match update_data.read_line(&mut line) { +//Ok(_) => { +//let UpdateEntry { uuid, mut update } = serde_json::from_str(&line)?; + +//if let Some(path) = update.content_path_mut() { +//let dst_file_path = dst_update_files_path.join(&path); +//let src_file_path = src_update_files_path.join(&path); +//*path = dst_update_files_path.join(&path); +//std::fs::copy(src_file_path, dst_file_path)?; +//} + +//update_store.register_raw_updates(&mut wtxn, update, uuid)?; +//} +//_ => break, +//} +//} +//wtxn.commit()?; +//Ok(()) +//} diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index 1508f8eb7..f0eeb1be3 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,26 +1,18 @@ mod actor; mod handle_impl; mod message; -mod v1; -mod v2; +mod loaders; -use std::{fs::File, path::Path, sync::Arc}; +use std::{fs::File, path::Path}; -use anyhow::bail; -use heed::EnvOpenOptions; -use log::{error, info}; -use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use log::error; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; -use tempfile::TempDir; use thiserror::Error; -use uuid::Uuid; -use super::IndexMetadata; -use crate::helpers::compression; -use crate::index::Index; -use crate::index_controller::uuid_resolver; +use loaders::v1::MetadataV1; +use loaders::v2::MetadataV2; pub use actor::DumpActor; pub use handle_impl::*; @@ -40,31 +32,6 @@ pub enum DumpError { DumpDoesNotExist(String), } -#[derive(Debug, Serialize, Deserialize, Copy, Clone)] -enum DumpVersion { - V1, - V2, -} - -impl DumpVersion { - const CURRENT: Self = Self::V2; - - /// Select the good importation function from the `DumpVersion` of metadata - pub fn import_index( - self, - size: usize, - uuid: Uuid, - dump_path: &Path, - db_path: &Path, - primary_key: Option<&str>, - ) -> anyhow::Result<()> { - match self { - Self::V1 => v1::import_index(size, uuid, dump_path, db_path, primary_key), - Self::V2 => v2::import_index(size, uuid, dump_path, db_path, primary_key), - } - } -} - #[async_trait::async_trait] #[cfg_attr(test, automock)] pub trait DumpActorHandle { @@ -78,23 +45,19 @@ pub trait DumpActorHandle { } #[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - indexes: Vec, - db_version: String, - dump_version: DumpVersion, +#[serde(rename_all = "camelCase", tag = "dump_version")] +pub enum Metadata { + V1 { + #[serde(flatten)] + meta: MetadataV1, + }, + V2 { + #[serde(flatten)] + meta: MetadataV2, + }, } impl Metadata { - /// Create a Metadata with the current dump version of meilisearch. - pub fn new(indexes: Vec, db_version: String) -> Self { - Metadata { - indexes, - db_version, - dump_version: DumpVersion::CURRENT, - } - } - /// Extract Metadata from `metadata.json` file present at provided `dir_path` fn from_path(dir_path: &Path) -> anyhow::Result { let path = dir_path.join("metadata.json"); @@ -155,80 +118,19 @@ impl DumpInfo { } pub fn load_dump( - db_path: impl AsRef, - dump_path: impl AsRef, - size: usize, + dst_path: impl AsRef, + src_path: impl AsRef, + _index_db_size: u64, + _update_db_size: u64, ) -> anyhow::Result<()> { - info!("Importing dump from {}...", dump_path.as_ref().display()); - let db_path = db_path.as_ref(); - let dump_path = dump_path.as_ref(); - let uuid_resolver = uuid_resolver::HeedUuidStore::new(&db_path)?; + let meta_path = src_path.as_ref().join("metadat.json"); + let mut meta_file = File::open(&meta_path)?; + let meta: Metadata = serde_json::from_reader(&mut meta_file)?; - // extract the dump in a temporary directory - let tmp_dir = TempDir::new_in(db_path)?; - let tmp_dir_path = tmp_dir.path(); - compression::from_tar_gz(dump_path, tmp_dir_path)?; - - // read dump metadata - let metadata = Metadata::from_path(&tmp_dir_path)?; - - // remove indexes which have same `uuid` than indexes to import and create empty indexes - let existing_index_uids = uuid_resolver.list()?; - - info!("Deleting indexes already present in the db and provided in the dump..."); - for idx in &metadata.indexes { - if let Some((_, uuid)) = existing_index_uids.iter().find(|(s, _)| s == &idx.uid) { - // if we find the index in the `uuid_resolver` it's supposed to exist on the file system - // and we want to delete it - let path = db_path.join(&format!("indexes/index-{}", uuid)); - info!("Deleting {}", path.display()); - use std::io::ErrorKind::*; - match std::fs::remove_dir_all(path) { - Ok(()) => (), - // if an index was present in the metadata but missing of the fs we can ignore the - // problem because we are going to create it later - Err(e) if e.kind() == NotFound => (), - Err(e) => bail!(e), - } - } else { - // if the index does not exist in the `uuid_resolver` we create it - uuid_resolver.create_uuid(idx.uid.clone(), false)?; - } + match meta { + Metadata::V1 { meta } => meta.load_dump(src_path, dst_path)?, + Metadata::V2 { meta } => meta.load_dump(src_path, dst_path)?, } - // import each indexes content - for idx in metadata.indexes { - let dump_path = tmp_dir_path.join(&idx.uid); - // this cannot fail since we created all the missing uuid in the previous loop - let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap(); - - info!( - "Importing dump from {} into {}...", - dump_path.display(), - db_path.display() - ); - metadata.dump_version.import_index( - size, - uuid, - &dump_path, - &db_path, - idx.meta.primary_key.as_ref().map(|s| s.as_ref()), - )?; - info!("Dump importation from {} succeed", dump_path.display()); - } - - // finally we can move all the unprocessed update file into our new DB - // this directory may not exists - let update_path = tmp_dir_path.join("update_files"); - let db_update_path = db_path.join("updates/update_files"); - if update_path.exists() { - let _ = std::fs::remove_dir_all(db_update_path); - std::fs::rename( - tmp_dir_path.join("update_files"), - db_path.join("updates/update_files"), - )?; - } - - info!("Dump importation from {} succeed", dump_path.display()); Ok(()) } diff --git a/meilisearch-http/src/index_controller/dump_actor/v1.rs b/meilisearch-http/src/index_controller/dump_actor/v1.rs deleted file mode 100644 index 6f199193c..000000000 --- a/meilisearch-http/src/index_controller/dump_actor/v1.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::{collections::{BTreeMap, BTreeSet}, marker::PhantomData}; - -use log::warn; -use serde::{Deserialize, Serialize}; -use crate::{index::Unchecked, index_controller}; -use crate::index::deserialize_some; -use super::*; - -/// This is the settings used in the last version of meilisearch exporting dump in V1 -#[derive(Default, Clone, Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase", deny_unknown_fields)] -struct Settings { - #[serde(default, deserialize_with = "deserialize_some")] - pub ranking_rules: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub distinct_attribute: Option>, - #[serde(default, deserialize_with = "deserialize_some")] - pub searchable_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub displayed_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub stop_words: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub synonyms: Option>>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub attributes_for_faceting: Option>>, -} - -/// we need to **always** be able to convert the old settings to the settings currently being used -impl From for index_controller::Settings { - fn from(settings: Settings) -> Self { - if settings.synonyms.flatten().is_some() { - error!("`synonyms` are not yet implemented and thus will be ignored"); - } - Self { - distinct_attribute: settings.distinct_attribute, - // we need to convert the old `Vec` into a `BTreeSet` - displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), - searchable_attributes: settings.searchable_attributes, - // we previously had a `Vec` but now we have a `HashMap` - // representing the name of the faceted field + the type of the field. Since the type - // was not known in the V1 of the dump we are just going to assume everything is a - // String - attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())), - // we need to convert the old `Vec` into a `BTreeSet` - ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { - match criterion.as_str() { - "words" | "typo" | "proximity" | "attribute" => Some(criterion), - s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), - "wordsPosition" => { - warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); - Some(String::from("words")) - } - "exactness" => { - error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); - None - } - s => { - error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); - None - } - } - }).collect())), - // we need to convert the old `Vec` into a `BTreeSet` - stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), - _kind: PhantomData, - } - } -} - -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn import_settings(dir_path: &Path) -> anyhow::Result { - let path = dir_path.join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) -} - - -pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { - let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - info!("Importing a dump from an old version of meilisearch with dump version 1"); - - std::fs::create_dir_all(&index_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, index_path)?; - let index = Index(Arc::new(index)); - - // extract `settings.json` file and import content - let settings = import_settings(&dump_path)?; - let settings: index_controller::Settings = settings.into(); - let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings.check(), update_builder)?; - - let update_builder = UpdateBuilder::new(1); - let file = File::open(&dump_path.join("documents.jsonl"))?; - let reader = std::io::BufReader::new(file); - - // TODO: TAMO: waiting for milli. We should use the result - let _ = index.update_documents( - UpdateFormat::JsonStream, - IndexDocumentsMethod::ReplaceDocuments, - Some(reader), - update_builder, - primary_key, - ); - - // the last step: we extract the original milli::Index and close it - Arc::try_unwrap(index.0) - .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") - .unwrap() - .prepare_for_closing() - .wait(); - - // at this point we should handle the import of the updates, but since the update logic is not handled in - // meilisearch we are just going to ignore this part - - Ok(()) -} diff --git a/meilisearch-http/src/index_controller/dump_actor/v2.rs b/meilisearch-http/src/index_controller/dump_actor/v2.rs deleted file mode 100644 index eeda78e8a..000000000 --- a/meilisearch-http/src/index_controller/dump_actor/v2.rs +++ /dev/null @@ -1,89 +0,0 @@ -use heed::EnvOpenOptions; -use log::info; -use uuid::Uuid; -use crate::{index::Unchecked, index_controller::{UpdateStatus, update_actor::UpdateStore}}; -use std::io::BufRead; -use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; -use crate::index::{Checked, Index}; -use crate::index_controller::Settings; -use std::{fs::File, path::Path, sync::Arc}; - -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn import_settings(dir_path: &Path) -> anyhow::Result> { - let path = dir_path.join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata: Settings = serde_json::from_reader(reader)?; - - println!("Meta: {:?}", metadata); - - Ok(metadata.check()) -} - -pub fn import_index(size: usize, uuid: Uuid, dump_path: &Path, db_path: &Path, primary_key: Option<&str>) -> anyhow::Result<()> { - let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - std::fs::create_dir_all(&index_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, index_path)?; - let index = Index(Arc::new(index)); - - let mut txn = index.write_txn()?; - - info!("importing the settings..."); - // extract `settings.json` file and import content - let settings = import_settings(&dump_path)?; - let update_builder = UpdateBuilder::new(0); - index.update_settings_txn(&mut txn, &settings, update_builder)?; - - // import the documents in the index - let update_builder = UpdateBuilder::new(1); - let file = File::open(&dump_path.join("documents.jsonl"))?; - let reader = std::io::BufReader::new(file); - - info!("importing the documents..."); - // TODO: TAMO: currently we ignore any error caused by the importation of the documents because - // if there is no documents nor primary key it'll throw an anyhow error, but we must remove - // this before the merge on main - index.update_documents_txn( - &mut txn, - UpdateFormat::JsonStream, - IndexDocumentsMethod::ReplaceDocuments, - Some(reader), - update_builder, - primary_key, - )?; - - txn.commit()?; - - // the last step: we extract the original milli::Index and close it - Arc::try_unwrap(index.0) - .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") - .unwrap() - .prepare_for_closing() - .wait(); - - info!("importing the updates..."); - import_updates(uuid, dump_path, db_path) -} - -fn import_updates(uuid: Uuid, dump_path: &Path, db_path: &Path) -> anyhow::Result<()> { - let update_path = db_path.join("updates"); - let options = EnvOpenOptions::new(); - // create an UpdateStore to import the updates - std::fs::create_dir_all(&update_path)?; - let (update_store, _) = UpdateStore::create(options, &update_path)?; - let file = File::open(&dump_path.join("updates.jsonl"))?; - let reader = std::io::BufReader::new(file); - - let mut wtxn = update_store.env.write_txn()?; - for update in reader.lines() { - let mut update: UpdateStatus = serde_json::from_str(&update?)?; - if let Some(path) = update.content_path_mut() { - *path = update_path.join("update_files").join(&path); - } - update_store.register_raw_updates(&mut wtxn, update, uuid)?; - } - wtxn.commit()?; - Ok(()) -} diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 61bc71114..4e40a9873 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -14,22 +14,20 @@ use tokio::sync::mpsc; use tokio::time::sleep; use uuid::Uuid; -pub use updates::*; -pub use dump_actor::{DumpInfo, DumpStatus}; use dump_actor::DumpActorHandle; +pub use dump_actor::{DumpInfo, DumpStatus}; use index_actor::IndexActorHandle; -use snapshot::{SnapshotService, load_snapshot}; +use snapshot::{load_snapshot, SnapshotService}; use update_actor::UpdateActorHandle; +pub use updates::*; use uuid_resolver::{UuidResolverError, UuidResolverHandle}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; -use dump_actor::load_dump; - +mod dump_actor; mod index_actor; mod snapshot; -mod dump_actor; mod update_actor; mod update_handler; mod updates; @@ -94,13 +92,8 @@ impl IndexController { options.ignore_snapshot_if_db_exists, options.ignore_missing_snapshot, )?; - } else if let Some(ref path) = options.import_dump { - load_dump( - &options.db_path, - path, - index_size, - )?; - + } else if let Some(ref _path) = options.import_dump { + todo!("implement load dump") } std::fs::create_dir_all(&path)?; @@ -112,7 +105,13 @@ impl IndexController { &path, update_store_size, )?; - let dump_handle = dump_actor::DumpActorHandleImpl::new(&options.dumps_dir, uuid_resolver.clone(), index_handle.clone(), update_handle.clone())?; + let dump_handle = dump_actor::DumpActorHandleImpl::new( + &options.dumps_dir, + uuid_resolver.clone(), + update_handle.clone(), + options.max_mdb_size.get_bytes(), + options.max_udb_size.get_bytes(), + )?; if options.schedule_snapshot { let snapshot_service = SnapshotService::new( @@ -158,7 +157,8 @@ impl IndexController { // prevent dead_locking between the update_handle::update that waits for the update to be // registered and the update_actor that waits for the the payload to be sent to it. tokio::task::spawn_local(async move { - payload.for_each(|r| async { + payload + .for_each(|r| async { let _ = sender.send(r).await; }) .await diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index 8cd77e252..ba89eebe3 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -1,7 +1,7 @@ mod actor; mod handle_impl; mod message; -mod store; +pub mod store; use std::{collections::HashSet, path::PathBuf}; diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 8b75f9e5d..82b8d0136 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -15,7 +15,7 @@ use super::UpdateStore; use crate::index_controller::{index_actor::IndexActorHandle, UpdateStatus}; #[derive(Serialize, Deserialize)] -struct UpdateEntry { +pub struct UpdateEntry { uuid: Uuid, update: UpdateStatus, } diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 52bd8d62a..58ac24720 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -1,4 +1,4 @@ -mod dump; +pub mod dump; mod codec; use std::collections::{BTreeMap, HashSet}; @@ -115,7 +115,6 @@ impl UpdateStore { let (notification_sender, notification_receiver) = mpsc::channel(10); // Send a first notification to trigger the process. - let _ = notification_sender.send(()); Ok(( Self { @@ -138,6 +137,9 @@ impl UpdateStore { let (update_store, mut notification_receiver) = Self::create(options, path)?; let update_store = Arc::new(update_store); + // trigger the update loop + let _ = update_store.notification_sender.send(()); + // Init update loop to perform any pending updates at launch. // Since we just launched the update store, and we still own the receiving end of the // channel, this call is guaranteed to succeed. diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index b84025094..5bddadf02 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -1,7 +1,7 @@ mod actor; mod handle_impl; mod message; -mod store; +pub mod store; use std::collections::HashSet; use std::path::PathBuf; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index b497116cb..0c6b66ddf 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, io::Write}; +use std::{collections::HashSet, io::{BufReader, BufRead, Write}}; use std::fs::{create_dir_all, File}; use std::path::{Path, PathBuf}; @@ -7,12 +7,19 @@ use heed::{ CompactionOption, Database, Env, EnvOpenOptions, }; use uuid::Uuid; +use serde::{Serialize, Deserialize}; use super::{Result, UuidResolverError, UUID_STORE_SIZE}; use crate::helpers::EnvSizer; +#[derive(Serialize, Deserialize)] +struct DumpEntry { + uuid: Uuid, + uid: String, +} + #[async_trait::async_trait] -pub trait UuidStore { +pub trait UuidStore: Sized { // Create a new entry for `name`. Return an error if `err` and the entry already exists, return // the uuid otherwise. async fn create_uuid(&self, uid: String, err: bool) -> Result; @@ -23,6 +30,7 @@ pub trait UuidStore { async fn snapshot(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; async fn dump(&self, path: PathBuf) -> Result>; + fn load_dump(src: &Path, dst: &Path) -> Result<()>; } #[derive(Clone)] @@ -62,11 +70,7 @@ impl HeedUuidStore { Ok(uuid) } } - } - - pub fn get_uuid(&self, name: String) -> Result> { - let env = self.env.clone(); - let db = self.db; + } pub fn get_uuid(&self, name: String) -> Result> { let env = self.env.clone(); let db = self.db; let txn = env.read_txn()?; match db.get(&txn, &name)? { Some(uuid) => { @@ -149,11 +153,14 @@ impl HeedUuidStore { let txn = self.env.read_txn()?; for entry in self.db.iter(&txn)? { - let entry = entry?; + let (uid, uuid) = entry?; let uuid = Uuid::from_slice(entry.1)?; uuids.insert(uuid); - serde_json::to_writer(&mut dump_file, &serde_json::json!({ "uid": entry.0, "uuid": uuid - }))?; dump_file.write(b"\n").unwrap(); + let entry = DumpEntry { + uuid, uid + }; + serde_json::to_writer(&mut dump_file, &entry)?; + dump_file.write(b"\n").unwrap(); } Ok(uuids) @@ -200,4 +207,33 @@ impl UuidStore for HeedUuidStore { let this = self.clone(); tokio::task::spawn_blocking(move || this.dump(path)).await? } + + async fn load_dump(src: &Path, dst: &Path) -> Result<()> { + let uuid_resolver_path = dst.join("uuid_resolver/"); + std::fs::create_dir_all(&uuid_resolver_path)?; + + let src_indexes = src.join("index_uuids/data.jsonl"); + let indexes = File::Open(&src_indexes)?; + let mut indexes = BufReader::new(indexes); + let mut line = String::new(); + + let db = Self::new(dst)?; + let mut txn = db.env.write_txn()?; + + loop { + match indexes.read_line(&mut line) { + Ok(0) => break, + Ok(_) => { + let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; + db.db.put(&mut txn, &uid, uuid.as_bytes())?; + } + Err(e) => Err(e)?, + } + + line.clear(); + } + txn.commit()?; + + Ok(()) + } } From b924e897f1096b0ce799c601ecdc5926e7a0424a Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Wed, 26 May 2021 22:52:06 +0200 Subject: [PATCH 41/54] load index dump --- meilisearch-http/src/index/dump.rs | 120 ++++++++++++ meilisearch-http/src/index/mod.rs | 69 ++----- .../update_handler.rs | 2 +- .../index_controller/dump_actor/loaders/v2.rs | 174 +++--------------- .../src/index_controller/dump_actor/mod.rs | 19 +- .../src/index_controller/index_actor/actor.rs | 4 +- .../src/index_controller/index_actor/store.rs | 14 +- meilisearch-http/src/index_controller/mod.rs | 1 - .../update_actor/store/dump.rs | 53 +++++- .../update_actor/store/mod.rs | 7 +- .../index_controller/uuid_resolver/store.rs | 77 ++++---- 11 files changed, 261 insertions(+), 279 deletions(-) create mode 100644 meilisearch-http/src/index/dump.rs rename meilisearch-http/src/{index_controller => index}/update_handler.rs (97%) diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs new file mode 100644 index 000000000..35f5159e5 --- /dev/null +++ b/meilisearch-http/src/index/dump.rs @@ -0,0 +1,120 @@ +use std::{fs::{create_dir_all, File}, path::Path, sync::Arc}; + +use anyhow::Context; +use heed::RoTxn; +use indexmap::IndexMap; +use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; +use serde::{Deserialize, Serialize}; +use anyhow::bail; + +use crate::option::IndexerOpts; + +use super::update_handler::UpdateHandler; +use super::{Checked, Index, Settings}; + +#[derive(Serialize, Deserialize)] +struct DumpMeta { + settings: Settings, + primary_key: Option, +} + +const META_FILE_NAME: &'static str = "meta.json"; +const DATA_FILE_NAME: &'static str = "documents.jsonl"; + +impl Index { + pub fn dump(&self, path: impl AsRef) -> anyhow::Result<()> { + // acquire write txn make sure any ongoing write is finnished before we start. + let txn = self.env.write_txn()?; + + self.dump_documents(&txn, &path)?; + self.dump_meta(&txn, &path)?; + + Ok(()) + } + + fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { + println!("dumping documents"); + let document_file_path = path.as_ref().join(DATA_FILE_NAME); + let mut document_file = File::create(&document_file_path)?; + + let documents = self.all_documents(txn)?; + let fields_ids_map = self.fields_ids_map(txn)?; + + // dump documents + let mut json_map = IndexMap::new(); + for document in documents { + let (_, reader) = document?; + + for (fid, bytes) in reader.iter() { + if let Some(name) = fields_ids_map.name(fid) { + json_map.insert(name, serde_json::from_slice::(bytes)?); + } + } + + serde_json::to_writer(&mut document_file, &json_map)?; + std::io::Write::write(&mut document_file, b"\n")?; + + json_map.clear(); + } + + Ok(()) + } + + fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { + println!("dumping settings"); + let meta_file_path = path.as_ref().join(META_FILE_NAME); + let mut meta_file = File::create(&meta_file_path)?; + + let settings = self.settings_txn(txn)?; + let primary_key = self.primary_key(txn)?.map(String::from); + let meta = DumpMeta { settings, primary_key }; + + serde_json::to_writer(&mut meta_file, &meta)?; + + Ok(()) + } + + pub fn load_dump( + src: impl AsRef, + dst: impl AsRef, + size: u64, + indexing_options: &IndexerOpts, + ) -> anyhow::Result<()> { + let dir_name = src + .as_ref() + .file_name() + .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; + let dst_dir_path = dst.as_ref().join(dir_name); + create_dir_all(&dst_dir_path)?; + + let meta_path = src.as_ref().join(META_FILE_NAME); + let mut meta_file = File::open(meta_path)?; + let DumpMeta { settings, primary_key } = serde_json::from_reader(&mut meta_file)?; + let index = Self::open(&dst_dir_path, size as usize)?; + let mut txn = index.write_txn()?; + + let handler = UpdateHandler::new(&indexing_options)?; + + index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; + + let document_file_path = src.as_ref().join(DATA_FILE_NAME); + let document_file = File::open(&document_file_path)?; + index.update_documents_txn( + &mut txn, + JsonStream, + IndexDocumentsMethod::UpdateDocuments, + Some(document_file), + handler.update_builder(0), + primary_key.as_deref(), + )?; + + txn.commit()?; + + match Arc::try_unwrap(index.0) { + Ok(inner) => inner.prepare_for_closing().wait(), + Err(_) => bail!("Could not close index properly."), + } + + Ok(()) + } +} diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index c4bf19856..331db07c4 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,11 +1,9 @@ -use std::{collections::{BTreeSet, HashSet}, io::Write, marker::PhantomData, path::{Path, PathBuf}}; +use std::{collections::{BTreeSet, HashSet}, marker::PhantomData, path::Path}; use std::ops::Deref; use std::sync::Arc; -use std::fs::File; use anyhow::{bail, Context}; -use heed::RoTxn; -use indexmap::IndexMap; +use heed::{EnvOpenOptions, RoTxn}; use milli::obkv_to_json; use serde_json::{Map, Value}; @@ -16,6 +14,8 @@ use serde::{de::Deserializer, Deserialize}; mod search; mod updates; +mod dump; +pub mod update_handler; pub type Document = Map; @@ -39,6 +39,14 @@ where } impl Index { + pub fn open(path: impl AsRef, size: usize) -> anyhow::Result { + std::fs::create_dir_all(&path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, &path)?; + Ok(Index(Arc::new(index))) + } + pub fn settings(&self) -> anyhow::Result> { let txn = self.read_txn()?; self.settings_txn(&txn) @@ -167,57 +175,4 @@ impl Index { displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); Ok(displayed_fields_ids) } - - pub fn dump(&self, path: PathBuf) -> anyhow::Result<()> { - // acquire write txn make sure any ongoing write is finnished before we start. - let txn = self.env.write_txn()?; - - self.dump_documents(&txn, &path)?; - self.dump_meta(&txn, &path)?; - - Ok(()) - } - - fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { - println!("dumping documents"); - let document_file_path = path.as_ref().join("documents.jsonl"); - let mut document_file = File::create(&document_file_path)?; - - let documents = self.all_documents(txn)?; - let fields_ids_map = self.fields_ids_map(txn)?; - - // dump documents - let mut json_map = IndexMap::new(); - for document in documents { - let (_, reader) = document?; - - for (fid, bytes) in reader.iter() { - if let Some(name) = fields_ids_map.name(fid) { - json_map.insert(name, serde_json::from_slice::(bytes)?); - } - } - - serde_json::to_writer(&mut document_file, &json_map)?; - document_file.write(b"\n")?; - - json_map.clear(); - } - - Ok(()) - } - - fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { - println!("dumping settings"); - let meta_file_path = path.as_ref().join("meta.json"); - let mut meta_file = File::create(&meta_file_path)?; - - let settings = self.settings_txn(txn)?; - let json = serde_json::json!({ - "settings": settings, - }); - - serde_json::to_writer(&mut meta_file, &json)?; - - Ok(()) - } } diff --git a/meilisearch-http/src/index_controller/update_handler.rs b/meilisearch-http/src/index/update_handler.rs similarity index 97% rename from meilisearch-http/src/index_controller/update_handler.rs rename to meilisearch-http/src/index/update_handler.rs index d0086aadd..6a303b4ce 100644 --- a/meilisearch-http/src/index_controller/update_handler.rs +++ b/meilisearch-http/src/index/update_handler.rs @@ -38,7 +38,7 @@ impl UpdateHandler { }) } - fn update_builder(&self, update_id: u64) -> UpdateBuilder { + pub fn update_builder(&self, update_id: u64) -> UpdateBuilder { // We prepare the update by using the update builder. let mut update_builder = UpdateBuilder::new(update_id); if let Some(max_nb_chunks) = self.max_nb_chunks { diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index ee7044fd1..ab4aa8cff 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -1,25 +1,27 @@ -use std::{fs::File, io::BufReader, marker::PhantomData, path::Path}; +use std::path::Path; use anyhow::Context; use chrono::{DateTime, Utc}; use log::info; use serde::{Deserialize, Serialize}; -use crate::index_controller::uuid_resolver::store::UuidStore; +use crate::{index::Index, index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, option::IndexerOpts}; #[derive(Serialize, Deserialize, Debug)] -pub struct MetadataV2 { +pub struct MetadataV2 { db_version: String, - index_db_size: usize, - update_db_size: usize, + index_db_size: u64, + update_db_size: u64, dump_date: DateTime, - _pth: PhantomData, } -impl MetadataV2 -where U: UuidStore, -{ - pub fn load_dump(self, src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { +impl MetadataV2 { + pub fn load_dump( + self, + src: impl AsRef, + dst: impl AsRef, + indexing_options: &IndexerOpts, + ) -> anyhow::Result<()> { info!( "Loading dump from {}, dump database version: {}, dump version: V2", self.dump_date, self.db_version @@ -32,148 +34,26 @@ where U: UuidStore, let tmp_dst = tempfile::tempdir_in(dst_dir)?; - self.load_index_resolver(&src, tmp_dst.path())?; - load_updates(&src, tmp_dst.path())?; - load_indexes(&src, tmp_dst.path())?; - Ok(()) - } - - fn load_index_resolver( - &self, - src: impl AsRef, - dst: impl AsRef, - ) -> anyhow::Result<()> { info!("Loading index database."); let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); std::fs::create_dir_all(&uuid_resolver_path)?; + HeedUuidStore::load_dump(src.as_ref(), tmp_dst.as_ref())?; - U::load_dump(src.as_ref(), dst.as_ref())?; + info!("Loading updates."); + UpdateStore::load_dump(&src, &tmp_dst.as_ref(), self.update_db_size)?; + + info!("Loading indexes"); + let indexes_path = src.as_ref().join("indexes"); + let indexes = indexes_path.read_dir()?; + for index in indexes { + let index = index?; + Index::load_dump(&index.path(), &dst, self.index_db_size, indexing_options)?; + } + + // Persist and atomically rename the db + let persisted_dump = tmp_dst.into_path(); + std::fs::rename(&persisted_dump, &dst)?; Ok(()) } } - - -fn load_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - info!("Loading updates."); - todo!() -} - -fn load_indexes(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - info!("Loading indexes"); - todo!() -} - -// Extract Settings from `settings.json` file present at provided `dir_path` -//fn import_settings(dir_path: &Path) -> anyhow::Result> { -//let path = dir_path.join("settings.json"); -//let file = File::open(path)?; -//let reader = BufReader::new(file); -//let metadata: Settings = serde_json::from_reader(reader)?; - -//Ok(metadata.check()) -//} - -//pub fn import_dump( -//_db_size: usize, -//update_db_size: usize, -//_uuid: Uuid, -//dump_path: impl AsRef, -//db_path: impl AsRef, -//_primary_key: Option<&str>, -//) -> anyhow::Result<()> { -//info!("Dump import started."); -//info!("Importing outstanding updates..."); - -//import_updates(&dump_path, &db_path, update_db_size)?; - -//info!("done importing updates"); - -//Ok(()) -////let index_path = db_path.join(&format!("indexes/index-{}", uuid)); -////std::fs::create_dir_all(&index_path)?; -////let mut options = EnvOpenOptions::new(); -////options.map_size(size); -////let index = milli::Index::new(options, index_path)?; -////let index = Index(Arc::new(index)); - -////let mut txn = index.write_txn()?; - -////info!("importing the settings..."); -////// extract `settings.json` file and import content -////let settings = import_settings(&dump_path)?; -////let update_builder = UpdateBuilder::new(0); -////index.update_settings_txn(&mut txn, &settings, update_builder)?; - -////// import the documents in the index -////let update_builder = UpdateBuilder::new(1); -////let file = File::open(&dump_path.join("documents.jsonl"))?; -////let reader = std::io::BufReader::new(file); - -////info!("importing the documents..."); -////// TODO: TAMO: currently we ignore any error caused by the importation of the documents because -////// if there is no documents nor primary key it'll throw an anyhow error, but we must remove -////// this before the merge on main -////index.update_documents_txn( -////&mut txn, -////UpdateFormat::JsonStream, -////IndexDocumentsMethod::ReplaceDocuments, -////Some(reader), -////update_builder, -////primary_key, -////)?; - -////txn.commit()?; - -////// the last step: we extract the original milli::Index and close it -////Arc::try_unwrap(index.0) -////.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") -////.unwrap() -////.prepare_for_closing() -////.wait(); - -////info!("importing the updates..."); -////import_updates(dump_path, db_path) -//} - -//fn import_updates( -//src_path: impl AsRef, -//dst_path: impl AsRef, -//_update_db_size: usize -//) -> anyhow::Result<()> { -//let dst_update_path = dst_path.as_ref().join("updates"); -//std::fs::create_dir_all(&dst_update_path)?; - -//let dst_update_files_path = dst_update_path.join("update_files"); -//std::fs::create_dir_all(&dst_update_files_path)?; - -//let options = EnvOpenOptions::new(); -//let (update_store, _) = UpdateStore::create(options, &dst_update_path)?; - -//let src_update_path = src_path.as_ref().join("updates"); -//let src_update_files_path = src_update_path.join("update_files"); -//let update_data = File::open(&src_update_path.join("data.jsonl"))?; -//let mut update_data = BufReader::new(update_data); - -//let mut wtxn = update_store.env.write_txn()?; -//let mut line = String::new(); -//loop { -//match update_data.read_line(&mut line) { -//Ok(_) => { -//let UpdateEntry { uuid, mut update } = serde_json::from_str(&line)?; - -//if let Some(path) = update.content_path_mut() { -//let dst_file_path = dst_update_files_path.join(&path); -//let src_file_path = src_update_files_path.join(&path); -//*path = dst_update_files_path.join(&path); -//std::fs::copy(src_file_path, dst_file_path)?; -//} - -//update_store.register_raw_updates(&mut wtxn, update, uuid)?; -//} -//_ => break, -//} -//} -//wtxn.commit()?; -//Ok(()) -//} diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index f0eeb1be3..6d661d75c 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,8 +1,3 @@ -mod actor; -mod handle_impl; -mod message; -mod loaders; - use std::{fs::File, path::Path}; use log::error; @@ -18,6 +13,15 @@ pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; +use crate::option::IndexerOpts; + +use super::uuid_resolver::store::UuidStore; + +mod actor; +mod handle_impl; +mod loaders; +mod message; + pub type DumpResult = std::result::Result; #[derive(Error, Debug)] @@ -117,11 +121,12 @@ impl DumpInfo { } } -pub fn load_dump( +pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, _index_db_size: u64, _update_db_size: u64, + indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { let meta_path = src_path.as_ref().join("metadat.json"); let mut meta_file = File::open(&meta_path)?; @@ -129,7 +134,7 @@ pub fn load_dump( match meta { Metadata::V1 { meta } => meta.load_dump(src_path, dst_path)?, - Metadata::V2 { meta } => meta.load_dump(src_path, dst_path)?, + Metadata::V2 { meta } => meta.load_dump(src_path.as_ref(), dst_path.as_ref(), indexer_opts)?, } Ok(()) diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index f6f7cdc28..2f136c011 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -10,9 +10,9 @@ use tokio::{fs, sync::mpsc}; use tokio::task::spawn_blocking; use uuid::Uuid; -use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; +use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings, update_handler::UpdateHandler}; use crate::index_controller::{ - get_arc_ownership_blocking, update_handler::UpdateHandler, Failed, IndexStats, Processed, + get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, }; use crate::option::IndexerOpts; diff --git a/meilisearch-http/src/index_controller/index_actor/store.rs b/meilisearch-http/src/index_controller/index_actor/store.rs index 3dee166a9..11791be48 100644 --- a/meilisearch-http/src/index_controller/index_actor/store.rs +++ b/meilisearch-http/src/index_controller/index_actor/store.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; -use heed::EnvOpenOptions; use tokio::fs; use tokio::sync::RwLock; use tokio::task::spawn_blocking; @@ -48,7 +47,7 @@ impl IndexStore for MapIndexStore { let index_size = self.index_size; let index = spawn_blocking(move || -> IndexResult { - let index = open_index(&path, index_size)?; + let index = Index::open(path, index_size)?; if let Some(primary_key) = primary_key { let mut txn = index.write_txn()?; index.put_primary_key(&mut txn, &primary_key)?; @@ -76,8 +75,7 @@ impl IndexStore for MapIndexStore { } let index_size = self.index_size; - let index = spawn_blocking(move || open_index(path, index_size)) - .await??; + let index = spawn_blocking(move || Index::open(path, index_size)).await??; self.index_store.write().await.insert(uuid, index.clone()); Ok(Some(index)) } @@ -91,11 +89,3 @@ impl IndexStore for MapIndexStore { Ok(index) } } - -fn open_index(path: impl AsRef, size: usize) -> IndexResult { - std::fs::create_dir_all(&path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, &path)?; - Ok(Index(Arc::new(index))) -} diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 4e40a9873..69415a1cd 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -29,7 +29,6 @@ mod dump_actor; mod index_actor; mod snapshot; mod update_actor; -mod update_handler; mod updates; mod uuid_resolver; diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 82b8d0136..1f36931d1 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -1,12 +1,7 @@ -use std::{ - collections::HashSet, - fs::{copy, create_dir_all, File}, - io::Write, - path::{Path, PathBuf}, -}; +use std::{collections::HashSet, fs::{copy, create_dir_all, File}, io::{BufRead, BufReader, Write}, path::{Path, PathBuf}}; use anyhow::Context; -use heed::RoTxn; +use heed::{EnvOpenOptions, RoTxn}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -15,7 +10,7 @@ use super::UpdateStore; use crate::index_controller::{index_actor::IndexActorHandle, UpdateStatus}; #[derive(Serialize, Deserialize)] -pub struct UpdateEntry { +struct UpdateEntry { uuid: Uuid, update: UpdateStatus, } @@ -121,6 +116,48 @@ impl UpdateStore { Ok(()) } + + pub fn load_dump(src: impl AsRef, dst: impl AsRef, db_size: u64) -> anyhow::Result<()> { + let dst_updates_path = dst.as_ref().join("updates/"); + create_dir_all(&dst_updates_path)?; + let dst_update_files_path = dst_updates_path.join("update_files/"); + create_dir_all(&dst_update_files_path)?; + + let mut options = EnvOpenOptions::new(); + options.map_size(db_size as usize); + let (store, _) = UpdateStore::new(options, &dst_updates_path)?; + + let src_update_path = src.as_ref().join("updates"); + let src_update_files_path = src_update_path.join("update_files"); + let update_data = File::open(&src_update_path.join("data.jsonl"))?; + let mut update_data = BufReader::new(update_data); + + let mut wtxn = store.env.write_txn()?; + let mut line = String::new(); + loop { + match update_data.read_line(&mut line) { + Ok(0) => break, + Ok(_) => { + let UpdateEntry { uuid, mut update } = serde_json::from_str(&line)?; + + if let Some(path) = update.content_path_mut() { + let dst_file_path = dst_update_files_path.join(&path); + let src_file_path = src_update_files_path.join(&path); + *path = dst_update_files_path.join(&path); + std::fs::copy(src_file_path, dst_file_path)?; + } + + store.register_raw_updates(&mut wtxn, update, uuid)?; + } + _ => break, + } + + line.clear(); + } + wtxn.commit()?; + + Ok(()) + } } async fn dump_indexes(uuids: &HashSet, handle: impl IndexActorHandle, path: impl AsRef)-> anyhow::Result<()> { diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 58ac24720..661b712ac 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -100,7 +100,7 @@ pub struct UpdateStore { } impl UpdateStore { - pub fn create( + fn new( mut options: EnvOpenOptions, path: impl AsRef, ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { @@ -114,7 +114,6 @@ impl UpdateStore { let state = Arc::new(StateLock::from_state(State::Idle)); let (notification_sender, notification_receiver) = mpsc::channel(10); - // Send a first notification to trigger the process. Ok(( Self { @@ -134,10 +133,10 @@ impl UpdateStore { path: impl AsRef, index_handle: impl IndexActorHandle + Clone + Sync + Send + 'static, ) -> anyhow::Result> { - let (update_store, mut notification_receiver) = Self::create(options, path)?; + let (update_store, mut notification_receiver) = Self::new(options, path)?; let update_store = Arc::new(update_store); - // trigger the update loop + // Send a first notification to trigger the process. let _ = update_store.notification_sender.send(()); // Init update loop to perform any pending updates at launch. diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 0c6b66ddf..876c2454c 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -30,7 +30,6 @@ pub trait UuidStore: Sized { async fn snapshot(&self, path: PathBuf) -> Result>; async fn get_size(&self) -> Result; async fn dump(&self, path: PathBuf) -> Result>; - fn load_dump(src: &Path, dst: &Path) -> Result<()>; } #[derive(Clone)] @@ -46,14 +45,7 @@ impl HeedUuidStore { let mut options = EnvOpenOptions::new(); options.map_size(UUID_STORE_SIZE); // 1GB let env = options.open(path)?; - let db = env.create_database(None)?; - Ok(Self { env, db }) - } - - pub fn create_uuid(&self, name: String, err: bool) -> Result { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; + let db = env.create_database(None)?; Ok(Self { env, db }) } pub fn create_uuid(&self, name: String, err: bool) -> Result { let env = self.env.clone(); let db = self.db; let mut txn = env.write_txn()?; match db.get(&txn, &name)? { Some(uuid) => { if err { @@ -154,17 +146,51 @@ impl HeedUuidStore { let txn = self.env.read_txn()?; for entry in self.db.iter(&txn)? { let (uid, uuid) = entry?; - let uuid = Uuid::from_slice(entry.1)?; - uuids.insert(uuid); + let uid = uid.to_string(); + let uuid = Uuid::from_slice(uuid)?; + let entry = DumpEntry { uuid, uid }; serde_json::to_writer(&mut dump_file, &entry)?; dump_file.write(b"\n").unwrap(); + + uuids.insert(uuid); } Ok(uuids) } + + pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { + let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); + std::fs::create_dir_all(&uuid_resolver_path)?; + + let src_indexes = src.as_ref().join("index_uuids/data.jsonl"); + let indexes = File::open(&src_indexes)?; + let mut indexes = BufReader::new(indexes); + let mut line = String::new(); + + let db = Self::new(dst)?; + let mut txn = db.env.write_txn()?; + + loop { + match indexes.read_line(&mut line) { + Ok(0) => break, + Ok(_) => { + let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; + db.db.put(&mut txn, &uid, uuid.as_bytes())?; + } + Err(e) => Err(e)?, + } + + line.clear(); + } + txn.commit()?; + + db.env.prepare_for_closing().wait(); + + Ok(()) + } } #[async_trait::async_trait] @@ -207,33 +233,4 @@ impl UuidStore for HeedUuidStore { let this = self.clone(); tokio::task::spawn_blocking(move || this.dump(path)).await? } - - async fn load_dump(src: &Path, dst: &Path) -> Result<()> { - let uuid_resolver_path = dst.join("uuid_resolver/"); - std::fs::create_dir_all(&uuid_resolver_path)?; - - let src_indexes = src.join("index_uuids/data.jsonl"); - let indexes = File::Open(&src_indexes)?; - let mut indexes = BufReader::new(indexes); - let mut line = String::new(); - - let db = Self::new(dst)?; - let mut txn = db.env.write_txn()?; - - loop { - match indexes.read_line(&mut line) { - Ok(0) => break, - Ok(_) => { - let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; - db.db.put(&mut txn, &uid, uuid.as_bytes())?; - } - Err(e) => Err(e)?, - } - - line.clear(); - } - txn.commit()?; - - Ok(()) - } } From c47369839bc73b4262b1aa79fb430cc462b65812 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Thu, 27 May 2021 10:51:19 +0200 Subject: [PATCH 42/54] dump meta --- .../src/index_controller/dump_actor/actor.rs | 123 ++++++++++-------- .../index_controller/dump_actor/loaders/v2.rs | 9 ++ .../src/index_controller/dump_actor/mod.rs | 4 + 3 files changed, 82 insertions(+), 54 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 31378f89c..1abceef47 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,17 +1,17 @@ use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus}; -use crate::helpers::compression; +use crate::{helpers::compression, index_controller::dump_actor::Metadata}; use crate::index_controller::{update_actor, uuid_resolver}; use async_stream::stream; use chrono::Utc; use futures::stream::StreamExt; use log::{error, info}; -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use update_actor::UpdateActorHandle; +use uuid_resolver::UuidResolverHandle; +use std::{fs::File, path::{Path, PathBuf}, sync::Arc}; use tokio::{fs::create_dir_all, sync::{mpsc, oneshot, RwLock}}; pub const CONCURRENT_DUMP_MSG: usize = 10; +const META_FILE_NAME: &'static str = "metadata.json"; pub struct DumpActor { inbox: Option>, @@ -19,8 +19,8 @@ pub struct DumpActor { update: Update, dump_path: PathBuf, dump_info: Arc>>, - _update_db_size: u64, - _index_db_size: u64, + update_db_size: u64, + index_db_size: u64, } /// Generate uid from creation date @@ -30,16 +30,16 @@ fn generate_uid() -> String { impl DumpActor where - UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, - Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, + UuidResolver: UuidResolverHandle + Send + Sync + Clone + 'static, + Update: UpdateActorHandle + Send + Sync + Clone + 'static, { pub fn new( inbox: mpsc::Receiver, uuid_resolver: UuidResolver, update: Update, dump_path: impl AsRef, - _index_db_size: u64, - _update_db_size: u64, + index_db_size: u64, + update_db_size: u64, ) -> Self { Self { inbox: Some(inbox), @@ -47,8 +47,8 @@ where update, dump_path: dump_path.as_ref().into(), dump_info: Arc::new(RwLock::new(None)), - _index_db_size, - _update_db_size, + index_db_size, + update_db_size, } } @@ -103,13 +103,16 @@ where let dump_info = self.dump_info.clone(); - let task_result = tokio::task::spawn(perform_dump( - self.dump_path.clone(), - self.uuid_resolver.clone(), - self.update.clone(), - uid.clone(), - )) - .await; + let task = DumpTask { + path: self.dump_path.clone(), + uuid_resolver: self.uuid_resolver.clone(), + update_handle: self.update.clone(), + uid: uid.clone(), + update_db_size: self.update_db_size, + index_db_size: self.index_db_size, + }; + + let task_result = tokio::task::spawn(task.run()).await; match task_result { Ok(Ok(())) => { @@ -152,42 +155,54 @@ where }) ) } + } -async fn perform_dump( +struct DumpTask { path: PathBuf, - uuid_resolver: UuidResolver, - update_handle: Update, + uuid_resolver: U, + update_handle: P, uid: String, -) -> anyhow::Result<()> -where - UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static, - Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static, -{ - info!("Performing dump."); - - create_dir_all(&path).await?; - - let path_clone = path.clone(); - let temp_dump_dir = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let uuids = uuid_resolver.dump(temp_dump_path.clone()).await?; - - update_handle.dump(uuids, temp_dump_path.clone()).await?; - - let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(&path)?; - compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; - - let dump_path = path.join(format!("{}.dump", uid)); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) + update_db_size: u64, + index_db_size: u64, +} + +impl DumpTask +where + U: UuidResolverHandle + Send + Sync + Clone + 'static, + P: UpdateActorHandle + Send + Sync + Clone + 'static, +{ + async fn run(self) -> anyhow::Result<()> { + info!("Performing dump."); + + create_dir_all(&self.path).await?; + + let path_clone = self.path.clone(); + let temp_dump_dir = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); + let meta_path = temp_dump_path.join(META_FILE_NAME); + let mut meta_file = File::create(&meta_path)?; + serde_json::to_writer(&mut meta_file, &meta)?; + + let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?; + + self.update_handle.dump(uuids, temp_dump_path.clone()).await?; + + let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; + compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; + + let dump_path = self.path.join(format!("{}.dump", self.uid)); + temp_dump_file.persist(&dump_path)?; + + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } } diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index ab4aa8cff..b9f89ebbf 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -16,6 +16,15 @@ pub struct MetadataV2 { } impl MetadataV2 { + pub fn new(index_db_size: u64, update_db_size: u64) -> Self { + Self { + db_version: env!("CARGO_PKG_VERSION").to_string(), + index_db_size, + update_db_size, + dump_date: Utc::now(), + } + } + pub fn load_dump( self, src: impl AsRef, diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index 6d661d75c..b54783f75 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -62,6 +62,10 @@ pub enum Metadata { } impl Metadata { + pub fn new_v2(index_db_size: u64, update_db_size: u64) -> Self { + let meta = MetadataV2::new(index_db_size, update_db_size); + Self::V2 { meta } + } /// Extract Metadata from `metadata.json` file present at provided `dir_path` fn from_path(dir_path: &Path) -> anyhow::Result { let path = dir_path.join("metadata.json"); From b258f4f394270ba3bc998a7f13d42312cae4675b Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Thu, 27 May 2021 14:30:20 +0200 Subject: [PATCH 43/54] fix dump import --- meilisearch-http/src/index/dump.rs | 12 +- meilisearch-http/src/index/updates.rs | 22 ++++ .../src/index_controller/dump_actor/actor.rs | 63 ++--------- .../index_controller/dump_actor/loaders/v2.rs | 17 ++- .../src/index_controller/dump_actor/mod.rs | 105 +++++++++++++----- meilisearch-http/src/index_controller/mod.rs | 12 +- .../index_controller/uuid_resolver/store.rs | 1 + 7 files changed, 133 insertions(+), 99 deletions(-) diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index 35f5159e5..9dbb14fbd 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -9,12 +9,11 @@ use anyhow::bail; use crate::option::IndexerOpts; -use super::update_handler::UpdateHandler; -use super::{Checked, Index, Settings}; +use super::{Unchecked, Index, Settings, update_handler::UpdateHandler}; #[derive(Serialize, Deserialize)] struct DumpMeta { - settings: Settings, + settings: Settings, primary_key: Option, } @@ -33,7 +32,6 @@ impl Index { } fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { - println!("dumping documents"); let document_file_path = path.as_ref().join(DATA_FILE_NAME); let mut document_file = File::create(&document_file_path)?; @@ -61,11 +59,10 @@ impl Index { } fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> anyhow::Result<()> { - println!("dumping settings"); let meta_file_path = path.as_ref().join(META_FILE_NAME); let mut meta_file = File::create(&meta_file_path)?; - let settings = self.settings_txn(txn)?; + let settings = self.settings_txn(txn)?.into_unchecked(); let primary_key = self.primary_key(txn)?.map(String::from); let meta = DumpMeta { settings, primary_key }; @@ -84,12 +81,13 @@ impl Index { .as_ref() .file_name() .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; - let dst_dir_path = dst.as_ref().join(dir_name); + let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); create_dir_all(&dst_dir_path)?; let meta_path = src.as_ref().join(META_FILE_NAME); let mut meta_file = File::open(meta_path)?; let DumpMeta { settings, primary_key } = serde_json::from_reader(&mut meta_file)?; + let settings = settings.check(); let index = Self::open(&dst_dir_path, size as usize)?; let mut txn = index.write_txn()?; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 2b489451b..053ca6a60 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -87,6 +87,28 @@ impl Settings { _kind: PhantomData, } } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + attributes_for_faceting, + ranking_rules, + stop_words, + distinct_attribute, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + attributes_for_faceting, + ranking_rules, + stop_words, + distinct_attribute, + _kind: PhantomData, + } + } } impl Settings { diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 1abceef47..b93d6f42d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,17 +1,18 @@ -use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus}; -use crate::{helpers::compression, index_controller::dump_actor::Metadata}; -use crate::index_controller::{update_actor, uuid_resolver}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + use async_stream::stream; use chrono::Utc; use futures::stream::StreamExt; use log::{error, info}; use update_actor::UpdateActorHandle; use uuid_resolver::UuidResolverHandle; -use std::{fs::File, path::{Path, PathBuf}, sync::Arc}; -use tokio::{fs::create_dir_all, sync::{mpsc, oneshot, RwLock}}; +use tokio::sync::{mpsc, oneshot, RwLock}; + +use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus, DumpTask}; +use crate::index_controller::{update_actor, uuid_resolver}; pub const CONCURRENT_DUMP_MSG: usize = 10; -const META_FILE_NAME: &'static str = "metadata.json"; pub struct DumpActor { inbox: Option>, @@ -155,54 +156,4 @@ where }) ) } - -} - -struct DumpTask { - path: PathBuf, - uuid_resolver: U, - update_handle: P, - uid: String, - update_db_size: u64, - index_db_size: u64, -} - -impl DumpTask -where - U: UuidResolverHandle + Send + Sync + Clone + 'static, - P: UpdateActorHandle + Send + Sync + Clone + 'static, -{ - async fn run(self) -> anyhow::Result<()> { - info!("Performing dump."); - - create_dir_all(&self.path).await?; - - let path_clone = self.path.clone(); - let temp_dump_dir = tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - let mut meta_file = File::create(&meta_path)?; - serde_json::to_writer(&mut meta_file, &meta)?; - - let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?; - - self.update_handle.dump(uuids, temp_dump_path.clone()).await?; - - let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; - compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; - - let dump_path = self.path.join(format!("{}.dump", self.uid)); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) - } } diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index b9f89ebbf..def47fecb 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -2,7 +2,7 @@ use std::path::Path; use anyhow::Context; use chrono::{DateTime, Utc}; -use log::info; +use log::{info, warn}; use serde::{Deserialize, Serialize}; use crate::{index::Index, index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, option::IndexerOpts}; @@ -29,6 +29,8 @@ impl MetadataV2 { self, src: impl AsRef, dst: impl AsRef, + _index_db_size: u64, + _update_db_size: u64, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( @@ -44,23 +46,26 @@ impl MetadataV2 { let tmp_dst = tempfile::tempdir_in(dst_dir)?; info!("Loading index database."); - let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); - std::fs::create_dir_all(&uuid_resolver_path)?; - HeedUuidStore::load_dump(src.as_ref(), tmp_dst.as_ref())?; + HeedUuidStore::load_dump(src.as_ref(), &tmp_dst)?; info!("Loading updates."); - UpdateStore::load_dump(&src, &tmp_dst.as_ref(), self.update_db_size)?; + UpdateStore::load_dump(&src, &tmp_dst, self.update_db_size)?; info!("Loading indexes"); let indexes_path = src.as_ref().join("indexes"); let indexes = indexes_path.read_dir()?; for index in indexes { let index = index?; - Index::load_dump(&index.path(), &dst, self.index_db_size, indexing_options)?; + Index::load_dump(&index.path(), &tmp_dst, self.index_db_size, indexing_options)?; } // Persist and atomically rename the db let persisted_dump = tmp_dst.into_path(); + if dst.as_ref().exists() { + warn!("Overwriting database at {}", dst.as_ref().display()); + std::fs::remove_dir_all(&dst)?; + } + std::fs::rename(&persisted_dump, &dst)?; Ok(()) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index b54783f75..2b7d8a3e0 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,6 +1,7 @@ -use std::{fs::File, path::Path}; +use std::fs::File; +use std::path::{Path, PathBuf}; -use log::error; +use log::{error, info}; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; @@ -12,16 +13,18 @@ use loaders::v2::MetadataV2; pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; +use tokio::fs::create_dir_all; -use crate::option::IndexerOpts; - -use super::uuid_resolver::store::UuidStore; +use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; +use crate::{helpers::compression, option::IndexerOpts}; mod actor; mod handle_impl; mod loaders; mod message; +const META_FILE_NAME: &'static str = "metadata.json"; + pub type DumpResult = std::result::Result; #[derive(Error, Debug)] @@ -66,23 +69,6 @@ impl Metadata { let meta = MetadataV2::new(index_db_size, update_db_size); Self::V2 { meta } } - /// Extract Metadata from `metadata.json` file present at provided `dir_path` - fn from_path(dir_path: &Path) -> anyhow::Result { - let path = dir_path.join("metadata.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) - } - - /// Write Metadata in `metadata.json` file at provided `dir_path` - pub async fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> { - let path = dir_path.join("metadata.json"); - tokio::fs::write(path, serde_json::to_string(self)?).await?; - - Ok(()) - } } #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] @@ -125,21 +111,84 @@ impl DumpInfo { } } -pub fn load_dump( +pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, - _index_db_size: u64, - _update_db_size: u64, + index_db_size: u64, + update_db_size: u64, indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { - let meta_path = src_path.as_ref().join("metadat.json"); + let tmp_src = tempfile::tempdir_in(".")?; + let tmp_src_path = tmp_src.path(); + + compression::from_tar_gz(&src_path, tmp_src_path)?; + + let meta_path = tmp_src_path.join(META_FILE_NAME); let mut meta_file = File::open(&meta_path)?; let meta: Metadata = serde_json::from_reader(&mut meta_file)?; match meta { - Metadata::V1 { meta } => meta.load_dump(src_path, dst_path)?, - Metadata::V2 { meta } => meta.load_dump(src_path.as_ref(), dst_path.as_ref(), indexer_opts)?, + Metadata::V1 { meta } => meta.load_dump(&tmp_src_path, dst_path)?, + Metadata::V2 { meta } => meta.load_dump( + &tmp_src_path, + dst_path.as_ref(), + index_db_size, + update_db_size, + indexer_opts, + )?, } Ok(()) } + +struct DumpTask { + path: PathBuf, + uuid_resolver: U, + update_handle: P, + uid: String, + update_db_size: u64, + index_db_size: u64, +} + +impl DumpTask +where + U: UuidResolverHandle + Send + Sync + Clone + 'static, + P: UpdateActorHandle + Send + Sync + Clone + 'static, +{ + async fn run(self) -> anyhow::Result<()> { + info!("Performing dump."); + + create_dir_all(&self.path).await?; + + let path_clone = self.path.clone(); + let temp_dump_dir = + tokio::task::spawn_blocking(|| tempfile::TempDir::new_in(path_clone)).await??; + let temp_dump_path = temp_dump_dir.path().to_owned(); + + let meta = Metadata::new_v2(self.index_db_size, self.update_db_size); + let meta_path = temp_dump_path.join(META_FILE_NAME); + let mut meta_file = File::create(&meta_path)?; + serde_json::to_writer(&mut meta_file, &meta)?; + + let uuids = self.uuid_resolver.dump(temp_dump_path.clone()).await?; + + self.update_handle + .dump(uuids, temp_dump_path.clone()) + .await?; + + let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result { + let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; + compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; + + let dump_path = self.path.join(format!("{}.dump", self.uid)); + temp_dump_file.persist(&dump_path)?; + + Ok(dump_path) + }) + .await??; + + info!("Created dump in {:?}.", dump_path); + + Ok(()) + } +} diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 69415a1cd..18ba6dee3 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -25,6 +25,8 @@ use uuid_resolver::{UuidResolverError, UuidResolverHandle}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::option::Opt; +use self::dump_actor::load_dump; + mod dump_actor; mod index_actor; mod snapshot; @@ -91,8 +93,14 @@ impl IndexController { options.ignore_snapshot_if_db_exists, options.ignore_missing_snapshot, )?; - } else if let Some(ref _path) = options.import_dump { - todo!("implement load dump") + } else if let Some(ref src_path) = options.import_dump { + load_dump( + &options.db_path, + src_path, + options.max_mdb_size.get_bytes(), + options.max_udb_size.get_bytes(), + &options.indexer_options, + )?; } std::fs::create_dir_all(&path)?; diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 876c2454c..2fd9ff301 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -178,6 +178,7 @@ impl HeedUuidStore { Ok(0) => break, Ok(_) => { let DumpEntry { uuid, uid } = serde_json::from_str(&line)?; + println!("importing {} {}", uid, uuid); db.db.put(&mut txn, &uid, uuid.as_bytes())?; } Err(e) => Err(e)?, From 1cb64caae46b4b5988f9f5c3fd8dec5d32dba068 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Sat, 29 May 2021 00:08:17 +0200 Subject: [PATCH 44/54] dump content is now only uuid --- .../index_controller/update_actor/actor.rs | 8 +- .../update_actor/store/dump.rs | 77 +++++++++---------- .../update_actor/store/mod.rs | 50 +++++++----- .../src/index_controller/updates.rs | 67 +--------------- 4 files changed, 75 insertions(+), 127 deletions(-) diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 4097f31aa..40bba4e2b 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -117,7 +117,7 @@ where if file_len != 0 { file.flush().await?; let file = file.into_std().await; - Some((file, path)) + Some((file, update_file_id)) } else { // empty update, delete the empty file. fs::remove_file(&path).await?; @@ -133,7 +133,7 @@ where use std::io::{copy, sink, BufReader, Seek}; // If the payload is empty, ignore the check. - let path = if let Some((mut file, path)) = file_path { + let update_uuid = if let Some((mut file, uuid)) = file_path { // set the file back to the beginning file.seek(SeekFrom::Start(0))?; // Check that the json payload is valid: @@ -145,14 +145,14 @@ where file.seek(SeekFrom::Start(0))?; let _: serde_json::Value = serde_json::from_reader(file)?; } - Some(path) + Some(uuid) } else { None }; // The payload is valid, we can register it to the update store. let status = update_store - .register_update(meta, path, uuid) + .register_update(meta, update_uuid, uuid) .map(UpdateStatus::Enqueued)?; Ok(status) }) diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 1f36931d1..ec7aeea87 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -1,12 +1,17 @@ -use std::{collections::HashSet, fs::{copy, create_dir_all, File}, io::{BufRead, BufReader, Write}, path::{Path, PathBuf}}; +use std::{ + collections::HashSet, + fs::{create_dir_all, File}, + io::{BufRead, BufReader, Write}, + path::{Path, PathBuf}, +}; use anyhow::Context; use heed::{EnvOpenOptions, RoTxn}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::{State, codec::UpdateKeyCodec}; use super::UpdateStore; +use super::{codec::UpdateKeyCodec, State}; use crate::index_controller::{index_actor::IndexActorHandle, UpdateStatus}; #[derive(Serialize, Deserialize)] @@ -50,10 +55,10 @@ impl UpdateStore { let dump_data_path = path.as_ref().join("data.jsonl"); let mut dump_data_file = File::create(dump_data_path)?; - let update_files_path = path.as_ref().join("update_files"); + let update_files_path = path.as_ref().join(super::UPDATE_DIR); create_dir_all(&update_files_path)?; - self.dump_pending(&txn, uuids, &mut dump_data_file, &update_files_path)?; + self.dump_pending(&txn, uuids, &mut dump_data_file, &path)?; self.dump_completed(&txn, uuids, &mut dump_data_file)?; Ok(()) @@ -64,19 +69,24 @@ impl UpdateStore { txn: &RoTxn, uuids: &HashSet, mut file: &mut File, - update_files_path: impl AsRef, + dst_update_files: impl AsRef, ) -> anyhow::Result<()> { let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); for pending in pendings { let ((_, uuid, _), data) = pending?; if uuids.contains(&uuid) { - let mut update = data.decode()?; + let update = data.decode()?; - if let Some(content) = update.content.take() { - update.content = Some(dump_update_file(content, &update_files_path)?); + if let Some(ref update_uuid) = update.content { + let src = dbg!(super::update_uuid_to_file_path(&self.path, *update_uuid)); + let dst = dbg!(super::update_uuid_to_file_path(&dst_update_files, *update_uuid)); + assert!(src.exists()); + dbg!(std::fs::copy(src, dst))?; } + println!("copied files"); + let update_json = UpdateEntry { uuid, update: update.into(), @@ -117,18 +127,20 @@ impl UpdateStore { Ok(()) } - pub fn load_dump(src: impl AsRef, dst: impl AsRef, db_size: u64) -> anyhow::Result<()> { - let dst_updates_path = dst.as_ref().join("updates/"); - create_dir_all(&dst_updates_path)?; - let dst_update_files_path = dst_updates_path.join("update_files/"); - create_dir_all(&dst_update_files_path)?; + pub fn load_dump( + src: impl AsRef, + dst: impl AsRef, + db_size: u64, + ) -> anyhow::Result<()> { + let dst_update_path = dst.as_ref().join("updates/"); + create_dir_all(&dst_update_path)?; + let mut options = EnvOpenOptions::new(); options.map_size(db_size as usize); - let (store, _) = UpdateStore::new(options, &dst_updates_path)?; + let (store, _) = UpdateStore::new(options, &dst_update_path)?; let src_update_path = src.as_ref().join("updates"); - let src_update_files_path = src_update_path.join("update_files"); let update_data = File::open(&src_update_path.join("data.jsonl"))?; let mut update_data = BufReader::new(update_data); @@ -138,15 +150,7 @@ impl UpdateStore { match update_data.read_line(&mut line) { Ok(0) => break, Ok(_) => { - let UpdateEntry { uuid, mut update } = serde_json::from_str(&line)?; - - if let Some(path) = update.content_path_mut() { - let dst_file_path = dst_update_files_path.join(&path); - let src_file_path = src_update_files_path.join(&path); - *path = dst_update_files_path.join(&path); - std::fs::copy(src_file_path, dst_file_path)?; - } - + let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; store.register_raw_updates(&mut wtxn, update, uuid)?; } _ => break, @@ -154,30 +158,25 @@ impl UpdateStore { line.clear(); } + + let dst_update_files_path = dst_update_path.join("update_files/"); + let src_update_files_path = src_update_path.join("update_files/"); + std::fs::copy(src_update_files_path, dst_update_files_path)?; + wtxn.commit()?; Ok(()) } } -async fn dump_indexes(uuids: &HashSet, handle: impl IndexActorHandle, path: impl AsRef)-> anyhow::Result<()> { +async fn dump_indexes( + uuids: &HashSet, + handle: impl IndexActorHandle, + path: impl AsRef, +) -> anyhow::Result<()> { for uuid in uuids { handle.dump(*uuid, path.as_ref().to_owned()).await?; } Ok(()) } - -fn dump_update_file( - file_path: impl AsRef, - dump_path: impl AsRef, -) -> anyhow::Result { - let filename: PathBuf = file_path - .as_ref() - .file_name() - .context("invalid update file name")? - .into(); - let dump_file_path = dump_path.as_ref().join(&filename); - copy(file_path, dump_file_path)?; - Ok(filename) -} diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 661b712ac..6910d5144 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -1,12 +1,11 @@ pub mod dump; mod codec; -use std::collections::{BTreeMap, HashSet}; +use std::{collections::{BTreeMap, HashSet}, path::PathBuf}; use std::fs::{copy, create_dir_all, remove_file, File}; use std::path::Path; use std::sync::Arc; -use anyhow::Context; use arc_swap::ArcSwap; use futures::StreamExt; use heed::types::{ByteSlice, OwnedType, SerdeJson}; @@ -27,6 +26,8 @@ use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, Ind #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; +const UPDATE_DIR: &'static str = "update_files"; + pub struct UpdateStoreInfo { /// Size of the update store in bytes. pub size: u64, @@ -97,6 +98,7 @@ pub struct UpdateStore { pub state: Arc, /// Wake up the loop when a new event occurs. notification_sender: mpsc::Sender<()>, + path: PathBuf, } impl UpdateStore { @@ -106,7 +108,7 @@ impl UpdateStore { ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { options.max_dbs(5); - let env = options.open(path)?; + let env = options.open(&path)?; let pending_queue = env.create_database(Some("pending-queue"))?; let next_update_id = env.create_database(Some("next-update-id"))?; let updates = env.create_database(Some("updates"))?; @@ -123,6 +125,7 @@ impl UpdateStore { updates, state, notification_sender, + path: path.as_ref().to_owned(), }, notification_receiver, )) @@ -165,7 +168,7 @@ impl UpdateStore { match res { Ok(Some(_)) => (), Ok(None) => break, - Err(e) => error!("error while processing update: {}", e), + Err(e) => panic!("error while processing update: {}", e), } } // the ownership on the arc has been taken, we need to exit. @@ -217,13 +220,13 @@ impl UpdateStore { pub fn register_update( &self, meta: UpdateMeta, - content: Option>, + content: Option, index_uuid: Uuid, ) -> heed::Result { let mut txn = self.env.write_txn()?; let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?; - let meta = Enqueued::new(meta, update_id, content.map(|p| p.as_ref().to_owned())); + let meta = Enqueued::new(meta, update_id, content); self.pending_queue .put(&mut txn, &(global_id, index_uuid, update_id), &meta)?; @@ -290,9 +293,9 @@ impl UpdateStore { state.swap(State::Processing(index_uuid, processing.clone())); let file = match content_path { - Some(ref path) => { - let file = File::open(path) - .with_context(|| format!("file at path: {:?}", &content_path))?; + Some(uuid) => { + let path = update_uuid_to_file_path(&self.path, uuid); + let file = File::open(path)?; Some(file) } None => None, @@ -308,7 +311,8 @@ impl UpdateStore { self.pending_queue .delete(&mut wtxn, &(global_id, index_uuid, update_id))?; - if let Some(path) = content_path { + if let Some(uuid) = content_path { + let path = update_uuid_to_file_path(&self.path, uuid); remove_file(&path)?; } @@ -408,7 +412,7 @@ impl UpdateStore { pub fn delete_all(&self, index_uuid: Uuid) -> anyhow::Result<()> { let mut txn = self.env.write_txn()?; // Contains all the content file paths that we need to be removed if the deletion was successful. - let mut paths_to_remove = Vec::new(); + let mut uuids_to_remove = Vec::new(); let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data(); @@ -416,8 +420,8 @@ impl UpdateStore { if uuid == index_uuid { pendings.del_current()?; let mut pending = pending.decode()?; - if let Some(path) = pending.content.take() { - paths_to_remove.push(path); + if let Some(update_uuid) = pending.content.take() { + uuids_to_remove.push(update_uuid); } } } @@ -437,7 +441,9 @@ impl UpdateStore { txn.commit()?; - paths_to_remove.iter().for_each(|path| { + uuids_to_remove.iter() + .map(|uuid| update_uuid_to_file_path(&self.path, *uuid)) + .for_each(|path| { let _ = remove_file(path); }); @@ -468,7 +474,7 @@ impl UpdateStore { // create db snapshot self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; - let update_files_path = update_path.join("update_files"); + let update_files_path = update_path.join(UPDATE_DIR); create_dir_all(&update_files_path)?; let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); @@ -476,10 +482,9 @@ impl UpdateStore { for entry in pendings { let ((_, uuid, _), pending) = entry?; if uuids.contains(&uuid) { - if let Some(path) = pending.decode()?.content_path() { - let name = path.file_name().unwrap(); - let to = update_files_path.join(name); - copy(path, to)?; + if let Enqueued { content: Some(uuid), .. } = pending.decode()? { + let path = update_uuid_to_file_path(&self.path, uuid); + copy(path, &update_files_path)?; } } } @@ -508,7 +513,8 @@ impl UpdateStore { let txn = self.env.read_txn()?; for entry in self.pending_queue.iter(&txn)? { let (_, pending) = entry?; - if let Some(path) = pending.content_path() { + if let Enqueued { content: Some(uuid), .. } = pending { + let path = update_uuid_to_file_path(&self.path, uuid); size += File::open(path)?.metadata()?.len(); } } @@ -521,6 +527,10 @@ impl UpdateStore { } } +fn update_uuid_to_file_path(root: impl AsRef, uuid: Uuid) -> PathBuf { + root.as_ref().join(UPDATE_DIR).join(format!("update_{}", uuid)) +} + #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-http/src/index_controller/updates.rs index 31f0005f8..0aacf9b6c 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-http/src/index_controller/updates.rs @@ -1,8 +1,7 @@ -use std::path::{Path, PathBuf}; - use chrono::{DateTime, Utc}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; use crate::index::{Checked, Settings}; @@ -34,11 +33,11 @@ pub struct Enqueued { pub update_id: u64, pub meta: UpdateMeta, pub enqueued_at: DateTime, - pub content: Option, + pub content: Option, } impl Enqueued { - pub fn new(meta: UpdateMeta, update_id: u64, content: Option) -> Self { + pub fn new(meta: UpdateMeta, update_id: u64, content: Option) -> Self { Self { enqueued_at: Utc::now(), meta, @@ -68,14 +67,6 @@ impl Enqueued { pub fn id(&self) -> u64 { self.update_id } - - pub fn content_path(&self) -> Option<&Path> { - self.content.as_deref() - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - self.content.as_mut() - } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -91,14 +82,6 @@ impl Processed { pub fn id(&self) -> u64 { self.from.id() } - - pub fn content_path(&self) -> Option<&Path> { - self.from.content_path() - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - self.from.content_path_mut() - } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -118,14 +101,6 @@ impl Processing { self.from.meta() } - pub fn content_path(&self) -> Option<&Path> { - self.from.content_path() - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - self.from.content_path_mut() - } - pub fn process(self, success: UpdateResult) -> Processed { Processed { success, @@ -155,14 +130,6 @@ impl Aborted { pub fn id(&self) -> u64 { self.from.id() } - - pub fn content_path(&self) -> Option<&Path> { - self.from.content_path() - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - self.from.content_path_mut() - } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -178,14 +145,6 @@ impl Failed { pub fn id(&self) -> u64 { self.from.id() } - - pub fn content_path(&self) -> Option<&Path> { - self.from.content_path() - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - self.from.content_path_mut() - } } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -215,26 +174,6 @@ impl UpdateStatus { _ => None, } } - - pub fn content_path(&self) -> Option<&Path> { - match self { - UpdateStatus::Processing(u) => u.content_path(), - UpdateStatus::Processed(u) => u.content_path(), - UpdateStatus::Aborted(u) => u.content_path(), - UpdateStatus::Failed(u) => u.content_path(), - UpdateStatus::Enqueued(u) => u.content_path(), - } - } - - pub fn content_path_mut(&mut self) -> Option<&mut PathBuf> { - match self { - UpdateStatus::Processing(u) => u.content_path_mut(), - UpdateStatus::Processed(u) => u.content_path_mut(), - UpdateStatus::Aborted(u) => u.content_path_mut(), - UpdateStatus::Failed(u) => u.content_path_mut(), - UpdateStatus::Enqueued(u) => u.content_path_mut(), - } - } } impl From for UpdateStatus { From 39c16c0fe4c05241036fa952ff7c7d3e166d2de2 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Sun, 30 May 2021 12:35:17 +0200 Subject: [PATCH 45/54] fix dump import --- .../update_actor/store/dump.rs | 26 +++++++++++-------- .../update_actor/store/mod.rs | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index ec7aeea87..fb4b7d5ac 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -12,7 +12,7 @@ use uuid::Uuid; use super::UpdateStore; use super::{codec::UpdateKeyCodec, State}; -use crate::index_controller::{index_actor::IndexActorHandle, UpdateStatus}; +use crate::index_controller::{Enqueued, UpdateStatus, index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -69,7 +69,7 @@ impl UpdateStore { txn: &RoTxn, uuids: &HashSet, mut file: &mut File, - dst_update_files: impl AsRef, + dst_path: impl AsRef, ) -> anyhow::Result<()> { let pendings = self.pending_queue.iter(txn)?.lazily_decode_data(); @@ -79,10 +79,9 @@ impl UpdateStore { let update = data.decode()?; if let Some(ref update_uuid) = update.content { - let src = dbg!(super::update_uuid_to_file_path(&self.path, *update_uuid)); - let dst = dbg!(super::update_uuid_to_file_path(&dst_update_files, *update_uuid)); - assert!(src.exists()); - dbg!(std::fs::copy(src, dst))?; + let src = super::update_uuid_to_file_path(&self.path, *update_uuid); + let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid); + std::fs::copy(src, dst)?; } println!("copied files"); @@ -144,6 +143,8 @@ impl UpdateStore { let update_data = File::open(&src_update_path.join("data.jsonl"))?; let mut update_data = BufReader::new(update_data); + std::fs::create_dir_all(dst_update_path.join("update_files/"))?; + let mut wtxn = store.env.write_txn()?; let mut line = String::new(); loop { @@ -151,7 +152,14 @@ impl UpdateStore { Ok(0) => break, Ok(_) => { let UpdateEntry { uuid, update } = serde_json::from_str(&line)?; - store.register_raw_updates(&mut wtxn, update, uuid)?; + store.register_raw_updates(&mut wtxn, &update, uuid)?; + + // Copy ascociated update path if it exists + if let UpdateStatus::Enqueued(Enqueued { content: Some(uuid), .. }) = update { + let src = update_uuid_to_file_path(&src_update_path, uuid); + let dst = update_uuid_to_file_path(&dst_update_path, uuid); + std::fs::copy(src, dst)?; + } } _ => break, } @@ -159,10 +167,6 @@ impl UpdateStore { line.clear(); } - let dst_update_files_path = dst_update_path.join("update_files/"); - let src_update_files_path = src_update_path.join("update_files/"); - std::fs::copy(src_update_files_path, dst_update_files_path)?; - wtxn.commit()?; Ok(()) diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 6910d5144..6e8f87a79 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -244,7 +244,7 @@ impl UpdateStore { pub fn register_raw_updates( &self, wtxn: &mut heed::RwTxn, - update: UpdateStatus, + update: &UpdateStatus, index_uuid: Uuid, ) -> heed::Result<()> { match update { From 33c6c4f0eed2d7e8038ce54ac564e83ceb75c561 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Sun, 30 May 2021 15:55:17 +0200 Subject: [PATCH 46/54] add timestamos to dump info --- meilisearch-http/src/index/updates.rs | 5 +++-- meilisearch-http/src/index_controller/dump_actor/actor.rs | 4 +++- meilisearch-http/src/index_controller/dump_actor/mod.rs | 8 ++++++++ .../src/index_controller/update_actor/store/dump.rs | 3 --- .../src/index_controller/update_actor/store/mod.rs | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 053ca6a60..566356d5f 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -197,9 +197,10 @@ impl Index { builder.update_format(format); builder.index_documents_method(method); - let indexing_callback = - |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); + //let indexing_callback = + //|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); + let indexing_callback = |_, _| (); let gzipped = false; let addition = match content { diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index b93d6f42d..ff4c39f6d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -97,7 +97,9 @@ where return; } let uid = generate_uid(); + let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); + *self.dump_info.write().await = Some(info.clone()); ret.send(Ok(info)).expect("Dump actor is dead"); @@ -126,7 +128,7 @@ where } Err(_) => { error!("Dump panicked. Dump status set to failed"); - *dump_info.write().await = Some(DumpInfo::new(uid, DumpStatus::Failed)); + (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").with_error("Unexpected error while performing dump.".to_string()); } }; } diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index 2b7d8a3e0..b236122bd 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,6 +1,7 @@ use std::fs::File; use std::path::{Path, PathBuf}; +use chrono::{DateTime, Utc}; use log::{error, info}; #[cfg(test)] use mockall::automock; @@ -86,6 +87,9 @@ pub struct DumpInfo { pub status: DumpStatus, #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, + started_at: DateTime, + #[serde(skip_serializing_if = "Option::is_none")] + finished_at: Option>, } impl DumpInfo { @@ -94,15 +98,19 @@ impl DumpInfo { uid, status, error: None, + started_at: Utc::now(), + finished_at: None, } } pub fn with_error(&mut self, error: String) { self.status = DumpStatus::Failed; + self.finished_at = Some(Utc::now()); self.error = Some(error); } pub fn done(&mut self) { + self.finished_at = Some(Utc::now()); self.status = DumpStatus::Done; } diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index fb4b7d5ac..fad8974f3 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -5,7 +5,6 @@ use std::{ path::{Path, PathBuf}, }; -use anyhow::Context; use heed::{EnvOpenOptions, RoTxn}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -84,8 +83,6 @@ impl UpdateStore { std::fs::copy(src, dst)?; } - println!("copied files"); - let update_json = UpdateEntry { uuid, update: update.into(), diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 6e8f87a79..29ccd4f34 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -168,7 +168,7 @@ impl UpdateStore { match res { Ok(Some(_)) => (), Ok(None) => break, - Err(e) => panic!("error while processing update: {}", e), + Err(e) => error!("error while processing update: {}", e), } } // the ownership on the arc has been taken, we need to exit. From bc5a5e37ea8127ffbfcd3dcec8d56cb9d28068f8 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 31 May 2021 10:42:31 +0200 Subject: [PATCH 47/54] fix dump v1 --- .../index_controller/dump_actor/loaders/v1.rs | 265 ++++++++++-------- .../index_controller/dump_actor/loaders/v2.rs | 27 +- .../src/index_controller/dump_actor/mod.rs | 38 ++- .../index_controller/uuid_resolver/store.rs | 25 +- 4 files changed, 201 insertions(+), 154 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 76207ff7b..471e66d17 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,137 +1,178 @@ -use std::path::Path; +use std::{ + collections::{BTreeMap, BTreeSet}, + fs::File, + marker::PhantomData, + path::Path, + sync::Arc, +}; +use heed::EnvOpenOptions; +use log::{error, info, warn}; +use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use crate::index_controller::IndexMetadata; +use crate::{index::deserialize_some, index_controller::uuid_resolver::HeedUuidStore}; +use crate::{ + index::{Index, Unchecked}, + index_controller::{self, IndexMetadata}, +}; #[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] pub struct MetadataV1 { db_version: String, indexes: Vec, } impl MetadataV1 { - pub fn load_dump(self, _src: impl AsRef, _dst: impl AsRef) -> anyhow::Result<()> { - todo!("implement load v1") + pub fn load_dump( + self, + src: impl AsRef, + dst: impl AsRef, + size: usize, + ) -> anyhow::Result<()> { + info!( + "Loading dump, dump database version: {}, dump version: V1", + self.db_version + ); + + dbg!("here"); + + let uuid_store = HeedUuidStore::new(&dst)?; + dbg!("here"); + for index in self.indexes { + let uuid = Uuid::new_v4(); + uuid_store.insert(index.uid.clone(), uuid)?; + let src = src.as_ref().join(index.uid); + load_index(&src, &dst, uuid, index.meta.primary_key.as_deref(), size)?; + } + + Ok(()) } } -// This is the settings used in the last version of meilisearch exporting dump in V1 -//#[derive(Default, Clone, Serialize, Deserialize, Debug)] -//#[serde(rename_all = "camelCase", deny_unknown_fields)] -//struct Settings { - //#[serde(default, deserialize_with = "deserialize_some")] - //pub ranking_rules: Option>>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub distinct_attribute: Option>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub searchable_attributes: Option>>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub displayed_attributes: Option>>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub stop_words: Option>>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub synonyms: Option>>>, - //#[serde(default, deserialize_with = "deserialize_some")] - //pub attributes_for_faceting: Option>>, -//} +//This is the settings used in the last version of meilisearch exporting dump in V1 +#[derive(Default, Clone, Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +struct Settings { + #[serde(default, deserialize_with = "deserialize_some")] + pub ranking_rules: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub distinct_attribute: Option>, + #[serde(default, deserialize_with = "deserialize_some")] + pub searchable_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub displayed_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub stop_words: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub synonyms: Option>>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub attributes_for_faceting: Option>>, +} -///// we need to **always** be able to convert the old settings to the settings currently being used -//impl From for index_controller::Settings { - //fn from(settings: Settings) -> Self { - //if settings.synonyms.flatten().is_some() { - //error!("`synonyms` are not yet implemented and thus will be ignored"); - //} - //Self { - //distinct_attribute: settings.distinct_attribute, - //// we need to convert the old `Vec` into a `BTreeSet` - //displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), - //searchable_attributes: settings.searchable_attributes, - //// we previously had a `Vec` but now we have a `HashMap` - //// representing the name of the faceted field + the type of the field. Since the type - //// was not known in the V1 of the dump we are just going to assume everything is a - //// String - //attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())), - //// we need to convert the old `Vec` into a `BTreeSet` - //ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { - //match criterion.as_str() { - //"words" | "typo" | "proximity" | "attribute" => Some(criterion), - //s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), - //"wordsPosition" => { - //warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); - //Some(String::from("words")) - //} - //"exactness" => { - //error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); - //None - //} - //s => { - //error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); - //None - //} - //} - //}).collect())), - //// we need to convert the old `Vec` into a `BTreeSet` - //stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), - //_kind: PhantomData, - //} - //} -//} +impl std::ops::Deref for Settings { + type Target = Option>>; -///// Extract Settings from `settings.json` file present at provided `dir_path` -//fn import_settings(dir_path: &Path) -> anyhow::Result { - //let path = dir_path.join("settings.json"); - //let file = File::open(path)?; - //let reader = std::io::BufReader::new(file); - //let metadata = serde_json::from_reader(reader)?; + fn deref(&self) -> &Self::Target { + &self.stop_words + } +} - //Ok(metadata) -//} +fn load_index( + src: impl AsRef, + dst: impl AsRef, + uuid: Uuid, + primary_key: Option<&str>, + size: usize, +) -> anyhow::Result<()> { + let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); -//pub fn import_dump( - //size: usize, - //uuid: Uuid, - //dump_path: &Path, - //db_path: &Path, - //primary_key: Option<&str>, -//) -> anyhow::Result<()> { - //let index_path = db_path.join(&format!("indexes/index-{}", uuid)); - //info!("Importing a dump from an old version of meilisearch with dump version 1"); + std::fs::create_dir_all(&index_path)?; + let mut options = EnvOpenOptions::new(); + options.map_size(size); + let index = milli::Index::new(options, index_path)?; + let index = Index(Arc::new(index)); - //std::fs::create_dir_all(&index_path)?; - //let mut options = EnvOpenOptions::new(); - //options.map_size(size); - //let index = milli::Index::new(options, index_path)?; - //let index = Index(Arc::new(index)); + // extract `settings.json` file and import content + let settings = import_settings(&src)?; + let settings: index_controller::Settings = settings.into(); + let update_builder = UpdateBuilder::new(0); + index.update_settings(&settings.check(), update_builder)?; - //// extract `settings.json` file and import content - //let settings = import_settings(&dump_path)?; - //let settings: index_controller::Settings = settings.into(); - //let update_builder = UpdateBuilder::new(0); - //index.update_settings(&settings.check(), update_builder)?; + let update_builder = UpdateBuilder::new(0); + let file = File::open(&src.as_ref().join("documents.jsonl"))?; + let reader = std::io::BufReader::new(file); - //let update_builder = UpdateBuilder::new(1); - //let file = File::open(&dump_path.join("documents.jsonl"))?; - //let reader = std::io::BufReader::new(file); + index.update_documents( + UpdateFormat::JsonStream, + IndexDocumentsMethod::ReplaceDocuments, + Some(reader), + update_builder, + primary_key, + )?; - //// TODO: TAMO: waiting for milli. We should use the result - //let _ = index.update_documents( - //UpdateFormat::JsonStream, - //IndexDocumentsMethod::ReplaceDocuments, - //Some(reader), - //update_builder, - //primary_key, - //); + // the last step: we extract the original milli::Index and close it + Arc::try_unwrap(index.0) + .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .unwrap() + .prepare_for_closing() + .wait(); - //// the last step: we extract the original milli::Index and close it - //Arc::try_unwrap(index.0) - //.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") - //.unwrap() - //.prepare_for_closing() - //.wait(); + // Ignore updates in v1. - //// at this point we should handle the import of the updates, but since the update logic is not handled in - //// meilisearch we are just going to ignore this part + Ok(()) +} - //Ok(()) -//} +/// we need to **always** be able to convert the old settings to the settings currently being used +impl From for index_controller::Settings { + fn from(settings: Settings) -> Self { + if settings.synonyms.flatten().is_some() { + error!("`synonyms` are not yet implemented and thus will be ignored"); + } + Self { + distinct_attribute: settings.distinct_attribute, + // we need to convert the old `Vec` into a `BTreeSet` + displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())), + searchable_attributes: settings.searchable_attributes, + // we previously had a `Vec` but now we have a `HashMap` + // representing the name of the faceted field + the type of the field. Since the type + // was not known in the V1 of the dump we are just going to assume everything is a + // String + attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())), + // we need to convert the old `Vec` into a `BTreeSet` + ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| { + match criterion.as_str() { + "words" | "typo" | "proximity" | "attribute" => Some(criterion), + s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion), + "wordsPosition" => { + warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored"); + Some(String::from("words")) + } + "exactness" => { + error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion); + None + } + s => { + error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); + None + } + } + }).collect())), + // we need to convert the old `Vec` into a `BTreeSet` + stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())), + _kind: PhantomData, + } + } +} + +/// Extract Settings from `settings.json` file present at provided `dir_path` +fn import_settings(dir_path: impl AsRef) -> anyhow::Result { + let path = dbg!(dir_path.as_ref().join("settings.json")); + let file = File::open(path)?; + let reader = std::io::BufReader::new(file); + let metadata = serde_json::from_reader(reader)?; + + Ok(metadata) +} diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index def47fecb..c0fe0abe6 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -1,13 +1,13 @@ use std::path::Path; -use anyhow::Context; use chrono::{DateTime, Utc}; -use log::{info, warn}; +use log::info; use serde::{Deserialize, Serialize}; use crate::{index::Index, index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, option::IndexerOpts}; #[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] pub struct MetadataV2 { db_version: String, index_db_size: u64, @@ -29,6 +29,7 @@ impl MetadataV2 { self, src: impl AsRef, dst: impl AsRef, + // TODO: use these variable to test if loading the index is possible. _index_db_size: u64, _update_db_size: u64, indexing_options: &IndexerOpts, @@ -37,37 +38,21 @@ impl MetadataV2 { "Loading dump from {}, dump database version: {}, dump version: V2", self.dump_date, self.db_version ); - // get dir in which to load the db: - let dst_dir = dst - .as_ref() - .parent() - .with_context(|| format!("Invalid db path: {}", dst.as_ref().display()))?; - - let tmp_dst = tempfile::tempdir_in(dst_dir)?; info!("Loading index database."); - HeedUuidStore::load_dump(src.as_ref(), &tmp_dst)?; + HeedUuidStore::load_dump(src.as_ref(), &dst)?; info!("Loading updates."); - UpdateStore::load_dump(&src, &tmp_dst, self.update_db_size)?; + UpdateStore::load_dump(&src, &dst, self.update_db_size)?; info!("Loading indexes"); let indexes_path = src.as_ref().join("indexes"); let indexes = indexes_path.read_dir()?; for index in indexes { let index = index?; - Index::load_dump(&index.path(), &tmp_dst, self.index_db_size, indexing_options)?; + Index::load_dump(&index.path(), &dst, self.index_db_size, indexing_options)?; } - // Persist and atomically rename the db - let persisted_dump = tmp_dst.into_path(); - if dst.as_ref().exists() { - warn!("Overwriting database at {}", dst.as_ref().display()); - std::fs::remove_dir_all(&dst)?; - } - - std::fs::rename(&persisted_dump, &dst)?; - Ok(()) } } diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index b236122bd..e1998f876 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -2,11 +2,12 @@ use std::fs::File; use std::path::{Path, PathBuf}; use chrono::{DateTime, Utc}; -use log::{error, info}; +use log::{error, info, warn}; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; +use anyhow::Context; use loaders::v1::MetadataV1; use loaders::v2::MetadataV2; @@ -53,22 +54,16 @@ pub trait DumpActorHandle { } #[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase", tag = "dump_version")] +#[serde(tag = "dumpVersion")] pub enum Metadata { - V1 { - #[serde(flatten)] - meta: MetadataV1, - }, - V2 { - #[serde(flatten)] - meta: MetadataV2, - }, + V1(MetadataV1), + V2(MetadataV2), } impl Metadata { pub fn new_v2(index_db_size: u64, update_db_size: u64) -> Self { let meta = MetadataV2::new(index_db_size, update_db_size); - Self::V2 { meta } + Self::V2(meta) } } @@ -135,16 +130,31 @@ pub fn load_dump( let mut meta_file = File::open(&meta_path)?; let meta: Metadata = serde_json::from_reader(&mut meta_file)?; + let dst_dir = dst_path + .as_ref() + .parent() + .with_context(|| format!("Invalid db path: {}", dst_path.as_ref().display()))?; + + let tmp_dst = tempfile::tempdir_in(dst_dir)?; + match meta { - Metadata::V1 { meta } => meta.load_dump(&tmp_src_path, dst_path)?, - Metadata::V2 { meta } => meta.load_dump( + Metadata::V1(meta) => meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size as usize)?, + Metadata::V2(meta) => meta.load_dump( &tmp_src_path, - dst_path.as_ref(), + tmp_dst.path(), index_db_size, update_db_size, indexer_opts, )?, } + // Persist and atomically rename the db + let persisted_dump = tmp_dst.into_path(); + if dst_path.as_ref().exists() { + warn!("Overwriting database at {}", dst_path.as_ref().display()); + std::fs::remove_dir_all(&dst_path)?; + } + + std::fs::rename(&persisted_dump, &dst_path)?; Ok(()) } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 2fd9ff301..e666a536e 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,13 +1,16 @@ -use std::{collections::HashSet, io::{BufReader, BufRead, Write}}; use std::fs::{create_dir_all, File}; use std::path::{Path, PathBuf}; +use std::{ + collections::HashSet, + io::{BufRead, BufReader, Write}, +}; use heed::{ types::{ByteSlice, Str}, CompactionOption, Database, Env, EnvOpenOptions, }; +use serde::{Deserialize, Serialize}; use uuid::Uuid; -use serde::{Serialize, Deserialize}; use super::{Result, UuidResolverError, UUID_STORE_SIZE}; use crate::helpers::EnvSizer; @@ -45,7 +48,14 @@ impl HeedUuidStore { let mut options = EnvOpenOptions::new(); options.map_size(UUID_STORE_SIZE); // 1GB let env = options.open(path)?; - let db = env.create_database(None)?; Ok(Self { env, db }) } pub fn create_uuid(&self, name: String, err: bool) -> Result { let env = self.env.clone(); let db = self.db; let mut txn = env.write_txn()?; + let db = env.create_database(None)?; + Ok(Self { env, db }) + } + + pub fn create_uuid(&self, name: String, err: bool) -> Result { + let env = self.env.clone(); + let db = self.db; + let mut txn = env.write_txn()?; match db.get(&txn, &name)? { Some(uuid) => { if err { @@ -62,7 +72,10 @@ impl HeedUuidStore { Ok(uuid) } } - } pub fn get_uuid(&self, name: String) -> Result> { let env = self.env.clone(); let db = self.db; + } + pub fn get_uuid(&self, name: String) -> Result> { + let env = self.env.clone(); + let db = self.db; let txn = env.read_txn()?; match db.get(&txn, &name)? { Some(uuid) => { @@ -149,9 +162,7 @@ impl HeedUuidStore { let uid = uid.to_string(); let uuid = Uuid::from_slice(uuid)?; - let entry = DumpEntry { - uuid, uid - }; + let entry = DumpEntry { uuid, uid }; serde_json::to_writer(&mut dump_file, &entry)?; dump_file.write(b"\n").unwrap(); From b3c8f0e1f6156e38a9e74c509cbbfc22f5a0d164 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 31 May 2021 10:58:51 +0200 Subject: [PATCH 48/54] fix empty index error --- meilisearch-http/src/index/dump.rs | 40 ++++++++++++------- .../index_controller/dump_actor/loaders/v1.rs | 28 ++++++------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index 9dbb14fbd..eb1d27a4e 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -1,15 +1,15 @@ -use std::{fs::{create_dir_all, File}, path::Path, sync::Arc}; +use std::{fs::{create_dir_all, File}, io::{BufRead, BufReader}, path::Path, sync::Arc}; +use anyhow::bail; use anyhow::Context; use heed::RoTxn; use indexmap::IndexMap; use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; use serde::{Deserialize, Serialize}; -use anyhow::bail; use crate::option::IndexerOpts; -use super::{Unchecked, Index, Settings, update_handler::UpdateHandler}; +use super::{update_handler::UpdateHandler, Index, Settings, Unchecked}; #[derive(Serialize, Deserialize)] struct DumpMeta { @@ -64,7 +64,10 @@ impl Index { let settings = self.settings_txn(txn)?.into_unchecked(); let primary_key = self.primary_key(txn)?.map(String::from); - let meta = DumpMeta { settings, primary_key }; + let meta = DumpMeta { + settings, + primary_key, + }; serde_json::to_writer(&mut meta_file, &meta)?; @@ -86,7 +89,10 @@ impl Index { let meta_path = src.as_ref().join(META_FILE_NAME); let mut meta_file = File::open(meta_path)?; - let DumpMeta { settings, primary_key } = serde_json::from_reader(&mut meta_file)?; + let DumpMeta { + settings, + primary_key, + } = serde_json::from_reader(&mut meta_file)?; let settings = settings.check(); let index = Self::open(&dst_dir_path, size as usize)?; let mut txn = index.write_txn()?; @@ -96,15 +102,21 @@ impl Index { index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?; let document_file_path = src.as_ref().join(DATA_FILE_NAME); - let document_file = File::open(&document_file_path)?; - index.update_documents_txn( - &mut txn, - JsonStream, - IndexDocumentsMethod::UpdateDocuments, - Some(document_file), - handler.update_builder(0), - primary_key.as_deref(), - )?; + let reader = File::open(&document_file_path)?; + let mut reader = BufReader::new(reader); + reader.fill_buf()?; + // If the document file is empty, we don't perform the document addition, to prevent + // a primary key error to be thrown. + if !reader.buffer().is_empty() { + index.update_documents_txn( + &mut txn, + JsonStream, + IndexDocumentsMethod::UpdateDocuments, + Some(reader), + handler.update_builder(0), + primary_key.as_deref(), + )?; + } txn.commit()?; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 471e66d17..ed268f1f7 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,10 +1,4 @@ -use std::{ - collections::{BTreeMap, BTreeSet}, - fs::File, - marker::PhantomData, - path::Path, - sync::Arc, -}; +use std::{collections::{BTreeMap, BTreeSet}, fs::File, io::BufRead, marker::PhantomData, path::Path, sync::Arc}; use heed::EnvOpenOptions; use log::{error, info, warn}; @@ -103,15 +97,17 @@ fn load_index( let update_builder = UpdateBuilder::new(0); let file = File::open(&src.as_ref().join("documents.jsonl"))?; - let reader = std::io::BufReader::new(file); - - index.update_documents( - UpdateFormat::JsonStream, - IndexDocumentsMethod::ReplaceDocuments, - Some(reader), - update_builder, - primary_key, - )?; + let mut reader = std::io::BufReader::new(file); + reader.fill_buf()?; + if !reader.buffer().is_empty() { + index.update_documents( + UpdateFormat::JsonStream, + IndexDocumentsMethod::ReplaceDocuments, + Some(reader), + update_builder, + primary_key, + )?; + } // the last step: we extract the original milli::Index and close it Arc::try_unwrap(index.0) From 10fc870684faa6e7a6014391b450898012f9854b Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 31 May 2021 15:34:03 +0200 Subject: [PATCH 49/54] improve dump info reports --- .../src/index_controller/dump_actor/actor.rs | 68 +++++++++---------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index ff4c39f6d..5ac5ca9b9 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,9 +1,9 @@ -use std::path::{Path, PathBuf}; +use std::{collections::HashMap, path::{Path, PathBuf}}; use std::sync::Arc; use async_stream::stream; use chrono::Utc; -use futures::stream::StreamExt; +use futures::{lock::Mutex, stream::StreamExt}; use log::{error, info}; use update_actor::UpdateActorHandle; use uuid_resolver::UuidResolverHandle; @@ -19,7 +19,8 @@ pub struct DumpActor { uuid_resolver: UuidResolver, update: Update, dump_path: PathBuf, - dump_info: Arc>>, + lock: Arc>, + dump_infos: Arc>>, update_db_size: u64, index_db_size: u64, } @@ -42,12 +43,15 @@ where index_db_size: u64, update_db_size: u64, ) -> Self { + let dump_infos = Arc::new(RwLock::new(HashMap::new())); + let lock = Arc::new(Mutex::new(())); Self { inbox: Some(inbox), uuid_resolver, update, dump_path: dump_path.as_ref().into(), - dump_info: Arc::new(RwLock::new(None)), + dump_infos, + lock, index_db_size, update_db_size, } @@ -91,21 +95,22 @@ where } async fn handle_create_dump(&self, ret: oneshot::Sender>) { - if self.is_running().await { - ret.send(Err(DumpError::DumpAlreadyRunning)) - .expect("Dump actor is dead"); - return; - } let uid = generate_uid(); - let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress); - *self.dump_info.write().await = Some(info.clone()); + let _lock = match self.lock.try_lock() { + Some(lock) => lock, + None => { + ret.send(Err(DumpError::DumpAlreadyRunning)) + .expect("Dump actor is dead"); + return; + } + }; + + self.dump_infos.write().await.insert(uid.clone(), info.clone()); ret.send(Ok(info)).expect("Dump actor is dead"); - let dump_info = self.dump_info.clone(); - let task = DumpTask { path: self.dump_path.clone(), uuid_resolver: self.uuid_resolver.clone(), @@ -117,45 +122,34 @@ where let task_result = tokio::task::spawn(task.run()).await; + let mut dump_infos = self.dump_infos + .write() + .await; + let dump_infos = + dump_infos + .get_mut(&uid) + .expect("dump entry deleted while lock was acquired"); + match task_result { Ok(Ok(())) => { - (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").done(); + dump_infos.done(); info!("Dump succeed"); } Ok(Err(e)) => { - (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").with_error(e.to_string()); + dump_infos.with_error(e.to_string()); error!("Dump failed: {}", e); } Err(_) => { + dump_infos.with_error("Unexpected error while performing dump.".to_string()); error!("Dump panicked. Dump status set to failed"); - (*dump_info.write().await).as_mut().expect("Inconsistent dump service state").with_error("Unexpected error while performing dump.".to_string()); } }; } async fn handle_dump_info(&self, uid: String) -> DumpResult { - match &*self.dump_info.read().await { - None => self.dump_from_fs(uid).await, - Some(DumpInfo { uid: ref s, .. }) if &uid != s => self.dump_from_fs(uid).await, + match self.dump_infos.read().await.get(&uid) { Some(info) => Ok(info.clone()), + _ => Err(DumpError::DumpDoesNotExist(uid)), } } - - async fn dump_from_fs(&self, uid: String) -> DumpResult { - self.dump_path - .join(format!("{}.dump", &uid)) - .exists() - .then(|| DumpInfo::new(uid.clone(), DumpStatus::Done)) - .ok_or(DumpError::DumpDoesNotExist(uid)) - } - - async fn is_running(&self) -> bool { - matches!( - *self.dump_info.read().await, - Some(DumpInfo { - status: DumpStatus::InProgress, - .. - }) - ) - } } From 1c4f0b2ccff258fe797414c0a087d4e120024361 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 31 May 2021 16:03:39 +0200 Subject: [PATCH 50/54] clippy, fmt & tests --- meilisearch-http/build.rs | 8 ++- meilisearch-http/src/data/mod.rs | 10 +++- meilisearch-http/src/error.rs | 4 +- .../src/helpers/authentication.rs | 33 +++++------- meilisearch-http/src/index/dump.rs | 11 ++-- meilisearch-http/src/index/mod.rs | 12 +++-- meilisearch-http/src/index/search.rs | 51 +++++++++---------- meilisearch-http/src/index/updates.rs | 2 +- .../src/index_controller/dump_actor/actor.rs | 19 ++++--- .../dump_actor/handle_impl.rs | 15 ++++-- .../index_controller/dump_actor/loaders/v1.rs | 9 +++- .../index_controller/dump_actor/loaders/v2.rs | 6 ++- .../index_controller/dump_actor/message.rs | 4 +- .../src/index_controller/dump_actor/mod.rs | 8 +-- .../src/index_controller/index_actor/actor.rs | 9 ++-- .../index_actor/handle_impl.rs | 5 +- .../index_controller/index_actor/message.rs | 2 +- .../src/index_controller/index_actor/mod.rs | 21 +++++--- .../src/index_controller/snapshot.rs | 6 +-- .../update_actor/store/dump.rs | 18 ++++--- .../update_actor/store/mod.rs | 37 +++++++++----- .../index_controller/uuid_resolver/actor.rs | 2 +- .../index_controller/uuid_resolver/message.rs | 2 +- .../index_controller/uuid_resolver/store.rs | 4 +- meilisearch-http/src/lib.rs | 10 ++-- meilisearch-http/src/routes/dump.rs | 11 ++-- meilisearch-http/src/routes/index.rs | 2 +- meilisearch-http/src/routes/mod.rs | 2 +- meilisearch-http/src/routes/settings/mod.rs | 2 +- meilisearch-http/tests/common/index.rs | 2 +- meilisearch-http/tests/common/server.rs | 2 +- 31 files changed, 196 insertions(+), 133 deletions(-) diff --git a/meilisearch-http/build.rs b/meilisearch-http/build.rs index 5dbde1477..557e04fe7 100644 --- a/meilisearch-http/build.rs +++ b/meilisearch-http/build.rs @@ -50,7 +50,7 @@ mod mini_dashboard { sha1_file.read_to_string(&mut sha1)?; if sha1 == meta["sha1"].as_str().unwrap() { // Nothing to do. - return Ok(()) + return Ok(()); } } @@ -62,7 +62,11 @@ mod mini_dashboard { hasher.update(&dashboard_assets_bytes); let sha1 = hex::encode(hasher.finalize()); - assert_eq!(meta["sha1"].as_str().unwrap(), sha1, "Downloaded mini-dashboard shasum differs from the one specified in the Cargo.toml"); + assert_eq!( + meta["sha1"].as_str().unwrap(), + sha1, + "Downloaded mini-dashboard shasum differs from the one specified in the Cargo.toml" + ); create_dir_all(&dashboard_dir)?; let cursor = Cursor::new(&dashboard_assets_bytes); diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 008065d74..9f8a688bc 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -4,7 +4,9 @@ use std::sync::Arc; use sha2::Digest; use crate::index::{Checked, Settings}; -use crate::index_controller::{IndexController, IndexStats, Stats, DumpInfo, IndexMetadata, IndexSettings}; +use crate::index_controller::{ + DumpInfo, IndexController, IndexMetadata, IndexSettings, IndexStats, Stats, +}; use crate::option::Opt; pub mod search; @@ -67,7 +69,11 @@ impl Data { api_keys.generate_missing_api_keys(); - let inner = DataInner { index_controller, api_keys, options }; + let inner = DataInner { + index_controller, + api_keys, + options, + }; let inner = Arc::new(inner); Ok(Data { inner }) diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 6489716ca..07bd96fb9 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -299,7 +299,7 @@ impl From for Error { JsonPayloadError::Payload(err) => { Error::BadRequest(format!("Problem while decoding the request: {}", err)) } - e => Error::Internal(format!("Unexpected Json error: {}", e)) + e => Error::Internal(format!("Unexpected Json error: {}", e)), } } } @@ -310,7 +310,7 @@ impl From for Error { QueryPayloadError::Deserialize(err) => { Error::BadRequest(format!("Invalid query parameters: {}", err)) } - e => Error::Internal(format!("Unexpected query payload error: {}", e)) + e => Error::Internal(format!("Unexpected query payload error: {}", e)), } } } diff --git a/meilisearch-http/src/helpers/authentication.rs b/meilisearch-http/src/helpers/authentication.rs index a1a0c431e..54d5488f4 100644 --- a/meilisearch-http/src/helpers/authentication.rs +++ b/meilisearch-http/src/helpers/authentication.rs @@ -1,16 +1,16 @@ use std::pin::Pin; use std::task::{Context, Poll}; +use actix_web::body::Body; use actix_web::dev::{Service, ServiceRequest, ServiceResponse, Transform}; use actix_web::web; -use actix_web::body::Body; -use futures::ready; -use futures::future::{ok, Future, Ready}; use actix_web::ResponseError as _; +use futures::future::{ok, Future, Ready}; +use futures::ready; use pin_project::pin_project; -use crate::Data; use crate::error::{Error, ResponseError}; +use crate::Data; #[derive(Clone, Copy)] pub enum Authentication { @@ -59,19 +59,15 @@ where let data = req.app_data::>().unwrap(); if data.api_keys().master.is_none() { - return AuthenticationFuture::Authenticated(self.service.call(req)) + return AuthenticationFuture::Authenticated(self.service.call(req)); } let auth_header = match req.headers().get("X-Meili-API-Key") { Some(auth) => match auth.to_str() { Ok(auth) => auth, - Err(_) => { - return AuthenticationFuture::NoHeader(Some(req)) - } + Err(_) => return AuthenticationFuture::NoHeader(Some(req)), }, - None => { - return AuthenticationFuture::NoHeader(Some(req)) - } + None => return AuthenticationFuture::NoHeader(Some(req)), }; let authenticated = match self.acl { @@ -111,15 +107,13 @@ where { type Output = Result, actix_web::Error>; - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) ->Poll { + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); match this { - AuthProj::Authenticated(fut) => { - match ready!(fut.poll(cx)) { - Ok(resp) => Poll::Ready(Ok(resp)), - Err(e) => Poll::Ready(Err(e)), - } - } + AuthProj::Authenticated(fut) => match ready!(fut.poll(cx)) { + Ok(resp) => Poll::Ready(Ok(resp)), + Err(e) => Poll::Ready(Err(e)), + }, AuthProj::NoHeader(req) => { match req.take() { Some(req) => { @@ -135,7 +129,8 @@ where AuthProj::Refused(req) => { match req.take() { Some(req) => { - let bad_token = req.headers() + let bad_token = req + .headers() .get("X-Meili-API-Key") .map(|h| h.to_str().map(String::from).unwrap_or_default()) .unwrap_or_default(); diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index eb1d27a4e..dd29aa50a 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -1,4 +1,9 @@ -use std::{fs::{create_dir_all, File}, io::{BufRead, BufReader}, path::Path, sync::Arc}; +use std::{ + fs::{create_dir_all, File}, + io::{BufRead, BufReader}, + path::Path, + sync::Arc, +}; use anyhow::bail; use anyhow::Context; @@ -17,8 +22,8 @@ struct DumpMeta { primary_key: Option, } -const META_FILE_NAME: &'static str = "meta.json"; -const DATA_FILE_NAME: &'static str = "documents.jsonl"; +const META_FILE_NAME: &str = "meta.json"; +const DATA_FILE_NAME: &str = "documents.jsonl"; impl Index { pub fn dump(&self, path: impl AsRef) -> anyhow::Result<()> { diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 331db07c4..7d9603e9e 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,6 +1,10 @@ -use std::{collections::{BTreeSet, HashSet}, marker::PhantomData, path::Path}; use std::ops::Deref; use std::sync::Arc; +use std::{ + collections::{BTreeSet, HashSet}, + marker::PhantomData, + path::Path, +}; use anyhow::{bail, Context}; use heed::{EnvOpenOptions, RoTxn}; @@ -9,13 +13,13 @@ use serde_json::{Map, Value}; use crate::helpers::EnvSizer; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; -pub use updates::{Facets, Settings, Checked, Unchecked}; use serde::{de::Deserializer, Deserialize}; +pub use updates::{Checked, Facets, Settings, Unchecked}; -mod search; -mod updates; mod dump; +mod search; pub mod update_handler; +mod updates; pub type Document = Map; diff --git a/meilisearch-http/src/index/search.rs b/meilisearch-http/src/index/search.rs index 0ff6c1bc3..bf559eb91 100644 --- a/meilisearch-http/src/index/search.rs +++ b/meilisearch-http/src/index/search.rs @@ -90,7 +90,8 @@ impl Index { let mut documents = Vec::new(); let fields_ids_map = self.fields_ids_map(&rtxn).unwrap(); - let displayed_ids = self.displayed_fields_ids(&rtxn)? + let displayed_ids = self + .displayed_fields_ids(&rtxn)? .map(|fields| fields.into_iter().collect::>()) .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); @@ -156,10 +157,8 @@ impl Index { }; let stop_words = fst::Set::default(); - let highlighter = Highlighter::new( - &stop_words, - (String::from(""), String::from("")), - ); + let highlighter = + Highlighter::new(&stop_words, (String::from(""), String::from(""))); for (_id, obkv) in self.documents(&rtxn, documents_ids)? { let document = make_document(&all_attributes, &fields_ids_map, obkv)?; @@ -384,17 +383,16 @@ mod test { #[test] fn no_formatted() { let stop_words = fst::Set::default(); - let highlighter = Highlighter::new( - &stop_words, - (String::from(""), String::from("")), - ); + let highlighter = + Highlighter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap(); + obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); @@ -410,8 +408,9 @@ mod test { &highlighter, &matching_words, &all_formatted, - &to_highlight_ids - ).unwrap(); + &to_highlight_ids, + ) + .unwrap(); assert!(value.is_empty()); } @@ -419,17 +418,16 @@ mod test { #[test] fn formatted_no_highlight() { let stop_words = fst::Set::default(); - let highlighter = Highlighter::new( - &stop_words, - (String::from(""), String::from("")), - ); + let highlighter = + Highlighter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap(); + obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); @@ -445,8 +443,9 @@ mod test { &highlighter, &matching_words, &all_formatted, - &to_highlight_ids - ).unwrap(); + &to_highlight_ids, + ) + .unwrap(); assert_eq!(value["test"], "hello"); } @@ -454,17 +453,16 @@ mod test { #[test] fn formatted_with_highlight() { let stop_words = fst::Set::default(); - let highlighter = Highlighter::new( - &stop_words, - (String::from(""), String::from("")), - ); + let highlighter = + Highlighter::new(&stop_words, (String::from(""), String::from(""))); let mut fields = FieldsIdsMap::new(); let id = fields.insert("test").unwrap(); let mut buf = Vec::new(); let mut obkv = obkv::KvWriter::new(&mut buf); - obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()).unwrap(); + obkv.insert(id, Value::String("hello".into()).to_string().as_bytes()) + .unwrap(); obkv.finish().unwrap(); let obkv = obkv::KvReader::new(&buf); @@ -480,8 +478,9 @@ mod test { &highlighter, &matching_words, &all_formatted, - &to_highlight_ids - ).unwrap(); + &to_highlight_ids, + ) + .unwrap(); assert_eq!(value["test"], "hello"); } diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 566356d5f..5ef6d854e 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -198,7 +198,7 @@ impl Index { builder.index_documents_method(method); //let indexing_callback = - //|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); + //|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); let indexing_callback = |_, _| (); diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 5ac5ca9b9..8ea2e1f6d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,13 +1,16 @@ -use std::{collections::HashMap, path::{Path, PathBuf}}; use std::sync::Arc; +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; use async_stream::stream; use chrono::Utc; use futures::{lock::Mutex, stream::StreamExt}; use log::{error, info}; +use tokio::sync::{mpsc, oneshot, RwLock}; use update_actor::UpdateActorHandle; use uuid_resolver::UuidResolverHandle; -use tokio::sync::{mpsc, oneshot, RwLock}; use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus, DumpTask}; use crate::index_controller::{update_actor, uuid_resolver}; @@ -107,7 +110,10 @@ where } }; - self.dump_infos.write().await.insert(uid.clone(), info.clone()); + self.dump_infos + .write() + .await + .insert(uid.clone(), info.clone()); ret.send(Ok(info)).expect("Dump actor is dead"); @@ -122,11 +128,8 @@ where let task_result = tokio::task::spawn(task.run()).await; - let mut dump_infos = self.dump_infos - .write() - .await; - let dump_infos = - dump_infos + let mut dump_infos = self.dump_infos.write().await; + let dump_infos = dump_infos .get_mut(&uid) .expect("dump entry deleted while lock was acquired"); diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index ff663798f..3d8665e62 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -1,7 +1,7 @@ -use std::path::Path; -use actix_web::web::Bytes; -use tokio::sync::{mpsc, oneshot}; use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; +use actix_web::web::Bytes; +use std::path::Path; +use tokio::sync::{mpsc, oneshot}; #[derive(Clone)] pub struct DumpActorHandleImpl { @@ -34,7 +34,14 @@ impl DumpActorHandleImpl { update_db_size: u64, ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(10); - let actor = DumpActor::new(receiver, uuid_resolver, update, path, index_db_size, update_db_size); + let actor = DumpActor::new( + receiver, + uuid_resolver, + update, + path, + index_db_size, + update_db_size, + ); tokio::task::spawn(actor.run()); diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index ed268f1f7..70c89664b 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,4 +1,11 @@ -use std::{collections::{BTreeMap, BTreeSet}, fs::File, io::BufRead, marker::PhantomData, path::Path, sync::Arc}; +use std::{ + collections::{BTreeMap, BTreeSet}, + fs::File, + io::BufRead, + marker::PhantomData, + path::Path, + sync::Arc, +}; use heed::EnvOpenOptions; use log::{error, info, warn}; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index c0fe0abe6..96001902d 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -4,7 +4,11 @@ use chrono::{DateTime, Utc}; use log::info; use serde::{Deserialize, Serialize}; -use crate::{index::Index, index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, option::IndexerOpts}; +use crate::{ + index::Index, + index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, + option::IndexerOpts, +}; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] diff --git a/meilisearch-http/src/index_controller/dump_actor/message.rs b/meilisearch-http/src/index_controller/dump_actor/message.rs index 14409afbb..dff9f5954 100644 --- a/meilisearch-http/src/index_controller/dump_actor/message.rs +++ b/meilisearch-http/src/index_controller/dump_actor/message.rs @@ -1,7 +1,6 @@ use tokio::sync::oneshot; -use super::{DumpResult, DumpInfo}; - +use super::{DumpInfo, DumpResult}; pub enum DumpMsg { CreateDump { @@ -12,4 +11,3 @@ pub enum DumpMsg { ret: oneshot::Sender>, }, } - diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index e1998f876..dde04bc12 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -1,13 +1,13 @@ use std::fs::File; use std::path::{Path, PathBuf}; +use anyhow::Context; use chrono::{DateTime, Utc}; use log::{error, info, warn}; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; -use anyhow::Context; use loaders::v1::MetadataV1; use loaders::v2::MetadataV2; @@ -25,7 +25,7 @@ mod handle_impl; mod loaders; mod message; -const META_FILE_NAME: &'static str = "metadata.json"; +const META_FILE_NAME: &str = "metadata.json"; pub type DumpResult = std::result::Result; @@ -138,7 +138,9 @@ pub fn load_dump( let tmp_dst = tempfile::tempdir_in(dst_dir)?; match meta { - Metadata::V1(meta) => meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size as usize)?, + Metadata::V1(meta) => { + meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size as usize)? + } Metadata::V2(meta) => meta.load_dump( &tmp_src_path, tmp_dst.path(), diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index 2f136c011..31e2a58d4 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -6,14 +6,15 @@ use async_stream::stream; use futures::stream::StreamExt; use heed::CompactionOption; use log::debug; -use tokio::{fs, sync::mpsc}; use tokio::task::spawn_blocking; +use tokio::{fs, sync::mpsc}; use uuid::Uuid; -use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings, update_handler::UpdateHandler}; +use crate::index::{ + update_handler::UpdateHandler, Checked, Document, SearchQuery, SearchResult, Settings, +}; use crate::index_controller::{ - get_arc_ownership_blocking, Failed, IndexStats, Processed, - Processing, + get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, }; use crate::option::IndexerOpts; diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index 26aa189d0..6bf83c647 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -3,7 +3,10 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; -use crate::{index::Checked, index_controller::{IndexSettings, IndexStats, Processing}}; +use crate::{ + index::Checked, + index_controller::{IndexSettings, IndexStats, Processing}, +}; use crate::{ index::{Document, SearchQuery, SearchResult, Settings}, index_controller::{Failed, Processed}, diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 714a30ecc..377b2c333 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use tokio::sync::oneshot; use uuid::Uuid; -use crate::index::{Document, SearchQuery, SearchResult, Settings, Checked}; +use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::index_controller::{Failed, IndexStats, Processed, Processing}; use super::{IndexMeta, IndexResult, IndexSettings}; diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index dbea5151d..1ddc0199e 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -15,7 +15,7 @@ use message::IndexMsg; use store::{IndexStore, MapIndexStore}; use crate::index::{Checked, Document, Index, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{Failed, Processed, Processing, IndexStats}; +use crate::index_controller::{Failed, IndexStats, Processed, Processing}; use super::IndexSettings; @@ -44,7 +44,11 @@ impl IndexMeta { let created_at = index.created_at(&txn)?; let updated_at = index.updated_at(&txn)?; let primary_key = index.primary_key(&txn)?.map(String::from); - Ok(Self { created_at, updated_at, primary_key }) + Ok(Self { + created_at, + updated_at, + primary_key, + }) } } @@ -57,7 +61,7 @@ pub enum IndexError { #[error("Existing primary key")] ExistingPrimaryKey, #[error("Internal Index Error: {0}")] - Internal(String) + Internal(String), } macro_rules! internal_error { @@ -72,7 +76,12 @@ macro_rules! internal_error { } } -internal_error!(anyhow::Error, heed::Error, tokio::task::JoinError, std::io::Error); +internal_error!( + anyhow::Error, + heed::Error, + tokio::task::JoinError, + std::io::Error +); #[async_trait::async_trait] #[cfg_attr(test, automock)] @@ -190,8 +199,8 @@ mod test { self.as_ref().snapshot(uuid, path).await } - async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()> { - self.as_ref().dump(uid, uuid, path).await + async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> { + self.as_ref().dump(uuid, path).await } async fn get_index_stats(&self, uuid: Uuid) -> IndexResult { diff --git a/meilisearch-http/src/index_controller/snapshot.rs b/meilisearch-http/src/index_controller/snapshot.rs index 2a456eb26..daef7d582 100644 --- a/meilisearch-http/src/index_controller/snapshot.rs +++ b/meilisearch-http/src/index_controller/snapshot.rs @@ -144,7 +144,7 @@ mod test { use crate::index_controller::update_actor::{ MockUpdateActorHandle, UpdateActorHandleImpl, UpdateError, }; - use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidError}; + use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidResolverError}; #[actix_rt::test] async fn test_normal() { @@ -193,7 +193,7 @@ mod test { .expect_snapshot() .times(1) // abitrary error - .returning(|_| Box::pin(err(UuidError::NameAlreadyExist))); + .returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); let update_handle = MockUpdateActorHandle::new(); @@ -248,7 +248,7 @@ mod test { // we expect the funtion to be called between 2 and 3 time in the given interval. .times(2..4) // abitrary error, to short-circuit the function - .returning(move |_| Box::pin(err(UuidError::NameAlreadyExist))); + .returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); let update_handle = MockUpdateActorHandle::new(); diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index fad8974f3..6dfb300e2 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -11,7 +11,10 @@ use uuid::Uuid; use super::UpdateStore; use super::{codec::UpdateKeyCodec, State}; -use crate::index_controller::{Enqueued, UpdateStatus, index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path}; +use crate::index_controller::{ + index_actor::IndexActorHandle, update_actor::store::update_uuid_to_file_path, Enqueued, + UpdateStatus, +}; #[derive(Serialize, Deserialize)] struct UpdateEntry { @@ -89,7 +92,7 @@ impl UpdateStore { }; serde_json::to_writer(&mut file, &update_json)?; - file.write(b"\n")?; + file.write_all(b"\n")?; } } @@ -111,12 +114,12 @@ impl UpdateStore { for update in updates { let ((uuid, _), data) = update?; if uuids.contains(&uuid) { - let update = data.decode()?.into(); + let update = data.decode()?; let update_json = UpdateEntry { uuid, update }; serde_json::to_writer(&mut file, &update_json)?; - file.write(b"\n")?; + file.write_all(b"\n")?; } } @@ -131,7 +134,6 @@ impl UpdateStore { let dst_update_path = dst.as_ref().join("updates/"); create_dir_all(&dst_update_path)?; - let mut options = EnvOpenOptions::new(); options.map_size(db_size as usize); let (store, _) = UpdateStore::new(options, &dst_update_path)?; @@ -152,7 +154,11 @@ impl UpdateStore { store.register_raw_updates(&mut wtxn, &update, uuid)?; // Copy ascociated update path if it exists - if let UpdateStatus::Enqueued(Enqueued { content: Some(uuid), .. }) = update { + if let UpdateStatus::Enqueued(Enqueued { + content: Some(uuid), + .. + }) = update + { let src = update_uuid_to_file_path(&src_update_path, uuid); let dst = update_uuid_to_file_path(&dst_update_path, uuid); std::fs::copy(src, dst)?; diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 29ccd4f34..006549fb6 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -1,10 +1,13 @@ -pub mod dump; mod codec; +pub mod dump; -use std::{collections::{BTreeMap, HashSet}, path::PathBuf}; use std::fs::{copy, create_dir_all, remove_file, File}; use std::path::Path; use std::sync::Arc; +use std::{ + collections::{BTreeMap, HashSet}, + path::PathBuf, +}; use arc_swap::ArcSwap; use futures::StreamExt; @@ -20,13 +23,13 @@ use uuid::Uuid; use codec::*; use super::UpdateMeta; -use crate::{helpers::EnvSizer, index_controller::index_actor::IndexResult}; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; +use crate::{helpers::EnvSizer, index_controller::index_actor::IndexResult}; #[allow(clippy::upper_case_acronyms)] type BEU64 = U64; -const UPDATE_DIR: &'static str = "update_files"; +const UPDATE_DIR: &str = "update_files"; pub struct UpdateStoreInfo { /// Size of the update store in bytes. @@ -441,11 +444,12 @@ impl UpdateStore { txn.commit()?; - uuids_to_remove.iter() + uuids_to_remove + .iter() .map(|uuid| update_uuid_to_file_path(&self.path, *uuid)) .for_each(|path| { - let _ = remove_file(path); - }); + let _ = remove_file(path); + }); // We don't care about the currently processing update, since it will be removed by itself // once its done processing, and we can't abort a running update. @@ -482,7 +486,11 @@ impl UpdateStore { for entry in pendings { let ((_, uuid, _), pending) = entry?; if uuids.contains(&uuid) { - if let Enqueued { content: Some(uuid), .. } = pending.decode()? { + if let Enqueued { + content: Some(uuid), + .. + } = pending.decode()? + { let path = update_uuid_to_file_path(&self.path, uuid); copy(path, &update_files_path)?; } @@ -507,13 +515,16 @@ impl UpdateStore { Ok(()) } - pub fn get_info(&self) -> anyhow::Result { let mut size = self.env.size(); let txn = self.env.read_txn()?; for entry in self.pending_queue.iter(&txn)? { let (_, pending) = entry?; - if let Enqueued { content: Some(uuid), .. } = pending { + if let Enqueued { + content: Some(uuid), + .. + } = pending + { let path = update_uuid_to_file_path(&self.path, uuid); size += File::open(path)?.metadata()?.len(); } @@ -528,7 +539,9 @@ impl UpdateStore { } fn update_uuid_to_file_path(root: impl AsRef, uuid: Uuid) -> PathBuf { - root.as_ref().join(UPDATE_DIR).join(format!("update_{}", uuid)) + root.as_ref() + .join(UPDATE_DIR) + .join(format!("update_{}", uuid)) } #[cfg(test)] @@ -577,7 +590,7 @@ mod test { let store_clone = update_store.clone(); tokio::task::spawn_blocking(move || { store_clone - .register_update(meta, Some("here"), uuid) + .register_update(meta, None, uuid) .unwrap(); }) .await diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index 3592c3551..0211cef25 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -4,7 +4,7 @@ use log::{info, warn}; use tokio::sync::mpsc; use uuid::Uuid; -use super::{Result, UuidResolverError, UuidResolveMsg, UuidStore}; +use super::{Result, UuidResolveMsg, UuidResolverError, UuidStore}; pub struct UuidResolverActor { inbox: mpsc::Receiver, diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index 166347455..2092c67fd 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -37,5 +37,5 @@ pub enum UuidResolveMsg { DumpRequest { path: PathBuf, ret: oneshot::Sender>>, - } + }, } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index e666a536e..6289cefcd 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -164,7 +164,7 @@ impl HeedUuidStore { let entry = DumpEntry { uuid, uid }; serde_json::to_writer(&mut dump_file, &entry)?; - dump_file.write(b"\n").unwrap(); + dump_file.write_all(b"\n").unwrap(); uuids.insert(uuid); } @@ -192,7 +192,7 @@ impl HeedUuidStore { println!("importing {} {}", uid, uuid); db.db.put(&mut txn, &uid, uuid.as_bytes())?; } - Err(e) => Err(e)?, + Err(e) => return Err(e.into()), } line.clear(); diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index e19037482..26b6a784c 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -62,11 +62,11 @@ macro_rules! create_app { app.wrap( Cors::default() - .send_wildcard() - .allowed_headers(vec!["content-type", "x-meili-api-key"]) - .allow_any_origin() - .allow_any_method() - .max_age(86_400) // 24h + .send_wildcard() + .allowed_headers(vec!["content-type", "x-meili-api-key"]) + .allow_any_origin() + .allow_any_method() + .max_age(86_400), // 24h ) .wrap(middleware::Logger::default()) .wrap(middleware::Compress::default()) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 47c081e6f..370eef509 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,20 +1,17 @@ -use actix_web::{post, get, web}; use actix_web::HttpResponse; -use serde::{Serialize, Deserialize}; +use actix_web::{get, post, web}; +use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::helpers::Authentication; use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { - cfg.service(create_dump) - .service(get_dump_status); + cfg.service(create_dump).service(get_dump_status); } #[post("/dumps", wrap = "Authentication::Private")] -async fn create_dump( - data: web::Data, -) -> Result { +async fn create_dump(data: web::Data) -> Result { let res = data.create_dump().await?; Ok(HttpResponse::Accepted().json(res)) diff --git a/meilisearch-http/src/routes/index.rs b/meilisearch-http/src/routes/index.rs index 62717c90d..4dfe90abf 100644 --- a/meilisearch-http/src/routes/index.rs +++ b/meilisearch-http/src/routes/index.rs @@ -1,7 +1,7 @@ use actix_web::{delete, get, post, put}; use actix_web::{web, HttpResponse}; use chrono::{DateTime, Utc}; -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; use crate::error::ResponseError; use crate::helpers::Authentication; diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 999c4f881..a550064ba 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -2,6 +2,7 @@ use actix_web::{get, HttpResponse}; use serde::{Deserialize, Serialize}; pub mod document; +pub mod dump; pub mod health; pub mod index; pub mod key; @@ -9,7 +10,6 @@ pub mod search; pub mod settings; pub mod stats; pub mod synonym; -pub mod dump; #[derive(Deserialize)] pub struct IndexParam { diff --git a/meilisearch-http/src/routes/settings/mod.rs b/meilisearch-http/src/routes/settings/mod.rs index 03f1ee95c..8ede56046 100644 --- a/meilisearch-http/src/routes/settings/mod.rs +++ b/meilisearch-http/src/routes/settings/mod.rs @@ -1,9 +1,9 @@ use actix_web::{delete, get, post, web, HttpResponse}; -use crate::{error::ResponseError, index::Unchecked}; use crate::helpers::Authentication; use crate::index::Settings; use crate::Data; +use crate::{error::ResponseError, index::Unchecked}; #[macro_export] macro_rules! make_setting_route { diff --git a/meilisearch-http/tests/common/index.rs b/meilisearch-http/tests/common/index.rs index adb7fef3e..7d98d0733 100644 --- a/meilisearch-http/tests/common/index.rs +++ b/meilisearch-http/tests/common/index.rs @@ -47,7 +47,7 @@ impl Index<'_> { update_id as u64 } - pub async fn create(& self, primary_key: Option<&str>) -> (Value, StatusCode) { + pub async fn create(&self, primary_key: Option<&str>) -> (Value, StatusCode) { let body = json!({ "uid": self.uid, "primaryKey": primary_key, diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 100722ec4..3c50110c3 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -44,7 +44,7 @@ impl Server { } /// Returns a view to an index. There is no guarantee that the index exists. - pub fn index(& self, uid: impl AsRef) -> Index<'_> { + pub fn index(&self, uid: impl AsRef) -> Index<'_> { Index { uid: encode(uid.as_ref()), service: &self.service, From 6609f9e3bec9ea833d0a2e70d2ef5f51565bd994 Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Mon, 31 May 2021 16:40:59 +0200 Subject: [PATCH 51/54] review edits --- meilisearch-http/src/index/dump.rs | 19 +++--- meilisearch-http/src/index/mod.rs | 13 ++-- meilisearch-http/src/index/updates.rs | 6 +- .../src/index_controller/dump_actor/actor.rs | 14 ++-- .../dump_actor/handle_impl.rs | 10 +-- .../index_controller/dump_actor/loaders/v1.rs | 64 +++++++++++-------- .../index_controller/dump_actor/loaders/v2.rs | 25 ++++---- .../src/index_controller/dump_actor/mod.rs | 14 ++-- meilisearch-http/src/index_controller/mod.rs | 8 +-- .../index_controller/update_actor/actor.rs | 2 +- .../update_actor/store/dump.rs | 2 +- .../update_actor/store/mod.rs | 4 +- .../index_controller/uuid_resolver/store.rs | 24 ++++--- 13 files changed, 100 insertions(+), 105 deletions(-) diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index dd29aa50a..247c02085 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -1,12 +1,9 @@ -use std::{ - fs::{create_dir_all, File}, - io::{BufRead, BufReader}, - path::Path, - sync::Arc, -}; +use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader, Write}; +use std::path::Path; +use std::sync::Arc; -use anyhow::bail; -use anyhow::Context; +use anyhow::{bail, Context}; use heed::RoTxn; use indexmap::IndexMap; use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; @@ -55,7 +52,7 @@ impl Index { } serde_json::to_writer(&mut document_file, &json_map)?; - std::io::Write::write(&mut document_file, b"\n")?; + document_file.write_all(b"\n")?; json_map.clear(); } @@ -82,7 +79,7 @@ impl Index { pub fn load_dump( src: impl AsRef, dst: impl AsRef, - size: u64, + size: usize, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { let dir_name = src @@ -99,7 +96,7 @@ impl Index { primary_key, } = serde_json::from_reader(&mut meta_file)?; let settings = settings.check(); - let index = Self::open(&dst_dir_path, size as usize)?; + let index = Self::open(&dst_dir_path, size)?; let mut txn = index.write_txn()?; let handler = UpdateHandler::new(&indexing_options)?; diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 7d9603e9e..790ac58f0 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,10 +1,9 @@ +use std::collections::{BTreeSet, HashSet}; +use std::fs::create_dir_all; +use std::marker::PhantomData; use std::ops::Deref; +use std::path::Path; use std::sync::Arc; -use std::{ - collections::{BTreeSet, HashSet}, - marker::PhantomData, - path::Path, -}; use anyhow::{bail, Context}; use heed::{EnvOpenOptions, RoTxn}; @@ -44,7 +43,7 @@ where impl Index { pub fn open(path: impl AsRef, size: usize) -> anyhow::Result { - std::fs::create_dir_all(&path)?; + create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, &path)?; @@ -113,8 +112,6 @@ impl Index { let mut documents = Vec::new(); - println!("fields to display: {:?}", fields_to_display); - for entry in iter { let (_id, obkv) = entry?; let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 5ef6d854e..046823fb7 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -197,10 +197,8 @@ impl Index { builder.update_format(format); builder.index_documents_method(method); - //let indexing_callback = - //|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); - - let indexing_callback = |_, _| (); + let indexing_callback = + |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); let gzipped = false; let addition = match content { diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 8ea2e1f6d..c78079de6 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,8 +1,6 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; use async_stream::stream; use chrono::Utc; @@ -24,8 +22,8 @@ pub struct DumpActor { dump_path: PathBuf, lock: Arc>, dump_infos: Arc>>, - update_db_size: u64, - index_db_size: u64, + update_db_size: usize, + index_db_size: usize, } /// Generate uid from creation date @@ -43,8 +41,8 @@ where uuid_resolver: UuidResolver, update: Update, dump_path: impl AsRef, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, ) -> Self { let dump_infos = Arc::new(RwLock::new(HashMap::new())); let lock = Arc::new(Mutex::new(())); diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index 3d8665e62..ab91aeae6 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -1,8 +1,10 @@ -use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; -use actix_web::web::Bytes; use std::path::Path; + +use actix_web::web::Bytes; use tokio::sync::{mpsc, oneshot}; +use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; + #[derive(Clone)] pub struct DumpActorHandleImpl { sender: mpsc::Sender, @@ -30,8 +32,8 @@ impl DumpActorHandleImpl { path: impl AsRef, uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, update: crate::index_controller::update_actor::UpdateActorHandleImpl, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(10); let actor = DumpActor::new( diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 70c89664b..89893998e 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,22 +1,20 @@ -use std::{ - collections::{BTreeMap, BTreeSet}, - fs::File, - io::BufRead, - marker::PhantomData, - path::Path, - sync::Arc, -}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::{create_dir_all, File}; +use std::io::BufRead; +use std::marker::PhantomData; +use std::path::Path; +use std::sync::Arc; use heed::EnvOpenOptions; use log::{error, info, warn}; -use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use milli::update::{IndexDocumentsMethod, UpdateFormat}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use crate::{index::deserialize_some, index_controller::uuid_resolver::HeedUuidStore}; +use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; use crate::{ - index::{Index, Unchecked}, - index_controller::{self, IndexMetadata}, + index::{deserialize_some, update_handler::UpdateHandler, Index, Unchecked}, + option::IndexerOpts, }; #[derive(Serialize, Deserialize, Debug)] @@ -32,28 +30,33 @@ impl MetadataV1 { src: impl AsRef, dst: impl AsRef, size: usize, + indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( "Loading dump, dump database version: {}, dump version: V1", self.db_version ); - dbg!("here"); - let uuid_store = HeedUuidStore::new(&dst)?; - dbg!("here"); for index in self.indexes { let uuid = Uuid::new_v4(); uuid_store.insert(index.uid.clone(), uuid)?; let src = src.as_ref().join(index.uid); - load_index(&src, &dst, uuid, index.meta.primary_key.as_deref(), size)?; + load_index( + &src, + &dst, + uuid, + index.meta.primary_key.as_deref(), + size, + indexer_options, + )?; } Ok(()) } } -//This is the settings used in the last version of meilisearch exporting dump in V1 +// These are the settings used in legacy meilisearch (, size: usize, + indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); - std::fs::create_dir_all(&index_path)?; + create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, index_path)?; @@ -99,31 +103,37 @@ fn load_index( // extract `settings.json` file and import content let settings = import_settings(&src)?; let settings: index_controller::Settings = settings.into(); - let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings.check(), update_builder)?; - let update_builder = UpdateBuilder::new(0); + let mut txn = index.write_txn()?; + + let handler = UpdateHandler::new(&indexer_options)?; + + index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; + let file = File::open(&src.as_ref().join("documents.jsonl"))?; let mut reader = std::io::BufReader::new(file); reader.fill_buf()?; if !reader.buffer().is_empty() { - index.update_documents( + index.update_documents_txn( + &mut txn, UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), - update_builder, + handler.update_builder(0), primary_key, )?; } - // the last step: we extract the original milli::Index and close it + txn.commit()?; + + // Finaly, we extract the original milli::Index and close it Arc::try_unwrap(index.0) - .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .map_err(|_e| "Couln't close index properly") .unwrap() .prepare_for_closing() .wait(); - // Ignore updates in v1. + // Updates are ignored in dumps V1. Ok(()) } @@ -172,7 +182,7 @@ impl From for index_controller::Settings { /// Extract Settings from `settings.json` file present at provided `dir_path` fn import_settings(dir_path: impl AsRef) -> anyhow::Result { - let path = dbg!(dir_path.as_ref().join("settings.json")); + let path = dir_path.as_ref().join("settings.json"); let file = File::open(path)?; let reader = std::io::BufReader::new(file); let metadata = serde_json::from_reader(reader)?; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index 96001902d..eddd8a3b7 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -4,23 +4,21 @@ use chrono::{DateTime, Utc}; use log::info; use serde::{Deserialize, Serialize}; -use crate::{ - index::Index, - index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, - option::IndexerOpts, -}; +use crate::index::Index; +use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; +use crate::option::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct MetadataV2 { db_version: String, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, dump_date: DateTime, } impl MetadataV2 { - pub fn new(index_db_size: u64, update_db_size: u64) -> Self { + pub fn new(index_db_size: usize, update_db_size: usize) -> Self { Self { db_version: env!("CARGO_PKG_VERSION").to_string(), index_db_size, @@ -33,9 +31,8 @@ impl MetadataV2 { self, src: impl AsRef, dst: impl AsRef, - // TODO: use these variable to test if loading the index is possible. - _index_db_size: u64, - _update_db_size: u64, + index_db_size: usize, + update_db_size: usize, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( @@ -47,14 +44,14 @@ impl MetadataV2 { HeedUuidStore::load_dump(src.as_ref(), &dst)?; info!("Loading updates."); - UpdateStore::load_dump(&src, &dst, self.update_db_size)?; + UpdateStore::load_dump(&src, &dst, update_db_size)?; - info!("Loading indexes"); + info!("Loading indexes."); let indexes_path = src.as_ref().join("indexes"); let indexes = indexes_path.read_dir()?; for index in indexes { let index = index?; - Index::load_dump(&index.path(), &dst, self.index_db_size, indexing_options)?; + Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?; } Ok(()) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index dde04bc12..0bddaf7a3 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -8,6 +8,7 @@ use log::{error, info, warn}; use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; +use tokio::fs::create_dir_all; use loaders::v1::MetadataV1; use loaders::v2::MetadataV2; @@ -15,7 +16,6 @@ use loaders::v2::MetadataV2; pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; -use tokio::fs::create_dir_all; use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use crate::{helpers::compression, option::IndexerOpts}; @@ -61,7 +61,7 @@ pub enum Metadata { } impl Metadata { - pub fn new_v2(index_db_size: u64, update_db_size: u64) -> Self { + pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self { let meta = MetadataV2::new(index_db_size, update_db_size); Self::V2(meta) } @@ -117,8 +117,8 @@ impl DumpInfo { pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { let tmp_src = tempfile::tempdir_in(".")?; @@ -139,7 +139,7 @@ pub fn load_dump( match meta { Metadata::V1(meta) => { - meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size as usize)? + meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)? } Metadata::V2(meta) => meta.load_dump( &tmp_src_path, @@ -166,8 +166,8 @@ struct DumpTask { uuid_resolver: U, update_handle: P, uid: String, - update_db_size: u64, - index_db_size: u64, + update_db_size: usize, + index_db_size: usize, } impl DumpTask diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 18ba6dee3..0615bb731 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -97,8 +97,8 @@ impl IndexController { load_dump( &options.db_path, src_path, - options.max_mdb_size.get_bytes(), - options.max_udb_size.get_bytes(), + options.max_mdb_size.get_bytes() as usize, + options.max_udb_size.get_bytes() as usize, &options.indexer_options, )?; } @@ -116,8 +116,8 @@ impl IndexController { &options.dumps_dir, uuid_resolver.clone(), update_handle.clone(), - options.max_mdb_size.get_bytes(), - options.max_udb_size.get_bytes(), + options.max_mdb_size.get_bytes() as usize, + options.max_udb_size.get_bytes() as usize, )?; if options.schedule_snapshot { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 40bba4e2b..7779f2556 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -197,7 +197,7 @@ where async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); - println!("starting dump"); + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { update_store.dump(&uuids, path.to_path_buf(), index_handle)?; Ok(()) diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 6dfb300e2..8f947e459 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -129,7 +129,7 @@ impl UpdateStore { pub fn load_dump( src: impl AsRef, dst: impl AsRef, - db_size: u64, + db_size: usize, ) -> anyhow::Result<()> { let dst_update_path = dst.as_ref().join("updates/"); create_dir_all(&dst_update_path)?; diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 006549fb6..28204f4c0 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -589,9 +589,7 @@ mod test { let uuid = Uuid::new_v4(); let store_clone = update_store.clone(); tokio::task::spawn_blocking(move || { - store_clone - .register_update(meta, None, uuid) - .unwrap(); + store_clone.register_update(meta, None, uuid).unwrap(); }) .await .unwrap(); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 6289cefcd..1d6ada269 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,14 +1,10 @@ +use std::collections::HashSet; use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; -use std::{ - collections::HashSet, - io::{BufRead, BufReader, Write}, -}; -use heed::{ - types::{ByteSlice, Str}, - CompactionOption, Database, Env, EnvOpenOptions, -}; +use heed::types::{ByteSlice, Str}; +use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -21,6 +17,8 @@ struct DumpEntry { uid: String, } +const UUIDS_DB_PATH: &str = "index_uuids"; + #[async_trait::async_trait] pub trait UuidStore: Sized { // Create a new entry for `name`. Return an error if `err` and the entry already exists, return @@ -43,7 +41,7 @@ pub struct HeedUuidStore { impl HeedUuidStore { pub fn new(path: impl AsRef) -> anyhow::Result { - let path = path.as_ref().join("index_uuids"); + let path = path.as_ref().join(UUIDS_DB_PATH); create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(UUID_STORE_SIZE); // 1GB @@ -137,7 +135,7 @@ impl HeedUuidStore { // only perform snapshot if there are indexes if !entries.is_empty() { - path.push("index_uuids"); + path.push(UUIDS_DB_PATH); create_dir_all(&path).unwrap(); path.push("data.mdb"); env.copy_to_path(path, CompactionOption::Enabled)?; @@ -150,7 +148,7 @@ impl HeedUuidStore { } pub fn dump(&self, path: PathBuf) -> Result> { - let dump_path = path.join("index_uuids"); + let dump_path = path.join(UUIDS_DB_PATH); create_dir_all(&dump_path)?; let dump_file_path = dump_path.join("data.jsonl"); let mut dump_file = File::create(&dump_file_path)?; @@ -173,10 +171,10 @@ impl HeedUuidStore { } pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); + let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH); std::fs::create_dir_all(&uuid_resolver_path)?; - let src_indexes = src.as_ref().join("index_uuids/data.jsonl"); + let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); let indexes = File::open(&src_indexes)?; let mut indexes = BufReader::new(indexes); let mut line = String::new(); From df6ba0e8246c23e18ad6aede46035c71de0b742f Mon Sep 17 00:00:00 2001 From: marin Date: Tue, 1 Jun 2021 11:18:37 +0200 Subject: [PATCH 52/54] Apply suggestions from code review Co-authored-by: Irevoire --- meilisearch-http/src/index/dump.rs | 2 +- .../src/index_controller/dump_actor/loaders/v1.rs | 10 +--------- .../src/index_controller/dump_actor/mod.rs | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index 247c02085..13e6cbc02 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -24,7 +24,7 @@ const DATA_FILE_NAME: &str = "documents.jsonl"; impl Index { pub fn dump(&self, path: impl AsRef) -> anyhow::Result<()> { - // acquire write txn make sure any ongoing write is finnished before we start. + // acquire write txn make sure any ongoing write is finished before we start. let txn = self.env.write_txn()?; self.dump_documents(&txn, &path)?; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 89893998e..decd67f87 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -76,14 +76,6 @@ struct Settings { pub attributes_for_faceting: Option>>, } -impl std::ops::Deref for Settings { - type Target = Option>>; - - fn deref(&self) -> &Self::Target { - &self.stop_words - } -} - fn load_index( src: impl AsRef, dst: impl AsRef, @@ -128,7 +120,7 @@ fn load_index( // Finaly, we extract the original milli::Index and close it Arc::try_unwrap(index.0) - .map_err(|_e| "Couln't close index properly") + .map_err(|_e| "Couldn't close the index properly") .unwrap() .prepare_for_closing() .wait(); diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index 0bddaf7a3..66f081e87 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -200,7 +200,7 @@ where let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; compression::to_tar_gz(temp_dump_path, temp_dump_file.path())?; - let dump_path = self.path.join(format!("{}.dump", self.uid)); + let dump_path = self.path.join(self.uid).with_extension("dump"); temp_dump_file.persist(&dump_path)?; Ok(dump_path) From d0552e765e8fecda614c6ded7b3505d3cc62fbc9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 1 Jun 2021 20:15:51 +0200 Subject: [PATCH 53/54] forbid deserialization of Setting --- meilisearch-http/src/index/update_handler.rs | 2 +- meilisearch-http/src/index/updates.rs | 5 +++-- meilisearch-http/src/index_controller/mod.rs | 2 +- meilisearch-http/src/index_controller/updates.rs | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/index/update_handler.rs b/meilisearch-http/src/index/update_handler.rs index 6a303b4ce..8a127168e 100644 --- a/meilisearch-http/src/index/update_handler.rs +++ b/meilisearch-http/src/index/update_handler.rs @@ -82,7 +82,7 @@ impl UpdateHandler { ), ClearDocuments => index.clear_documents(update_builder), DeleteDocuments => index.delete_documents(content, update_builder), - Settings(settings) => index.update_settings(settings, update_builder), + Settings(settings) => index.update_settings(&settings.clone().check(), update_builder), }; match result { diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 046823fb7..b4869fa42 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -20,14 +20,15 @@ where s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard))) } -#[derive(Clone, Default, Debug)] +#[derive(Clone, Default, Debug, Serialize)] pub struct Checked; -#[derive(Clone, Default, Debug)] +#[derive(Clone, Default, Debug, Serialize, Deserialize)] pub struct Unchecked; #[derive(Debug, Clone, Default, Serialize, Deserialize)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] pub struct Settings { #[serde( default, diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 0615bb731..f562d2185 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -223,7 +223,7 @@ impl IndexController { create: bool, ) -> anyhow::Result { let perform_udpate = |uuid| async move { - let meta = UpdateMeta::Settings(settings); + let meta = UpdateMeta::Settings(settings.into_unchecked()); // Nothing so send, drop the sender right away, as not to block the update actor. let (_, receiver) = mpsc::channel(1); self.update_handle.update(meta, receiver, uuid).await diff --git a/meilisearch-http/src/index_controller/updates.rs b/meilisearch-http/src/index_controller/updates.rs index 0aacf9b6c..303289df3 100644 --- a/meilisearch-http/src/index_controller/updates.rs +++ b/meilisearch-http/src/index_controller/updates.rs @@ -3,7 +3,7 @@ use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use crate::index::{Checked, Settings}; +use crate::index::{Unchecked, Settings}; pub type UpdateError = String; @@ -24,7 +24,7 @@ pub enum UpdateMeta { }, ClearDocuments, DeleteDocuments, - Settings(Settings), + Settings(Settings), } #[derive(Debug, Serialize, Deserialize, Clone)] From 2d7785ae0c5f8846d89b52d0ccb9e74b30277a60 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 1 Jun 2021 20:27:12 +0200 Subject: [PATCH 54/54] remove the dump_batch_size option from the CLI --- meilisearch-http/src/option.rs | 5 ----- meilisearch-http/tests/common/server.rs | 1 - 2 files changed, 6 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 87238c4d7..eb81ab9fd 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -202,11 +202,6 @@ pub struct Opt { #[structopt(long, conflicts_with = "import-snapshot")] pub import_dump: Option, - /// The batch size used in the importation process, the bigger it is the faster the dump is created. - /// This options is now deprecated and will be ignored - #[structopt(long, env = "MEILI_DUMP_BATCH_SIZE", default_value = "1024")] - pub dump_batch_size: usize, - #[structopt(flatten)] pub indexer_options: IndexerOpts, } diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 3c50110c3..0fb801d7f 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -68,7 +68,6 @@ pub fn default_settings(dir: impl AsRef) -> Opt { Opt { db_path: dir.as_ref().join("db"), dumps_dir: dir.as_ref().join("dump"), - dump_batch_size: 16, http_addr: "127.0.0.1:7700".to_owned(), master_key: None, env: "development".to_owned(),