restore snapshots

This commit is contained in:
mpostma 2021-09-27 16:48:03 +02:00
parent b9d189bf12
commit 90018755c5
15 changed files with 397 additions and 317 deletions

View File

@ -1,4 +1,4 @@
pub mod compression; //pub mod compression;
mod env; mod env;
pub use env::EnvSizer; pub use env::EnvSizer;

View File

@ -1,4 +1,4 @@
use std::env; use std::{env, path::Path, time::Duration};
use actix_web::HttpServer; use actix_web::HttpServer;
use meilisearch_http::{create_app, Opt}; use meilisearch_http::{create_app, Opt};
@ -12,6 +12,7 @@ use meilisearch_http::analytics;
#[global_allocator] #[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
/// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<()> { fn setup(opt: &Opt) -> anyhow::Result<()> {
let mut log_builder = env_logger::Builder::new(); let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters(&opt.log_level); log_builder.parse_filters(&opt.log_level);
@ -22,12 +23,19 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
log_builder.init(); log_builder.init();
Ok(())
}
/// Cleans and setup the temporary file folder in the database directory. This must be done after
/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading.
fn setup_temp_dir(db_path: impl AsRef<Path>) -> anyhow::Result<()> {
// Set the tempfile directory in the current db path, to avoid cross device references. Also // Set the tempfile directory in the current db path, to avoid cross device references. Also
// remove the previous outstanding files found there // remove the previous outstanding files found there
// //
// TODO: if two processes open the same db, one might delete the other tmpdir. Need to make // TODO: if two processes open the same db, one might delete the other tmpdir. Need to make
// sure that no one is using it before deleting it. // sure that no one is using it before deleting it.
let temp_path = opt.db_path.join("tmp"); let temp_path = db_path.as_ref().join("tmp");
// Ignore error if tempdir doesn't exist // Ignore error if tempdir doesn't exist
let _ = std::fs::remove_dir_all(&temp_path); let _ = std::fs::remove_dir_all(&temp_path);
std::fs::create_dir_all(&temp_path)?; std::fs::create_dir_all(&temp_path)?;
@ -48,15 +56,21 @@ fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot) .set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone()) .set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec))
.set_snapshot_dir(opt.snapshot_dir.clone()); .set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot { if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone()); meilisearch.set_import_snapshot(path.clone());
} }
if let Some(ref path) = opt.import_dump { if let Some(ref path) = opt.import_dump {
meilisearch.set_dump_src(path.clone()); meilisearch.set_dump_src(path.clone());
} }
if opt.schedule_snapshot {
meilisearch.set_schedule_snapshot();
}
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
} }
@ -78,6 +92,8 @@ async fn main() -> anyhow::Result<()> {
let meilisearch = setup_meilisearch(&opt)?; let meilisearch = setup_meilisearch(&opt)?;
setup_temp_dir(&opt.db_path)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics { if !opt.no_analytics {
let analytics_data = meilisearch.clone(); let analytics_data = meilisearch.clone();

View File

@ -16,11 +16,11 @@ pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Resul
Ok(()) Ok(())
} }
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> { //pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let f = File::open(&src)?; //let f = File::open(&src)?;
let gz = GzDecoder::new(f); //let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz); //let mut ar = Archive::new(gz);
create_dir_all(&dest)?; //create_dir_all(&dest)?;
ar.unpack(&dest)?; //ar.unpack(&dest)?;
Ok(()) //Ok(())
} //}

View File

@ -71,11 +71,15 @@ impl IndexMeta {
} }
} }
#[derive(Clone)] #[derive(Clone, derivative::Derivative)]
#[derivative(Debug)]
pub struct Index { pub struct Index {
pub uuid: Uuid, pub uuid: Uuid,
#[derivative(Debug="ignore")]
pub inner: Arc<milli::Index>, pub inner: Arc<milli::Index>,
#[derivative(Debug="ignore")]
update_file_store: Arc<UpdateFileStore>, update_file_store: Arc<UpdateFileStore>,
#[derivative(Debug="ignore")]
update_handler: Arc<UpdateHandler>, update_handler: Arc<UpdateHandler>,
} }
@ -258,4 +262,13 @@ impl Index {
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids) Ok(displayed_fields_ids)
} }
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid));
create_dir_all(&dst)?;
dst.push("data.mdb");
let _txn = self.write_txn()?;
self.inner.env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
Ok(())
}
} }

View File

@ -57,7 +57,7 @@ impl IndexStore for MapIndexStore {
if let Some(index) = lock.get(&uuid) { if let Some(index) = lock.get(&uuid) {
return Ok(index.clone()); return Ok(index.clone());
} }
let path = self.path.join(format!("index-{}", uuid)); let path = self.path.join(format!("{}", uuid));
if path.exists() { if path.exists() {
return Err(IndexResolverError::IndexAlreadyExists); return Err(IndexResolverError::IndexAlreadyExists);
} }
@ -92,7 +92,7 @@ impl IndexStore for MapIndexStore {
None => { None => {
// drop the guard here so we can perform the write after without deadlocking; // drop the guard here so we can perform the write after without deadlocking;
drop(guard); drop(guard);
let path = self.path.join(format!("index-{}", uuid)); let path = self.path.join(format!("{}", uuid));
if !path.exists() { if !path.exists() {
return Ok(None); return Ok(None);
} }
@ -108,7 +108,7 @@ impl IndexStore for MapIndexStore {
} }
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> { async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid)); let db_path = self.path.join(format!("{}", uuid));
fs::remove_dir_all(db_path).await?; fs::remove_dir_all(db_path).await?;
let index = self.index_store.write().await.remove(&uuid); let index = self.index_store.write().await.remove(&uuid);
Ok(index) Ok(index)

View File

@ -45,10 +45,18 @@ where U: UuidStore,
pub async fn get_size(&self) -> Result<u64> { pub async fn get_size(&self) -> Result<u64> {
todo!() todo!()
//Ok(self.index_store.get_size()? + self.index_uuid_store.get_size().await?)
} }
pub async fn perform_snapshot(&self, _path: impl AsRef<Path>) -> Result<()> { pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
todo!() let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?;
let mut indexes = Vec::new();
for uuid in uuids {
indexes.push(self.get_index_by_uuid(uuid).await?);
}
Ok(indexes)
} }
pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<(Uuid, Index)> { pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<(Uuid, Index)> {

View File

@ -46,8 +46,9 @@ impl HeedUuidStore {
create_dir_all(&path)?; create_dir_all(&path)?;
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(UUID_STORE_SIZE); // 1GB options.map_size(UUID_STORE_SIZE); // 1GB
options.max_dbs(1);
let env = options.open(path)?; let env = options.open(path)?;
let db = env.create_database(None)?; let db = env.create_database(Some("uuids"))?;
Ok(Self { env, db }) Ok(Self { env, db })
} }

View File

@ -20,6 +20,7 @@ use snapshot::load_snapshot;
use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked}; use crate::index::{Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked};
use crate::index_controller::index_resolver::create_index_resolver; use crate::index_controller::index_resolver::create_index_resolver;
use crate::index_controller::snapshot::SnapshotService;
use crate::options::IndexerOpts; use crate::options::IndexerOpts;
use error::Result; use error::Result;
use crate::index::error::Result as IndexResult; use crate::index::error::Result as IndexResult;
@ -75,7 +76,7 @@ pub struct IndexSettings {
#[derive(Clone)] #[derive(Clone)]
pub struct IndexController { pub struct IndexController {
index_resolver: Arc<HardStateIndexResolver>, index_resolver: Arc<HardStateIndexResolver>,
update_handle: updates::UpdateSender, update_sender: updates::UpdateSender,
dump_handle: dump_actor::DumpActorHandleImpl, dump_handle: dump_actor::DumpActorHandleImpl,
} }
@ -113,8 +114,10 @@ pub struct IndexControllerBuilder {
max_update_store_size: Option<usize>, max_update_store_size: Option<usize>,
snapshot_dir: Option<PathBuf>, snapshot_dir: Option<PathBuf>,
import_snapshot: Option<PathBuf>, import_snapshot: Option<PathBuf>,
snapshot_interval: Option<Duration>,
ignore_snapshot_if_db_exists: bool, ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool, ignore_missing_snapshot: bool,
schedule_snapshot: bool,
dump_src: Option<PathBuf>, dump_src: Option<PathBuf>,
dump_dst: Option<PathBuf>, dump_dst: Option<PathBuf>,
} }
@ -155,36 +158,36 @@ impl IndexControllerBuilder {
let index_resolver = Arc::new(create_index_resolver(&db_path, index_size, &indexer_options)?); let index_resolver = Arc::new(create_index_resolver(&db_path, index_size, &indexer_options)?);
#[allow(unreachable_code)] #[allow(unreachable_code)]
let update_handle = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?; let update_sender = updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?;
let dump_path = self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; let dump_path = self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
let dump_handle = dump_actor::DumpActorHandleImpl::new( let dump_handle = dump_actor::DumpActorHandleImpl::new(
dump_path, dump_path,
index_resolver.clone(), index_resolver.clone(),
update_handle.clone(), update_sender.clone(),
index_size, index_size,
update_store_size, update_store_size,
)?; )?;
//if options.schedule_snapshot { if self.schedule_snapshot {
//let snapshot_service = SnapshotService::new( let snapshot_service = SnapshotService::new(
//uuid_resolver.clone(), index_resolver.clone(),
//update_handle.clone(), update_sender.clone(),
//Duration::from_secs(options.snapshot_interval_sec), self.snapshot_interval.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?,
//options.snapshot_dir.clone(), self.snapshot_dir.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?,
//options db_path
//.db_path .as_ref()
//.file_name() .file_name()
//.map(|n| n.to_owned().into_string().expect("invalid path")) .map(|n| n.to_owned().into_string().expect("invalid path"))
//.unwrap_or_else(|| String::from("data.ms")), .unwrap_or_else(|| String::from("data.ms")),
//); );
//tokio::task::spawn(snapshot_service.run()); tokio::task::spawn(snapshot_service.run());
//} }
Ok(IndexController { Ok(IndexController {
index_resolver, index_resolver,
update_handle, update_sender,
dump_handle, dump_handle,
}) })
} }
@ -238,6 +241,18 @@ impl IndexControllerBuilder {
self.import_snapshot.replace(import_snapshot); self.import_snapshot.replace(import_snapshot);
self self
} }
/// Set the index controller builder's snapshot interval sec.
pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self {
self.snapshot_interval = Some(snapshot_interval);
self
}
/// Set the index controller builder's schedule snapshot.
pub fn set_schedule_snapshot(&mut self) -> &mut Self {
self.schedule_snapshot = true;
self
}
} }
impl IndexController { impl IndexController {
@ -248,12 +263,12 @@ impl IndexController {
pub async fn register_update(&self, uid: String, update: Update) -> Result<UpdateStatus> { pub async fn register_update(&self, uid: String, update: Update) -> Result<UpdateStatus> {
match self.index_resolver.get_uuid(uid).await { match self.index_resolver.get_uuid(uid).await {
Ok(uuid) => { Ok(uuid) => {
let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?;
Ok(update_result) Ok(update_result)
} }
Err(IndexResolverError::UnexistingIndex(name)) => { Err(IndexResolverError::UnexistingIndex(name)) => {
let (uuid, _) = self.index_resolver.create_index(name, None).await?; let (uuid, _) = self.index_resolver.create_index(name, None).await?;
let update_result = UpdateMsg::update(&self.update_handle, uuid, update).await?; let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?;
// ignore if index creation fails now, since it may already have been created // ignore if index creation fails now, since it may already have been created
Ok(update_result) Ok(update_result)
@ -389,13 +404,13 @@ impl IndexController {
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> { pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
let uuid = self.index_resolver.get_uuid(uid).await?; let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::get_update(&self.update_handle, uuid, id).await?; let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?;
Ok(result) Ok(result)
} }
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> { pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
let uuid = self.index_resolver.get_uuid(uid).await?; let uuid = self.index_resolver.get_uuid(uid).await?;
let result = UpdateMsg::list_updates(&self.update_handle, uuid).await?; let result = UpdateMsg::list_updates(&self.update_sender, uuid).await?;
Ok(result) Ok(result)
} }
@ -490,7 +505,7 @@ impl IndexController {
} }
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> { pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
let update_infos = UpdateMsg::get_info(&self.update_handle).await?; let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let index = self.index_resolver.get_index(uid).await?; let index = self.index_resolver.get_index(uid).await?;
let uuid = index.uuid; let uuid = index.uuid;
let mut stats = spawn_blocking(move || index.stats()).await??; let mut stats = spawn_blocking(move || index.stats()).await??;
@ -500,7 +515,7 @@ impl IndexController {
} }
pub async fn get_all_stats(&self) -> Result<Stats> { pub async fn get_all_stats(&self) -> Result<Stats> {
let update_infos = UpdateMsg::get_info(&self.update_handle).await?; let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
let mut database_size = self.get_uuids_size().await? + update_infos.size; let mut database_size = self.get_uuids_size().await? + update_infos.size;
let mut last_update: Option<DateTime<_>> = None; let mut last_update: Option<DateTime<_>> = None;
let mut indexes = BTreeMap::new(); let mut indexes = BTreeMap::new();

View File

@ -1,88 +1,94 @@
use std::path::Path; use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use anyhow::bail; use anyhow::bail;
use log::{error, info, trace};
use tokio::task::spawn_blocking;
use tokio::time::sleep;
use tokio::fs;
//pub struct SnapshotService<U, R> { use crate::index_controller::updates::UpdateMsg;
//uuid_resolver_handle: R,
//update_handle: U,
//snapshot_period: Duration,
//snapshot_path: PathBuf,
//db_name: String,
//}
//impl<U, R> SnapshotService<U, R> use super::updates::UpdateSender;
//where use super::index_resolver::HardStateIndexResolver;
//U: UpdateActorHandle,
//R: UuidResolverHandle,
//{
//pub fn new(
//uuid_resolver_handle: R,
//update_handle: U,
//snapshot_period: Duration,
//snapshot_path: PathBuf,
//db_name: String,
//) -> Self {
//Self {
//uuid_resolver_handle,
//update_handle,
//snapshot_period,
//snapshot_path,
//db_name,
//}
//}
//pub async fn run(self) { pub struct SnapshotService {
//info!( index_resolver: Arc<HardStateIndexResolver>,
//"Snapshot scheduled every {}s.", update_sender: UpdateSender,
//self.snapshot_period.as_secs() snapshot_period: Duration,
//); snapshot_path: PathBuf,
//loop { db_name: String,
//if let Err(e) = self.perform_snapshot().await { }
//error!("Error while performing snapshot: {}", e);
//}
//sleep(self.snapshot_period).await;
//}
//}
//async fn perform_snapshot(&self) -> anyhow::Result<()> { impl SnapshotService {
//trace!("Performing snapshot."); pub fn new(
index_resolver: Arc<HardStateIndexResolver>,
update_sender: UpdateSender,
snapshot_period: Duration,
snapshot_path: PathBuf,
db_name: String,
) -> Self {
Self {
index_resolver,
update_sender,
snapshot_period,
snapshot_path,
db_name,
}
}
//let snapshot_dir = self.snapshot_path.clone(); pub async fn run(self) {
//fs::create_dir_all(&snapshot_dir).await?; info!(
//let temp_snapshot_dir = "Snapshot scheduled every {}s.",
//spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??; self.snapshot_period.as_secs()
//let temp_snapshot_path = temp_snapshot_dir.path().to_owned(); );
loop {
if let Err(e) = self.perform_snapshot().await {
error!("Error while performing snapshot: {}", e);
}
sleep(self.snapshot_period).await;
}
}
//let uuids = self async fn perform_snapshot(&self) -> anyhow::Result<()> {
//.uuid_resolver_handle trace!("Performing snapshot.");
//.snapshot(temp_snapshot_path.clone())
//.await?;
//if uuids.is_empty() { let snapshot_dir = self.snapshot_path.clone();
//return Ok(()); fs::create_dir_all(&snapshot_dir).await?;
//} let temp_snapshot_dir =
spawn_blocking(move || tempfile::tempdir_in(snapshot_dir)).await??;
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
//self.update_handle let indexes = self
//.snapshot(uuids, temp_snapshot_path.clone()) .index_resolver
//.await?; .snapshot(temp_snapshot_path.clone())
//let snapshot_dir = self.snapshot_path.clone(); .await?;
//let snapshot_path = self
//.snapshot_path
//.join(format!("{}.snapshot", self.db_name));
//let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
//let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
//let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
//compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
//temp_snapshot_file.persist(&snapshot_path)?;
//Ok(snapshot_path)
//})
//.await??;
//trace!("Created snapshot in {:?}.", snapshot_path); if indexes.is_empty() {
return Ok(());
}
//Ok(()) UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?;
//}
//} let snapshot_dir = self.snapshot_path.clone();
let snapshot_path = self
.snapshot_path
.join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new_in(snapshot_dir)?;
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?;
Ok(snapshot_path)
})
.await??;
trace!("Created snapshot in {:?}.", snapshot_path);
Ok(())
}
}
pub fn load_snapshot( pub fn load_snapshot(
db_path: impl AsRef<Path>, db_path: impl AsRef<Path>,
@ -94,7 +100,7 @@ pub fn load_snapshot(
match crate::from_tar_gz(snapshot_path, &db_path) { match crate::from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()), Ok(()) => Ok(()),
Err(e) => { Err(e) => {
// clean created db folder //clean created db folder
std::fs::remove_dir_all(&db_path)?; std::fs::remove_dir_all(&db_path)?;
Err(e) Err(e)
} }
@ -120,140 +126,140 @@ pub fn load_snapshot(
} }
} }
#[cfg(test)] //#[cfg(test)]
mod test { //mod test {
use std::iter::FromIterator; //use std::iter::FromIterator;
use std::{collections::HashSet, sync::Arc}; //use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok}; //use futures::future::{err, ok};
use rand::Rng; //use rand::Rng;
use tokio::time::timeout; //use tokio::time::timeout;
use uuid::Uuid; //use uuid::Uuid;
use super::*; //use super::*;
use crate::index_controller::index_actor::MockIndexActorHandle; //use crate::index_controller::index_actor::MockIndexActorHandle;
use crate::index_controller::updates::{ //use crate::index_controller::updates::{
error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl, //error::UpdateActorError, MockUpdateActorHandle, UpdateActorHandleImpl,
}; //};
use crate::index_controller::uuid_resolver::{ //use crate::index_controller::uuid_resolver::{
error::UuidResolverError, MockUuidResolverHandle, //error::UuidResolverError, MockUuidResolverHandle,
}; //};
#[actix_rt::test] //#[actix_rt::test]
async fn test_normal() { //async fn test_normal() {
let mut rng = rand::thread_rng(); //let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5..10); //let uuids_num: usize = rng.gen_range(5..10);
let uuids = (0..uuids_num) //let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4()) //.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>(); //.collect::<HashSet<_>>();
let mut uuid_resolver = MockUuidResolverHandle::new(); //let mut uuid_resolver = MockUuidResolverHandle::new();
let uuids_clone = uuids.clone(); //let uuids_clone = uuids.clone();
uuid_resolver //uuid_resolver
.expect_snapshot() //.expect_snapshot()
.times(1) //.times(1)
.returning(move |_| Box::pin(ok(uuids_clone.clone()))); //.returning(move |_| Box::pin(ok(uuids_clone.clone())));
let uuids_clone = uuids.clone(); //let uuids_clone = uuids.clone();
let mut index_handle = MockIndexActorHandle::new(); //let mut index_handle = MockIndexActorHandle::new();
index_handle //index_handle
.expect_snapshot() //.expect_snapshot()
.withf(move |uuid, _path| uuids_clone.contains(uuid)) //.withf(move |uuid, _path| uuids_clone.contains(uuid))
.times(uuids_num) //.times(uuids_num)
.returning(move |_, _| Box::pin(ok(()))); //.returning(move |_, _| Box::pin(ok(())));
let dir = tempfile::tempdir_in(".").unwrap(); //let dir = tempfile::tempdir_in(".").unwrap();
let handle = Arc::new(index_handle); //let handle = Arc::new(index_handle);
let update_handle = //let update_handle =
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); //UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir_in(".").unwrap(); //let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new( //let snapshot_service = SnapshotService::new(
uuid_resolver, //uuid_resolver,
update_handle, //update_handle,
Duration::from_millis(100), //Duration::from_millis(100),
snapshot_path.path().to_owned(), //snapshot_path.path().to_owned(),
"data.ms".to_string(), //"data.ms".to_string(),
); //);
snapshot_service.perform_snapshot().await.unwrap(); //snapshot_service.perform_snapshot().await.unwrap();
} //}
#[actix_rt::test] //#[actix_rt::test]
async fn error_performing_uuid_snapshot() { //async fn error_performing_uuid_snapshot() {
let mut uuid_resolver = MockUuidResolverHandle::new(); //let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver //uuid_resolver
.expect_snapshot() //.expect_snapshot()
.times(1) //.times(1)
// abitrary error //abitrary error
.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist))); //.returning(|_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new(); //let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap(); //let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new( //let snapshot_service = SnapshotService::new(
uuid_resolver, //uuid_resolver,
update_handle, //update_handle,
Duration::from_millis(100), //Duration::from_millis(100),
snapshot_path.path().to_owned(), //snapshot_path.path().to_owned(),
"data.ms".to_string(), //"data.ms".to_string(),
); //);
assert!(snapshot_service.perform_snapshot().await.is_err()); //assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file //Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); //assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
} //}
#[actix_rt::test] //#[actix_rt::test]
async fn error_performing_index_snapshot() { //async fn error_performing_index_snapshot() {
let uuid = Uuid::new_v4(); //let uuid = Uuid::new_v4();
let mut uuid_resolver = MockUuidResolverHandle::new(); //let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver //uuid_resolver
.expect_snapshot() //.expect_snapshot()
.times(1) //.times(1)
.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid))))); //.returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
let mut update_handle = MockUpdateActorHandle::new(); //let mut update_handle = MockUpdateActorHandle::new();
update_handle //update_handle
.expect_snapshot() //.expect_snapshot()
// abitrary error //abitrary error
.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0)))); //.returning(|_, _| Box::pin(err(UpdateActorError::UnexistingUpdate(0))));
let snapshot_path = tempfile::tempdir_in(".").unwrap(); //let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new( //let snapshot_service = SnapshotService::new(
uuid_resolver, //uuid_resolver,
update_handle, //update_handle,
Duration::from_millis(100), //Duration::from_millis(100),
snapshot_path.path().to_owned(), //snapshot_path.path().to_owned(),
"data.ms".to_string(), //"data.ms".to_string(),
); //);
assert!(snapshot_service.perform_snapshot().await.is_err()); //assert!(snapshot_service.perform_snapshot().await.is_err());
// Nothing was written to the file //Nothing was written to the file
assert!(!snapshot_path.path().join("data.ms.snapshot").exists()); //assert!(!snapshot_path.path().join("data.ms.snapshot").exists());
} //}
#[actix_rt::test] //#[actix_rt::test]
async fn test_loop() { //async fn test_loop() {
let mut uuid_resolver = MockUuidResolverHandle::new(); //let mut uuid_resolver = MockUuidResolverHandle::new();
uuid_resolver //uuid_resolver
.expect_snapshot() //.expect_snapshot()
// we expect the funtion to be called between 2 and 3 time in the given interval. //we expect the funtion to be called between 2 and 3 time in the given interval.
.times(2..4) //.times(2..4)
// abitrary error, to short-circuit the function //abitrary error, to short-circuit the function
.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist))); //.returning(move |_| Box::pin(err(UuidResolverError::NameAlreadyExist)));
let update_handle = MockUpdateActorHandle::new(); //let update_handle = MockUpdateActorHandle::new();
let snapshot_path = tempfile::tempdir_in(".").unwrap(); //let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new( //let snapshot_service = SnapshotService::new(
uuid_resolver, //uuid_resolver,
update_handle, //update_handle,
Duration::from_millis(100), //Duration::from_millis(100),
snapshot_path.path().to_owned(), //snapshot_path.path().to_owned(),
"data.ms".to_string(), //"data.ms".to_string(),
); //);
let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await; //let _ = timeout(Duration::from_millis(300), snapshot_service.run()).await;
} //}
} //}

View File

@ -2,9 +2,13 @@ use std::fs::File;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::ops::{Deref, DerefMut}; use std::ops::{Deref, DerefMut};
//use milli::documents::DocumentBatchReader;
//use serde_json::Map;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use uuid::Uuid; use uuid::Uuid;
const UPDATE_FILES_PATH: &str = "updates/updates_files";
use super::error::Result; use super::error::Result;
pub struct UpdateFile { pub struct UpdateFile {
@ -14,7 +18,6 @@ pub struct UpdateFile {
impl UpdateFile { impl UpdateFile {
pub fn persist(self) { pub fn persist(self) {
println!("persisting in {}", self.path.display());
self.file.persist(&self.path).unwrap(); self.file.persist(&self.path).unwrap();
} }
} }
@ -40,11 +43,14 @@ pub struct UpdateFileStore {
impl UpdateFileStore { impl UpdateFileStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> { pub fn new(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().join("updates/updates_files"); let path = path.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&path).unwrap(); std::fs::create_dir_all(&path).unwrap();
Ok(Self { path }) Ok(Self { path })
} }
/// Created a new temporary update file.
///
/// A call to persist is needed to persist in the database.
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
let file = NamedTempFile::new().unwrap(); let file = NamedTempFile::new().unwrap();
let uuid = Uuid::new_v4(); let uuid = Uuid::new_v4();
@ -54,10 +60,45 @@ impl UpdateFileStore {
Ok((uuid, update_file)) Ok((uuid, update_file))
} }
/// Returns a the file corresponding to the requested uuid.
pub fn get_update(&self, uuid: Uuid) -> Result<File> { pub fn get_update(&self, uuid: Uuid) -> Result<File> {
let path = self.path.join(uuid.to_string()); let path = self.path.join(uuid.to_string());
println!("reading in {}", path.display());
let file = File::open(path).unwrap(); let file = File::open(path).unwrap();
Ok(file) Ok(file)
} }
/// Copies the content of the update file poited to by uuid to dst directory.
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
let src = self.path.join(uuid.to_string());
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
std::fs::create_dir_all(&dst).unwrap();
dst.push(uuid.to_string());
std::fs::copy(src, dst).unwrap();
Ok(())
}
/// Peform a dump of the given update file uuid into the provided snapshot path.
pub fn dump(&self, _uuid: Uuid, _snapshot_path: impl AsRef<Path>) -> Result<()> {
todo!()
//let update_file_path = self.path.join(uuid.to_string());
//let snapshot_file_path: snapshot_path.as_ref().join(format!("update_files/uuid", uuid));
//let update_file = File::open(update_file_path).unwrap();
//let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap();
//let mut document_buffer = Map::new();
//// TODO: we need to find a way to do this more efficiently. (create a custom serializer to
//// jsonl for example...)
//while let Some((index, document)) = document_reader.next_document_with_index().unwrap() {
//for (field_id, content) in document.iter() {
//let field_name = index.get_by_left(&field_id).unwrap();
//let content = serde_json::from_slice(content).unwrap();
//document_buffer.insert(field_name.to_string(), content);
//}
//}
//Ok(())
}
} }

View File

@ -4,6 +4,8 @@ use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
use uuid::Uuid; use uuid::Uuid;
use crate::index::Index;
use super::error::Result; use super::error::Result;
use super::{Update, UpdateStatus, UpdateStoreInfo}; use super::{Update, UpdateStatus, UpdateStoreInfo};
@ -28,7 +30,7 @@ pub enum UpdateMsg {
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<Result<()>>,
}, },
Snapshot { Snapshot {
uuids: HashSet<Uuid>, indexes: Vec<Index>,
path: PathBuf, path: PathBuf,
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<Result<()>>,
}, },
@ -43,17 +45,20 @@ pub enum UpdateMsg {
} }
impl UpdateMsg { impl UpdateMsg {
pub async fn snapshot(sender: &mpsc::Sender<Self>, path: PathBuf, indexes: Vec<Index>) -> Result<()> {
let (ret, rcv) = oneshot::channel();
let msg = Self::Snapshot { path, indexes, ret };
sender.send(msg).await?;
rcv.await?
}
pub async fn dump( pub async fn dump(
sender: &mpsc::Sender<Self>, sender: &mpsc::Sender<Self>,
uuids: HashSet<Uuid>, uuids: HashSet<Uuid>,
path: PathBuf, path: PathBuf,
) -> Result<()> { ) -> Result<()> {
let (ret, rcv) = oneshot::channel(); let (ret, rcv) = oneshot::channel();
let msg = Self::Dump { let msg = Self::Dump { path, uuids, ret };
path,
uuids,
ret,
};
sender.send(msg).await?; sender.send(msg).await?;
rcv.await? rcv.await?
} }
@ -63,11 +68,7 @@ impl UpdateMsg {
update: Update, update: Update,
) -> Result<UpdateStatus> { ) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel(); let (ret, rcv) = oneshot::channel();
let msg = Self::Update { let msg = Self::Update { uuid, update, ret };
uuid,
update,
ret,
};
sender.send(msg).await?; sender.send(msg).await?;
rcv.await? rcv.await?
} }
@ -78,11 +79,7 @@ impl UpdateMsg {
id: u64, id: u64,
) -> Result<UpdateStatus> { ) -> Result<UpdateStatus> {
let (ret, rcv) = oneshot::channel(); let (ret, rcv) = oneshot::channel();
let msg = Self::GetUpdate { let msg = Self::GetUpdate { uuid, id, ret };
uuid,
id,
ret,
};
sender.send(msg).await?; sender.send(msg).await?;
rcv.await? rcv.await?
} }
@ -92,21 +89,14 @@ impl UpdateMsg {
uuid: Uuid, uuid: Uuid,
) -> Result<Vec<UpdateStatus>> { ) -> Result<Vec<UpdateStatus>> {
let (ret, rcv) = oneshot::channel(); let (ret, rcv) = oneshot::channel();
let msg = Self::ListUpdates { let msg = Self::ListUpdates { uuid, ret };
uuid,
ret,
};
sender.send(msg).await?; sender.send(msg).await?;
rcv.await? rcv.await?
} }
pub async fn get_info( pub async fn get_info(sender: &mpsc::Sender<Self>) -> Result<UpdateStoreInfo> {
sender: &mpsc::Sender<Self>,
) -> Result<UpdateStoreInfo> {
let (ret, rcv) = oneshot::channel(); let (ret, rcv) = oneshot::channel();
let msg = Self::GetInfo { let msg = Self::GetInfo { ret };
ret,
};
sender.send(msg).await?; sender.send(msg).await?;
rcv.await? rcv.await?
} }

View File

@ -24,7 +24,7 @@ use uuid::Uuid;
use self::error::{Result, UpdateLoopError}; use self::error::{Result, UpdateLoopError};
pub use self::message::UpdateMsg; pub use self::message::UpdateMsg;
use self::store::{UpdateStore, UpdateStoreInfo}; use self::store::{UpdateStore, UpdateStoreInfo};
use crate::index::{Settings, Unchecked}; use crate::index::{Index, Settings, Unchecked};
use crate::index_controller::update_file_store::UpdateFileStore; use crate::index_controller::update_file_store::UpdateFileStore;
use status::UpdateStatus; use status::UpdateStatus;
@ -123,12 +123,11 @@ impl UpdateLoop {
let must_exit = Arc::new(AtomicBool::new(false)); let must_exit = Arc::new(AtomicBool::new(false));
let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone())?; let update_file_store = UpdateFileStore::new(&path).unwrap();
let store = UpdateStore::open(options, &path, index_resolver.clone(), must_exit.clone(), update_file_store.clone())?;
let inbox = Some(inbox); let inbox = Some(inbox);
let update_file_store = UpdateFileStore::new(&path).unwrap();
Ok(Self { Ok(Self {
store, store,
inbox, inbox,
@ -179,8 +178,8 @@ impl UpdateLoop {
Delete { uuid, ret } => { Delete { uuid, ret } => {
let _ = ret.send(self.handle_delete(uuid).await); let _ = ret.send(self.handle_delete(uuid).await);
} }
Snapshot { uuids, path, ret } => { Snapshot { indexes, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuids, path).await); let _ = ret.send(self.handle_snapshot(indexes, path).await);
} }
GetInfo { ret } => { GetInfo { ret } => {
let _ = ret.send(self.handle_get_info().await); let _ = ret.send(self.handle_get_info().await);
@ -270,15 +269,13 @@ impl UpdateLoop {
Ok(()) Ok(())
} }
async fn handle_snapshot(&self, _uuids: HashSet<Uuid>,_pathh: PathBuf) -> Result<()> { async fn handle_snapshot(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
todo!() let update_store = self.store.clone();
//let index_handle = self.index_resolver.clone();
//let update_store = self.store.clone();
//tokio::task::spawn_blocking(move || update_store.snapshot(&uuids, &path, index_handle)) tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path))
//.await??; .await??;
//Ok(()) Ok(())
} }
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> { async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {

View File

@ -45,16 +45,17 @@ impl UpdateStore {
uuids: &HashSet<Uuid>, uuids: &HashSet<Uuid>,
path: impl AsRef<Path>, path: impl AsRef<Path>,
) -> Result<()> { ) -> Result<()> {
let dump_data_path = path.as_ref().join("data.jsonl"); //let dump_data_path = path.as_ref().join("data.jsonl");
let mut dump_data_file = File::create(dump_data_path)?; //let mut dump_data_file = File::create(dump_data_path)?;
let update_files_path = path.as_ref().join(super::UPDATE_DIR); //let update_files_path = path.as_ref().join(super::UPDATE_DIR);
create_dir_all(&update_files_path)?; //create_dir_all(&update_files_path)?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; //self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?; //self.dump_completed(txn, uuids, &mut dump_data_file)?;
Ok(()) //Ok(())
todo!()
} }
fn dump_pending( fn dump_pending(

View File

@ -22,6 +22,7 @@ use tokio::sync::mpsc;
use tokio::sync::mpsc::error::TrySendError; use tokio::sync::mpsc::error::TrySendError;
use tokio::time::timeout; use tokio::time::timeout;
use uuid::Uuid; use uuid::Uuid;
use rayon::prelude::*;
use codec::*; use codec::*;
@ -31,12 +32,11 @@ use super::status::{Enqueued, Processing};
use crate::EnvSizer; use crate::EnvSizer;
use crate::index_controller::update_files_path; use crate::index_controller::update_files_path;
use crate::index_controller::updates::*; use crate::index_controller::updates::*;
use crate::index::Index;
#[allow(clippy::upper_case_acronyms)] #[allow(clippy::upper_case_acronyms)]
type BEU64 = U64<heed::byteorder::BE>; type BEU64 = U64<heed::byteorder::BE>;
const UPDATE_DIR: &str = "update_files";
#[derive(Debug)] #[derive(Debug)]
pub struct UpdateStoreInfo { pub struct UpdateStoreInfo {
/// Size of the update store in bytes. /// Size of the update store in bytes.
@ -108,6 +108,7 @@ pub struct UpdateStore {
state: Arc<StateLock>, state: Arc<StateLock>,
/// Wake up the loop when a new event occurs. /// Wake up the loop when a new event occurs.
notification_sender: mpsc::Sender<()>, notification_sender: mpsc::Sender<()>,
update_file_store: UpdateFileStore,
path: PathBuf, path: PathBuf,
} }
@ -115,6 +116,7 @@ impl UpdateStore {
fn new( fn new(
mut options: EnvOpenOptions, mut options: EnvOpenOptions,
path: impl AsRef<Path>, path: impl AsRef<Path>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> { ) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
options.max_dbs(5); options.max_dbs(5);
@ -138,6 +140,7 @@ impl UpdateStore {
state, state,
notification_sender, notification_sender,
path: path.as_ref().to_owned(), path: path.as_ref().to_owned(),
update_file_store,
}, },
notification_receiver, notification_receiver,
)) ))
@ -148,8 +151,9 @@ impl UpdateStore {
path: impl AsRef<Path>, path: impl AsRef<Path>,
index_resolver: Arc<HardStateIndexResolver>, index_resolver: Arc<HardStateIndexResolver>,
must_exit: Arc<AtomicBool>, must_exit: Arc<AtomicBool>,
update_file_store: UpdateFileStore,
) -> anyhow::Result<Arc<Self>> { ) -> anyhow::Result<Arc<Self>> {
let (update_store, mut notification_receiver) = Self::new(options, path)?; let (update_store, mut notification_receiver) = Self::new(options, path, update_file_store)?;
let update_store = Arc::new(update_store); let update_store = Arc::new(update_store);
// Send a first notification to trigger the process. // Send a first notification to trigger the process.
@ -482,13 +486,13 @@ impl UpdateStore {
pub fn snapshot( pub fn snapshot(
&self, &self,
_uuids: &HashSet<Uuid>, indexes: Vec<Index>,
path: impl AsRef<Path>, path: impl AsRef<Path>,
handle: Arc<HardStateIndexResolver>,
) -> Result<()> { ) -> Result<()> {
let state_lock = self.state.write(); let state_lock = self.state.write();
state_lock.swap(State::Snapshoting); state_lock.swap(State::Snapshoting);
let txn = self.env.write_txn()?; let txn = self.env.write_txn()?;
let update_path = path.as_ref().join("updates"); let update_path = path.as_ref().join("updates");
@ -501,42 +505,28 @@ impl UpdateStore {
// create db snapshot // create db snapshot
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?; self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
let update_files_path = update_path.join(UPDATE_DIR);
create_dir_all(&update_files_path)?;
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data(); let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
let uuids: HashSet<_> = indexes.iter().map(|i| i.uuid).collect();
for entry in pendings { for entry in pendings {
let ((_, _uuid, _), _pending) = entry?; let ((_, uuid, _), pending) = entry?;
//if uuids.contains(&uuid) { if uuids.contains(&uuid) {
//if let Enqueued { if let Enqueued {
//content: Some(uuid), meta: RegisterUpdate::DocumentAddition {
//.. content_uuid, ..
//} = pending.decode()? },
//{ ..
//let path = update_uuid_to_file_path(&self.path, uuid); } = pending.decode()?
//copy(path, &update_files_path)?; {
//} self.update_file_store.snapshot(content_uuid, &path).unwrap();
//} }
}
} }
let _path = &path.as_ref().to_path_buf(); let path = path.as_ref().to_owned();
let _handle = &handle; indexes.par_iter().try_for_each(|index| index.snapshot(&path)).unwrap();
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
todo!()
//let mut stream = futures::stream::iter(uuids.iter())
//.map(move |uuid| IndexMsg::snapshot(handle,*uuid, path.clone()))
//.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
//Handle::current().block_on(async { Ok(())
//while let Some(res) = stream.next().await {
//res?;
//}
//Ok(()) as Result<()>
//})?;
//Ok(())
} }
pub fn get_info(&self) -> Result<UpdateStoreInfo> { pub fn get_info(&self) -> Result<UpdateStoreInfo> {

View File

@ -7,6 +7,8 @@ pub mod index_controller;
pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate}; pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate};
mod compression;
use walkdir::WalkDir; use walkdir::WalkDir;
pub trait EnvSizer { pub trait EnvSizer {