rewrite update store

This commit is contained in:
Marin Postma 2021-04-22 10:14:29 +02:00
parent 51829ad85e
commit 4fe2a13c71
No known key found for this signature in database
GPG Key ID: D5241F0C0C865F30
28 changed files with 896 additions and 826 deletions

33
Cargo.lock generated
View File

@ -286,6 +286,12 @@ version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b"
[[package]]
name = "arc-swap"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4d7d63395147b81a9e570bcc6243aaf71c017bd666d4909cfef0085bdda8d73"
[[package]] [[package]]
name = "assert-json-diff" name = "assert-json-diff"
version = "1.0.1" version = "1.0.1"
@ -295,19 +301,6 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "async-compression"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b72c1f1154e234325b50864a349b9c8e56939e266a4c307c0f159812df2f9537"
dependencies = [
"flate2",
"futures-core",
"memchr",
"pin-project-lite 0.2.6",
"tokio 0.2.25",
]
[[package]] [[package]]
name = "async-stream" name = "async-stream"
version = "0.3.1" version = "0.3.1"
@ -775,16 +768,6 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "dashmap"
version = "4.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
dependencies = [
"cfg-if 1.0.0",
"num_cpus",
]
[[package]] [[package]]
name = "debugid" name = "debugid"
version = "0.7.2" version = "0.7.2"
@ -1751,17 +1734,15 @@ dependencies = [
"actix-web", "actix-web",
"actix-web-static-files", "actix-web-static-files",
"anyhow", "anyhow",
"arc-swap",
"assert-json-diff", "assert-json-diff",
"async-compression",
"async-stream", "async-stream",
"async-trait", "async-trait",
"byte-unit", "byte-unit",
"bytemuck",
"bytes 0.6.0", "bytes 0.6.0",
"cargo_toml", "cargo_toml",
"chrono", "chrono",
"crossbeam-channel", "crossbeam-channel",
"dashmap",
"either", "either",
"env_logger 0.8.3", "env_logger 0.8.3",
"flate2", "flate2",

View File

@ -28,15 +28,13 @@ actix-service = "2.0.0"
actix-web = { version = "=4.0.0-beta.6", features = ["rustls"] } actix-web = { version = "=4.0.0-beta.6", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "6db8c3e", optional = true } actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "6db8c3e", optional = true }
anyhow = "1.0.36" anyhow = "1.0.36"
async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] }
async-stream = "0.3.0" async-stream = "0.3.0"
async-trait = "0.1.42" async-trait = "0.1.42"
arc-swap = "1.2.0"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
bytemuck = "1.5.1"
bytes = "0.6.0" bytes = "0.6.0"
chrono = { version = "0.4.19", features = ["serde"] } chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.0" crossbeam-channel = "0.5.0"
dashmap = "4.0.2"
either = "1.6.1" either = "1.6.1"
env_logger = "0.8.2" env_logger = "0.8.2"
flate2 = "1.0.19" flate2 = "1.0.19"

View File

@ -8,7 +8,7 @@ use serde_json::{Map, Value};
use crate::helpers::EnvSizer; use crate::helpers::EnvSizer;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, UpdateResult}; pub use updates::{Facets, Settings};
mod search; mod search;
mod updates; mod updates;
@ -59,9 +59,7 @@ impl Index {
}) })
.transpose()? .transpose()?
.unwrap_or_else(BTreeSet::new); .unwrap_or_else(BTreeSet::new);
let distinct_attribute = self let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from);
.distinct_attribute(&txn)?
.map(String::from);
Ok(Settings { Ok(Settings {
displayed_attributes: Some(Some(displayed_attributes)), displayed_attributes: Some(Some(displayed_attributes)),

View File

@ -4,17 +4,11 @@ use std::num::NonZeroUsize;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use log::info; use log::info;
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{de::Deserializer, Deserialize, Serialize}; use serde::{de::Deserializer, Deserialize, Serialize};
use super::Index; use super::Index;
use crate::index_controller::UpdateResult;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)] #[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
@ -91,7 +85,7 @@ impl Index {
&self, &self,
format: UpdateFormat, format: UpdateFormat,
method: IndexDocumentsMethod, method: IndexDocumentsMethod,
content: impl io::Read, content: Option<impl io::Read>,
update_builder: UpdateBuilder, update_builder: UpdateBuilder,
primary_key: Option<&str>, primary_key: Option<&str>,
) -> anyhow::Result<UpdateResult> { ) -> anyhow::Result<UpdateResult> {
@ -108,16 +102,15 @@ impl Index {
builder.update_format(format); builder.update_format(format);
builder.index_documents_method(method); builder.index_documents_method(method);
let gzipped = false; let indexing_callback =
let reader = if gzipped { |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step);
Box::new(GzDecoder::new(content))
} else {
Box::new(content) as Box<dyn io::Read>
};
let result = builder.execute(reader, |indexing_step, update_id| { let gzipped = false;
info!("update {}: {:?}", update_id, indexing_step) let result = match content {
}); Some(content) if gzipped => builder.execute(GzDecoder::new(content), indexing_callback),
Some(content) => builder.execute(content, indexing_callback),
None => builder.execute(std::io::empty(), indexing_callback),
};
info!("document addition done: {:?}", result); info!("document addition done: {:?}", result);
@ -228,10 +221,13 @@ impl Index {
pub fn delete_documents( pub fn delete_documents(
&self, &self,
document_ids: impl io::Read, document_ids: Option<impl io::Read>,
update_builder: UpdateBuilder, update_builder: UpdateBuilder,
) -> anyhow::Result<UpdateResult> { ) -> anyhow::Result<UpdateResult> {
let ids: Vec<String> = serde_json::from_reader(document_ids)?; let ids = match document_ids {
Some(reader) => serde_json::from_reader(reader)?,
None => Vec::<String>::new(),
};
let mut txn = self.write_txn()?; let mut txn = self.write_txn()?;
let mut builder = update_builder.delete_documents(&mut txn, self)?; let mut builder = update_builder.delete_documents(&mut txn, self)?;

View File

@ -11,13 +11,13 @@ use tokio::task::spawn_blocking;
use uuid::Uuid; use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index::{Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::update_handler::UpdateHandler;
use crate::index_controller::{ use crate::index_controller::{
get_arc_ownership_blocking, updates::Processing, IndexStats, UpdateMeta, get_arc_ownership_blocking, update_handler::UpdateHandler, Failed, IndexStats, Processed,
Processing,
}; };
use crate::option::IndexerOpts; use crate::option::IndexerOpts;
use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; use super::{IndexError, IndexMeta, IndexMsg, IndexResult, IndexSettings, IndexStore};
pub const CONCURRENT_INDEX_MSG: usize = 10; pub const CONCURRENT_INDEX_MSG: usize = 10;
@ -28,7 +28,7 @@ pub struct IndexActor<S> {
} }
impl<S: IndexStore + Sync + Send> IndexActor<S> { impl<S: IndexStore + Sync + Send> IndexActor<S> {
pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> Result<Self> { pub fn new(receiver: mpsc::Receiver<IndexMsg>, store: S) -> IndexResult<Self> {
let options = IndexerOpts::default(); let options = IndexerOpts::default();
let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?; let update_handler = UpdateHandler::new(&options).map_err(IndexError::Error)?;
let update_handler = Arc::new(update_handler); let update_handler = Arc::new(update_handler);
@ -40,9 +40,6 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
}) })
} }
/// `run` poll the write_receiver and read_receiver concurrently, but while messages send
/// through the read channel are processed concurrently, the messages sent through the write
/// channel are processed one at a time.
pub async fn run(mut self) { pub async fn run(mut self) {
let mut receiver = self let mut receiver = self
.receiver .receiver
@ -145,7 +142,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
&self, &self,
uuid: Uuid, uuid: Uuid,
primary_key: Option<String>, primary_key: Option<String>,
) -> Result<IndexMeta> { ) -> IndexResult<IndexMeta> {
let index = self.store.create(uuid, primary_key).await?; let index = self.store.create(uuid, primary_key).await?;
let meta = spawn_blocking(move || IndexMeta::new(&index)) let meta = spawn_blocking(move || IndexMeta::new(&index))
.await .await
@ -156,9 +153,9 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
async fn handle_update( async fn handle_update(
&self, &self,
uuid: Uuid, uuid: Uuid,
meta: Processing<UpdateMeta>, meta: Processing,
data: File, data: Option<File>,
) -> Result<UpdateResult> { ) -> IndexResult<Result<Processed, Failed>> {
debug!("Processing update {}", meta.id()); debug!("Processing update {}", meta.id());
let update_handler = self.update_handler.clone(); let update_handler = self.update_handler.clone();
let index = match self.store.get(uuid).await? { let index = match self.store.get(uuid).await? {
@ -171,7 +168,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into())) .map_err(|e| IndexError::Error(e.into()))
} }
async fn handle_settings(&self, uuid: Uuid) -> Result<Settings> { async fn handle_settings(&self, uuid: Uuid) -> IndexResult<Settings> {
let index = self let index = self
.store .store
.get(uuid) .get(uuid)
@ -188,7 +185,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
offset: usize, offset: usize,
limit: usize, limit: usize,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> { ) -> IndexResult<Vec<Document>> {
let index = self let index = self
.store .store
.get(uuid) .get(uuid)
@ -208,7 +205,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
uuid: Uuid, uuid: Uuid,
doc_id: String, doc_id: String,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> { ) -> IndexResult<Document> {
let index = self let index = self
.store .store
.get(uuid) .get(uuid)
@ -223,7 +220,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into()))? .map_err(|e| IndexError::Error(e.into()))?
} }
async fn handle_delete(&self, uuid: Uuid) -> Result<()> { async fn handle_delete(&self, uuid: Uuid) -> IndexResult<()> {
let index = self.store.delete(uuid).await?; let index = self.store.delete(uuid).await?;
if let Some(index) = index { if let Some(index) = index {
@ -240,7 +237,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(()) Ok(())
} }
async fn handle_get_meta(&self, uuid: Uuid) -> Result<IndexMeta> { async fn handle_get_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
match self.store.get(uuid).await? { match self.store.get(uuid).await? {
Some(index) => { Some(index) => {
let meta = spawn_blocking(move || IndexMeta::new(&index)) let meta = spawn_blocking(move || IndexMeta::new(&index))
@ -256,7 +253,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
&self, &self,
uuid: Uuid, uuid: Uuid,
index_settings: IndexSettings, index_settings: IndexSettings,
) -> Result<IndexMeta> { ) -> IndexResult<IndexMeta> {
let index = self let index = self
.store .store
.get(uuid) .get(uuid)
@ -283,7 +280,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
.map_err(|e| IndexError::Error(e.into()))? .map_err(|e| IndexError::Error(e.into()))?
} }
async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> Result<()> { async fn handle_snapshot(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
use tokio::fs::create_dir_all; use tokio::fs::create_dir_all;
path.push("indexes"); path.push("indexes");
@ -313,7 +310,7 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(()) Ok(())
} }
async fn handle_get_stats(&self, uuid: Uuid) -> Result<IndexStats> { async fn handle_get_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let index = self let index = self
.store .store
.get(uuid) .get(uuid)

View File

@ -3,14 +3,14 @@ use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
use uuid::Uuid; use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index_controller::{IndexSettings, IndexStats, Processing};
use crate::index_controller::{updates::Processing, UpdateMeta}; use crate::{
use crate::index_controller::{IndexSettings, IndexStats}; index::{Document, SearchQuery, SearchResult, Settings},
index_controller::{Failed, Processed},
use super::{
IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore, Result, UpdateResult,
}; };
use super::{IndexActor, IndexActorHandle, IndexMeta, IndexMsg, IndexResult, MapIndexStore};
#[derive(Clone)] #[derive(Clone)]
pub struct IndexActorHandleImpl { pub struct IndexActorHandleImpl {
sender: mpsc::Sender<IndexMsg>, sender: mpsc::Sender<IndexMsg>,
@ -18,7 +18,11 @@ pub struct IndexActorHandleImpl {
#[async_trait::async_trait] #[async_trait::async_trait]
impl IndexActorHandle for IndexActorHandleImpl { impl IndexActorHandle for IndexActorHandleImpl {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> { async fn create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::CreateIndex { let msg = IndexMsg::CreateIndex {
ret, ret,
@ -32,9 +36,9 @@ impl IndexActorHandle for IndexActorHandleImpl {
async fn update( async fn update(
&self, &self,
uuid: Uuid, uuid: Uuid,
meta: Processing<UpdateMeta>, meta: Processing,
data: std::fs::File, data: Option<std::fs::File>,
) -> anyhow::Result<UpdateResult> { ) -> anyhow::Result<Result<Processed, Failed>> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Update { let msg = IndexMsg::Update {
ret, ret,
@ -46,14 +50,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> { async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Search { uuid, query, ret }; let msg = IndexMsg::Search { uuid, query, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn settings(&self, uuid: Uuid) -> Result<Settings> { async fn settings(&self, uuid: Uuid) -> IndexResult<Settings> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Settings { uuid, ret }; let msg = IndexMsg::Settings { uuid, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;
@ -66,7 +70,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
offset: usize, offset: usize,
limit: usize, limit: usize,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> { ) -> IndexResult<Vec<Document>> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Documents { let msg = IndexMsg::Documents {
uuid, uuid,
@ -84,7 +88,7 @@ impl IndexActorHandle for IndexActorHandleImpl {
uuid: Uuid, uuid: Uuid,
doc_id: String, doc_id: String,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> { ) -> IndexResult<Document> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Document { let msg = IndexMsg::Document {
uuid, uuid,
@ -96,21 +100,25 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn delete(&self, uuid: Uuid) -> Result<()> { async fn delete(&self, uuid: Uuid) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Delete { uuid, ret }; let msg = IndexMsg::Delete { uuid, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> { async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetMeta { uuid, ret }; let msg = IndexMsg::GetMeta { uuid, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> { async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::UpdateIndex { let msg = IndexMsg::UpdateIndex {
uuid, uuid,
@ -121,14 +129,14 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> { async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Snapshot { uuid, path, ret }; let msg = IndexMsg::Snapshot { uuid, path, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?) Ok(receiver.await.expect("IndexActor has been killed")?)
} }
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> { async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::GetStats { uuid, ret }; let msg = IndexMsg::GetStats { uuid, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;

View File

@ -4,21 +4,21 @@ use tokio::sync::oneshot;
use uuid::Uuid; use uuid::Uuid;
use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index::{Document, SearchQuery, SearchResult, Settings};
use crate::index_controller::{updates::Processing, IndexStats, UpdateMeta}; use crate::index_controller::{Failed, IndexStats, Processed, Processing};
use super::{IndexMeta, IndexSettings, Result, UpdateResult}; use super::{IndexMeta, IndexResult, IndexSettings};
pub enum IndexMsg { pub enum IndexMsg {
CreateIndex { CreateIndex {
uuid: Uuid, uuid: Uuid,
primary_key: Option<String>, primary_key: Option<String>,
ret: oneshot::Sender<Result<IndexMeta>>, ret: oneshot::Sender<IndexResult<IndexMeta>>,
}, },
Update { Update {
uuid: Uuid, uuid: Uuid,
meta: Processing<UpdateMeta>, meta: Processing,
data: std::fs::File, data: Option<std::fs::File>,
ret: oneshot::Sender<Result<UpdateResult>>, ret: oneshot::Sender<IndexResult<Result<Processed, Failed>>>,
}, },
Search { Search {
uuid: Uuid, uuid: Uuid,
@ -27,41 +27,41 @@ pub enum IndexMsg {
}, },
Settings { Settings {
uuid: Uuid, uuid: Uuid,
ret: oneshot::Sender<Result<Settings>>, ret: oneshot::Sender<IndexResult<Settings>>,
}, },
Documents { Documents {
uuid: Uuid, uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
offset: usize, offset: usize,
limit: usize, limit: usize,
ret: oneshot::Sender<Result<Vec<Document>>>, ret: oneshot::Sender<IndexResult<Vec<Document>>>,
}, },
Document { Document {
uuid: Uuid, uuid: Uuid,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
doc_id: String, doc_id: String,
ret: oneshot::Sender<Result<Document>>, ret: oneshot::Sender<IndexResult<Document>>,
}, },
Delete { Delete {
uuid: Uuid, uuid: Uuid,
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<IndexResult<()>>,
}, },
GetMeta { GetMeta {
uuid: Uuid, uuid: Uuid,
ret: oneshot::Sender<Result<IndexMeta>>, ret: oneshot::Sender<IndexResult<IndexMeta>>,
}, },
UpdateIndex { UpdateIndex {
uuid: Uuid, uuid: Uuid,
index_settings: IndexSettings, index_settings: IndexSettings,
ret: oneshot::Sender<Result<IndexMeta>>, ret: oneshot::Sender<IndexResult<IndexMeta>>,
}, },
Snapshot { Snapshot {
uuid: Uuid, uuid: Uuid,
path: PathBuf, path: PathBuf,
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<IndexResult<()>>,
}, },
GetStats { GetStats {
uuid: Uuid, uuid: Uuid,
ret: oneshot::Sender<Result<IndexStats>>, ret: oneshot::Sender<IndexResult<IndexStats>>,
}, },
} }

View File

@ -1,5 +1,4 @@
#[cfg(test)] use std::fs::File;
use std::sync::Arc;
use std::path::PathBuf; use std::path::PathBuf;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
@ -15,12 +14,8 @@ pub use handle_impl::IndexActorHandleImpl;
use message::IndexMsg; use message::IndexMsg;
use store::{IndexStore, MapIndexStore}; use store::{IndexStore, MapIndexStore};
use crate::index::UpdateResult as UResult;
use crate::index::{Document, Index, SearchQuery, SearchResult, Settings}; use crate::index::{Document, Index, SearchQuery, SearchResult, Settings};
use crate::index_controller::{ use crate::index_controller::{Failed, Processed, Processing, IndexStats};
updates::{Failed, Processed, Processing},
IndexStats, UpdateMeta,
};
use super::IndexSettings; use super::IndexSettings;
@ -29,8 +24,7 @@ mod handle_impl;
mod message; mod message;
mod store; mod store;
pub type Result<T> = std::result::Result<T, IndexError>; pub type IndexResult<T> = std::result::Result<T, IndexError>;
type UpdateResult = std::result::Result<Processed<UpdateMeta, UResult>, Failed<UpdateMeta, String>>;
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@ -41,12 +35,12 @@ pub struct IndexMeta {
} }
impl IndexMeta { impl IndexMeta {
fn new(index: &Index) -> Result<Self> { fn new(index: &Index) -> IndexResult<Self> {
let txn = index.read_txn()?; let txn = index.read_txn()?;
Self::new_txn(index, &txn) Self::new_txn(index, &txn)
} }
fn new_txn(index: &Index, txn: &heed::RoTxn) -> Result<Self> { fn new_txn(index: &Index, txn: &heed::RoTxn) -> IndexResult<Self> {
let created_at = index.created_at(&txn)?; let created_at = index.created_at(&txn)?;
let updated_at = index.updated_at(&txn)?; let updated_at = index.updated_at(&txn)?;
let primary_key = index.primary_key(&txn)?.map(String::from); let primary_key = index.primary_key(&txn)?.map(String::from);
@ -72,82 +66,19 @@ pub enum IndexError {
ExistingPrimaryKey, ExistingPrimaryKey,
} }
#[cfg(test)]
#[async_trait::async_trait]
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing<UpdateMeta>,
data: std::fs::File,
) -> anyhow::Result<UpdateResult> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> Result<Settings> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>> {
self.as_ref().documents(uuid, offset, limit, attributes_to_retrieve).await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document> {
self.as_ref().document(uuid, doc_id, attributes_to_retrieve).await
}
async fn delete(&self, uuid: Uuid) -> Result<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
#[async_trait::async_trait] #[async_trait::async_trait]
#[cfg_attr(test, automock)] #[cfg_attr(test, automock)]
pub trait IndexActorHandle { pub trait IndexActorHandle {
async fn create_index(&self, uuid: Uuid, primary_key: Option<String>) -> Result<IndexMeta>; async fn create_index(&self, uuid: Uuid, primary_key: Option<String>)
-> IndexResult<IndexMeta>;
async fn update( async fn update(
&self, &self,
uuid: Uuid, uuid: Uuid,
meta: Processing<UpdateMeta>, meta: Processing,
data: std::fs::File, data: Option<File>,
) -> anyhow::Result<UpdateResult>; ) -> anyhow::Result<Result<Processed, Failed>>;
async fn search(&self, uuid: Uuid, query: SearchQuery) -> Result<SearchResult>; async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult>;
async fn settings(&self, uuid: Uuid) -> Result<Settings>; async fn settings(&self, uuid: Uuid) -> IndexResult<Settings>;
async fn documents( async fn documents(
&self, &self,
@ -155,16 +86,103 @@ pub trait IndexActorHandle {
offset: usize, offset: usize,
limit: usize, limit: usize,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Document>>; ) -> IndexResult<Vec<Document>>;
async fn document( async fn document(
&self, &self,
uuid: Uuid, uuid: Uuid,
doc_id: String, doc_id: String,
attributes_to_retrieve: Option<Vec<String>>, attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Document>; ) -> IndexResult<Document>;
async fn delete(&self, uuid: Uuid) -> Result<()>; async fn delete(&self, uuid: Uuid) -> IndexResult<()>;
async fn get_index_meta(&self, uuid: Uuid) -> Result<IndexMeta>; async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta>;
async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result<IndexMeta>; async fn update_index(
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; &self,
async fn get_index_stats(&self, uuid: Uuid) -> Result<IndexStats>; uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use super::*;
#[async_trait::async_trait]
/// Useful for passing around an `Arc<MockIndexActorHandle>` in tests.
impl IndexActorHandle for Arc<MockIndexActorHandle> {
async fn create_index(
&self,
uuid: Uuid,
primary_key: Option<String>,
) -> IndexResult<IndexMeta> {
self.as_ref().create_index(uuid, primary_key).await
}
async fn update(
&self,
uuid: Uuid,
meta: Processing,
data: Option<std::fs::File>,
) -> anyhow::Result<Result<Processed, Failed>> {
self.as_ref().update(uuid, meta, data).await
}
async fn search(&self, uuid: Uuid, query: SearchQuery) -> IndexResult<SearchResult> {
self.as_ref().search(uuid, query).await
}
async fn settings(&self, uuid: Uuid) -> IndexResult<Settings> {
self.as_ref().settings(uuid).await
}
async fn documents(
&self,
uuid: Uuid,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> IndexResult<Vec<Document>> {
self.as_ref()
.documents(uuid, offset, limit, attributes_to_retrieve)
.await
}
async fn document(
&self,
uuid: Uuid,
doc_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> IndexResult<Document> {
self.as_ref()
.document(uuid, doc_id, attributes_to_retrieve)
.await
}
async fn delete(&self, uuid: Uuid) -> IndexResult<()> {
self.as_ref().delete(uuid).await
}
async fn get_index_meta(&self, uuid: Uuid) -> IndexResult<IndexMeta> {
self.as_ref().get_index_meta(uuid).await
}
async fn update_index(
&self,
uuid: Uuid,
index_settings: IndexSettings,
) -> IndexResult<IndexMeta> {
self.as_ref().update_index(uuid, index_settings).await
}
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
self.as_ref().snapshot(uuid, path).await
}
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats> {
self.as_ref().get_index_stats(uuid).await
}
}
} }

View File

@ -8,16 +8,16 @@ use tokio::sync::RwLock;
use tokio::task::spawn_blocking; use tokio::task::spawn_blocking;
use uuid::Uuid; use uuid::Uuid;
use super::{IndexError, Result}; use super::{IndexError, IndexResult};
use crate::index::Index; use crate::index::Index;
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>; type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
#[async_trait::async_trait] #[async_trait::async_trait]
pub trait IndexStore { pub trait IndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>; async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> IndexResult<Index>;
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>; async fn get(&self, uuid: Uuid) -> IndexResult<Option<Index>>;
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>; async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>>;
} }
pub struct MapIndexStore { pub struct MapIndexStore {
@ -40,14 +40,14 @@ impl MapIndexStore {
#[async_trait::async_trait] #[async_trait::async_trait]
impl IndexStore for MapIndexStore { impl IndexStore for MapIndexStore {
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> { async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> IndexResult<Index> {
let path = self.path.join(format!("index-{}", uuid)); let path = self.path.join(format!("index-{}", uuid));
if path.exists() { if path.exists() {
return Err(IndexError::IndexAlreadyExists); return Err(IndexError::IndexAlreadyExists);
} }
let index_size = self.index_size; let index_size = self.index_size;
let index = spawn_blocking(move || -> Result<Index> { let index = spawn_blocking(move || -> IndexResult<Index> {
let index = open_index(&path, index_size)?; let index = open_index(&path, index_size)?;
if let Some(primary_key) = primary_key { if let Some(primary_key) = primary_key {
let mut txn = index.write_txn()?; let mut txn = index.write_txn()?;
@ -64,7 +64,7 @@ impl IndexStore for MapIndexStore {
Ok(index) Ok(index)
} }
async fn get(&self, uuid: Uuid) -> Result<Option<Index>> { async fn get(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
let guard = self.index_store.read().await; let guard = self.index_store.read().await;
match guard.get(&uuid) { match guard.get(&uuid) {
Some(index) => Ok(Some(index.clone())), Some(index) => Ok(Some(index.clone())),
@ -86,7 +86,7 @@ impl IndexStore for MapIndexStore {
} }
} }
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> { async fn delete(&self, uuid: Uuid) -> IndexResult<Option<Index>> {
let db_path = self.path.join(format!("index-{}", uuid)); let db_path = self.path.join(format!("index-{}", uuid));
fs::remove_dir_all(db_path) fs::remove_dir_all(db_path)
.await .await
@ -96,7 +96,7 @@ impl IndexStore for MapIndexStore {
} }
} }
fn open_index(path: impl AsRef<Path>, size: usize) -> Result<Index> { fn open_index(path: impl AsRef<Path>, size: usize) -> IndexResult<Index> {
std::fs::create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?; std::fs::create_dir_all(&path).map_err(|e| IndexError::Error(e.into()))?;
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(size); options.map_size(size);

View File

@ -8,23 +8,19 @@ use anyhow::bail;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use futures::stream::StreamExt; use futures::stream::StreamExt;
use log::info; use log::info;
use milli::update::{IndexDocumentsMethod, UpdateFormat};
use milli::FieldsDistribution; use milli::FieldsDistribution;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::time::sleep; use tokio::time::sleep;
use uuid::Uuid; use uuid::Uuid;
pub use updates::*;
use index_actor::IndexActorHandle; use index_actor::IndexActorHandle;
use snapshot::load_snapshot; use snapshot::{SnapshotService, load_snapshot};
use snapshot::SnapshotService;
use update_actor::UpdateActorHandle; use update_actor::UpdateActorHandle;
pub use updates::{Failed, Processed, Processing}; use uuid_resolver::{UuidError, UuidResolverHandle};
use uuid_resolver::UuidError;
use uuid_resolver::UuidResolverHandle;
use crate::index::{Document, SearchQuery, SearchResult}; use crate::index::{Settings, Document, SearchQuery, SearchResult};
use crate::index::{Facets, Settings, UpdateResult};
use crate::option::Opt; use crate::option::Opt;
mod index_actor; mod index_actor;
@ -34,8 +30,6 @@ mod update_handler;
mod updates; mod updates;
mod uuid_resolver; mod uuid_resolver;
pub type UpdateStatus = updates::UpdateStatus<UpdateMeta, UpdateResult, String>;
#[derive(Debug, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct IndexMetadata { pub struct IndexMetadata {
@ -47,20 +41,6 @@ pub struct IndexMetadata {
pub meta: index_actor::IndexMeta, pub meta: index_actor::IndexMeta,
} }
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct IndexSettings { pub struct IndexSettings {
pub uid: Option<String>, pub uid: Option<String>,
@ -73,6 +53,9 @@ pub struct IndexStats {
#[serde(skip)] #[serde(skip)]
pub size: u64, pub size: u64,
pub number_of_documents: u64, pub number_of_documents: u64,
/// Whether the current index is performing an update. It is initially `None` when the
/// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is
/// later set to either true or false, we we retrieve the information from the `UpdateStore`
pub is_indexing: Option<bool>, pub is_indexing: Option<bool>,
pub fields_distribution: FieldsDistribution, pub fields_distribution: FieldsDistribution,
} }
@ -180,7 +163,8 @@ impl IndexController {
Err(UuidError::UnexistingIndex(name)) => { Err(UuidError::UnexistingIndex(name)) => {
let uuid = Uuid::new_v4(); let uuid = Uuid::new_v4();
let status = perform_update(uuid).await?; let status = perform_update(uuid).await?;
self.index_handle.create_index(uuid, None).await?; // ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?; self.uuid_resolver.insert(name, uuid).await?;
Ok(status) Ok(status)
} }
@ -233,7 +217,8 @@ impl IndexController {
Err(UuidError::UnexistingIndex(name)) if create => { Err(UuidError::UnexistingIndex(name)) if create => {
let uuid = Uuid::new_v4(); let uuid = Uuid::new_v4();
let status = perform_udpate(uuid).await?; let status = perform_udpate(uuid).await?;
self.index_handle.create_index(uuid, None).await?; // ignore if index creation fails now, since it may already have been created
let _ = self.index_handle.create_index(uuid, None).await;
self.uuid_resolver.insert(name, uuid).await?; self.uuid_resolver.insert(name, uuid).await?;
Ok(status) Ok(status)
} }
@ -378,7 +363,8 @@ impl IndexController {
let uuid = self.uuid_resolver.get(uid).await?; let uuid = self.uuid_resolver.get(uid).await?;
let update_infos = self.update_handle.get_info().await?; let update_infos = self.update_handle.get_info().await?;
let mut stats = self.index_handle.get_index_stats(uuid).await?; let mut stats = self.index_handle.get_index_stats(uuid).await?;
stats.is_indexing = (Some(uuid) == update_infos.processing).into(); // Check if the currently indexing update is from out index.
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
Ok(stats) Ok(stats)
} }
@ -396,7 +382,7 @@ impl IndexController {
Some(last.max(index.meta.updated_at)) Some(last.max(index.meta.updated_at))
}); });
index_stats.is_indexing = (Some(index.uuid) == update_infos.processing).into(); index_stats.is_indexing = Some(Some(index.uuid) == update_infos.processing);
indexes.insert(index.uid, index_stats); indexes.insert(index.uid, index_stats);
} }

View File

@ -131,7 +131,8 @@ pub fn load_snapshot(
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::sync::Arc; use std::iter::FromIterator;
use std::{collections::HashSet, sync::Arc};
use futures::future::{err, ok}; use futures::future::{err, ok};
use rand::Rng; use rand::Rng;
@ -139,15 +140,19 @@ mod test {
use uuid::Uuid; use uuid::Uuid;
use super::*; use super::*;
use crate::index_controller::update_actor::{UpdateError, MockUpdateActorHandle, UpdateActorHandleImpl};
use crate::index_controller::index_actor::MockIndexActorHandle; use crate::index_controller::index_actor::MockIndexActorHandle;
use crate::index_controller::update_actor::{
MockUpdateActorHandle, UpdateActorHandleImpl, UpdateError,
};
use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidError}; use crate::index_controller::uuid_resolver::{MockUuidResolverHandle, UuidError};
#[actix_rt::test] #[actix_rt::test]
async fn test_normal() { async fn test_normal() {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let uuids_num: usize = rng.gen_range(5, 10); let uuids_num: usize = rng.gen_range(5, 10);
let uuids = (0..uuids_num).map(|_| Uuid::new_v4()).collect::<Vec<_>>(); let uuids = (0..uuids_num)
.map(|_| Uuid::new_v4())
.collect::<HashSet<_>>();
let mut uuid_resolver = MockUuidResolverHandle::new(); let mut uuid_resolver = MockUuidResolverHandle::new();
let uuids_clone = uuids.clone(); let uuids_clone = uuids.clone();
@ -162,13 +167,12 @@ mod test {
.expect_snapshot() .expect_snapshot()
.withf(move |uuid, _path| uuids_clone.contains(uuid)) .withf(move |uuid, _path| uuids_clone.contains(uuid))
.times(uuids_num) .times(uuids_num)
.returning(move |_, _| { .returning(move |_, _| Box::pin(ok(())));
Box::pin(ok(()))
});
let dir = tempfile::tempdir_in(".").unwrap(); let dir = tempfile::tempdir_in(".").unwrap();
let handle = Arc::new(index_handle); let handle = Arc::new(index_handle);
let update_handle = UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap(); let update_handle =
UpdateActorHandleImpl::<Vec<u8>>::new(handle.clone(), dir.path(), 4096 * 100).unwrap();
let snapshot_path = tempfile::tempdir_in(".").unwrap(); let snapshot_path = tempfile::tempdir_in(".").unwrap();
let snapshot_service = SnapshotService::new( let snapshot_service = SnapshotService::new(
@ -214,7 +218,7 @@ mod test {
uuid_resolver uuid_resolver
.expect_snapshot() .expect_snapshot()
.times(1) .times(1)
.returning(move |_| Box::pin(ok(vec![uuid]))); .returning(move |_| Box::pin(ok(HashSet::from_iter(Some(uuid)))));
let mut update_handle = MockUpdateActorHandle::new(); let mut update_handle = MockUpdateActorHandle::new();
update_handle update_handle

View File

@ -1,14 +1,16 @@
use std::collections::HashSet;
use std::io::SeekFrom; use std::io::SeekFrom;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use futures::StreamExt;
use log::info; use log::info;
use oxidized_json_checker::JsonChecker; use oxidized_json_checker::JsonChecker;
use tokio::fs; use tokio::fs;
use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio::io::AsyncWriteExt;
use tokio::runtime::Handle;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use uuid::Uuid; use uuid::Uuid;
use futures::StreamExt;
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo}; use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG}; use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
@ -32,18 +34,14 @@ where
path: impl AsRef<Path>, path: impl AsRef<Path>,
index_handle: I, index_handle: I,
) -> anyhow::Result<Self> { ) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("updates"); let path = path.as_ref().join("updates");
std::fs::create_dir_all(&path)?; std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new(); let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size); options.map_size(update_db_size);
let handle = index_handle.clone(); let store = UpdateStore::open(options, &path, index_handle.clone())?;
let store = UpdateStore::open(options, &path, move |uuid, meta, file| {
futures::executor::block_on(handle.update(uuid, meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
std::fs::create_dir_all(path.join("update_files"))?; std::fs::create_dir_all(path.join("update_files"))?;
assert!(path.exists()); assert!(path.exists());
Ok(Self { Ok(Self {
@ -95,40 +93,54 @@ where
meta: UpdateMeta, meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>, mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> { ) -> Result<UpdateStatus> {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
while let Some(bytes) = payload.recv().await { let file_path = match meta {
match bytes { UpdateMeta::DocumentsAddition { .. }
Ok(bytes) => { | UpdateMeta::DeleteDocuments => {
file.write_all(bytes.as_ref())
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file_len = 0;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file_len += bytes.as_ref().len();
file.write_all(bytes.as_ref())
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
}
}
}
if file_len != 0 {
file.flush()
.await .await
.map_err(|e| UpdateError::Error(Box::new(e)))?; .map_err(|e| UpdateError::Error(Box::new(e)))?;
} let file = file.into_std().await;
Err(e) => { Some((file, path))
return Err(UpdateError::Error(e)); } else {
// empty update, delete the empty file.
fs::remove_file(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
None
} }
} }
} _ => None
};
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
file.seek(SeekFrom::Start(0))
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file = file.into_std().await;
let update_store = self.store.clone(); let update_store = self.store.clone();
@ -136,12 +148,9 @@ where
use std::io::{copy, sink, BufReader, Seek}; use std::io::{copy, sink, BufReader, Seek};
// If the payload is empty, ignore the check. // If the payload is empty, ignore the check.
if file let path = if let Some((mut file, path)) = file_path {
.metadata() // set the file back to the beginning
.map_err(|e| UpdateError::Error(Box::new(e)))? file.seek(SeekFrom::Start(0)).map_err(|e| UpdateError::Error(Box::new(e)))?;
.len()
> 0
{
// Check that the json payload is valid: // Check that the json payload is valid:
let reader = BufReader::new(&mut file); let reader = BufReader::new(&mut file);
let mut checker = JsonChecker::new(reader); let mut checker = JsonChecker::new(reader);
@ -153,7 +162,10 @@ where
let _: serde_json::Value = serde_json::from_reader(file) let _: serde_json::Value = serde_json::from_reader(file)
.map_err(|e| UpdateError::Error(Box::new(e)))?; .map_err(|e| UpdateError::Error(Box::new(e)))?;
} }
} Some(path)
} else {
None
};
// The payload is valid, we can register it to the update store. // The payload is valid, we can register it to the update store.
update_store update_store
@ -197,17 +209,11 @@ where
Ok(()) Ok(())
} }
async fn handle_snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> { async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone(); let index_handle = self.index_handle.clone();
let update_store = self.store.clone(); let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> { tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
// acquire write lock to prevent further writes during snapshot update_store.snapshot(&uuids, &path)?;
// the update lock must be acquired BEFORE the write lock to prevent dead lock
let _lock = update_store.update_lock.lock();
let mut txn = update_store.env.write_txn()?;
// create db snapshot
update_store.snapshot(&mut txn, &path)?;
// Perform the snapshot of each index concurently. Only a third of the capabilities of // Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor // the index actor at a time not to put too much pressure on the index actor
@ -218,7 +224,7 @@ where
.map(|&uuid| handle.snapshot(uuid, path.clone())) .map(|&uuid| handle.snapshot(uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3); .buffer_unordered(CONCURRENT_INDEX_MSG / 3);
futures::executor::block_on(async { Handle::current().block_on(async {
while let Some(res) = stream.next().await { while let Some(res) = stream.next().await {
res?; res?;
} }
@ -234,25 +240,14 @@ where
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> { async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone(); let update_store = self.store.clone();
let processing = self.store.processing.clone();
let info = tokio::task::spawn_blocking(move || -> anyhow::Result<UpdateStoreInfo> { let info = tokio::task::spawn_blocking(move || -> anyhow::Result<UpdateStoreInfo> {
let txn = update_store.env.read_txn()?; let info = update_store.get_info()?;
let size = update_store.get_size(&txn)?;
let processing = processing
.read()
.as_ref()
.map(|(uuid, _)| uuid)
.cloned();
let info = UpdateStoreInfo {
size, processing
};
Ok(info) Ok(info)
}) })
.await .await
.map_err(|e| UpdateError::Error(e.into()))? .map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?; .map_err(|e| UpdateError::Error(e.into()))?;
Ok(info) Ok(info)
} }
} }

View File

@ -1,12 +1,13 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
use uuid::Uuid; use uuid::Uuid;
use crate::index_controller::IndexActorHandle; use crate::index_controller::{IndexActorHandle, UpdateStatus};
use super::{ use super::{
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus, UpdateStoreInfo PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStoreInfo,
}; };
#[derive(Clone)] #[derive(Clone)]
@ -63,7 +64,7 @@ where
receiver.await.expect("update actor killed.") receiver.await.expect("update actor killed.")
} }
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> { async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuids, path, ret }; let msg = UpdateMsg::Snapshot { uuids, path, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
@ -26,7 +27,7 @@ pub enum UpdateMsg<D> {
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<Result<()>>,
}, },
Snapshot { Snapshot {
uuids: Vec<Uuid>, uuids: HashSet<Uuid>,
path: PathBuf, path: PathBuf,
ret: oneshot::Sender<Result<()>>, ret: oneshot::Sender<Result<()>>,
}, },

View File

@ -3,22 +3,22 @@ mod handle_impl;
mod message; mod message;
mod update_store; mod update_store;
use std::path::PathBuf; use std::{collections::HashSet, path::PathBuf};
use thiserror::Error; use thiserror::Error;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use uuid::Uuid; use uuid::Uuid;
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus}; use crate::index_controller::{UpdateMeta, UpdateStatus};
use actor::UpdateActor; use actor::UpdateActor;
use message::UpdateMsg; use message::UpdateMsg;
use update_store::UpdateStore;
pub use update_store::UpdateStoreInfo;
pub use handle_impl::UpdateActorHandleImpl; pub use handle_impl::UpdateActorHandleImpl;
pub type Result<T> = std::result::Result<T, UpdateError>; pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>; type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
#[cfg(test)] #[cfg(test)]
@ -32,13 +32,6 @@ pub enum UpdateError {
UnexistingUpdate(u64), UnexistingUpdate(u64),
} }
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
/// Uuid of the currently processing update if it exists
pub processing: Option<Uuid>,
}
#[async_trait::async_trait] #[async_trait::async_trait]
#[cfg_attr(test, automock(type Data=Vec<u8>;))] #[cfg_attr(test, automock(type Data=Vec<u8>;))]
pub trait UpdateActorHandle { pub trait UpdateActorHandle {
@ -47,7 +40,7 @@ pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>; async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>; async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>; async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()>; async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>; async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn update( async fn update(
&self, &self,

View File

@ -6,9 +6,8 @@ use grenad::CompressionType;
use milli::update::UpdateBuilder; use milli::update::UpdateBuilder;
use rayon::ThreadPool; use rayon::ThreadPool;
use crate::index::UpdateResult;
use crate::index_controller::updates::{Failed, Processed, Processing};
use crate::index_controller::UpdateMeta; use crate::index_controller::UpdateMeta;
use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts; use crate::option::IndexerOpts;
pub struct UpdateHandler { pub struct UpdateHandler {
@ -59,10 +58,10 @@ impl UpdateHandler {
pub fn handle_update( pub fn handle_update(
&self, &self,
meta: Processing<UpdateMeta>, meta: Processing,
content: File, content: Option<File>,
index: Index, index: Index,
) -> Result<Processed<UpdateMeta, UpdateResult>, Failed<UpdateMeta, String>> { ) -> Result<Processed, Failed> {
use UpdateMeta::*; use UpdateMeta::*;
let update_id = meta.id(); let update_id = meta.id();

View File

@ -1,87 +1,121 @@
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod, UpdateFormat};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] use crate::index::{Facets, Settings};
#[serde(rename_all = "camelCase")]
pub struct Enqueued<M> { pub type UpdateError = String;
pub update_id: u64,
pub meta: M, #[derive(Debug, Clone, Serialize, Deserialize)]
pub enqueued_at: DateTime<Utc>, pub enum UpdateResult {
DocumentsAddition(DocumentAdditionResult),
DocumentDeletion { deleted: u64 },
Other,
} }
impl<M> Enqueued<M> { #[derive(Debug, Clone, Serialize, Deserialize)]
pub fn new(meta: M, update_id: u64) -> Self { #[serde(tag = "type")]
pub enum UpdateMeta {
DocumentsAddition {
method: IndexDocumentsMethod,
format: UpdateFormat,
primary_key: Option<String>,
},
ClearDocuments,
DeleteDocuments,
Settings(Settings),
Facets(Facets),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Enqueued {
pub update_id: u64,
pub meta: UpdateMeta,
pub enqueued_at: DateTime<Utc>,
pub content: Option<PathBuf>,
}
impl Enqueued {
pub fn new(meta: UpdateMeta, update_id: u64, content: Option<PathBuf>) -> Self {
Self { Self {
enqueued_at: Utc::now(), enqueued_at: Utc::now(),
meta, meta,
update_id, update_id,
content,
} }
} }
pub fn processing(self) -> Processing<M> { pub fn processing(self) -> Processing {
Processing { Processing {
from: self, from: self,
started_processing_at: Utc::now(), started_processing_at: Utc::now(),
} }
} }
pub fn abort(self) -> Aborted<M> { pub fn abort(self) -> Aborted {
Aborted { Aborted {
from: self, from: self,
aborted_at: Utc::now(), aborted_at: Utc::now(),
} }
} }
pub fn meta(&self) -> &M { pub fn meta(&self) -> &UpdateMeta {
&self.meta &self.meta
} }
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.update_id self.update_id
} }
pub fn content_path(&self) -> Option<&Path> {
self.content.as_deref()
}
} }
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Processed<M, N> { pub struct Processed {
pub success: N, pub success: UpdateResult,
pub processed_at: DateTime<Utc>, pub processed_at: DateTime<Utc>,
#[serde(flatten)] #[serde(flatten)]
pub from: Processing<M>, pub from: Processing,
} }
impl<M, N> Processed<M, N> { impl Processed {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.from.id() self.from.id()
} }
} }
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Processing<M> { pub struct Processing {
#[serde(flatten)] #[serde(flatten)]
pub from: Enqueued<M>, pub from: Enqueued,
pub started_processing_at: DateTime<Utc>, pub started_processing_at: DateTime<Utc>,
} }
impl<M> Processing<M> { impl Processing {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.from.id() self.from.id()
} }
pub fn meta(&self) -> &M { pub fn meta(&self) -> &UpdateMeta {
self.from.meta() self.from.meta()
} }
pub fn process<N>(self, meta: N) -> Processed<M, N> { pub fn process(self, success: UpdateResult) -> Processed {
Processed { Processed {
success: meta, success,
from: self, from: self,
processed_at: Utc::now(), processed_at: Utc::now(),
} }
} }
pub fn fail<E>(self, error: E) -> Failed<M, E> { pub fn fail(self, error: UpdateError) -> Failed {
Failed { Failed {
from: self, from: self,
error, error,
@ -90,46 +124,46 @@ impl<M> Processing<M> {
} }
} }
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Aborted<M> { pub struct Aborted {
#[serde(flatten)] #[serde(flatten)]
from: Enqueued<M>, from: Enqueued,
aborted_at: DateTime<Utc>, aborted_at: DateTime<Utc>,
} }
impl<M> Aborted<M> { impl Aborted {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.from.id() self.from.id()
} }
} }
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Clone)] #[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Failed<M, E> { pub struct Failed {
#[serde(flatten)] #[serde(flatten)]
from: Processing<M>, from: Processing,
error: E, error: UpdateError,
failed_at: DateTime<Utc>, failed_at: DateTime<Utc>,
} }
impl<M, E> Failed<M, E> { impl Failed {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.from.id() self.from.id()
} }
} }
#[derive(Debug, PartialEq, Eq, Hash, Serialize)] #[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "status", rename_all = "camelCase")] #[serde(tag = "status", rename_all = "camelCase")]
pub enum UpdateStatus<M, N, E> { pub enum UpdateStatus {
Processing(Processing<M>), Processing(Processing),
Enqueued(Enqueued<M>), Enqueued(Enqueued),
Processed(Processed<M, N>), Processed(Processed),
Aborted(Aborted<M>), Aborted(Aborted),
Failed(Failed<M, E>), Failed(Failed),
} }
impl<M, N, E> UpdateStatus<M, N, E> { impl UpdateStatus {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
match self { match self {
UpdateStatus::Processing(u) => u.id(), UpdateStatus::Processing(u) => u.id(),
@ -140,7 +174,7 @@ impl<M, N, E> UpdateStatus<M, N, E> {
} }
} }
pub fn processed(&self) -> Option<&Processed<M, N>> { pub fn processed(&self) -> Option<&Processed> {
match self { match self {
UpdateStatus::Processed(p) => Some(p), UpdateStatus::Processed(p) => Some(p),
_ => None, _ => None,
@ -148,32 +182,32 @@ impl<M, N, E> UpdateStatus<M, N, E> {
} }
} }
impl<M, N, E> From<Enqueued<M>> for UpdateStatus<M, N, E> { impl From<Enqueued> for UpdateStatus {
fn from(other: Enqueued<M>) -> Self { fn from(other: Enqueued) -> Self {
Self::Enqueued(other) Self::Enqueued(other)
} }
} }
impl<M, N, E> From<Aborted<M>> for UpdateStatus<M, N, E> { impl From<Aborted> for UpdateStatus {
fn from(other: Aborted<M>) -> Self { fn from(other: Aborted) -> Self {
Self::Aborted(other) Self::Aborted(other)
} }
} }
impl<M, N, E> From<Processed<M, N>> for UpdateStatus<M, N, E> { impl From<Processed> for UpdateStatus {
fn from(other: Processed<M, N>) -> Self { fn from(other: Processed) -> Self {
Self::Processed(other) Self::Processed(other)
} }
} }
impl<M, N, E> From<Processing<M>> for UpdateStatus<M, N, E> { impl From<Processing> for UpdateStatus {
fn from(other: Processing<M>) -> Self { fn from(other: Processing) -> Self {
Self::Processing(other) Self::Processing(other)
} }
} }
impl<M, N, E> From<Failed<M, E>> for UpdateStatus<M, N, E> { impl From<Failed> for UpdateStatus {
fn from(other: Failed<M, E>) -> Self { fn from(other: Failed) -> Self {
Self::Failed(other) Self::Failed(other)
} }
} }

View File

@ -1,4 +1,4 @@
use std::path::PathBuf; use std::{collections::HashSet, path::PathBuf};
use log::{info, warn}; use log::{info, warn};
use tokio::sync::mpsc; use tokio::sync::mpsc;
@ -78,7 +78,7 @@ impl<S: UuidStore> UuidResolverActor<S> {
Ok(result) Ok(result)
} }
async fn handle_snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>> { async fn handle_snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
self.store.snapshot(path).await self.store.snapshot(path).await
} }

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
@ -67,7 +68,7 @@ impl UuidResolverHandle for UuidResolverHandleImpl {
.expect("Uuid resolver actor has been killed")?) .expect("Uuid resolver actor has been killed")?)
} }
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>> { async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = UuidResolveMsg::SnapshotRequest { path, ret }; let msg = UuidResolveMsg::SnapshotRequest { path, ret };
let _ = self.sender.send(msg).await; let _ = self.sender.send(msg).await;

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use tokio::sync::oneshot; use tokio::sync::oneshot;
@ -28,7 +29,7 @@ pub enum UuidResolveMsg {
}, },
SnapshotRequest { SnapshotRequest {
path: PathBuf, path: PathBuf,
ret: oneshot::Sender<Result<Vec<Uuid>>>, ret: oneshot::Sender<Result<HashSet<Uuid>>>,
}, },
GetSize { GetSize {
ret: oneshot::Sender<Result<u64>>, ret: oneshot::Sender<Result<u64>>,

View File

@ -3,6 +3,7 @@ mod handle_impl;
mod message; mod message;
mod store; mod store;
use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use thiserror::Error; use thiserror::Error;
@ -29,7 +30,7 @@ pub trait UuidResolverHandle {
async fn create(&self, name: String) -> anyhow::Result<Uuid>; async fn create(&self, name: String) -> anyhow::Result<Uuid>;
async fn delete(&self, name: String) -> anyhow::Result<Uuid>; async fn delete(&self, name: String) -> anyhow::Result<Uuid>;
async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>; async fn list(&self) -> anyhow::Result<Vec<(String, Uuid)>>;
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>>; async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>; async fn get_size(&self) -> Result<u64>;
} }

View File

@ -1,5 +1,6 @@
use std::fs::create_dir_all;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::collections::HashSet;
use std::fs::create_dir_all;
use heed::{ use heed::{
types::{ByteSlice, Str}, types::{ByteSlice, Str},
@ -19,7 +20,7 @@ pub trait UuidStore {
async fn delete(&self, uid: String) -> Result<Option<Uuid>>; async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
async fn list(&self) -> Result<Vec<(String, Uuid)>>; async fn list(&self) -> Result<Vec<(String, Uuid)>>;
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, path: PathBuf) -> Result<Vec<Uuid>>; async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
async fn get_size(&self) -> Result<u64>; async fn get_size(&self) -> Result<u64>;
} }
@ -129,17 +130,17 @@ impl UuidStore for HeedUuidStore {
.await? .await?
} }
async fn snapshot(&self, mut path: PathBuf) -> Result<Vec<Uuid>> { async fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
let env = self.env.clone(); let env = self.env.clone();
let db = self.db; let db = self.db;
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
// Write transaction to acquire a lock on the database. // Write transaction to acquire a lock on the database.
let txn = env.write_txn()?; let txn = env.write_txn()?;
let mut entries = Vec::new(); let mut entries = HashSet::new();
for entry in db.iter(&txn)? { for entry in db.iter(&txn)? {
let (_, uuid) = entry?; let (_, uuid) = entry?;
let uuid = Uuid::from_slice(uuid)?; let uuid = Uuid::from_slice(uuid)?;
entries.push(uuid) entries.insert(uuid);
} }
// only perform snapshot if there are indexes // only perform snapshot if there are indexes

View File

@ -107,14 +107,11 @@ async fn get_all_documents(
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>, params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let attributes_to_retrieve = params let attributes_to_retrieve = params.attributes_to_retrieve.as_ref().and_then(|attrs| {
.attributes_to_retrieve
.as_ref()
.and_then(|attrs| {
let mut names = Vec::new(); let mut names = Vec::new();
for name in attrs.split(',').map(String::from) { for name in attrs.split(',').map(String::from) {
if name == "*" { if name == "*" {
return None return None;
} }
names.push(name); names.push(name);
} }

View File

@ -185,12 +185,9 @@ impl Index<'_> {
self.service.get(url).await self.service.get(url).await
} }
make_settings_test_routes!( make_settings_test_routes!(distinct_attribute);
distinct_attribute
);
} }
pub struct GetDocumentOptions; pub struct GetDocumentOptions;
#[derive(Debug, Default)] #[derive(Debug, Default)]

View File

@ -77,8 +77,8 @@ async fn document_addition_with_primary_key() {
"content": "foo", "content": "foo",
} }
]); ]);
let (_response, code) = index.add_documents(documents, Some("primary")).await; let (response, code) = index.add_documents(documents, Some("primary")).await;
assert_eq!(code, 202); assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(0).await; index.wait_update_id(0).await;
@ -189,8 +189,8 @@ async fn replace_document() {
} }
]); ]);
let (_response, code) = index.add_documents(documents, None).await; let (response, code) = index.add_documents(documents, None).await;
assert_eq!(code, 202); assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(0).await; index.wait_update_id(0).await;
@ -260,8 +260,8 @@ async fn update_document() {
} }
]); ]);
let (_response, code) = index.update_documents(documents, None).await; let (response, code) = index.update_documents(documents, None).await;
assert_eq!(code, 202); assert_eq!(code, 202, "response: {}", response);
index.wait_update_id(1).await; index.wait_update_id(1).await;

View File

@ -6,14 +6,18 @@ async fn set_and_reset_distinct_attribute() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
let (_response, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; let (_response, _code) = index
.update_settings(json!({ "distinctAttribute": "test"}))
.await;
index.wait_update_id(0).await; index.wait_update_id(0).await;
let (response, _) = index.settings().await; let (response, _) = index.settings().await;
assert_eq!(response["distinctAttribute"], "test"); assert_eq!(response["distinctAttribute"], "test");
index.update_settings(json!({ "distinctAttribute": null })).await; index
.update_settings(json!({ "distinctAttribute": null }))
.await;
index.wait_update_id(1).await; index.wait_update_id(1).await;

View File

@ -1,2 +1,2 @@
mod get_settings;
mod distinct; mod distinct;
mod get_settings;