mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Merge #1796
1796: Feature branch: Task store r=irevoire a=MarinPostma # Feature branch: Task Store ## Spec todo https://github.com/meilisearch/specifications/blob/develop/text/0060-refashion-updates-apis.md - [x] The update resource is renamed task. The names of existing API routes are also changed to reflect this change. - [x] Tasks are now also accessible as an independent resource of an index. GET - /tasks; GET - /tasks/:taskUid - [x] The task uid is not incremented by index anymore. The sequence is generated globally. - [x] A task_not_found error is introduced. - [x] The format of the task object is updated. - [x] updateId becomes uid. - [x] Attributes of an error appearing in a failed task are now contained in a dedicated error object. - [x] type is no longer an object. It now becomes a string containing the values of its name field previously defined in the type object. - [x] The possible values for the type field are reworked to be more clear and consistent with our naming rules. - [x] A details object is added to contain specific information related to a task payload that was previously displayed in the type nested object. Previous number key is renamed numberOfDocuments. - [x] An indexUid field is added to give information about the related index on which the task is performed. - [x] duration format has been updated to express an ISO 8601 duration. - [x] processed status changes to succeeded. - [x] startedProcessingAt is updated to startedAt. - [x] processedAt is updated to finishedAt. - [x] 202 Accepted requests previously returning an updateId are now returning a summarized task object. - [x] MEILI_MAX_UDB_SIZE env var is updated MEILI_MAX_TASK_DB_SIZE. - [x] --max-udb-size cli option is updated to --max-task-db-size. - [x] task object lists are now returned under a results array. - [x] Each operation on an index (creation, update, deletion) is now asynchronous and represented by a task. ## Todo tech - [x] Restore Snapshots - [x] Restore dumps of documents - [x] Implements the dump of updates - [x] Error handling - [x] Fix stats - [x] Restore the Analytics - [x] [Add the new analytics](https://github.com/meilisearch/specifications/pull/92/files) - [x] Fix tests - [x] ~Deleting tasks when index is deleted (see bellow)~ see #1891 instead - [x] Improve details for documents addition and deletion tasks - [ ] Add integration test - [ ] Test task store filtering - [x] Rename `UuidStore` to `IndexMetaStore`, and simplify the trait. - [x] Fix task store initialization: fill pending queue from hard state - [x] Synchronously return error when creating an index with an invalid index_uid and add test - [x] Task should be returned in decreasing uid + tests (on index task route) - [x] Summarized task view - [x] fix snapshot permissions ## Implementation ### Linked PRs - #1889 - #1891 - #1892 - #1902 - #1906 - #1911 - #1914 - #1915 - #1916 - #1918 - #1924 - #1925 - #1926 - #1930 - #1936 - #1937 - #1942 - #1944 - #1945 - #1946 - #1947 - #1950 - #1951 - #1957 - #1959 - #1960 - #1961 - #1962 - #1964 ### Linked PRs in milli: - https://github.com/meilisearch/milli/pull/414 - https://github.com/meilisearch/milli/pull/409 - https://github.com/meilisearch/milli/pull/406 - https://github.com/meilisearch/milli/pull/418 ### Issues - close #1687 - close #1786 - close #1940 - close #1948 - close #1949 - close #1932 - close #1956 ### Spec patches - https://github.com/meilisearch/specifications/pull/90 Co-authored-by: Marin Postma <postma.marin@protonmail.com>
This commit is contained in:
commit
fa196986c2
823
Cargo.lock
generated
823
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,5 @@ members = [
|
||||
"meilisearch-error",
|
||||
"meilisearch-lib",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[patch.crates-io]
|
||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||
resolver = "2"
|
||||
|
@ -6,4 +6,11 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
actix-http = "=3.0.0-beta.10"
|
||||
actix-web = "4.0.0-beta.9"
|
||||
proptest = { version = "1.0.0", optional = true }
|
||||
proptest-derive = { version = "0.3.0", optional = true }
|
||||
serde = { version = "1.0.130", features = ["derive"] }
|
||||
serde_json = "1.0.69"
|
||||
|
||||
[features]
|
||||
test-traits = ["proptest", "proptest-derive"]
|
||||
|
@ -1,8 +1,76 @@
|
||||
use std::fmt;
|
||||
|
||||
use actix_http::http::StatusCode;
|
||||
use actix_http::{body::Body, http::StatusCode};
|
||||
use actix_web::{self as aweb, HttpResponseBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
#[cfg_attr(
|
||||
feature = "test-traits",
|
||||
proptest(strategy = "strategy::status_code_strategy()")
|
||||
)]
|
||||
code: StatusCode,
|
||||
message: String,
|
||||
#[serde(rename = "code")]
|
||||
error_code: String,
|
||||
#[serde(rename = "type")]
|
||||
error_type: String,
|
||||
#[serde(rename = "link")]
|
||||
error_link: String,
|
||||
}
|
||||
|
||||
impl ResponseError {
|
||||
pub fn from_msg(message: String, code: Code) -> Self {
|
||||
Self {
|
||||
code: code.http(),
|
||||
message,
|
||||
error_code: code.err_code().error_name.to_string(),
|
||||
error_type: code.type_(),
|
||||
error_link: code.url(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ResponseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.message.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ResponseError {}
|
||||
|
||||
impl<T> From<T> for ResponseError
|
||||
where
|
||||
T: ErrorCode,
|
||||
{
|
||||
fn from(other: T) -> Self {
|
||||
Self {
|
||||
code: other.http_status(),
|
||||
message: other.to_string(),
|
||||
error_code: other.error_name(),
|
||||
error_type: other.error_type(),
|
||||
error_link: other.error_url(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl aweb::error::ResponseError for ResponseError {
|
||||
fn error_response(&self) -> aweb::HttpResponse<Body> {
|
||||
let json = serde_json::to_vec(self).unwrap();
|
||||
HttpResponseBuilder::new(self.status_code())
|
||||
.content_type("application/json")
|
||||
.body(json)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
self.code
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ErrorCode: std::error::Error {
|
||||
fn error_code(&self) -> Code;
|
||||
|
||||
@ -237,3 +305,14 @@ impl ErrCode {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "test-traits")]
|
||||
mod strategy {
|
||||
use proptest::strategy::Strategy;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn status_code_strategy() -> impl Strategy<Value = StatusCode> {
|
||||
(100..999u16).prop_map(|i| StatusCode::from_u16(i).unwrap())
|
||||
}
|
||||
}
|
||||
|
@ -25,10 +25,13 @@ zip = { version = "0.5.13", optional = true }
|
||||
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" }
|
||||
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
|
||||
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
|
||||
# TODO: specifying this dependency so semver doesn't bump to next beta
|
||||
actix-tls = "=3.0.0-beta.5"
|
||||
anyhow = { version = "1.0.43", features = ["backtrace"] }
|
||||
arc-swap = "1.3.2"
|
||||
async-stream = "0.3.2"
|
||||
async-trait = "0.1.51"
|
||||
arc-swap = "1.3.2"
|
||||
bstr = "0.2.17"
|
||||
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
|
||||
bytes = "1.1.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
@ -44,13 +47,15 @@ http = "0.2.4"
|
||||
indexmap = { version = "1.7.0", features = ["serde-1"] }
|
||||
itertools = "0.10.1"
|
||||
log = "0.4.14"
|
||||
meilisearch-lib = { path = "../meilisearch-lib" }
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
meilisearch-lib = { path = "../meilisearch-lib" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.8.0"
|
||||
parking_lot = "0.11.2"
|
||||
pin-project = "1.0.8"
|
||||
platform-dirs = "0.3.0"
|
||||
rand = "0.8.4"
|
||||
rayon = "1.5.1"
|
||||
@ -63,16 +68,14 @@ sha2 = "0.9.6"
|
||||
siphasher = "0.3.7"
|
||||
slice-group-by = "0.2.6"
|
||||
structopt = "0.3.23"
|
||||
sysinfo = "0.20.2"
|
||||
tar = "0.4.37"
|
||||
tempfile = "3.2.0"
|
||||
thiserror = "1.0.28"
|
||||
tokio = { version = "1.11.0", features = ["full"] }
|
||||
tokio-stream = "0.1.7"
|
||||
uuid = { version = "0.8.2", features = ["serde"] }
|
||||
walkdir = "2.3.2"
|
||||
obkv = "0.2.0"
|
||||
pin-project = "1.0.8"
|
||||
sysinfo = "0.20.2"
|
||||
tokio-stream = "0.1.7"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.2.0"
|
||||
|
@ -75,7 +75,30 @@ impl SegmentAnalytics {
|
||||
|
||||
let client = HttpClient::default();
|
||||
let user = User::UserId { user_id };
|
||||
let batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
||||
let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string());
|
||||
|
||||
// If Meilisearch is Launched for the first time:
|
||||
// 1. Send an event Launched associated to the user `total_launch`.
|
||||
// 2. Batch an event Launched with the real instance-id and send it in one hour.
|
||||
if first_time_run {
|
||||
let _ = batcher
|
||||
.push(Track {
|
||||
user: User::UserId {
|
||||
user_id: "total_launch".to_string(),
|
||||
},
|
||||
event: "Launched".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
let _ = batcher.flush().await;
|
||||
let _ = batcher
|
||||
.push(Track {
|
||||
user: user.clone(),
|
||||
event: "Launched".to_string(),
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize
|
||||
|
||||
@ -95,10 +118,6 @@ impl SegmentAnalytics {
|
||||
sender,
|
||||
user: user.clone(),
|
||||
};
|
||||
// batch the launched for the first time track event
|
||||
if first_time_run {
|
||||
this.publish("Launched".to_string(), json!({}), None);
|
||||
}
|
||||
|
||||
(Arc::new(this), user.to_string())
|
||||
}
|
||||
@ -216,7 +235,9 @@ impl Segment {
|
||||
|
||||
async fn run(mut self, meilisearch: MeiliSearch) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
let mut interval = tokio::time::interval(INTERVAL);
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
|
||||
|
||||
loop {
|
||||
select! {
|
||||
@ -304,10 +325,8 @@ pub struct SearchAggregator {
|
||||
used_syntax: HashMap<String, usize>,
|
||||
|
||||
// q
|
||||
// everytime a request has a q field, this field must be incremented by the number of terms
|
||||
sum_of_terms_count: usize,
|
||||
// everytime a request has a q field, this field must be incremented by one
|
||||
total_number_of_q: usize,
|
||||
// The maximum number of terms in a q request
|
||||
max_terms_number: usize,
|
||||
|
||||
// pagination
|
||||
max_limit: usize,
|
||||
@ -354,8 +373,7 @@ impl SearchAggregator {
|
||||
}
|
||||
|
||||
if let Some(ref q) = query.q {
|
||||
ret.total_number_of_q = 1;
|
||||
ret.sum_of_terms_count = q.split_whitespace().count();
|
||||
ret.max_terms_number = q.split_whitespace().count();
|
||||
}
|
||||
|
||||
ret.max_limit = query.limit;
|
||||
@ -365,7 +383,7 @@ impl SearchAggregator {
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, result: &SearchResult) {
|
||||
self.total_succeeded += 1;
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
self.time_spent.push(result.processing_time_ms as usize);
|
||||
}
|
||||
|
||||
@ -376,23 +394,31 @@ impl SearchAggregator {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
// request
|
||||
self.total_received += other.total_received;
|
||||
self.total_succeeded += other.total_succeeded;
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
|
||||
self.time_spent.append(&mut other.time_spent);
|
||||
// sort
|
||||
self.sort_with_geo_point |= other.sort_with_geo_point;
|
||||
self.sort_sum_of_criteria_terms += other.sort_sum_of_criteria_terms;
|
||||
self.sort_total_number_of_criteria += other.sort_total_number_of_criteria;
|
||||
self.sort_sum_of_criteria_terms = self
|
||||
.sort_sum_of_criteria_terms
|
||||
.saturating_add(other.sort_sum_of_criteria_terms);
|
||||
self.sort_total_number_of_criteria = self
|
||||
.sort_total_number_of_criteria
|
||||
.saturating_add(other.sort_total_number_of_criteria);
|
||||
// filter
|
||||
self.filter_with_geo_radius |= other.filter_with_geo_radius;
|
||||
self.filter_sum_of_criteria_terms += other.filter_sum_of_criteria_terms;
|
||||
self.filter_total_number_of_criteria += other.filter_total_number_of_criteria;
|
||||
self.filter_sum_of_criteria_terms = self
|
||||
.filter_sum_of_criteria_terms
|
||||
.saturating_add(other.filter_sum_of_criteria_terms);
|
||||
self.filter_total_number_of_criteria = self
|
||||
.filter_total_number_of_criteria
|
||||
.saturating_add(other.filter_total_number_of_criteria);
|
||||
for (key, value) in other.used_syntax.into_iter() {
|
||||
*self.used_syntax.entry(key).or_insert(0) += value;
|
||||
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||
*used_syntax = used_syntax.saturating_add(value);
|
||||
}
|
||||
// q
|
||||
self.sum_of_terms_count += other.sum_of_terms_count;
|
||||
self.total_number_of_q += other.total_number_of_q;
|
||||
self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
|
||||
// pagination
|
||||
self.max_limit = self.max_limit.max(other.max_limit);
|
||||
self.max_offset = self.max_offset.max(other.max_offset);
|
||||
@ -407,12 +433,12 @@ impl SearchAggregator {
|
||||
// we get all the values in a sorted manner
|
||||
let time_spent = self.time_spent.into_sorted_vec();
|
||||
// We are only intersted by the slowest value of the 99th fastest results
|
||||
let time_spent = time_spent[percentile_99th as usize];
|
||||
let time_spent = time_spent.get(percentile_99th as usize);
|
||||
|
||||
let properties = json!({
|
||||
"user-agent": self.user_agents,
|
||||
"requests": {
|
||||
"99th_response_time": format!("{:.2}", time_spent),
|
||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||
"total_succeeded": self.total_succeeded,
|
||||
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
||||
"total_received": self.total_received,
|
||||
@ -427,7 +453,7 @@ impl SearchAggregator {
|
||||
"most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||
},
|
||||
"q": {
|
||||
"avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64),
|
||||
"max_terms_number": self.max_terms_number,
|
||||
},
|
||||
"pagination": {
|
||||
"max_limit": self.max_limit,
|
||||
|
@ -2,12 +2,8 @@ use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use actix_web as aweb;
|
||||
use actix_web::body::Body;
|
||||
use actix_web::http::StatusCode;
|
||||
use actix_web::HttpResponseBuilder;
|
||||
use aweb::error::{JsonPayloadError, QueryPayloadError};
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use meilisearch_error::{Code, ErrorCode, ResponseError};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MeilisearchHttpError {
|
||||
@ -36,54 +32,6 @@ impl From<MeilisearchHttpError> for aweb::Error {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
code: StatusCode,
|
||||
message: String,
|
||||
#[serde(rename = "code")]
|
||||
error_code: String,
|
||||
#[serde(rename = "type")]
|
||||
error_type: String,
|
||||
#[serde(rename = "link")]
|
||||
error_link: String,
|
||||
}
|
||||
|
||||
impl fmt::Display for ResponseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.message.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<T> for ResponseError
|
||||
where
|
||||
T: ErrorCode,
|
||||
{
|
||||
fn from(other: T) -> Self {
|
||||
Self {
|
||||
code: other.http_status(),
|
||||
message: other.to_string(),
|
||||
error_code: other.error_name(),
|
||||
error_type: other.error_type(),
|
||||
error_link: other.error_url(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl aweb::error::ResponseError for ResponseError {
|
||||
fn error_response(&self) -> aweb::HttpResponse<Body> {
|
||||
let json = serde_json::to_vec(self).unwrap();
|
||||
HttpResponseBuilder::new(self.status_code())
|
||||
.content_type("application/json")
|
||||
.body(json)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
self.code
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for PayloadError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
|
@ -8,8 +8,8 @@ use std::ops::Deref;
|
||||
use actix_web::FromRequest;
|
||||
use futures::future::err;
|
||||
use futures::future::{ok, Ready};
|
||||
use meilisearch_error::ResponseError;
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use error::AuthenticationError;
|
||||
|
||||
macro_rules! create_policies {
|
||||
|
@ -1,12 +1,14 @@
|
||||
#![allow(rustdoc::private_intra_doc_links)]
|
||||
#[macro_use]
|
||||
pub mod error;
|
||||
pub mod analytics;
|
||||
mod task;
|
||||
#[macro_use]
|
||||
pub mod extractors;
|
||||
pub mod analytics;
|
||||
pub mod helpers;
|
||||
pub mod option;
|
||||
pub mod routes;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@ -53,7 +55,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
||||
let mut meilisearch = MeiliSearch::builder();
|
||||
meilisearch
|
||||
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
|
||||
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
|
||||
.set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize)
|
||||
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
|
||||
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
|
||||
.set_dump_dst(opt.dumps_dir.clone())
|
||||
@ -180,7 +182,8 @@ macro_rules! create_app {
|
||||
use actix_web::middleware::TrailingSlash;
|
||||
use actix_web::App;
|
||||
use actix_web::{middleware, web};
|
||||
use meilisearch_http::error::{MeilisearchHttpError, ResponseError};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_http::error::MeilisearchHttpError;
|
||||
use meilisearch_http::routes;
|
||||
use meilisearch_http::{configure_auth, configure_data, dashboard};
|
||||
|
||||
|
@ -45,8 +45,8 @@ pub struct Opt {
|
||||
pub max_index_size: Byte,
|
||||
|
||||
/// The maximum size, in bytes, of the update lmdb database directory
|
||||
#[structopt(long, env = "MEILI_MAX_UDB_SIZE", default_value = "100 GiB")]
|
||||
pub max_udb_size: Byte,
|
||||
#[structopt(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")]
|
||||
pub max_task_db_size: Byte,
|
||||
|
||||
/// The maximum size, in bytes, of accepted JSON payloads
|
||||
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")]
|
||||
|
@ -1,11 +1,11 @@
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use log::debug;
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
@ -1,25 +1,38 @@
|
||||
use actix_web::error::PayloadError;
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Bytes;
|
||||
use actix_web::HttpMessage;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use futures::{Stream, StreamExt};
|
||||
use log::debug;
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
|
||||
use meilisearch_lib::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use mime::Mime;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::{MeilisearchHttpError, ResponseError};
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::routes::IndexParam;
|
||||
use crate::task::SummarizedTaskView;
|
||||
|
||||
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
|
||||
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec![
|
||||
"application/json".to_string(),
|
||||
"application/x-ndjson".to_string(),
|
||||
"text/csv".to_string(),
|
||||
]
|
||||
});
|
||||
|
||||
/// This is required because Payload is not Sync nor Send
|
||||
fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, PayloadError>> {
|
||||
let (snd, recv) = mpsc::channel(1);
|
||||
@ -31,6 +44,24 @@ fn payload_to_stream(mut payload: Payload) -> impl Stream<Item = Result<Bytes, P
|
||||
tokio_stream::wrappers::ReceiverStream::new(recv)
|
||||
}
|
||||
|
||||
/// Extracts the mime type from the content type and return
|
||||
/// a meilisearch error if anyhthing bad happen.
|
||||
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
|
||||
match req.mime_type() {
|
||||
Ok(Some(mime)) => Ok(Some(mime)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(_) => match req.headers().get(CONTENT_TYPE) {
|
||||
Some(content_type) => Err(MeilisearchHttpError::InvalidContentType(
|
||||
content_type.as_bytes().as_bstr().to_string(),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
)),
|
||||
None => Err(MeilisearchHttpError::MissingContentType(
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct DocumentParam {
|
||||
index_uid: String,
|
||||
@ -76,11 +107,9 @@ pub async fn delete_document(
|
||||
index_uid,
|
||||
} = path.into_inner();
|
||||
let update = Update::DeleteDocuments(vec![document_id]);
|
||||
let update_status = meilisearch
|
||||
.register_update(index_uid, update, false)
|
||||
.await?;
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
let task: SummarizedTaskView = meilisearch.register_update(index_uid, update).await?.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
@ -93,7 +122,7 @@ pub struct BrowseQuery {
|
||||
|
||||
pub async fn get_all_documents(
|
||||
meilisearch: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
params: web::Query<BrowseQuery>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
@ -110,7 +139,7 @@ pub async fn get_all_documents(
|
||||
|
||||
let documents = meilisearch
|
||||
.documents(
|
||||
path.index_uid.clone(),
|
||||
path.into_inner(),
|
||||
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
|
||||
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
|
||||
attributes_to_retrieve,
|
||||
@ -128,91 +157,83 @@ pub struct UpdateDocumentsQuery {
|
||||
|
||||
pub async fn add_documents(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let content_type = req
|
||||
.headers()
|
||||
.get("Content-type")
|
||||
.map(|s| s.to_str().unwrap_or("unkown"));
|
||||
let params = params.into_inner();
|
||||
let index_uid = path.into_inner();
|
||||
|
||||
analytics.add_documents(
|
||||
¶ms,
|
||||
meilisearch.get_index(path.index_uid.clone()).await.is_err(),
|
||||
meilisearch.get_index(index_uid.clone()).await.is_err(),
|
||||
&req,
|
||||
);
|
||||
|
||||
document_addition(
|
||||
content_type,
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
meilisearch,
|
||||
path.index_uid.clone(),
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
)
|
||||
.await
|
||||
.await?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update_documents(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
params: web::Query<UpdateDocumentsQuery>,
|
||||
body: Payload,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
let content_type = req
|
||||
.headers()
|
||||
.get("Content-type")
|
||||
.map(|s| s.to_str().unwrap_or("unkown"));
|
||||
let index_uid = path.into_inner();
|
||||
|
||||
analytics.update_documents(
|
||||
¶ms,
|
||||
meilisearch.get_index(path.index_uid.clone()).await.is_err(),
|
||||
meilisearch.get_index(index_uid.clone()).await.is_err(),
|
||||
&req,
|
||||
);
|
||||
|
||||
document_addition(
|
||||
content_type,
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
meilisearch,
|
||||
path.into_inner().index_uid,
|
||||
index_uid,
|
||||
params.into_inner().primary_key,
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
)
|
||||
.await
|
||||
.await?;
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
/// Route used when the payload type is "application/json"
|
||||
/// Used to add or replace documents
|
||||
async fn document_addition(
|
||||
content_type: Option<&str>,
|
||||
mime_type: Option<Mime>,
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec![
|
||||
"application/json".to_string(),
|
||||
"application/x-ndjson".to_string(),
|
||||
"text/csv".to_string(),
|
||||
]
|
||||
});
|
||||
let format = match content_type {
|
||||
Some("application/json") => DocumentAdditionFormat::Json,
|
||||
Some("application/x-ndjson") => DocumentAdditionFormat::Ndjson,
|
||||
Some("text/csv") => DocumentAdditionFormat::Csv,
|
||||
Some(other) => {
|
||||
) -> Result<SummarizedTaskView, ResponseError> {
|
||||
let format = match mime_type
|
||||
.as_ref()
|
||||
.map(|m| (m.type_().as_str(), m.subtype().as_str()))
|
||||
{
|
||||
Some(("application", "json")) => DocumentAdditionFormat::Json,
|
||||
Some(("application", "x-ndjson")) => DocumentAdditionFormat::Ndjson,
|
||||
Some(("text", "csv")) => DocumentAdditionFormat::Csv,
|
||||
Some((type_, subtype)) => {
|
||||
return Err(MeilisearchHttpError::InvalidContentType(
|
||||
other.to_string(),
|
||||
format!("{}/{}", type_, subtype),
|
||||
ACCEPTED_CONTENT_TYPE.clone(),
|
||||
)
|
||||
.into())
|
||||
@ -231,15 +252,15 @@ async fn document_addition(
|
||||
format,
|
||||
};
|
||||
|
||||
let update_status = meilisearch.register_update(index_uid, update, true).await?;
|
||||
let task = meilisearch.register_update(index_uid, update).await?.into();
|
||||
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn delete_documents(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
@ -253,21 +274,25 @@ pub async fn delete_documents(
|
||||
.collect();
|
||||
|
||||
let update = Update::DeleteDocuments(ids);
|
||||
let update_status = meilisearch
|
||||
.register_update(path.into_inner().index_uid, update, false)
|
||||
.await?;
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(path.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update = Update::ClearDocuments;
|
||||
let update_status = meilisearch
|
||||
.register_update(path.into_inner().index_uid, update, false)
|
||||
.await?;
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(path.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -1,20 +1,20 @@
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_lib::index_controller::IndexSettings;
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index_controller::Update;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::routes::IndexParam;
|
||||
use crate::task::SummarizedTaskView;
|
||||
|
||||
pub mod documents;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
pub mod updates;
|
||||
pub mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
@ -33,7 +33,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/stats").route(web::get().to(get_index_stats)))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/updates").configure(updates::configure))
|
||||
.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
@ -59,19 +59,25 @@ pub async fn create_index(
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let body = body.into_inner();
|
||||
let IndexCreateRequest {
|
||||
primary_key, uid, ..
|
||||
} = body.into_inner();
|
||||
|
||||
analytics.publish(
|
||||
"Index Created".to_string(),
|
||||
json!({ "primary_key": body.primary_key}),
|
||||
json!({ "primary_key": primary_key }),
|
||||
Some(&req),
|
||||
);
|
||||
let meta = meilisearch.create_index(body.uid, body.primary_key).await?;
|
||||
Ok(HttpResponse::Created().json(meta))
|
||||
|
||||
let update = Update::CreateIndex { primary_key };
|
||||
let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
#[allow(dead_code)]
|
||||
pub struct UpdateIndexRequest {
|
||||
uid: Option<String>,
|
||||
primary_key: Option<String>,
|
||||
@ -89,16 +95,16 @@ pub struct UpdateIndexResponse {
|
||||
|
||||
pub async fn get_index(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let meta = meilisearch.get_index(path.index_uid.clone()).await?;
|
||||
let meta = meilisearch.get_index(path.into_inner()).await?;
|
||||
debug!("returns: {:?}", meta);
|
||||
Ok(HttpResponse::Ok().json(meta))
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
body: web::Json<UpdateIndexRequest>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
@ -110,30 +116,36 @@ pub async fn update_index(
|
||||
json!({ "primary_key": body.primary_key}),
|
||||
Some(&req),
|
||||
);
|
||||
let settings = IndexSettings {
|
||||
uid: body.uid,
|
||||
|
||||
let update = Update::UpdateIndex {
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
let meta = meilisearch
|
||||
.update_index(path.into_inner().index_uid, settings)
|
||||
.await?;
|
||||
debug!("returns: {:?}", meta);
|
||||
Ok(HttpResponse::Ok().json(meta))
|
||||
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(path.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn delete_index(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
meilisearch.delete_index(path.index_uid.clone()).await?;
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
let uid = path.into_inner();
|
||||
let update = Update::DeleteIndex;
|
||||
let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let response = meilisearch.get_index_stats(path.index_uid.clone()).await?;
|
||||
let response = meilisearch.get_index_stats(path.into_inner()).await?;
|
||||
|
||||
debug!("returns: {:?}", response);
|
||||
Ok(HttpResponse::Ok().json(response))
|
||||
|
@ -1,14 +1,13 @@
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use log::debug;
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::routes::IndexParam;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
@ -108,7 +107,7 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
|
||||
pub async fn search_with_url_query(
|
||||
meilisearch: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
params: web::Query<SearchQueryGet>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
@ -118,7 +117,7 @@ pub async fn search_with_url_query(
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let search_result = meilisearch.search(path.into_inner().index_uid, query).await;
|
||||
let search_result = meilisearch.search(path.into_inner(), query).await;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
@ -136,7 +135,7 @@ pub async fn search_with_url_query(
|
||||
|
||||
pub async fn search_with_post(
|
||||
meilisearch: GuardedData<Public, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
path: web::Path<String>,
|
||||
params: web::Json<SearchQuery>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
@ -146,7 +145,7 @@ pub async fn search_with_post(
|
||||
|
||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let search_result = meilisearch.search(path.into_inner().index_uid, query).await;
|
||||
let search_result = meilisearch.search(path.into_inner(), query).await;
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
|
@ -1,28 +1,30 @@
|
||||
use log::debug;
|
||||
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index::{Settings, Unchecked};
|
||||
use meilisearch_lib::index_controller::Update;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::task::SummarizedTaskView;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => {
|
||||
pub mod $attr {
|
||||
use actix_web::{web, HttpRequest, HttpResponse, Resource};
|
||||
use log::debug;
|
||||
use actix_web::{web, HttpResponse, HttpRequest, Resource};
|
||||
|
||||
use meilisearch_lib::milli::update::Setting;
|
||||
use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update};
|
||||
use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch};
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{GuardedData, policies::*};
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::task::SummarizedTaskView;
|
||||
use meilisearch_error::ResponseError;
|
||||
|
||||
pub async fn delete(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
@ -32,10 +34,17 @@ macro_rules! make_setting_route {
|
||||
$attr: Setting::Reset,
|
||||
..Default::default()
|
||||
};
|
||||
let update = Update::Settings(settings);
|
||||
let update_status = meilisearch.register_update(index_uid.into_inner(), update, false).await?;
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
let update = Update::Settings {
|
||||
settings,
|
||||
is_deletion: true,
|
||||
};
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(index_uid.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn update(
|
||||
@ -43,7 +52,7 @@ macro_rules! make_setting_route {
|
||||
index_uid: actix_web::web::Path<String>,
|
||||
body: actix_web::web::Json<Option<$type>>,
|
||||
req: HttpRequest,
|
||||
$analytics_var: web::Data< dyn Analytics>,
|
||||
$analytics_var: web::Data<dyn Analytics>,
|
||||
) -> std::result::Result<HttpResponse, ResponseError> {
|
||||
let body = body.into_inner();
|
||||
|
||||
@ -52,15 +61,22 @@ macro_rules! make_setting_route {
|
||||
let settings = Settings {
|
||||
$attr: match body {
|
||||
Some(inner_body) => Setting::Set(inner_body),
|
||||
None => Setting::Reset
|
||||
None => Setting::Reset,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let update = Update::Settings(settings);
|
||||
let update_status = meilisearch.register_update(index_uid.into_inner(), update, true).await?;
|
||||
debug!("returns: {:?}", update_status);
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "updateId": update_status.id() })))
|
||||
let update = Update::Settings {
|
||||
settings,
|
||||
is_deletion: false,
|
||||
};
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(index_uid.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
@ -71,6 +87,7 @@ macro_rules! make_setting_route {
|
||||
debug!("returns: {:?}", settings);
|
||||
let mut json = serde_json::json!(&settings);
|
||||
let val = json[$camelcase_attr].take();
|
||||
|
||||
Ok(HttpResponse::Ok().json(val))
|
||||
}
|
||||
|
||||
@ -151,7 +168,7 @@ make_setting_route!(
|
||||
"SearchableAttributes Updated".to_string(),
|
||||
json!({
|
||||
"searchable_attributes": {
|
||||
"total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0),
|
||||
"total": setting.as_ref().map(|searchable| searchable.len()).unwrap_or(0),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
@ -240,6 +257,9 @@ pub async fn update_all(
|
||||
"ranking_rules": {
|
||||
"sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")),
|
||||
},
|
||||
"searchable_attributes": {
|
||||
"total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()).unwrap_or(0),
|
||||
},
|
||||
"sortable_attributes": {
|
||||
"total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0),
|
||||
"has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false),
|
||||
@ -252,13 +272,17 @@ pub async fn update_all(
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let update = Update::Settings(settings);
|
||||
let update_result = meilisearch
|
||||
.register_update(index_uid.into_inner(), update, true)
|
||||
.await?;
|
||||
let json = serde_json::json!({ "updateId": update_result.id() });
|
||||
debug!("returns: {:?}", json);
|
||||
Ok(HttpResponse::Accepted().json(json))
|
||||
let update = Update::Settings {
|
||||
settings,
|
||||
is_deletion: false,
|
||||
};
|
||||
let task: SummarizedTaskView = meilisearch
|
||||
.register_update(index_uid.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_all(
|
||||
@ -274,13 +298,17 @@ pub async fn delete_all(
|
||||
data: GuardedData<Private, MeiliSearch>,
|
||||
index_uid: web::Path<String>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let settings = Settings::cleared();
|
||||
let settings = Settings::cleared().into_unchecked();
|
||||
|
||||
let update = Update::Settings(settings.into_unchecked());
|
||||
let update_result = data
|
||||
.register_update(index_uid.into_inner(), update, false)
|
||||
.await?;
|
||||
let json = serde_json::json!({ "updateId": update_result.id() });
|
||||
debug!("returns: {:?}", json);
|
||||
Ok(HttpResponse::Accepted().json(json))
|
||||
let update = Update::Settings {
|
||||
settings,
|
||||
is_deletion: true,
|
||||
};
|
||||
let task: SummarizedTaskView = data
|
||||
.register_update(index_uid.into_inner(), update)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
76
meilisearch-http/src/routes/indexes/tasks.rs
Normal file
76
meilisearch-http/src/routes/indexes/tasks.rs
Normal file
@ -0,0 +1,76 @@
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::task::{TaskListView, TaskView};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(get_all_tasks_status)))
|
||||
.service(web::resource("{task_id}").route(web::get().to(get_task_status)));
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct UpdateIndexResponse {
|
||||
name: String,
|
||||
uid: String,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct UpdateParam {
|
||||
index_uid: String,
|
||||
task_id: u64,
|
||||
}
|
||||
|
||||
pub async fn get_task_status(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
index_uid: web::Path<UpdateParam>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish(
|
||||
"Index Tasks Seen".to_string(),
|
||||
json!({ "per_task_uid": true }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let UpdateParam { index_uid, task_id } = index_uid.into_inner();
|
||||
|
||||
let task: TaskView = meilisearch.get_index_task(index_uid, task_id).await?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
||||
|
||||
pub async fn get_all_tasks_status(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish(
|
||||
"Index Tasks Seen".to_string(),
|
||||
json!({ "per_task_uid": false }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let tasks: TaskListView = meilisearch
|
||||
.list_index_task(index_uid.into_inner(), None, None)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(TaskView::from)
|
||||
.collect::<Vec<_>>()
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", tasks);
|
||||
Ok(HttpResponse::Ok().json(tasks))
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
use actix_web::{web, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::routes::{IndexParam, UpdateStatusResponse};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
|
||||
.service(web::resource("{update_id}").route(web::get().to(get_update_status)));
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct UpdateIndexResponse {
|
||||
name: String,
|
||||
uid: String,
|
||||
created_at: DateTime<Utc>,
|
||||
updated_at: DateTime<Utc>,
|
||||
primary_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct UpdateParam {
|
||||
index_uid: String,
|
||||
update_id: u64,
|
||||
}
|
||||
|
||||
pub async fn get_update_status(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<UpdateParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let params = path.into_inner();
|
||||
let meta = meilisearch
|
||||
.update_status(params.index_uid, params.update_id)
|
||||
.await?;
|
||||
let meta = UpdateStatusResponse::from(meta);
|
||||
debug!("returns: {:?}", meta);
|
||||
Ok(HttpResponse::Ok().json(meta))
|
||||
}
|
||||
|
||||
pub async fn get_all_updates_status(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
path: web::Path<IndexParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let metas = meilisearch
|
||||
.all_update_status(path.into_inner().index_uid)
|
||||
.await?;
|
||||
let metas = metas
|
||||
.into_iter()
|
||||
.map(UpdateStatusResponse::from)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
debug!("returns: {:?}", metas);
|
||||
Ok(HttpResponse::Ok().json(metas))
|
||||
}
|
@ -1,23 +1,22 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::{web, HttpResponse};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::debug;
|
||||
use meilisearch_lib::index_controller::updates::status::{UpdateResult, UpdateStatus};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index::{Settings, Unchecked};
|
||||
use meilisearch_lib::{MeiliSearch, Update};
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
|
||||
use crate::error::ResponseError;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::ApiKeys;
|
||||
|
||||
mod dump;
|
||||
pub mod indexes;
|
||||
mod tasks;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
cfg.service(web::scope("/tasks").configure(tasks::configure))
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::resource("/keys").route(web::get().to(list_keys)))
|
||||
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
||||
@ -48,38 +47,6 @@ pub enum UpdateType {
|
||||
},
|
||||
}
|
||||
|
||||
impl From<&UpdateStatus> for UpdateType {
|
||||
fn from(other: &UpdateStatus) -> Self {
|
||||
use meilisearch_lib::milli::update::IndexDocumentsMethod::*;
|
||||
match other.meta() {
|
||||
Update::DocumentAddition { method, .. } => {
|
||||
let number = match other {
|
||||
UpdateStatus::Processed(processed) => match processed.success {
|
||||
UpdateResult::DocumentsAddition(ref addition) => {
|
||||
Some(addition.nb_documents)
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
};
|
||||
|
||||
match method {
|
||||
ReplaceDocuments => UpdateType::DocumentsAddition { number },
|
||||
UpdateDocuments => UpdateType::DocumentsPartial { number },
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Update::Settings(settings) => UpdateType::Settings {
|
||||
settings: settings.clone(),
|
||||
},
|
||||
Update::ClearDocuments => UpdateType::ClearAll,
|
||||
Update::DeleteDocuments(ids) => UpdateType::DocumentsDeletion {
|
||||
number: Some(ids.len()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ProcessedUpdateResult {
|
||||
@ -135,81 +102,6 @@ pub enum UpdateStatusResponse {
|
||||
},
|
||||
}
|
||||
|
||||
impl From<UpdateStatus> for UpdateStatusResponse {
|
||||
fn from(other: UpdateStatus) -> Self {
|
||||
let update_type = UpdateType::from(&other);
|
||||
|
||||
match other {
|
||||
UpdateStatus::Processing(processing) => {
|
||||
let content = EnqueuedUpdateResult {
|
||||
update_id: processing.id(),
|
||||
update_type,
|
||||
enqueued_at: processing.from.enqueued_at,
|
||||
started_processing_at: Some(processing.started_processing_at),
|
||||
};
|
||||
UpdateStatusResponse::Processing { content }
|
||||
}
|
||||
UpdateStatus::Enqueued(enqueued) => {
|
||||
let content = EnqueuedUpdateResult {
|
||||
update_id: enqueued.id(),
|
||||
update_type,
|
||||
enqueued_at: enqueued.enqueued_at,
|
||||
started_processing_at: None,
|
||||
};
|
||||
UpdateStatusResponse::Enqueued { content }
|
||||
}
|
||||
UpdateStatus::Processed(processed) => {
|
||||
let duration = processed
|
||||
.processed_at
|
||||
.signed_duration_since(processed.from.started_processing_at)
|
||||
.num_milliseconds();
|
||||
|
||||
// necessary since chrono::duration don't expose a f64 secs method.
|
||||
let duration = Duration::from_millis(duration as u64).as_secs_f64();
|
||||
|
||||
let content = ProcessedUpdateResult {
|
||||
update_id: processed.id(),
|
||||
update_type,
|
||||
duration,
|
||||
enqueued_at: processed.from.from.enqueued_at,
|
||||
processed_at: processed.processed_at,
|
||||
};
|
||||
UpdateStatusResponse::Processed { content }
|
||||
}
|
||||
UpdateStatus::Aborted(_) => unreachable!(),
|
||||
UpdateStatus::Failed(failed) => {
|
||||
let duration = failed
|
||||
.failed_at
|
||||
.signed_duration_since(failed.from.started_processing_at)
|
||||
.num_milliseconds();
|
||||
|
||||
// necessary since chrono::duration don't expose a f64 secs method.
|
||||
let duration = Duration::from_millis(duration as u64).as_secs_f64();
|
||||
|
||||
let update_id = failed.id();
|
||||
let processed_at = failed.failed_at;
|
||||
let enqueued_at = failed.from.from.enqueued_at;
|
||||
let error = failed.into();
|
||||
|
||||
let content = FailedUpdateResult {
|
||||
update_id,
|
||||
update_type,
|
||||
error,
|
||||
duration,
|
||||
enqueued_at,
|
||||
processed_at,
|
||||
};
|
||||
UpdateStatusResponse::Failed { content }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct IndexParam {
|
||||
index_uid: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexUpdateResponse {
|
||||
@ -365,8 +257,8 @@ mod test {
|
||||
indexes::documents::add_documents,
|
||||
indexes::documents::delete_document,
|
||||
|
||||
indexes::updates::get_all_updates_status,
|
||||
indexes::updates::get_update_status,
|
||||
indexes::tasks::get_all_tasks_status,
|
||||
indexes::tasks::get_task_status,
|
||||
}
|
||||
Admin => { list_keys, }
|
||||
}
|
||||
|
56
meilisearch-http/src/routes/tasks.rs
Normal file
56
meilisearch-http/src/routes/tasks.rs
Normal file
@ -0,0 +1,56 @@
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::tasks::task::TaskId;
|
||||
use meilisearch_lib::MeiliSearch;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::task::{TaskListView, TaskView};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(get_tasks)))
|
||||
.service(web::resource("/{task_id}").route(web::get().to(get_task)));
|
||||
}
|
||||
|
||||
async fn get_tasks(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish(
|
||||
"Tasks Seen".to_string(),
|
||||
json!({ "per_task_uid": false }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let tasks: TaskListView = meilisearch
|
||||
.list_tasks(None, None, None)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(TaskView::from)
|
||||
.collect::<Vec<_>>()
|
||||
.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(tasks))
|
||||
}
|
||||
|
||||
async fn get_task(
|
||||
meilisearch: GuardedData<Private, MeiliSearch>,
|
||||
task_id: web::Path<TaskId>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish(
|
||||
"Tasks Seen".to_string(),
|
||||
json!({ "per_task_uid": true }),
|
||||
Some(&req),
|
||||
);
|
||||
|
||||
let task: TaskView = meilisearch
|
||||
.get_task(task_id.into_inner(), None)
|
||||
.await?
|
||||
.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
}
|
292
meilisearch-http/src/task.rs
Normal file
292
meilisearch-http/src/task.rs
Normal file
@ -0,0 +1,292 @@
|
||||
use chrono::{DateTime, Duration, Utc};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meilisearch_lib::index::{Settings, Unchecked};
|
||||
use meilisearch_lib::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_lib::tasks::task::{
|
||||
DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult,
|
||||
};
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum TaskType {
|
||||
IndexCreation,
|
||||
IndexUpdate,
|
||||
IndexDeletion,
|
||||
DocumentsAddition,
|
||||
DocumentsPartial,
|
||||
DocumentsDeletion,
|
||||
SettingsUpdate,
|
||||
ClearAll,
|
||||
}
|
||||
|
||||
impl From<TaskContent> for TaskType {
|
||||
fn from(other: TaskContent) -> Self {
|
||||
match other {
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..
|
||||
} => TaskType::DocumentsAddition,
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy: IndexDocumentsMethod::UpdateDocuments,
|
||||
..
|
||||
} => TaskType::DocumentsPartial,
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => TaskType::ClearAll,
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(_)) => TaskType::DocumentsDeletion,
|
||||
TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate,
|
||||
TaskContent::IndexDeletion => TaskType::IndexDeletion,
|
||||
TaskContent::IndexCreation { .. } => TaskType::IndexCreation,
|
||||
TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate,
|
||||
_ => unreachable!("unexpected task type"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
enum TaskStatus {
|
||||
Enqueued,
|
||||
Processing,
|
||||
Succeeded,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(untagged)]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
enum TaskDetails {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentsAddition {
|
||||
received_documents: usize,
|
||||
indexed_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
Settings {
|
||||
#[serde(flatten)]
|
||||
settings: Settings<Unchecked>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
IndexInfo { primary_key: Option<String> },
|
||||
#[serde(rename_all = "camelCase")]
|
||||
DocumentDeletion {
|
||||
received_document_ids: usize,
|
||||
deleted_documents: Option<u64>,
|
||||
},
|
||||
#[serde(rename_all = "camelCase")]
|
||||
ClearAll { deleted_documents: Option<u64> },
|
||||
}
|
||||
|
||||
fn serialize_duration<S: Serializer>(
|
||||
duration: &Option<Duration>,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error> {
|
||||
match duration {
|
||||
Some(duration) => {
|
||||
let duration_str = duration.to_string();
|
||||
serializer.serialize_str(&duration_str)
|
||||
}
|
||||
None => serializer.serialize_none(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TaskView {
|
||||
uid: TaskId,
|
||||
index_uid: String,
|
||||
status: TaskStatus,
|
||||
#[serde(rename = "type")]
|
||||
task_type: TaskType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
details: Option<TaskDetails>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
error: Option<ResponseError>,
|
||||
#[serde(serialize_with = "serialize_duration")]
|
||||
duration: Option<Duration>,
|
||||
enqueued_at: DateTime<Utc>,
|
||||
started_at: Option<DateTime<Utc>>,
|
||||
finished_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
impl From<Task> for TaskView {
|
||||
fn from(task: Task) -> Self {
|
||||
let Task {
|
||||
id,
|
||||
index_uid,
|
||||
content,
|
||||
events,
|
||||
} = task;
|
||||
|
||||
let (task_type, mut details) = match content {
|
||||
TaskContent::DocumentAddition {
|
||||
merge_strategy,
|
||||
documents_count,
|
||||
..
|
||||
} => {
|
||||
let details = TaskDetails::DocumentsAddition {
|
||||
received_documents: documents_count,
|
||||
indexed_documents: None,
|
||||
};
|
||||
|
||||
let task_type = match merge_strategy {
|
||||
IndexDocumentsMethod::UpdateDocuments => TaskType::DocumentsPartial,
|
||||
IndexDocumentsMethod::ReplaceDocuments => TaskType::DocumentsAddition,
|
||||
_ => unreachable!("Unexpected document merge strategy."),
|
||||
};
|
||||
|
||||
(task_type, Some(details))
|
||||
}
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => (
|
||||
TaskType::DocumentsDeletion,
|
||||
Some(TaskDetails::DocumentDeletion {
|
||||
received_document_ids: ids.len(),
|
||||
deleted_documents: None,
|
||||
}),
|
||||
),
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => (
|
||||
TaskType::ClearAll,
|
||||
Some(TaskDetails::ClearAll {
|
||||
deleted_documents: None,
|
||||
}),
|
||||
),
|
||||
TaskContent::IndexDeletion => (
|
||||
TaskType::IndexDeletion,
|
||||
Some(TaskDetails::ClearAll {
|
||||
deleted_documents: None,
|
||||
}),
|
||||
),
|
||||
TaskContent::SettingsUpdate { settings, .. } => (
|
||||
TaskType::SettingsUpdate,
|
||||
Some(TaskDetails::Settings { settings }),
|
||||
),
|
||||
TaskContent::IndexCreation { primary_key } => (
|
||||
TaskType::IndexCreation,
|
||||
Some(TaskDetails::IndexInfo { primary_key }),
|
||||
),
|
||||
TaskContent::IndexUpdate { primary_key } => (
|
||||
TaskType::IndexUpdate,
|
||||
Some(TaskDetails::IndexInfo { primary_key }),
|
||||
),
|
||||
};
|
||||
|
||||
// An event always has at least one event: "Created"
|
||||
let (status, error, finished_at) = match events.last().unwrap() {
|
||||
TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None),
|
||||
TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None),
|
||||
TaskEvent::Processing(_) => (TaskStatus::Processing, None, None),
|
||||
TaskEvent::Succeded { timestamp, result } => {
|
||||
match (result, &mut details) {
|
||||
(
|
||||
TaskResult::DocumentAddition {
|
||||
indexed_documents: num,
|
||||
..
|
||||
},
|
||||
Some(TaskDetails::DocumentsAddition {
|
||||
ref mut indexed_documents,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
indexed_documents.replace(*num);
|
||||
}
|
||||
(
|
||||
TaskResult::DocumentDeletion {
|
||||
deleted_documents: docs,
|
||||
..
|
||||
},
|
||||
Some(TaskDetails::DocumentDeletion {
|
||||
ref mut deleted_documents,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
deleted_documents.replace(*docs);
|
||||
}
|
||||
(
|
||||
TaskResult::ClearAll {
|
||||
deleted_documents: docs,
|
||||
},
|
||||
Some(TaskDetails::ClearAll {
|
||||
ref mut deleted_documents,
|
||||
}),
|
||||
) => {
|
||||
deleted_documents.replace(*docs);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
(TaskStatus::Succeeded, None, Some(*timestamp))
|
||||
}
|
||||
TaskEvent::Failed { timestamp, error } => {
|
||||
(TaskStatus::Failed, Some(error.clone()), Some(*timestamp))
|
||||
}
|
||||
};
|
||||
|
||||
let enqueued_at = match events.first() {
|
||||
Some(TaskEvent::Created(ts)) => *ts,
|
||||
_ => unreachable!("A task must always have a creation event."),
|
||||
};
|
||||
|
||||
let duration = finished_at.map(|ts| (ts - enqueued_at));
|
||||
|
||||
let started_at = events.iter().find_map(|e| match e {
|
||||
TaskEvent::Processing(ts) => Some(*ts),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
Self {
|
||||
uid: id,
|
||||
index_uid: index_uid.into_inner(),
|
||||
status,
|
||||
task_type,
|
||||
details,
|
||||
error,
|
||||
duration,
|
||||
enqueued_at,
|
||||
started_at,
|
||||
finished_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct TaskListView {
|
||||
results: Vec<TaskView>,
|
||||
}
|
||||
|
||||
impl From<Vec<TaskView>> for TaskListView {
|
||||
fn from(results: Vec<TaskView>) -> Self {
|
||||
Self { results }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
uid: TaskId,
|
||||
index_uid: String,
|
||||
status: TaskStatus,
|
||||
#[serde(rename = "type")]
|
||||
task_type: TaskType,
|
||||
enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl From<Task> for SummarizedTaskView {
|
||||
fn from(mut other: Task) -> Self {
|
||||
let created_event = other
|
||||
.events
|
||||
.drain(..1)
|
||||
.next()
|
||||
.expect("Task must have an enqueued event.");
|
||||
|
||||
let enqueued_at = match created_event {
|
||||
TaskEvent::Created(ts) => ts,
|
||||
_ => unreachable!("The first event of a task must always be 'Created'"),
|
||||
};
|
||||
|
||||
Self {
|
||||
uid: other.id,
|
||||
index_uid: other.index_uid.to_string(),
|
||||
status: TaskStatus::Enqueued,
|
||||
task_type: other.content.into(),
|
||||
enqueued_at,
|
||||
}
|
||||
}
|
||||
}
|
@ -49,8 +49,8 @@ impl Index<'_> {
|
||||
.post_str(url, include_str!("../assets/test_set.json"))
|
||||
.await;
|
||||
assert_eq!(code, 202);
|
||||
let update_id = response["updateId"].as_i64().unwrap();
|
||||
self.wait_update_id(update_id as u64).await;
|
||||
let update_id = response["uid"].as_i64().unwrap();
|
||||
self.wait_task(update_id as u64).await;
|
||||
update_id as u64
|
||||
}
|
||||
|
||||
@ -114,18 +114,14 @@ impl Index<'_> {
|
||||
self.service.put(url, documents).await
|
||||
}
|
||||
|
||||
pub async fn wait_update_id(&self, update_id: u64) -> Value {
|
||||
pub async fn wait_task(&self, update_id: u64) -> Value {
|
||||
// try 10 times to get status, or panic to not wait forever
|
||||
let url = format!(
|
||||
"/indexes/{}/updates/{}",
|
||||
encode(self.uid.as_ref()).to_string(),
|
||||
update_id
|
||||
);
|
||||
let url = format!("/tasks/{}", update_id);
|
||||
for _ in 0..10 {
|
||||
let (response, status_code) = self.service.get(&url).await;
|
||||
assert_eq!(status_code, 200, "response: {}", response);
|
||||
|
||||
if response["status"] == "processed" || response["status"] == "failed" {
|
||||
if response["status"] == "succeeded" || response["status"] == "failed" {
|
||||
return response;
|
||||
}
|
||||
|
||||
@ -134,17 +130,13 @@ impl Index<'_> {
|
||||
panic!("Timeout waiting for update id");
|
||||
}
|
||||
|
||||
pub async fn get_update(&self, update_id: u64) -> (Value, StatusCode) {
|
||||
let url = format!(
|
||||
"/indexes/{}/updates/{}",
|
||||
encode(self.uid.as_ref()).to_string(),
|
||||
update_id
|
||||
);
|
||||
pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/tasks/{}", self.uid, update_id);
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn list_updates(&self) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/updates", encode(self.uid.as_ref()).to_string());
|
||||
pub async fn list_tasks(&self) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/tasks", self.uid);
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
|
@ -77,6 +77,10 @@ impl Server {
|
||||
pub async fn stats(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/stats").await
|
||||
}
|
||||
|
||||
pub async fn tasks(&self) -> (Value, StatusCode) {
|
||||
self.service.get("/tasks").await
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
@ -89,7 +93,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics: true,
|
||||
max_index_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
|
||||
max_udb_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
|
||||
max_task_db_size: Byte::from_unit(4.0, ByteUnit::GiB).unwrap(),
|
||||
http_payload_size_limit: Byte::from_unit(10.0, ByteUnit::MiB).unwrap(),
|
||||
ssl_cert_path: None,
|
||||
ssl_key_path: None,
|
||||
|
@ -110,3 +110,39 @@ async fn error_json_bad_content_type() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn extract_actual_content_type() {
|
||||
let route = "/indexes/doggo/documents";
|
||||
let documents = "[{}]";
|
||||
let server = Server::new().await;
|
||||
let app = test::init_service(create_app!(
|
||||
&server.service.meilisearch,
|
||||
true,
|
||||
&server.service.options,
|
||||
analytics::MockAnalytics::new(&server.service.options).0
|
||||
))
|
||||
.await;
|
||||
|
||||
// Good content-type, we probably have an error since we didn't send anything in the json
|
||||
// so we only ensure we didn't get a bad media type error.
|
||||
let req = test::TestRequest::post()
|
||||
.uri(route)
|
||||
.set_payload(documents)
|
||||
.insert_header(("content-type", "application/json; charset=utf-8"))
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
assert_ne!(status_code, 415,
|
||||
"calling the route `{}` with a content-type of json isn't supposed to throw a bad media type error", route);
|
||||
|
||||
let req = test::TestRequest::put()
|
||||
.uri(route)
|
||||
.set_payload(documents)
|
||||
.insert_header(("content-type", "application/json; charset=latin-1"))
|
||||
.to_request();
|
||||
let res = test::call_service(&app, req).await;
|
||||
let status_code = res.status();
|
||||
assert_ne!(status_code, 415,
|
||||
"calling the route `{}` with a content-type of json isn't supposed to throw a bad media type error", route);
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ async fn add_documents_test_json_content_types() {
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
assert_eq!(status_code, 202);
|
||||
assert_eq!(response, json!({ "updateId": 0 }));
|
||||
assert_eq!(response["uid"], 0);
|
||||
|
||||
// put
|
||||
let req = test::TestRequest::put()
|
||||
@ -47,7 +47,7 @@ async fn add_documents_test_json_content_types() {
|
||||
let body = test::read_body(res).await;
|
||||
let response: Value = serde_json::from_slice(&body).unwrap_or_default();
|
||||
assert_eq!(status_code, 202);
|
||||
assert_eq!(response, json!({ "updateId": 1 }));
|
||||
assert_eq!(response["uid"], 1);
|
||||
}
|
||||
|
||||
/// any other content-type is must be refused
|
||||
@ -538,7 +538,7 @@ async fn add_documents_no_index_creation() {
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["uid"], 0);
|
||||
/*
|
||||
* currently we don’t check these field to stay ISO with meilisearch
|
||||
* assert_eq!(response["status"], "pending");
|
||||
@ -548,17 +548,18 @@ async fn add_documents_no_index_creation() {
|
||||
* assert!(response.get("enqueuedAt").is_some());
|
||||
*/
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.get_update(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["type"]["name"], "DocumentsAddition");
|
||||
assert_eq!(response["type"]["number"], 1);
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["uid"], 0);
|
||||
assert_eq!(response["type"], "documentsAddition");
|
||||
assert_eq!(response["details"]["receivedDocuments"], 1);
|
||||
assert_eq!(response["details"]["indexedDocuments"], 1);
|
||||
|
||||
let processed_at =
|
||||
DateTime::parse_from_rfc3339(response["processedAt"].as_str().unwrap()).unwrap();
|
||||
DateTime::parse_from_rfc3339(response["finishedAt"].as_str().unwrap()).unwrap();
|
||||
let enqueued_at =
|
||||
DateTime::parse_from_rfc3339(response["enqueuedAt"].as_str().unwrap()).unwrap();
|
||||
assert!(processed_at > enqueued_at);
|
||||
@ -573,7 +574,7 @@ async fn add_documents_no_index_creation() {
|
||||
async fn error_document_add_create_index_bad_uid() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("883 fj!");
|
||||
let (response, code) = index.add_documents(json!([]), None).await;
|
||||
let (response, code) = index.add_documents(json!([{"id": 1}]), None).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
@ -582,15 +583,15 @@ async fn error_document_add_create_index_bad_uid() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
assert_eq!(response, expected_response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_document_update_create_index_bad_uid() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("883 fj!");
|
||||
let (response, code) = index.update_documents(json!([]), None).await;
|
||||
let (response, code) = index.update_documents(json!([{"id": 1}]), None).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
@ -599,8 +600,8 @@ async fn error_document_update_create_index_bad_uid() {
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
assert_eq!(response, expected_response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -617,14 +618,15 @@ async fn document_addition_with_primary_key() {
|
||||
let (response, code) = index.add_documents(documents, Some("primary")).await;
|
||||
assert_eq!(code, 202, "response: {}", response);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.get_update(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["type"]["name"], "DocumentsAddition");
|
||||
assert_eq!(response["type"]["number"], 1);
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["uid"], 0);
|
||||
assert_eq!(response["type"], "documentsAddition");
|
||||
assert_eq!(response["details"]["receivedDocuments"], 1);
|
||||
assert_eq!(response["details"]["indexedDocuments"], 1);
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
@ -645,14 +647,15 @@ async fn document_update_with_primary_key() {
|
||||
let (_response, code) = index.update_documents(documents, Some("primary")).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.get_update(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["type"]["name"], "DocumentsPartial");
|
||||
assert_eq!(response["type"]["number"], 1);
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["uid"], 0);
|
||||
assert_eq!(response["type"], "documentsPartial");
|
||||
assert_eq!(response["details"]["indexedDocuments"], 1);
|
||||
assert_eq!(response["details"]["receivedDocuments"], 1);
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
@ -674,7 +677,7 @@ async fn replace_document() {
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202, "response: {}", response);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
@ -686,11 +689,11 @@ async fn replace_document() {
|
||||
let (_response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.get_update(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
|
||||
let (response, code) = index.get_document(1, None).await;
|
||||
assert_eq!(code, 200);
|
||||
@ -729,7 +732,7 @@ async fn update_document() {
|
||||
let (_response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
@ -741,11 +744,11 @@ async fn update_document() {
|
||||
let (response, code) = index.update_documents(documents, None).await;
|
||||
assert_eq!(code, 202, "response: {}", response);
|
||||
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.get_update(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
|
||||
let (response, code) = index.get_document(1, None).await;
|
||||
assert_eq!(code, 200);
|
||||
@ -760,11 +763,12 @@ async fn add_larger_dataset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let update_id = index.load_test_set().await;
|
||||
let (response, code) = index.get_update(update_id).await;
|
||||
let (response, code) = index.get_task(update_id).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "processed");
|
||||
assert_eq!(response["type"]["name"], "DocumentsAddition");
|
||||
assert_eq!(response["type"]["number"], 77);
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["type"], "documentsAddition");
|
||||
assert_eq!(response["details"]["indexedDocuments"], 77);
|
||||
assert_eq!(response["details"]["receivedDocuments"], 77);
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
limit: Some(1000),
|
||||
@ -781,11 +785,11 @@ async fn update_larger_dataset() {
|
||||
let index = server.index("test");
|
||||
let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap();
|
||||
index.update_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["type"]["name"], "DocumentsPartial");
|
||||
assert_eq!(response["type"]["number"], 77);
|
||||
assert_eq!(response["type"], "documentsPartial");
|
||||
assert_eq!(response["details"]["indexedDocuments"], 77);
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions {
|
||||
limit: Some(1000),
|
||||
@ -808,19 +812,17 @@ async fn error_add_documents_bad_document_id() {
|
||||
}
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], json!("failed"));
|
||||
|
||||
let expected_error = json!({
|
||||
"message": "Document identifier `foo & bar` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
});
|
||||
|
||||
assert_eq!(response["error"], expected_error);
|
||||
assert_eq!(response["error"]["message"], json!("Document identifier `foo & bar` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."));
|
||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||
assert_eq!(response["error"]["type"], json!("invalid_request"));
|
||||
assert_eq!(
|
||||
response["error"]["link"],
|
||||
json!("https://docs.meilisearch.com/errors#invalid_document_id")
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -835,19 +837,15 @@ async fn error_update_documents_bad_document_id() {
|
||||
}
|
||||
]);
|
||||
index.update_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
assert_eq!(code, 200);
|
||||
let response = index.wait_task(1).await;
|
||||
assert_eq!(response["status"], json!("failed"));
|
||||
|
||||
let expected_error = json!({
|
||||
"message": "Document identifier `foo & bar` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||
"code": "invalid_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_id"
|
||||
});
|
||||
|
||||
assert_eq!(response["error"], expected_error);
|
||||
assert_eq!(response["error"]["message"], json!("Document identifier `foo & bar` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."));
|
||||
assert_eq!(response["error"]["code"], json!("invalid_document_id"));
|
||||
assert_eq!(response["error"]["type"], json!("invalid_request"));
|
||||
assert_eq!(
|
||||
response["error"]["link"],
|
||||
json!("https://docs.meilisearch.com/errors#invalid_document_id")
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -862,19 +860,20 @@ async fn error_add_documents_missing_document_id() {
|
||||
}
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "failed");
|
||||
|
||||
let expected_error = json!({
|
||||
"message": r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."#,
|
||||
"code": "missing_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_document_id"
|
||||
});
|
||||
|
||||
assert_eq!(response["error"], expected_error);
|
||||
assert_eq!(
|
||||
response["error"]["message"],
|
||||
json!(r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."#)
|
||||
);
|
||||
assert_eq!(response["error"]["code"], json!("missing_document_id"));
|
||||
assert_eq!(response["error"]["type"], json!("invalid_request"));
|
||||
assert_eq!(
|
||||
response["error"]["link"],
|
||||
json!("https://docs.meilisearch.com/errors#missing_document_id")
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -889,19 +888,18 @@ async fn error_update_documents_missing_document_id() {
|
||||
}
|
||||
]);
|
||||
index.update_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
assert_eq!(code, 200);
|
||||
let response = index.wait_task(1).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
|
||||
let expected_error = json!({
|
||||
"message": r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."#,
|
||||
"code": "missing_document_id",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#missing_document_id"
|
||||
});
|
||||
|
||||
assert_eq!(response["error"], expected_error);
|
||||
assert_eq!(
|
||||
response["error"]["message"],
|
||||
r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."#
|
||||
);
|
||||
assert_eq!(response["error"]["code"], "missing_document_id");
|
||||
assert_eq!(response["error"]["type"], "invalid_request");
|
||||
assert_eq!(
|
||||
response["error"]["link"],
|
||||
"https://docs.meilisearch.com/errors#missing_document_id"
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -924,8 +922,8 @@ async fn error_document_field_limit_reached() {
|
||||
let (_response, code) = index.update_documents(documents, Some("id")).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
// Documents without a primary key are not accepted.
|
||||
assert_eq!(response["status"], "failed");
|
||||
@ -957,8 +955,8 @@ async fn error_add_documents_invalid_geo_field() {
|
||||
]);
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
let (response, code) = index.get_update(1).await;
|
||||
index.wait_task(2).await;
|
||||
let (response, code) = index.get_task(2).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "failed");
|
||||
|
||||
@ -1011,8 +1009,8 @@ async fn error_primary_key_inference() {
|
||||
]);
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_task(0).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "failed");
|
||||
|
||||
|
@ -5,8 +5,13 @@ use crate::common::{GetAllDocumentsOptions, Server};
|
||||
#[actix_rt::test]
|
||||
async fn delete_one_document_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (_response, code) = server.index("test").delete_document(0).await;
|
||||
assert_eq!(code, 404);
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.delete_document(0).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -16,8 +21,8 @@ async fn delete_one_unexisting_document() {
|
||||
index.create(None).await;
|
||||
let (response, code) = index.delete_document(0).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
let update = index.wait_update_id(0).await;
|
||||
assert_eq!(update["status"], "processed");
|
||||
let update = index.wait_task(0).await;
|
||||
assert_eq!(update["status"], "succeeded");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -27,10 +32,10 @@ async fn delete_one_document() {
|
||||
index
|
||||
.add_documents(json!([{ "id": 0, "content": "foobar" }]), None)
|
||||
.await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (_response, code) = server.index("test").delete_document(0).await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (_response, code) = index.get_document(0, None).await;
|
||||
assert_eq!(code, 404);
|
||||
@ -39,8 +44,13 @@ async fn delete_one_document() {
|
||||
#[actix_rt::test]
|
||||
async fn clear_all_documents_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (_response, code) = server.index("test").clear_all_documents().await;
|
||||
assert_eq!(code, 404);
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.clear_all_documents().await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -53,11 +63,11 @@ async fn clear_all_documents() {
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (_response, code) = index.clear_all_documents().await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let _update = index.wait_update_id(1).await;
|
||||
let _update = index.wait_task(1).await;
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
@ -74,7 +84,7 @@ async fn clear_all_documents_empty_index() {
|
||||
let (_response, code) = index.clear_all_documents().await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let _update = index.wait_update_id(0).await;
|
||||
let _update = index.wait_task(0).await;
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
@ -85,15 +95,20 @@ async fn clear_all_documents_empty_index() {
|
||||
#[actix_rt::test]
|
||||
async fn error_delete_batch_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.index("test").delete_batch(vec![]).await;
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.delete_batch(vec![]).await;
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response["status"], "failed");
|
||||
assert_eq!(response["error"], expected_response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -101,11 +116,11 @@ async fn delete_batch() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (_response, code) = index.delete_batch(vec![1, 0]).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let _update = index.wait_update_id(1).await;
|
||||
let _update = index.wait_task(1).await;
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
@ -119,11 +134,11 @@ async fn delete_no_document_batch() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.add_documents(json!([{ "id": 1, "content": "foobar" }, { "id": 0, "content": "foobar" }, { "id": 3, "content": "foobar" }]), Some("id")).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (_response, code) = index.delete_batch(vec![]).await;
|
||||
assert_eq!(code, 202, "{}", _response);
|
||||
|
||||
let _update = index.wait_update_id(1).await;
|
||||
let _update = index.wait_task(1).await;
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
|
@ -17,6 +17,7 @@ async fn error_get_unexisting_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_document(1, None).await;
|
||||
|
||||
let expected_response = json!({
|
||||
@ -43,7 +44,7 @@ async fn get_document() {
|
||||
]);
|
||||
let (_, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_document(0, None).await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(
|
||||
@ -75,11 +76,13 @@ async fn error_get_unexisting_index_all_documents() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_no_documents() {
|
||||
async fn get_no_document() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(None).await;
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
|
@ -7,14 +7,15 @@ async fn create_index_no_primary_key() {
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(response["uid"], "test");
|
||||
assert_eq!(response["name"], "test");
|
||||
assert!(response.get("createdAt").is_some());
|
||||
assert!(response.get("updatedAt").is_some());
|
||||
assert_eq!(response["createdAt"], response["updatedAt"]);
|
||||
assert_eq!(response["primaryKey"], Value::Null);
|
||||
assert_eq!(response.as_object().unwrap().len(), 5);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
assert_eq!(response["status"], "enqueued");
|
||||
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["type"], "indexCreation");
|
||||
assert_eq!(response["details"]["primaryKey"], Value::Null);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -23,14 +24,15 @@ async fn create_index_with_primary_key() {
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(Some("primary")).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(response["uid"], "test");
|
||||
assert_eq!(response["name"], "test");
|
||||
assert!(response.get("createdAt").is_some());
|
||||
assert!(response.get("updatedAt").is_some());
|
||||
//assert_eq!(response["createdAt"], response["updatedAt"]);
|
||||
assert_eq!(response["primaryKey"], "primary");
|
||||
assert_eq!(response.as_object().unwrap().len(), 5);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
assert_eq!(response["status"], "enqueued");
|
||||
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
assert_eq!(response["type"], "indexCreation");
|
||||
assert_eq!(response["details"]["primaryKey"], "primary");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -42,7 +44,7 @@ async fn create_index_with_invalid_primary_key() {
|
||||
let (_response, code) = index.add_documents(document, Some("title")).await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
@ -61,6 +63,10 @@ async fn test_create_multiple_indexes() {
|
||||
index2.create(None).await;
|
||||
index3.create(None).await;
|
||||
|
||||
index1.wait_task(0).await;
|
||||
index1.wait_task(1).await;
|
||||
index1.wait_task(2).await;
|
||||
|
||||
assert_eq!(index1.get().await.1, 200);
|
||||
assert_eq!(index2.get().await.1, 200);
|
||||
assert_eq!(index3.get().await.1, 200);
|
||||
@ -73,9 +79,11 @@ async fn error_create_existing_index() {
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(Some("primary")).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let (response, code) = index.create(Some("primary")).await;
|
||||
index.create(Some("primary")).await;
|
||||
|
||||
let response = index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` already exists.",
|
||||
@ -84,8 +92,7 @@ async fn error_create_existing_index() {
|
||||
"link":"https://docs.meilisearch.com/errors#index_already_exists"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 409);
|
||||
assert_eq!(response["error"], expected_response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
@ -8,11 +8,17 @@ async fn create_and_delete_index() {
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.create(None).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
|
||||
assert_eq!(index.get().await.1, 200);
|
||||
|
||||
let (_response, code) = index.delete().await;
|
||||
|
||||
assert_eq!(code, 204);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
assert_eq!(index.get().await.1, 404);
|
||||
}
|
||||
@ -21,7 +27,9 @@ async fn create_and_delete_index() {
|
||||
async fn error_delete_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.delete().await;
|
||||
let (_, code) = index.delete().await;
|
||||
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
@ -30,19 +38,29 @@ async fn error_delete_unexisting_index() {
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
let response = index.wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
assert_eq!(response["error"], expected_response);
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[actix_rt::test]
|
||||
async fn loop_delete_add_documents() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let documents = json!([{"id": 1, "field1": "hello"}]);
|
||||
let mut tasks = Vec::new();
|
||||
for _ in 0..50 {
|
||||
let (response, code) = index.add_documents(documents.clone(), None).await;
|
||||
tasks.push(response["uid"].as_u64().unwrap());
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
let (response, code) = index.delete().await;
|
||||
assert_eq!(code, 204, "{}", response);
|
||||
tasks.push(response["uid"].as_u64().unwrap());
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
}
|
||||
|
||||
for task in tasks {
|
||||
let response = index.wait_task(task).await;
|
||||
assert_eq!(response["status"], "succeeded", "{}", response);
|
||||
}
|
||||
}
|
||||
|
@ -8,7 +8,9 @@ async fn create_and_get_index() {
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(None).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
|
||||
@ -55,6 +57,8 @@ async fn list_multiple_indexes() {
|
||||
server.index("test").create(None).await;
|
||||
server.index("test1").create(Some("key")).await;
|
||||
|
||||
server.index("test").wait_task(1).await;
|
||||
|
||||
let (response, code) = server.list_indexes().await;
|
||||
assert_eq!(code, 200);
|
||||
assert!(response.is_array());
|
||||
@ -67,3 +71,22 @@ async fn list_multiple_indexes() {
|
||||
.iter()
|
||||
.any(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key"));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_invalid_index_uid() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("this is not a valid index name");
|
||||
let (response, code) = index.get().await;
|
||||
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(
|
||||
response,
|
||||
json!(
|
||||
{
|
||||
"message": "Index `this is not a valid index name` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
})
|
||||
);
|
||||
}
|
||||
|
@ -8,7 +8,9 @@ async fn stats() {
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(Some("id")).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = index.stats().await;
|
||||
|
||||
@ -33,9 +35,9 @@ async fn stats() {
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["uid"], 1);
|
||||
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.stats().await;
|
||||
|
||||
|
@ -8,11 +8,18 @@ async fn update_primary_key() {
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(None).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let (response, code) = index.update(Some("primary")).await;
|
||||
index.update(Some("primary")).await;
|
||||
|
||||
let response = index.wait_task(1).await;
|
||||
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
|
||||
let (response, code) = index.get().await;
|
||||
|
||||
assert_eq!(code, 200);
|
||||
|
||||
assert_eq!(response["uid"], "test");
|
||||
assert_eq!(response["name"], "test");
|
||||
assert!(response.get("createdAt").is_some());
|
||||
@ -30,14 +37,19 @@ async fn update_primary_key() {
|
||||
async fn update_nothing() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
let (_, code) = index.create(None).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let (update, code) = index.update(None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response, update);
|
||||
let (_, code) = index.update(None).await;
|
||||
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = index.wait_task(1).await;
|
||||
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -46,7 +58,7 @@ async fn error_update_existing_primary_key() {
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.create(Some("id")).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
@ -55,9 +67,12 @@ async fn error_update_existing_primary_key() {
|
||||
}
|
||||
]);
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
|
||||
let (response, code) = index.update(Some("primary")).await;
|
||||
let (_, code) = index.update(Some("primary")).await;
|
||||
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = index.wait_task(2).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index already has a primary key: `id`.",
|
||||
@ -66,14 +81,17 @@ async fn error_update_existing_primary_key() {
|
||||
"link": "https://docs.meilisearch.com/errors#index_primary_key_already_exists"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
assert_eq!(response["error"], expected_response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.index("test").update(None).await;
|
||||
let (_, code) = server.index("test").update(None).await;
|
||||
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let response = server.index("test").wait_task(0).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
@ -82,6 +100,5 @@ async fn error_update_unexisting_index() {
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(response["error"], expected_response);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ mod search;
|
||||
mod settings;
|
||||
mod snapshot;
|
||||
mod stats;
|
||||
mod updates;
|
||||
mod tasks;
|
||||
|
||||
// Tests are isolated by features in different modules to allow better readability, test
|
||||
// targetability, and improved incremental compilation times.
|
||||
|
@ -47,10 +47,10 @@ async fn filter_invalid_syntax_object() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Invalid syntax for the filter parameter: ` --> 1:7\n |\n1 | title & Glass\n | ^---\n |\n = expected word`.",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -74,10 +74,10 @@ async fn filter_invalid_syntax_array() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Invalid syntax for the filter parameter: ` --> 1:7\n |\n1 | title & Glass\n | ^---\n |\n = expected word`.",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -101,10 +101,10 @@ async fn filter_invalid_syntax_string() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Invalid syntax for the filter parameter: ` --> 1:15\n |\n1 | title = Glass XOR title = Glass\n | ^---\n |\n = expected EOI, and, or or`.",
|
||||
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -131,10 +131,10 @@ async fn filter_invalid_attribute_array() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.",
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -158,10 +158,10 @@ async fn filter_invalid_attribute_string() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.",
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -185,10 +185,10 @@ async fn filter_reserved_geo_attribute_array() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -212,10 +212,10 @@ async fn filter_reserved_geo_attribute_string() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -239,10 +239,10 @@ async fn filter_reserved_attribute_array() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -269,10 +269,10 @@ async fn filter_reserved_attribute_string() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"code": "invalid_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_filter"
|
||||
@ -299,7 +299,7 @@ async fn sort_geo_reserved_attribute() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a sort expression. Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.",
|
||||
@ -331,7 +331,7 @@ async fn sort_reserved_attribute() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a sort expression.",
|
||||
@ -363,7 +363,7 @@ async fn sort_unsortable_attribute() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
|
||||
@ -395,7 +395,7 @@ async fn sort_invalid_syntax() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Invalid syntax for the sort parameter: expected expression ending by `:asc` or `:desc`, found `title`.",
|
||||
@ -429,7 +429,7 @@ async fn sort_unset_ranking_rule() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.",
|
||||
|
@ -37,7 +37,7 @@ async fn simple_placeholder_search() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({}), |response, code| {
|
||||
@ -54,7 +54,7 @@ async fn simple_search() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "glass"}), |response, code| {
|
||||
@ -71,7 +71,7 @@ async fn search_multiple_params() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@ -101,7 +101,7 @@ async fn search_with_filter_string_notation() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@ -127,7 +127,7 @@ async fn search_with_filter_array_notation() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
@ -157,7 +157,7 @@ async fn search_with_sort_on_numbers() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@ -183,7 +183,7 @@ async fn search_with_sort_on_strings() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@ -209,7 +209,7 @@ async fn search_with_multiple_sort() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
@ -231,7 +231,7 @@ async fn search_facet_distribution() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
@ -259,7 +259,7 @@ async fn displayed_attributes() {
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({ "attributesToRetrieve": ["title", "id"] }))
|
||||
|
@ -9,7 +9,7 @@ async fn set_and_reset_distinct_attribute() {
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({ "distinctAttribute": "test"}))
|
||||
.await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
|
||||
@ -19,7 +19,7 @@ async fn set_and_reset_distinct_attribute() {
|
||||
.update_settings(json!({ "distinctAttribute": null }))
|
||||
.await;
|
||||
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
|
||||
@ -32,7 +32,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index.update_distinct_attribute(json!("test")).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.get_distinct_attribute().await;
|
||||
|
||||
@ -40,7 +40,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
|
||||
|
||||
index.update_distinct_attribute(json!(null)).await;
|
||||
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.get_distinct_attribute().await;
|
||||
|
||||
|
@ -39,6 +39,7 @@ async fn get_settings() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
@ -77,7 +78,7 @@ async fn test_partial_update() {
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({"displayedAttributes": ["foo"]}))
|
||||
.await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["displayedAttributes"], json!(["foo"]));
|
||||
@ -86,7 +87,7 @@ async fn test_partial_update() {
|
||||
let (_response, _) = index
|
||||
.update_settings(json!({"searchableAttributes": ["bar"]}))
|
||||
.await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
@ -98,17 +99,12 @@ async fn test_partial_update() {
|
||||
async fn error_delete_settings_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.delete_settings().await;
|
||||
let (_response, code) = index.delete_settings().await;
|
||||
assert_eq!(code, 202);
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
let response = index.wait_task(0).await;
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -126,13 +122,13 @@ async fn reset_all_settings() {
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202);
|
||||
assert_eq!(response["updateId"], 0);
|
||||
index.wait_update_id(0).await;
|
||||
assert_eq!(response["uid"], 0);
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.update_settings(json!({"displayedAttributes": ["name", "age"], "searchableAttributes": ["name"], "stopWords": ["the"], "filterableAttributes": ["age"], "synonyms": {"puppy": ["dog", "doggo", "potat"] }}))
|
||||
.await;
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["displayedAttributes"], json!(["name", "age"]));
|
||||
@ -145,7 +141,7 @@ async fn reset_all_settings() {
|
||||
assert_eq!(response["filterableAttributes"], json!(["age"]));
|
||||
|
||||
index.delete_settings().await;
|
||||
index.wait_update_id(2).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
@ -166,10 +162,13 @@ async fn update_setting_unexisting_index() {
|
||||
let index = server.index("test");
|
||||
let (_response, code) = index.update_settings(json!({})).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = index.wait_task(0).await;
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
let (_response, code) = index.get().await;
|
||||
assert_eq!(code, 200);
|
||||
let (_response, code) = index.delete_settings().await;
|
||||
assert_eq!(code, 202);
|
||||
index.delete_settings().await;
|
||||
let response = index.wait_task(1).await;
|
||||
assert_eq!(response["status"], "succeeded");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -177,16 +176,15 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test##! ");
|
||||
let (response, code) = index.update_settings(json!({})).await;
|
||||
assert_eq!(code, 400);
|
||||
|
||||
let expected_response = json!({
|
||||
let expected = json!({
|
||||
"message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
|
||||
"code": "invalid_index_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
|
||||
});
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
assert_eq!(response, expected);
|
||||
}
|
||||
|
||||
macro_rules! test_setting_routes {
|
||||
@ -218,6 +216,7 @@ macro_rules! test_setting_routes {
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.post(url, serde_json::Value::Null).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
server.index("").wait_task(0).await;
|
||||
let (response, code) = server.index("test").get().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
}
|
||||
@ -230,8 +229,10 @@ macro_rules! test_setting_routes {
|
||||
.chars()
|
||||
.map(|c| if c == '_' { '-' } else { c })
|
||||
.collect::<String>());
|
||||
let (response, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 404, "{}", response);
|
||||
let (_, code) = server.service.delete(url).await;
|
||||
assert_eq!(code, 202);
|
||||
let response = server.index("").wait_task(0).await;
|
||||
assert_eq!(response["status"], "failed");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -239,7 +240,8 @@ macro_rules! test_setting_routes {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (response, code) = index.create(None).await;
|
||||
assert_eq!(code, 201, "{}", response);
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
index.wait_task(0).await;
|
||||
let url = format!("/indexes/test/settings/{}",
|
||||
stringify!($setting)
|
||||
.chars()
|
||||
@ -274,8 +276,8 @@ async fn error_set_invalid_ranking_rules() {
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({ "rankingRules": [ "manyTheFish"]}))
|
||||
.await;
|
||||
index.wait_update_id(0).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["status"], "failed");
|
||||
@ -296,7 +298,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index.update_distinct_attribute(json!("test")).await;
|
||||
index.wait_update_id(0).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.get_distinct_attribute().await;
|
||||
|
||||
@ -304,7 +306,7 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
|
||||
|
||||
index.update_distinct_attribute(json!(null)).await;
|
||||
|
||||
index.wait_update_id(1).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.get_distinct_attribute().await;
|
||||
|
||||
|
@ -7,6 +7,28 @@ use tokio::time::sleep;
|
||||
|
||||
use meilisearch_http::Opt;
|
||||
|
||||
macro_rules! verify_snapshot {
|
||||
(
|
||||
$orig:expr,
|
||||
$snapshot: expr,
|
||||
|$server:ident| =>
|
||||
$($e:expr,)+) => {
|
||||
use std::sync::Arc;
|
||||
let snapshot = Arc::new($snapshot);
|
||||
let orig = Arc::new($orig);
|
||||
$(
|
||||
{
|
||||
let test= |$server: Arc<Server>| async move {
|
||||
$e.await
|
||||
};
|
||||
let (snapshot, _) = test(snapshot.clone()).await;
|
||||
let (orig, _) = test(orig.clone()).await;
|
||||
assert_eq!(snapshot, orig);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn perform_snapshot() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
@ -20,12 +42,19 @@ async fn perform_snapshot() {
|
||||
};
|
||||
|
||||
let server = Server::new_with_options(options).await;
|
||||
|
||||
let index = server.index("test");
|
||||
index
|
||||
.update_settings(serde_json::json! ({
|
||||
"searchableAttributes": [],
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
let (response, _) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
server.index("test1").create(Some("prim")).await;
|
||||
|
||||
index.wait_task(2).await;
|
||||
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
|
||||
@ -41,12 +70,17 @@ async fn perform_snapshot() {
|
||||
..default_settings(temp.path())
|
||||
};
|
||||
|
||||
let server = Server::new_with_options(options).await;
|
||||
let index = server.index("test");
|
||||
let snapshot_server = Server::new_with_options(options).await;
|
||||
|
||||
let (response_from_snapshot, _) = index
|
||||
.get_all_documents(GetAllDocumentsOptions::default())
|
||||
.await;
|
||||
|
||||
assert_eq!(response, response_from_snapshot);
|
||||
verify_snapshot!(server, snapshot_server, |server| =>
|
||||
server.list_indexes(),
|
||||
// for some reason the db sizes differ. this may be due to the compaction options we have
|
||||
// set when performing the snapshot
|
||||
//server.stats(),
|
||||
server.tasks(),
|
||||
server.index("test").get_all_documents(GetAllDocumentsOptions::default()),
|
||||
server.index("test").settings(),
|
||||
server.index("test1").get_all_documents(GetAllDocumentsOptions::default()),
|
||||
server.index("test1").settings(),
|
||||
);
|
||||
}
|
||||
|
@ -28,7 +28,8 @@ async fn stats() {
|
||||
let index = server.index("test");
|
||||
let (_, code) = index.create(Some("id")).await;
|
||||
|
||||
assert_eq!(code, 201);
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, code) = server.stats().await;
|
||||
|
||||
@ -52,10 +53,9 @@ async fn stats() {
|
||||
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
assert_eq!(code, 202, "{}", response);
|
||||
assert_eq!(response["updateId"], 0);
|
||||
assert_eq!(response["uid"], 1);
|
||||
|
||||
let response = index.wait_update_id(0).await;
|
||||
println!("response: {}", response);
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = server.stats().await;
|
||||
|
||||
|
133
meilisearch-http/tests/tasks/mod.rs
Normal file
133
meilisearch-http/tests/tasks/mod.rs
Normal file
@ -0,0 +1,133 @@
|
||||
use crate::common::Server;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde_json::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_task_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.service.get("/indexes/test/tasks").await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_unexisting_task_status() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index.wait_task(0).await;
|
||||
let (response, code) = index.get_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Task `1` not found.",
|
||||
"code": "task_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#task_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_task_status() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index
|
||||
.add_documents(
|
||||
serde_json::json!([{
|
||||
"id": 1,
|
||||
"content": "foobar",
|
||||
}]),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
index.wait_task(0).await;
|
||||
let (_response, code) = index.get_task(1).await;
|
||||
assert_eq!(code, 200);
|
||||
// TODO check resonse format, as per #48
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_list_tasks_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.index("test").list_tasks().await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn list_tasks() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index.wait_task(0).await;
|
||||
index
|
||||
.add_documents(
|
||||
serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let (response, code) = index.list_tasks().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
macro_rules! assert_valid_summarized_task {
|
||||
($response:expr, $task_type:literal, $index:literal) => {{
|
||||
assert_eq!($response.as_object().unwrap().len(), 5);
|
||||
assert!($response["uid"].as_u64().is_some());
|
||||
assert_eq!($response["indexUid"], $index);
|
||||
assert_eq!($response["status"], "enqueued");
|
||||
assert_eq!($response["type"], $task_type);
|
||||
let date = $response["enqueuedAt"].as_str().expect("missing date");
|
||||
date.parse::<DateTime<Utc>>().unwrap();
|
||||
}};
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn test_summarized_task_view() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (response, _) = index.create(None).await;
|
||||
assert_valid_summarized_task!(response, "indexCreation", "test");
|
||||
|
||||
let (response, _) = index.update(None).await;
|
||||
assert_valid_summarized_task!(response, "indexUpdate", "test");
|
||||
|
||||
let (response, _) = index.update_settings(json!({})).await;
|
||||
assert_valid_summarized_task!(response, "settingsUpdate", "test");
|
||||
|
||||
let (response, _) = index.update_documents(json!([{"id": 1}]), None).await;
|
||||
assert_valid_summarized_task!(response, "documentsPartial", "test");
|
||||
|
||||
let (response, _) = index.add_documents(json!([{"id": 1}]), None).await;
|
||||
assert_valid_summarized_task!(response, "documentsAddition", "test");
|
||||
|
||||
let (response, _) = index.delete_document(1).await;
|
||||
assert_valid_summarized_task!(response, "documentsDeletion", "test");
|
||||
|
||||
let (response, _) = index.clear_all_documents().await;
|
||||
assert_valid_summarized_task!(response, "clearAll", "test");
|
||||
|
||||
let (response, _) = index.delete().await;
|
||||
assert_valid_summarized_task!(response, "indexDeletion", "test");
|
||||
}
|
@ -1,97 +0,0 @@
|
||||
use crate::common::Server;
|
||||
use serde_json::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_update_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.index("test").get_update(0).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_unexisting_update_status() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
let (response, code) = index.get_update(0).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Task `0` not found.",
|
||||
"code": "task_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#task_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_update_status() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index
|
||||
.add_documents(
|
||||
serde_json::json!([{
|
||||
"id": 1,
|
||||
"content": "foobar",
|
||||
}]),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let (_response, code) = index.get_update(0).await;
|
||||
assert_eq!(code, 200);
|
||||
// TODO check resonse format, as per #48
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_list_updates_unexisting_index() {
|
||||
let server = Server::new().await;
|
||||
let (response, code) = server.index("test").list_updates().await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
});
|
||||
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 404);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn list_no_updates() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
let (response, code) = index.list_updates().await;
|
||||
assert_eq!(code, 200);
|
||||
assert!(response.as_array().unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn list_updates() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
index
|
||||
.add_documents(
|
||||
serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let (response, code) = index.list_updates().await;
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response.as_array().unwrap().len(), 1);
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
name = "meilisearch-lib"
|
||||
version = "0.24.0"
|
||||
edition = "2018"
|
||||
resolver = "2"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@ -11,7 +12,6 @@ actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-stati
|
||||
anyhow = { version = "1.0.43", features = ["backtrace"] }
|
||||
async-stream = "0.3.2"
|
||||
async-trait = "0.1.51"
|
||||
arc-swap = "1.3.2"
|
||||
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
|
||||
bytes = "1.1.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
@ -30,7 +30,7 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.20.2" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.21.0" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.0"
|
||||
once_cell = "1.8.0"
|
||||
@ -56,8 +56,13 @@ whoami = { version = "1.1.3", optional = true }
|
||||
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
||||
sysinfo = "0.20.2"
|
||||
derivative = "2.2.0"
|
||||
fs_extra = "1.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.2.0"
|
||||
mockall = "0.10.2"
|
||||
paste = "1.0.5"
|
||||
nelson = { git = "https://github.com/MarinPostma/nelson.git", rev = "e5f4ff046c21e7e986c7cb31550d1c9e7f0b693b"}
|
||||
meilisearch-error = { path = "../meilisearch-error", features = ["test-traits"] }
|
||||
proptest = "1.0.0"
|
||||
proptest-derive = "0.3.0"
|
||||
|
19
meilisearch-lib/proptest-regressions/index_resolver/mod.txt
Normal file
19
meilisearch-lib/proptest-regressions/index_resolver/mod.txt
Normal file
@ -0,0 +1,19 @@
|
||||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 6f3ae3cba934ba3e328e2306218c32f27a46ce2d54a1258b05fef65663208662 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentAddition { content_uuid: 37bc137d-2038-47f0-819f-b133233daadc, merge_strategy: ReplaceDocuments, primary_key: None, documents_count: 0 }, events: [] }
|
||||
cc b726f7d9f44a9216aad302ddba0f04e7108817e741d656a4759aea8562de4d63 # shrinks to task = Task { id: 0, index_uid: IndexUid("_"), content: IndexDeletion, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 427ec2dde3260b1ab334207bdc22adef28a5b8532b9902c84b55fd2c017ea7e1 # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
||||
cc c24f3d42f0f36fbdbf4e9d4327e75529b163ac580d63a5934ca05e9b5bd23a65 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
|
||||
cc 8084e2410801b997533b0bcbad75cd212873cfc2677f26847f68c568ead1604c # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 330085e0200a9a2ddfdd764a03d768aa95c431bcaafbd530c8c949425beed18b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
|
||||
cc c70e901576ef2fb9622e814bdecd11e4747cd70d71a9a6ce771b5b7256a187c0 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: true }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc 3fe2c38cbc2cca34ecde321472141d386056f0cd332cbf700773657715a382b5 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc c31cf86692968483f1ab08a6a9d4667ccb9635c306998551bf1eb1f135ef0d4b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: Some("") }, events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
||||
cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = true, index_op_fails = true, any_int = 0
|
||||
cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0
|
||||
cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0
|
||||
cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0
|
@ -0,0 +1,7 @@
|
||||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 8cbd6c45ce8c5611ec3f2f94fd485f6a8eeccc470fa426e59bdfd4d9e7fce0e1 # shrinks to bytes = []
|
@ -58,7 +58,7 @@ impl ErrorCode for DocumentFormatError {
|
||||
internal_error!(DocumentFormatError: io::Error);
|
||||
|
||||
/// reads csv from input and write an obkv batch to writer.
|
||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let writer = BufWriter::new(writer);
|
||||
let builder =
|
||||
DocumentBatchBuilder::from_csv(input, writer).map_err(|e| (PayloadType::Csv, e))?;
|
||||
@ -67,13 +67,13 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
return Err(DocumentFormatError::EmptyPayload(PayloadType::Csv));
|
||||
}
|
||||
|
||||
builder.finish().map_err(|e| (PayloadType::Csv, e))?;
|
||||
let count = builder.finish().map_err(|e| (PayloadType::Csv, e))?;
|
||||
|
||||
Ok(())
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// reads jsonl from input and write an obkv batch to writer.
|
||||
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let mut reader = BufReader::new(input);
|
||||
let writer = BufWriter::new(writer);
|
||||
|
||||
@ -91,13 +91,13 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
return Err(DocumentFormatError::EmptyPayload(PayloadType::Ndjson));
|
||||
}
|
||||
|
||||
builder.finish().map_err(|e| (PayloadType::Ndjson, e))?;
|
||||
let count = builder.finish().map_err(|e| (PayloadType::Ndjson, e))?;
|
||||
|
||||
Ok(())
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// reads json from input and write an obkv batch to writer.
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let writer = BufWriter::new(writer);
|
||||
let mut builder = DocumentBatchBuilder::new(writer).map_err(|e| (PayloadType::Json, e))?;
|
||||
builder
|
||||
@ -108,7 +108,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||
return Err(DocumentFormatError::EmptyPayload(PayloadType::Json));
|
||||
}
|
||||
|
||||
builder.finish().map_err(|e| (PayloadType::Json, e))?;
|
||||
let count = builder.finish().map_err(|e| (PayloadType::Json, e))?;
|
||||
|
||||
Ok(())
|
||||
Ok(count)
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ impl Index {
|
||||
let mut txn = index.write_txn()?;
|
||||
|
||||
// Apply settings first
|
||||
let builder = update_handler.update_builder(0);
|
||||
let builder = update_handler.update_builder();
|
||||
let mut builder = builder.settings(&mut txn, &index);
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
@ -121,7 +121,7 @@ impl Index {
|
||||
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
|
||||
builder.execute(|_, _| ())?;
|
||||
builder.execute(|_| ())?;
|
||||
|
||||
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
let reader = BufReader::new(File::open(&document_file_path)?);
|
||||
@ -138,9 +138,9 @@ impl Index {
|
||||
//a primary key error to be thrown.
|
||||
if !documents_reader.is_empty() {
|
||||
let builder = update_handler
|
||||
.update_builder(0)
|
||||
.update_builder()
|
||||
.index_documents(&mut txn, &index);
|
||||
builder.execute(documents_reader, |_, _| ())?;
|
||||
builder.execute(documents_reader, |_| ())?;
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
|
@ -13,7 +13,6 @@ use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::EnvSizer;
|
||||
|
||||
use super::error::IndexError;
|
||||
@ -26,7 +25,7 @@ pub type Document = Map<String, Value>;
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexMeta {
|
||||
created_at: DateTime<Utc>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
@ -69,8 +68,6 @@ pub struct Index {
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub inner: Arc<milli::Index>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub update_file_store: Arc<UpdateFileStore>,
|
||||
#[derivative(Debug = "ignore")]
|
||||
pub update_handler: Arc<UpdateHandler>,
|
||||
}
|
||||
|
||||
@ -86,24 +83,24 @@ impl Index {
|
||||
pub fn open(
|
||||
path: impl AsRef<Path>,
|
||||
size: usize,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
uuid: Uuid,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
) -> Result<Self> {
|
||||
log::debug!("opening index in {}", path.as_ref().display());
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let inner = Arc::new(milli::Index::new(options, &path)?);
|
||||
Ok(Index {
|
||||
inner,
|
||||
update_file_store,
|
||||
uuid,
|
||||
update_handler,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &milli::Index {
|
||||
&self.inner
|
||||
/// Asynchronously close the underlying index
|
||||
pub fn close(self) {
|
||||
self.inner.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Result<IndexStats> {
|
||||
@ -284,3 +281,17 @@ impl Index {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// When running tests, when a server instance is dropped, the environment is not actually closed,
|
||||
/// leaving a lot of open file descriptors.
|
||||
impl Drop for Index {
|
||||
fn drop(&mut self) {
|
||||
// When dropping the last instance of an index, we want to close the index
|
||||
// Note that the close is actually performed only if all the instances a effectively
|
||||
// dropped
|
||||
|
||||
if Arc::strong_count(&self.inner) == 1 {
|
||||
self.inner.as_ref().clone().prepare_for_closing();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ mod dump;
|
||||
pub mod error;
|
||||
mod search;
|
||||
pub mod update_handler;
|
||||
mod updates;
|
||||
pub mod updates;
|
||||
|
||||
#[allow(clippy::module_inception)]
|
||||
mod index;
|
||||
@ -22,191 +22,40 @@ pub use test::MockIndex as Index;
|
||||
/// code for unit testing, in places where an index would normally be used.
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::panic::{RefUnwindSafe, UnwindSafe};
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::{DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod};
|
||||
use nelson::Mocker;
|
||||
use serde_json::{Map, Value};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::index_controller::updates::status::{Failed, Processed, Processing};
|
||||
|
||||
use super::error::Result;
|
||||
use super::index::Index;
|
||||
use super::update_handler::UpdateHandler;
|
||||
use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub struct Stub<A, R> {
|
||||
name: String,
|
||||
times: Mutex<Option<usize>>,
|
||||
stub: Box<dyn Fn(A) -> R + Sync + Send>,
|
||||
invalidated: AtomicBool,
|
||||
}
|
||||
|
||||
impl<A, R> Drop for Stub<A, R> {
|
||||
fn drop(&mut self) {
|
||||
if !self.invalidated.load(Ordering::Relaxed) {
|
||||
let lock = self.times.lock().unwrap();
|
||||
if let Some(n) = *lock {
|
||||
assert_eq!(n, 0, "{} not called enough times", self.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, R> Stub<A, R> {
|
||||
fn invalidate(&self) {
|
||||
self.invalidated.store(true, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: UnwindSafe, R> Stub<A, R> {
|
||||
fn call(&self, args: A) -> R {
|
||||
let mut lock = self.times.lock().unwrap();
|
||||
match *lock {
|
||||
Some(0) => panic!("{} called to many times", self.name),
|
||||
Some(ref mut times) => {
|
||||
*times -= 1;
|
||||
}
|
||||
None => (),
|
||||
}
|
||||
|
||||
// Since we add assertions in the drop implementation for Stub, a panic can occur in a
|
||||
// panic, causing a hard abort of the program. To handle that, we catch the panic, and
|
||||
// set the stub as invalidated so the assertions aren't run during the drop.
|
||||
impl<'a, A, R> RefUnwindSafe for StubHolder<'a, A, R> {}
|
||||
struct StubHolder<'a, A, R>(&'a (dyn Fn(A) -> R + Sync + Send));
|
||||
|
||||
let stub = StubHolder(self.stub.as_ref());
|
||||
|
||||
match std::panic::catch_unwind(|| (stub.0)(args)) {
|
||||
Ok(r) => r,
|
||||
Err(panic) => {
|
||||
self.invalidate();
|
||||
std::panic::resume_unwind(panic);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct StubStore {
|
||||
inner: Arc<Mutex<HashMap<String, Box<dyn Any + Sync + Send>>>>,
|
||||
}
|
||||
|
||||
impl StubStore {
|
||||
pub fn insert<A: 'static, R: 'static>(&self, name: String, stub: Stub<A, R>) {
|
||||
let mut lock = self.inner.lock().unwrap();
|
||||
lock.insert(name, Box::new(stub));
|
||||
}
|
||||
|
||||
pub fn get<A, B>(&self, name: &str) -> Option<&Stub<A, B>> {
|
||||
let mut lock = self.inner.lock().unwrap();
|
||||
match lock.get_mut(name) {
|
||||
Some(s) => {
|
||||
let s = s.as_mut() as *mut dyn Any as *mut Stub<A, B>;
|
||||
Some(unsafe { &mut *s })
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StubBuilder<'a, A, R> {
|
||||
name: String,
|
||||
store: &'a StubStore,
|
||||
times: Option<usize>,
|
||||
_f: std::marker::PhantomData<fn(A) -> R>,
|
||||
}
|
||||
|
||||
impl<'a, A: 'static, R: 'static> StubBuilder<'a, A, R> {
|
||||
/// Asserts the stub has been called exactly `times` times.
|
||||
#[must_use]
|
||||
pub fn times(mut self, times: usize) -> Self {
|
||||
self.times = Some(times);
|
||||
self
|
||||
}
|
||||
|
||||
/// Asserts the stub has been called exactly once.
|
||||
#[must_use]
|
||||
pub fn once(mut self) -> Self {
|
||||
self.times = Some(1);
|
||||
self
|
||||
}
|
||||
|
||||
/// The function that will be called when the stub is called. This needs to be called to
|
||||
/// actually build the stub and register it to the stub store.
|
||||
pub fn then(self, f: impl Fn(A) -> R + Sync + Send + 'static) {
|
||||
let times = Mutex::new(self.times);
|
||||
let stub = Stub {
|
||||
stub: Box::new(f),
|
||||
times,
|
||||
name: self.name.clone(),
|
||||
invalidated: AtomicBool::new(false),
|
||||
};
|
||||
|
||||
self.store.insert(self.name, stub);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mocker allows to stub metod call on any struct. you can register stubs by calling
|
||||
/// `Mocker::when` and retrieve it in the proxy implementation when with `Mocker::get`.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Mocker {
|
||||
store: StubStore,
|
||||
}
|
||||
|
||||
impl Mocker {
|
||||
pub fn when<A, R>(&self, name: &str) -> StubBuilder<A, R> {
|
||||
StubBuilder {
|
||||
name: name.to_string(),
|
||||
store: &self.store,
|
||||
times: None,
|
||||
_f: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get<A, R>(&self, name: &str) -> &Stub<A, R> {
|
||||
match self.store.get(name) {
|
||||
Some(stub) => stub,
|
||||
None => {
|
||||
// panic here causes the stubs to get dropped, and panic in turn. To prevent
|
||||
// that, we forget them, and let them be cleaned by the os later. This is not
|
||||
// optimal, but is still better than nested panicks.
|
||||
let mut stubs = self.store.inner.lock().unwrap();
|
||||
let stubs = std::mem::take(&mut *stubs);
|
||||
std::mem::forget(stubs);
|
||||
panic!("unexpected call to {}", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub enum MockIndex {
|
||||
Vrai(Index),
|
||||
Faux(Arc<Mocker>),
|
||||
Real(Index),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl MockIndex {
|
||||
pub fn faux(faux: Mocker) -> Self {
|
||||
Self::Faux(Arc::new(faux))
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub fn open(
|
||||
path: impl AsRef<Path>,
|
||||
size: usize,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
uuid: Uuid,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
) -> Result<Self> {
|
||||
let index = Index::open(path, size, update_file_store, uuid, update_handler)?;
|
||||
Ok(Self::Vrai(index))
|
||||
let index = Index::open(path, size, uuid, update_handler)?;
|
||||
Ok(Self::Real(index))
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
@ -215,41 +64,33 @@ pub mod test {
|
||||
size: usize,
|
||||
update_handler: &UpdateHandler,
|
||||
) -> anyhow::Result<()> {
|
||||
Index::load_dump(src, dst, size, update_handler)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn handle_update(&self, update: Processing) -> std::result::Result<Processed, Failed> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.handle_update(update),
|
||||
MockIndex::Faux(faux) => faux.get("handle_update").call(update),
|
||||
}
|
||||
Index::load_dump(src, dst, size, update_handler)
|
||||
}
|
||||
|
||||
pub fn uuid(&self) -> Uuid {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.uuid(),
|
||||
MockIndex::Faux(faux) => faux.get("uuid").call(()),
|
||||
MockIndex::Real(index) => index.uuid(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Result<IndexStats> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.stats(),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.stats(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("stats").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> Result<IndexMeta> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.meta(),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.meta(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
pub fn settings(&self) -> Result<Settings<Checked>> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.settings(),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.settings(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -260,10 +101,10 @@ pub mod test {
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Vec<Map<String, Value>>> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => {
|
||||
MockIndex::Real(index) => {
|
||||
index.retrieve_documents(offset, limit, attributes_to_retrieve)
|
||||
}
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -273,49 +114,93 @@ pub mod test {
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<Map<String, Value>> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> u64 {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.size(),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.size(),
|
||||
MockIndex::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.snapshot(path),
|
||||
MockIndex::Faux(faux) => faux.get("snapshot").call(path.as_ref()),
|
||||
MockIndex::Real(index) => index.snapshot(path),
|
||||
MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &milli::Index {
|
||||
pub fn close(self) {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.inner(),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
MockIndex::Real(index) => index.close(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("close").call(()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.update_primary_key(primary_key),
|
||||
MockIndex::Faux(_) => todo!(),
|
||||
}
|
||||
}
|
||||
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.perform_search(query),
|
||||
MockIndex::Faux(faux) => faux.get("perform_search").call(query),
|
||||
MockIndex::Real(index) => index.perform_search(query),
|
||||
MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Vrai(index) => index.dump(path),
|
||||
MockIndex::Faux(faux) => faux.get("dump").call(path.as_ref()),
|
||||
MockIndex::Real(index) => index.dump(path),
|
||||
MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
) -> Result<DocumentAdditionResult> {
|
||||
match self {
|
||||
MockIndex::Real(index) => {
|
||||
index.update_documents(method, content_uuid, primary_key, file_store)
|
||||
}
|
||||
MockIndex::Mock(mocker) => unsafe {
|
||||
mocker.get("update_documents").call((
|
||||
method,
|
||||
content_uuid,
|
||||
primary_key,
|
||||
file_store,
|
||||
))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.update_settings(settings),
|
||||
MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.update_primary_key(primary_key),
|
||||
MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.delete_documents(ids),
|
||||
MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear_documents(&self) -> Result<()> {
|
||||
match self {
|
||||
MockIndex::Real(index) => index.clear_documents(),
|
||||
MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) },
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -327,7 +212,7 @@ pub mod test {
|
||||
.times(2)
|
||||
.then(|_: &Path| -> Result<()> { Ok(()) });
|
||||
|
||||
let index = MockIndex::faux(faux);
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
@ -339,7 +224,7 @@ pub mod test {
|
||||
fn test_faux_unexisting_method_stub() {
|
||||
let faux = Mocker::default();
|
||||
|
||||
let index = MockIndex::faux(faux);
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
@ -356,7 +241,7 @@ pub mod test {
|
||||
panic!();
|
||||
});
|
||||
|
||||
let index = MockIndex::faux(faux);
|
||||
let index = MockIndex::mock(faux);
|
||||
|
||||
let path = PathBuf::from("hello");
|
||||
index.snapshot(&path).unwrap();
|
||||
|
@ -3,10 +3,9 @@ use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use heed::RoTxn;
|
||||
use indexmap::IndexMap;
|
||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, FilterCondition, MatchingWords, SortError};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
@ -102,7 +101,7 @@ impl Index {
|
||||
search.offset(query.offset.unwrap_or_default());
|
||||
|
||||
if let Some(ref filter) = query.filter {
|
||||
if let Some(facets) = parse_filter(filter, self, &rtxn)? {
|
||||
if let Some(facets) = parse_filter(filter)? {
|
||||
search.filter(facets);
|
||||
}
|
||||
}
|
||||
@ -650,31 +649,27 @@ impl<'a, A: AsRef<[u8]>> Formatter<'a, A> {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter(facets: &Value, index: &Index, txn: &RoTxn) -> Result<Option<FilterCondition>> {
|
||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>> {
|
||||
match facets {
|
||||
Value::String(expr) => {
|
||||
let condition = FilterCondition::from_str(txn, index, expr)?;
|
||||
let condition = Filter::from_str(expr)?;
|
||||
Ok(Some(condition))
|
||||
}
|
||||
Value::Array(arr) => parse_filter_array(txn, index, arr),
|
||||
Value::Array(arr) => parse_filter_array(arr),
|
||||
v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter_array(
|
||||
txn: &RoTxn,
|
||||
index: &Index,
|
||||
arr: &[Value],
|
||||
) -> Result<Option<FilterCondition>> {
|
||||
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>> {
|
||||
let mut ands = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ands.push(Either::Right(s.clone())),
|
||||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
||||
Value::Array(arr) => {
|
||||
let mut ors = Vec::new();
|
||||
for value in arr {
|
||||
match value {
|
||||
Value::String(s) => ors.push(s.clone()),
|
||||
Value::String(s) => ors.push(s.as_str()),
|
||||
v => {
|
||||
return Err(FacetError::InvalidExpression(&["String"], v.clone()).into())
|
||||
}
|
||||
@ -690,7 +685,7 @@ fn parse_filter_array(
|
||||
}
|
||||
}
|
||||
|
||||
Ok(FilterCondition::from_array(txn, index, ands)?)
|
||||
Ok(Filter::from_array(ands)?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -29,9 +29,9 @@ impl UpdateHandler {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn update_builder(&self, update_id: u64) -> UpdateBuilder {
|
||||
pub fn update_builder(&self) -> UpdateBuilder {
|
||||
// We prepare the update by using the update builder.
|
||||
let mut update_builder = UpdateBuilder::new(update_id);
|
||||
let mut update_builder = UpdateBuilder::new();
|
||||
if let Some(max_nb_chunks) = self.max_nb_chunks {
|
||||
update_builder.max_nb_chunks(max_nb_chunks);
|
||||
}
|
||||
|
@ -4,15 +4,15 @@ use std::num::NonZeroUsize;
|
||||
|
||||
use log::{debug, info, trace};
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder};
|
||||
use milli::update::{
|
||||
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, Setting,
|
||||
};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index_controller::updates::status::{Failed, Processed, Processing, UpdateResult};
|
||||
use crate::Update;
|
||||
|
||||
use super::error::Result;
|
||||
use super::index::{Index, IndexMeta};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
fn serialize_with_wildcard<S>(
|
||||
field: &Setting<Vec<String>>,
|
||||
@ -30,25 +30,27 @@ where
|
||||
.serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize)]
|
||||
#[derive(Clone, Default, Debug, Serialize, PartialEq)]
|
||||
pub struct Checked;
|
||||
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct Unchecked;
|
||||
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub struct Settings<T> {
|
||||
#[serde(
|
||||
default,
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub displayed_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(
|
||||
@ -56,19 +58,26 @@ pub struct Settings<T> {
|
||||
serialize_with = "serialize_with_wildcard",
|
||||
skip_serializing_if = "Setting::is_not_set"
|
||||
)]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub sortable_attributes: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub ranking_rules: Setting<Vec<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub distinct_attribute: Setting<String>,
|
||||
|
||||
#[serde(skip)]
|
||||
@ -164,126 +173,107 @@ pub struct Facets {
|
||||
}
|
||||
|
||||
impl Index {
|
||||
pub fn handle_update(&self, update: Processing) -> std::result::Result<Processed, Failed> {
|
||||
let update_id = update.id();
|
||||
let update_builder = self.update_handler.update_builder(update_id);
|
||||
let result = (|| {
|
||||
let mut txn = self.write_txn()?;
|
||||
let result = match update.meta() {
|
||||
Update::DocumentAddition {
|
||||
primary_key,
|
||||
content_uuid,
|
||||
method,
|
||||
} => self.update_documents(
|
||||
&mut txn,
|
||||
*method,
|
||||
*content_uuid,
|
||||
update_builder,
|
||||
primary_key.as_deref(),
|
||||
),
|
||||
Update::Settings(settings) => {
|
||||
let settings = settings.clone().check();
|
||||
self.update_settings(&mut txn, &settings, update_builder)
|
||||
}
|
||||
Update::ClearDocuments => {
|
||||
let builder = update_builder.clear_documents(&mut txn, self);
|
||||
let _count = builder.execute()?;
|
||||
Ok(UpdateResult::Other)
|
||||
}
|
||||
Update::DeleteDocuments(ids) => {
|
||||
let mut builder = update_builder.delete_documents(&mut txn, self)?;
|
||||
|
||||
// We ignore unexisting document ids
|
||||
ids.iter().for_each(|id| {
|
||||
builder.delete_external_id(id);
|
||||
});
|
||||
|
||||
let deleted = builder.execute()?;
|
||||
Ok(UpdateResult::DocumentDeletion { deleted })
|
||||
}
|
||||
};
|
||||
if result.is_ok() {
|
||||
txn.commit()?;
|
||||
}
|
||||
result
|
||||
})();
|
||||
|
||||
if let Update::DocumentAddition { content_uuid, .. } = update.from.meta() {
|
||||
let _ = self.update_file_store.delete(*content_uuid);
|
||||
}
|
||||
|
||||
match result {
|
||||
Ok(result) => Ok(update.process(result)),
|
||||
Err(e) => Err(update.fail(e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: Option<String>) -> Result<IndexMeta> {
|
||||
match primary_key {
|
||||
Some(primary_key) => {
|
||||
let mut txn = self.write_txn()?;
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, self);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
let meta = IndexMeta::new_txn(self, &txn)?;
|
||||
txn.commit()?;
|
||||
Ok(meta)
|
||||
}
|
||||
None => {
|
||||
let meta = IndexMeta::new(self)?;
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn update_documents<'a, 'b>(
|
||||
fn update_primary_key_txn<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut heed::RwTxn<'a, 'b>,
|
||||
primary_key: String,
|
||||
) -> Result<IndexMeta> {
|
||||
let mut builder = self.update_handler.update_builder().settings(txn, self);
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_| ())?;
|
||||
let meta = IndexMeta::new_txn(self, txn)?;
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub fn update_primary_key(&self, primary_key: String) -> Result<IndexMeta> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let res = self.update_primary_key_txn(&mut txn, primary_key)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Deletes `ids` from the index, and returns how many documents were deleted.
|
||||
pub fn delete_documents(&self, ids: &[String]) -> Result<DocumentDeletionResult> {
|
||||
let mut txn = self.write_txn()?;
|
||||
let mut builder = self
|
||||
.update_handler
|
||||
.update_builder()
|
||||
.delete_documents(&mut txn, self)?;
|
||||
|
||||
// We ignore unexisting document ids
|
||||
ids.iter().for_each(|id| {
|
||||
builder.delete_external_id(id);
|
||||
});
|
||||
|
||||
let deleted = builder.execute()?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
Ok(deleted)
|
||||
}
|
||||
|
||||
pub fn clear_documents(&self) -> Result<()> {
|
||||
let mut txn = self.write_txn()?;
|
||||
self.update_handler
|
||||
.update_builder()
|
||||
.clear_documents(&mut txn, self)
|
||||
.execute()?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_documents(
|
||||
&self,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
update_builder: UpdateBuilder,
|
||||
primary_key: Option<&str>,
|
||||
) -> Result<UpdateResult> {
|
||||
primary_key: Option<String>,
|
||||
file_store: UpdateFileStore,
|
||||
) -> Result<DocumentAdditionResult> {
|
||||
trace!("performing document addition");
|
||||
let mut txn = self.write_txn()?;
|
||||
|
||||
// Set the primary key if not set already, ignore if already set.
|
||||
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
|
||||
let mut builder = UpdateBuilder::new(0).settings(txn, self);
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
builder.execute(|_, _| ())?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
self.update_primary_key_txn(&mut txn, primary_key)?;
|
||||
}
|
||||
|
||||
let indexing_callback =
|
||||
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
|
||||
let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step);
|
||||
|
||||
let content_file = self.update_file_store.get_update(content_uuid).unwrap();
|
||||
let content_file = file_store.get_update(content_uuid).unwrap();
|
||||
let reader = DocumentBatchReader::from_reader(content_file).unwrap();
|
||||
|
||||
let mut builder = update_builder.index_documents(txn, self);
|
||||
let mut builder = self
|
||||
.update_handler
|
||||
.update_builder()
|
||||
.index_documents(&mut txn, self);
|
||||
builder.index_documents_method(method);
|
||||
let addition = builder.execute(reader, indexing_callback)?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
info!("document addition done: {:?}", addition);
|
||||
|
||||
Ok(UpdateResult::DocumentsAddition(addition))
|
||||
Ok(addition)
|
||||
}
|
||||
|
||||
fn update_settings<'a, 'b>(
|
||||
&'a self,
|
||||
txn: &mut heed::RwTxn<'a, 'b>,
|
||||
settings: &Settings<Checked>,
|
||||
update_builder: UpdateBuilder,
|
||||
) -> Result<UpdateResult> {
|
||||
pub fn update_settings(&self, settings: &Settings<Checked>) -> Result<()> {
|
||||
// We must use the write transaction of the update here.
|
||||
let mut builder = update_builder.settings(txn, self);
|
||||
let mut txn = self.write_txn()?;
|
||||
let mut builder = self
|
||||
.update_handler
|
||||
.update_builder()
|
||||
.settings(&mut txn, self);
|
||||
|
||||
apply_settings_to_builder(settings, &mut builder);
|
||||
|
||||
builder.execute(|indexing_step, update_id| {
|
||||
debug!("update {}: {:?}", update_id, indexing_step)
|
||||
})?;
|
||||
builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?;
|
||||
|
||||
Ok(UpdateResult::Other)
|
||||
txn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -343,9 +333,19 @@ pub fn apply_settings_to_builder(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
pub(crate) mod test {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn setting_strategy<T: Arbitrary + Clone>() -> impl Strategy<Value = Setting<T>> {
|
||||
prop_oneof![
|
||||
Just(Setting::NotSet),
|
||||
Just(Setting::Reset),
|
||||
any::<T>().prop_map(Setting::Set)
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_setting_check() {
|
||||
// test no changes
|
||||
|
@ -9,18 +9,16 @@ use log::{error, trace};
|
||||
use tokio::sync::{mpsc, oneshot, RwLock};
|
||||
|
||||
use super::error::{DumpActorError, Result};
|
||||
use super::{DumpInfo, DumpMsg, DumpStatus, DumpTask};
|
||||
use crate::index_controller::index_resolver::index_store::IndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::UuidStore;
|
||||
use crate::index_controller::index_resolver::IndexResolver;
|
||||
use crate::index_controller::updates::UpdateSender;
|
||||
use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus};
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub const CONCURRENT_DUMP_MSG: usize = 10;
|
||||
|
||||
pub struct DumpActor<U, I> {
|
||||
pub struct DumpActor {
|
||||
inbox: Option<mpsc::Receiver<DumpMsg>>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update: UpdateSender,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
dump_path: PathBuf,
|
||||
analytics_path: PathBuf,
|
||||
lock: Arc<Mutex<()>>,
|
||||
@ -34,15 +32,11 @@ fn generate_uid() -> String {
|
||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||
}
|
||||
|
||||
impl<U, I> DumpActor<U, I>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
impl DumpActor {
|
||||
pub fn new(
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update: UpdateSender,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
dump_path: impl AsRef<Path>,
|
||||
analytics_path: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
@ -52,8 +46,8 @@ where
|
||||
let lock = Arc::new(Mutex::new(()));
|
||||
Self {
|
||||
inbox: Some(inbox),
|
||||
index_resolver,
|
||||
update,
|
||||
task_store,
|
||||
update_file_store,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
analytics_path: analytics_path.as_ref().into(),
|
||||
dump_infos,
|
||||
@ -120,11 +114,11 @@ where
|
||||
|
||||
ret.send(Ok(info)).expect("Dump actor is dead");
|
||||
|
||||
let task = DumpTask {
|
||||
let task = DumpJob {
|
||||
dump_path: self.dump_path.clone(),
|
||||
db_path: self.analytics_path.clone(),
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
update_sender: self.update.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
uid: uid.clone(),
|
||||
update_db_size: self.update_db_size,
|
||||
index_db_size: self.index_db_size,
|
||||
|
@ -0,0 +1,16 @@
|
||||
pub mod v2;
|
||||
pub mod v3;
|
||||
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
147
meilisearch-lib/src/index_controller/dump_actor/compat/v2.rs
Normal file
147
meilisearch-lib/src/index_controller/dump_actor/compat/v2.rs
Normal file
@ -0,0 +1,147 @@
|
||||
use anyhow::bail;
|
||||
use chrono::{DateTime, Utc};
|
||||
use meilisearch_error::Code;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateFormat {
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct DocumentAdditionResult {
|
||||
pub nb_documents: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
format: UpdateFormat,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: UpdateMeta,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
pub content: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub aborted_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub error: ResponseError,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
type StatusCode = ();
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
pub code: StatusCode,
|
||||
pub message: String,
|
||||
pub error_code: String,
|
||||
pub error_type: String,
|
||||
pub error_link: String,
|
||||
}
|
||||
|
||||
pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
|
||||
let code = match s {
|
||||
"index_creation_failed" => Code::CreateIndex,
|
||||
"index_already_exists" => Code::IndexAlreadyExists,
|
||||
"index_not_found" => Code::IndexNotFound,
|
||||
"invalid_index_uid" => Code::InvalidIndexUid,
|
||||
"invalid_state" => Code::InvalidState,
|
||||
"missing_primary_key" => Code::MissingPrimaryKey,
|
||||
"primary_key_already_present" => Code::PrimaryKeyAlreadyPresent,
|
||||
"invalid_request" => Code::InvalidRankingRule,
|
||||
"max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded,
|
||||
"missing_document_id" => Code::MissingDocumentId,
|
||||
"invalid_facet" => Code::Filter,
|
||||
"invalid_filter" => Code::Filter,
|
||||
"invalid_sort" => Code::Sort,
|
||||
"bad_parameter" => Code::BadParameter,
|
||||
"bad_request" => Code::BadRequest,
|
||||
"document_not_found" => Code::DocumentNotFound,
|
||||
"internal" => Code::Internal,
|
||||
"invalid_geo_field" => Code::InvalidGeoField,
|
||||
"invalid_token" => Code::InvalidToken,
|
||||
"missing_authorization_header" => Code::MissingAuthorizationHeader,
|
||||
"payload_too_large" => Code::PayloadTooLarge,
|
||||
"unretrievable_document" => Code::RetrieveDocument,
|
||||
"search_error" => Code::SearchDocuments,
|
||||
"unsupported_media_type" => Code::UnsupportedMediaType,
|
||||
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
|
||||
"dump_process_failed" => Code::DumpProcessFailed,
|
||||
_ => bail!("unknow error code."),
|
||||
};
|
||||
|
||||
Ok(code)
|
||||
}
|
198
meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs
Normal file
198
meilisearch-lib/src/index_controller/dump_actor/compat/v3.rs
Normal file
@ -0,0 +1,198 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use meilisearch_error::{Code, ResponseError};
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
use crate::index_resolver::IndexUid;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
|
||||
use super::v2;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct DumpEntry {
|
||||
pub uuid: Uuid,
|
||||
pub uid: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl From<v2::UpdateResult> for TaskResult {
|
||||
fn from(other: v2::UpdateResult) -> Self {
|
||||
match other {
|
||||
v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition {
|
||||
indexed_documents: result.nb_documents as u64,
|
||||
},
|
||||
v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion {
|
||||
deleted_documents: deleted,
|
||||
},
|
||||
v2::UpdateResult::Other => TaskResult::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Update {
|
||||
DeleteDocuments(Vec<String>),
|
||||
DocumentAddition {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
ClearDocuments,
|
||||
}
|
||||
|
||||
impl From<Update> for TaskContent {
|
||||
fn from(other: Update) -> Self {
|
||||
match other {
|
||||
Update::DeleteDocuments(ids) => {
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
|
||||
}
|
||||
Update::DocumentAddition {
|
||||
primary_key,
|
||||
method,
|
||||
..
|
||||
} => TaskContent::DocumentAddition {
|
||||
content_uuid: Uuid::default(),
|
||||
merge_strategy: method,
|
||||
primary_key,
|
||||
// document count is unknown for legacy updates
|
||||
documents_count: 0,
|
||||
},
|
||||
Update::Settings(settings) => TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
// There is no way to know now, so we assume it isn't
|
||||
is_deletion: false,
|
||||
},
|
||||
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: Update,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
// we do not erase the `TaskId` that was given to us.
|
||||
task.content = self.meta.into();
|
||||
task.events.push(TaskEvent::Created(self.enqueued_at));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: v2::UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Succeded {
|
||||
result: TaskResult::from(self.success),
|
||||
timestamp: self.processed_at,
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Processing(self.started_processing_at);
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub msg: String,
|
||||
pub code: Code,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
fn update_task(self, task: &mut Task) {
|
||||
self.from.update_task(task);
|
||||
|
||||
let event = TaskEvent::Failed {
|
||||
error: ResponseError::from_msg(self.msg, self.code),
|
||||
timestamp: self.failed_at,
|
||||
};
|
||||
task.events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(UpdateStatus, String, TaskId)> for Task {
|
||||
fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self {
|
||||
// Dummy task
|
||||
let mut task = Task {
|
||||
id: task_id,
|
||||
index_uid: IndexUid::new(uid).unwrap(),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
match update {
|
||||
UpdateStatus::Processing(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Enqueued(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Processed(u) => u.update_task(&mut task),
|
||||
UpdateStatus::Failed(u) => u.update_task(&mut task),
|
||||
}
|
||||
|
||||
task
|
||||
}
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
|
||||
use crate::index_controller::index_resolver::error::IndexResolverError;
|
||||
use crate::index_controller::updates::error::UpdateLoopError;
|
||||
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, DumpActorError>;
|
||||
|
||||
@ -15,8 +14,6 @@ pub enum DumpActorError {
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
IndexResolver(#[from] IndexResolverError),
|
||||
#[error("{0}")]
|
||||
UpdateLoop(#[from] UpdateLoopError),
|
||||
}
|
||||
|
||||
macro_rules! internal_error {
|
||||
@ -35,8 +32,11 @@ internal_error!(
|
||||
heed::Error,
|
||||
std::io::Error,
|
||||
tokio::task::JoinError,
|
||||
tokio::sync::oneshot::error::RecvError,
|
||||
serde_json::error::Error,
|
||||
tempfile::PersistError
|
||||
tempfile::PersistError,
|
||||
fs_extra::error::Error,
|
||||
TaskError
|
||||
);
|
||||
|
||||
impl ErrorCode for DumpActorError {
|
||||
@ -46,7 +46,6 @@ impl ErrorCode for DumpActorError {
|
||||
DumpActorError::DumpDoesNotExist(_) => Code::DumpNotFound,
|
||||
DumpActorError::Internal(_) => Code::Internal,
|
||||
DumpActorError::IndexResolver(e) => e.error_code(),
|
||||
DumpActorError::UpdateLoop(e) => e.error_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,16 +1,11 @@
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use crate::index_controller::index_resolver::HardStateIndexResolver;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg};
|
||||
use super::{DumpActorHandle, DumpInfo, DumpMsg};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DumpActorHandleImpl {
|
||||
sender: mpsc::Sender<DumpMsg>,
|
||||
pub sender: mpsc::Sender<DumpMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@ -29,29 +24,3 @@ impl DumpActorHandle for DumpActorHandleImpl {
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
analytics_path: impl AsRef<Path>,
|
||||
index_resolver: Arc<HardStateIndexResolver>,
|
||||
update: crate::index_controller::updates::UpdateSender,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(10);
|
||||
let actor = DumpActor::new(
|
||||
receiver,
|
||||
index_resolver,
|
||||
update,
|
||||
path,
|
||||
analytics_path,
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
);
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,4 @@
|
||||
pub mod v1;
|
||||
pub mod v2;
|
||||
pub mod v3;
|
||||
|
||||
mod compat {
|
||||
/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name.
|
||||
pub fn asc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("asc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
|
||||
/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name.
|
||||
pub fn desc_ranking_rule(text: &str) -> Option<&str> {
|
||||
text.split_once("desc(")
|
||||
.and_then(|(_, tail)| tail.rsplit_once(")"))
|
||||
.map(|(field, _)| field)
|
||||
}
|
||||
}
|
||||
pub mod v4;
|
||||
|
@ -14,8 +14,7 @@ use uuid::Uuid;
|
||||
use crate::document_formats::read_ndjson;
|
||||
use crate::index::apply_settings_to_builder;
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index_controller::dump_actor::loaders::compat::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
|
||||
use crate::index_controller::dump_actor::compat;
|
||||
use crate::index_controller::{self, IndexMetadata};
|
||||
use crate::{index::Unchecked, options::IndexerOpts};
|
||||
|
||||
@ -27,6 +26,7 @@ pub struct MetadataV1 {
|
||||
}
|
||||
|
||||
impl MetadataV1 {
|
||||
#[allow(dead_code, unreachable_code, unused_variables)]
|
||||
pub fn load_dump(
|
||||
self,
|
||||
src: impl AsRef<Path>,
|
||||
@ -34,22 +34,29 @@ impl MetadataV1 {
|
||||
size: usize,
|
||||
indexer_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
let uuid_store = HeedUuidStore::new(&dst)?;
|
||||
for index in self.indexes {
|
||||
let uuid = Uuid::new_v4();
|
||||
uuid_store.insert(index.uid.clone(), uuid)?;
|
||||
let src = src.as_ref().join(index.uid);
|
||||
load_index(
|
||||
&src,
|
||||
&dst,
|
||||
uuid,
|
||||
index.meta.primary_key.as_deref(),
|
||||
size,
|
||||
indexer_options,
|
||||
)?;
|
||||
}
|
||||
unreachable!("dump v1 not implemented");
|
||||
// log::info!("Patching dump V2 to dump V3...");
|
||||
// let uuid_store = todo!(); // HeedMetaStore::new(&dst)?;
|
||||
// for index in self.indexes {
|
||||
// let uuid = Uuid::new_v4();
|
||||
// // Since we don't know when the index was created, we assume it's from 0
|
||||
// let meta = IndexMeta {
|
||||
// uuid,
|
||||
// creation_task_id: 0,
|
||||
// };
|
||||
// // uuid_store.insert(index.uid.clone(), meta)?;
|
||||
// let src = src.as_ref().join(index.uid);
|
||||
// load_index(
|
||||
// &src,
|
||||
// &dst,
|
||||
// uuid,
|
||||
// index.meta.primary_key.as_deref(),
|
||||
// size,
|
||||
// indexer_options,
|
||||
// )?;
|
||||
// }
|
||||
|
||||
Ok(())
|
||||
// Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,6 +88,7 @@ struct Settings {
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn load_index(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
@ -105,7 +113,7 @@ fn load_index(
|
||||
|
||||
let handler = UpdateHandler::new(indexer_options)?;
|
||||
|
||||
let mut builder = handler.update_builder(0).settings(&mut txn, &index);
|
||||
let mut builder = handler.update_builder().settings(&mut txn, &index);
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
@ -113,7 +121,7 @@ fn load_index(
|
||||
|
||||
apply_settings_to_builder(&settings.check(), &mut builder);
|
||||
|
||||
builder.execute(|_, _| ())?;
|
||||
builder.execute(|_| ())?;
|
||||
|
||||
let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?);
|
||||
|
||||
@ -129,9 +137,9 @@ fn load_index(
|
||||
//a primary key error to be thrown.
|
||||
if !documents_reader.is_empty() {
|
||||
let builder = update_handler
|
||||
.update_builder(0)
|
||||
.update_builder()
|
||||
.index_documents(&mut txn, &index);
|
||||
builder.execute(documents_reader, |_, _| ())?;
|
||||
builder.execute(documents_reader, |_| ())?;
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
@ -174,8 +182,8 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
|
||||
Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| {
|
||||
match criterion.as_str() {
|
||||
"words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion),
|
||||
s if s.starts_with("asc") => asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
|
||||
s if s.starts_with("desc") => desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
|
||||
s if s.starts_with("asc") => compat::asc_ranking_rule(s).map(|f| format!("{}:asc", f)),
|
||||
s if s.starts_with("desc") => compat::desc_ranking_rule(s).map(|f| format!("{}:desc", f)),
|
||||
"wordsPosition" => {
|
||||
warn!("The criteria `attribute` and `wordsPosition` have been merged \
|
||||
into a single criterion `attribute` so `wordsPositon` will be \
|
||||
|
@ -5,17 +5,10 @@ use std::path::{Path, PathBuf};
|
||||
use serde_json::{Deserializer, Value};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::index_controller::dump_actor::loaders::compat::{asc_ranking_rule, desc_ranking_rule};
|
||||
use crate::index_controller::dump_actor::compat::{self, v2, v3};
|
||||
use crate::index_controller::dump_actor::Metadata;
|
||||
use crate::index_controller::updates::status::{
|
||||
Aborted, Enqueued, Failed, Processed, Processing, UpdateResult, UpdateStatus,
|
||||
};
|
||||
use crate::index_controller::updates::store::dump::UpdateEntry;
|
||||
use crate::index_controller::updates::store::Update;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
use super::v3;
|
||||
|
||||
/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a
|
||||
/// dump v3, then calls the dump v3 to actually handle the dump.
|
||||
pub fn load_dump(
|
||||
@ -26,6 +19,7 @@ pub fn load_dump(
|
||||
update_db_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
log::info!("Patching dump V2 to dump V3...");
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
|
||||
let dir_entries = std::fs::read_dir(indexes_path)?;
|
||||
@ -47,7 +41,7 @@ pub fn load_dump(
|
||||
let update_path = update_dir.join("data.jsonl");
|
||||
patch_updates(update_dir, update_path)?;
|
||||
|
||||
v3::load_dump(
|
||||
super::v3::load_dump(
|
||||
meta,
|
||||
src,
|
||||
dst,
|
||||
@ -84,12 +78,12 @@ fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Resul
|
||||
let mut output_update_file = NamedTempFile::new_in(&dir)?;
|
||||
let update_file = File::open(&path)?;
|
||||
|
||||
let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>();
|
||||
let stream = Deserializer::from_reader(update_file).into_iter::<v2::UpdateEntry>();
|
||||
|
||||
for update in stream {
|
||||
let update_entry = update?;
|
||||
|
||||
let update_entry = UpdateEntry::from(update_entry);
|
||||
let update_entry = v3::UpdateEntry::from(update_entry);
|
||||
|
||||
serde_json::to_writer(&mut output_update_file, &update_entry)?;
|
||||
output_update_file.write_all(b"\n")?;
|
||||
@ -110,10 +104,10 @@ fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
Value::Array(values) => values
|
||||
.into_iter()
|
||||
.filter_map(|value| match value {
|
||||
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:asc", f))
|
||||
.map(Value::String),
|
||||
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:desc", f))
|
||||
.map(Value::String),
|
||||
otherwise => Some(otherwise),
|
||||
@ -123,23 +117,23 @@ fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::UpdateEntry> for UpdateEntry {
|
||||
fn from(compat::UpdateEntry { uuid, update }: compat::UpdateEntry) -> Self {
|
||||
impl From<v2::UpdateEntry> for v3::UpdateEntry {
|
||||
fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self {
|
||||
let update = match update {
|
||||
compat::UpdateStatus::Processing(meta) => UpdateStatus::Processing(meta.into()),
|
||||
compat::UpdateStatus::Enqueued(meta) => UpdateStatus::Enqueued(meta.into()),
|
||||
compat::UpdateStatus::Processed(meta) => UpdateStatus::Processed(meta.into()),
|
||||
compat::UpdateStatus::Aborted(meta) => UpdateStatus::Aborted(meta.into()),
|
||||
compat::UpdateStatus::Failed(meta) => UpdateStatus::Failed(meta.into()),
|
||||
v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()),
|
||||
v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()),
|
||||
v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()),
|
||||
v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."),
|
||||
v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()),
|
||||
};
|
||||
|
||||
Self { uuid, update }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::Failed> for Failed {
|
||||
fn from(other: compat::Failed) -> Self {
|
||||
let compat::Failed {
|
||||
impl From<v2::Failed> for v3::Failed {
|
||||
fn from(other: v2::Failed) -> Self {
|
||||
let v2::Failed {
|
||||
from,
|
||||
error,
|
||||
failed_at,
|
||||
@ -148,27 +142,16 @@ impl From<compat::Failed> for Failed {
|
||||
Self {
|
||||
from: from.into(),
|
||||
msg: error.message,
|
||||
code: compat::error_code_from_str(&error.error_code)
|
||||
code: v2::error_code_from_str(&error.error_code)
|
||||
.expect("Invalid update: Invalid error code"),
|
||||
failed_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::Aborted> for Aborted {
|
||||
fn from(other: compat::Aborted) -> Self {
|
||||
let compat::Aborted { from, aborted_at } = other;
|
||||
|
||||
Self {
|
||||
from: from.into(),
|
||||
aborted_at,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::Processing> for Processing {
|
||||
fn from(other: compat::Processing) -> Self {
|
||||
let compat::Processing {
|
||||
impl From<v2::Processing> for v3::Processing {
|
||||
fn from(other: v2::Processing) -> Self {
|
||||
let v2::Processing {
|
||||
from,
|
||||
started_processing_at,
|
||||
} = other;
|
||||
@ -180,9 +163,9 @@ impl From<compat::Processing> for Processing {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::Enqueued> for Enqueued {
|
||||
fn from(other: compat::Enqueued) -> Self {
|
||||
let compat::Enqueued {
|
||||
impl From<v2::Enqueued> for v3::Enqueued {
|
||||
fn from(other: v2::Enqueued) -> Self {
|
||||
let v2::Enqueued {
|
||||
update_id,
|
||||
meta,
|
||||
enqueued_at,
|
||||
@ -190,12 +173,12 @@ impl From<compat::Enqueued> for Enqueued {
|
||||
} = other;
|
||||
|
||||
let meta = match meta {
|
||||
compat::UpdateMeta::DocumentsAddition {
|
||||
v2::UpdateMeta::DocumentsAddition {
|
||||
method,
|
||||
primary_key,
|
||||
..
|
||||
} => {
|
||||
Update::DocumentAddition {
|
||||
v3::Update::DocumentAddition {
|
||||
primary_key,
|
||||
method,
|
||||
// Just ignore if the uuid is no present. If it is needed later, an error will
|
||||
@ -203,9 +186,9 @@ impl From<compat::Enqueued> for Enqueued {
|
||||
content_uuid: content.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
compat::UpdateMeta::ClearDocuments => Update::ClearDocuments,
|
||||
compat::UpdateMeta::DeleteDocuments { ids } => Update::DeleteDocuments(ids),
|
||||
compat::UpdateMeta::Settings(settings) => Update::Settings(settings),
|
||||
v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments,
|
||||
v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids),
|
||||
v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings),
|
||||
};
|
||||
|
||||
Self {
|
||||
@ -216,176 +199,18 @@ impl From<compat::Enqueued> for Enqueued {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::Processed> for Processed {
|
||||
fn from(other: compat::Processed) -> Self {
|
||||
let compat::Processed {
|
||||
impl From<v2::Processed> for v3::Processed {
|
||||
fn from(other: v2::Processed) -> Self {
|
||||
let v2::Processed {
|
||||
from,
|
||||
success,
|
||||
processed_at,
|
||||
} = other;
|
||||
|
||||
Self {
|
||||
success: success.into(),
|
||||
success,
|
||||
processed_at,
|
||||
from: from.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<compat::UpdateResult> for UpdateResult {
|
||||
fn from(other: compat::UpdateResult) -> Self {
|
||||
match other {
|
||||
compat::UpdateResult::DocumentsAddition(r) => Self::DocumentsAddition(r),
|
||||
compat::UpdateResult::DocumentDeletion { deleted } => {
|
||||
Self::DocumentDeletion { deleted }
|
||||
}
|
||||
compat::UpdateResult::Other => Self::Other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// compat structure from pre-dumpv3 meilisearch
|
||||
mod compat {
|
||||
use anyhow::bail;
|
||||
use chrono::{DateTime, Utc};
|
||||
use meilisearch_error::Code;
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::{Settings, Unchecked};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateFormat {
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
format: UpdateFormat,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: UpdateMeta,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
pub content: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub aborted_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub error: ResponseError,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
type StatusCode = ();
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResponseError {
|
||||
#[serde(skip)]
|
||||
pub code: StatusCode,
|
||||
pub message: String,
|
||||
pub error_code: String,
|
||||
pub error_type: String,
|
||||
pub error_link: String,
|
||||
}
|
||||
|
||||
pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> {
|
||||
let code = match s {
|
||||
"index_creation_failed" => Code::CreateIndex,
|
||||
"index_already_exists" => Code::IndexAlreadyExists,
|
||||
"index_not_found" => Code::IndexNotFound,
|
||||
"invalid_index_uid" => Code::InvalidIndexUid,
|
||||
"invalid_state" => Code::InvalidState,
|
||||
"missing_primary_key" => Code::MissingPrimaryKey,
|
||||
"primary_key_already_present" => Code::PrimaryKeyAlreadyPresent,
|
||||
"invalid_request" => Code::InvalidRankingRule,
|
||||
"max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded,
|
||||
"missing_document_id" => Code::MissingDocumentId,
|
||||
"invalid_facet" => Code::Filter,
|
||||
"invalid_filter" => Code::Filter,
|
||||
"invalid_sort" => Code::Sort,
|
||||
"bad_parameter" => Code::BadParameter,
|
||||
"bad_request" => Code::BadRequest,
|
||||
"document_not_found" => Code::DocumentNotFound,
|
||||
"internal" => Code::Internal,
|
||||
"invalid_geo_field" => Code::InvalidGeoField,
|
||||
"invalid_token" => Code::InvalidToken,
|
||||
"missing_authorization_header" => Code::MissingAuthorizationHeader,
|
||||
"payload_too_large" => Code::PayloadTooLarge,
|
||||
"unretrievable_document" => Code::RetrieveDocument,
|
||||
"search_error" => Code::SearchDocuments,
|
||||
"unsupported_media_type" => Code::UnsupportedMediaType,
|
||||
"dump_already_in_progress" => Code::DumpAlreadyInProgress,
|
||||
"dump_process_failed" => Code::DumpProcessFailed,
|
||||
_ => bail!("unknow error code."),
|
||||
};
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
}
|
||||
|
@ -1,33 +1,136 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufReader, BufWriter, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::info;
|
||||
use tempfile::tempdir;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::analytics;
|
||||
use crate::index_controller::dump_actor::compat::v3;
|
||||
use crate::index_controller::dump_actor::Metadata;
|
||||
use crate::index_controller::index_resolver::IndexResolver;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::index_controller::updates::store::UpdateStore;
|
||||
use crate::index_resolver::meta_store::{DumpEntry, IndexMeta};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::{Task, TaskId};
|
||||
|
||||
/// dump structure for V3:
|
||||
/// .
|
||||
/// ├── indexes
|
||||
/// │ └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648
|
||||
/// │ ├── documents.jsonl
|
||||
/// │ └── meta.json
|
||||
/// ├── index_uuids
|
||||
/// │ └── data.jsonl
|
||||
/// ├── metadata.json
|
||||
/// └── updates
|
||||
/// └── data.jsonl
|
||||
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
update_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump from {}, dump database version: {}, dump version: V3",
|
||||
meta.dump_date, meta.db_version
|
||||
);
|
||||
info!("Patching dump V3 to dump V4...");
|
||||
|
||||
IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?;
|
||||
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
|
||||
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
||||
analytics::copy_user_id(src.as_ref(), dst.as_ref());
|
||||
let patched_dir = tempdir()?;
|
||||
|
||||
info!("Loading indexes.");
|
||||
let options = CopyOptions::default();
|
||||
dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?;
|
||||
dir::copy(
|
||||
src.as_ref().join("index_uuids"),
|
||||
patched_dir.path(),
|
||||
&options,
|
||||
)?;
|
||||
|
||||
let uuid_map = patch_index_meta(
|
||||
src.as_ref().join("index_uuids/data.jsonl"),
|
||||
patched_dir.path(),
|
||||
)?;
|
||||
|
||||
fs::copy(
|
||||
src.as_ref().join("metadata.json"),
|
||||
patched_dir.path().join("metadata.json"),
|
||||
)?;
|
||||
|
||||
patch_updates(&src, patched_dir.path(), uuid_map)?;
|
||||
|
||||
super::v4::load_dump(
|
||||
meta,
|
||||
patched_dir.path(),
|
||||
dst,
|
||||
index_db_size,
|
||||
meta_env_size,
|
||||
indexing_options,
|
||||
)
|
||||
}
|
||||
|
||||
fn patch_index_meta(
|
||||
path: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
) -> anyhow::Result<HashMap<Uuid, String>> {
|
||||
let file = BufReader::new(File::open(path)?);
|
||||
let dst = dst.as_ref().join("index_uuids");
|
||||
fs::create_dir_all(&dst)?;
|
||||
let mut dst_file = File::create(dst.join("data.jsonl"))?;
|
||||
|
||||
let map = serde_json::Deserializer::from_reader(file)
|
||||
.into_iter::<v3::DumpEntry>()
|
||||
.try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> {
|
||||
let entry = entry?;
|
||||
map.insert(entry.uuid, entry.uid.clone());
|
||||
let meta = IndexMeta {
|
||||
uuid: entry.uuid,
|
||||
// This is lost information, we patch it to 0;
|
||||
creation_task_id: 0,
|
||||
};
|
||||
let entry = DumpEntry {
|
||||
uid: entry.uid,
|
||||
index_meta: meta,
|
||||
};
|
||||
serde_json::to_writer(&mut dst_file, &entry)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
Ok(map)
|
||||
})?;
|
||||
|
||||
dst_file.flush()?;
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
fn patch_updates(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
uuid_map: HashMap<Uuid, String>,
|
||||
) -> anyhow::Result<()> {
|
||||
let dst = dst.as_ref().join("updates");
|
||||
fs::create_dir_all(&dst)?;
|
||||
|
||||
let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?);
|
||||
let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?);
|
||||
|
||||
serde_json::Deserializer::from_reader(src_file)
|
||||
.into_iter::<v3::UpdateEntry>()
|
||||
.enumerate()
|
||||
.try_for_each(|(task_id, entry)| -> anyhow::Result<()> {
|
||||
let entry = entry?;
|
||||
let name = uuid_map
|
||||
.get(&entry.uuid)
|
||||
.with_context(|| format!("Unknown index uuid: {}", entry.uuid))?
|
||||
.clone();
|
||||
serde_json::to_writer(
|
||||
&mut dst_file,
|
||||
&Task::from((entry.update, name, task_id as TaskId)),
|
||||
)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
dst_file.flush()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -0,0 +1,45 @@
|
||||
use std::path::Path;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use log::info;
|
||||
|
||||
use crate::analytics;
|
||||
use crate::index_controller::dump_actor::Metadata;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
pub fn load_dump(
|
||||
meta: Metadata,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
meta_env_size: usize,
|
||||
indexing_options: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"Loading dump from {}, dump database version: {}, dump version: V4",
|
||||
meta.dump_date, meta.db_version
|
||||
);
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(meta_env_size);
|
||||
options.max_dbs(100);
|
||||
let env = options.open(&dst)?;
|
||||
|
||||
IndexResolver::load_dump(
|
||||
src.as_ref(),
|
||||
&dst,
|
||||
index_db_size,
|
||||
env.clone(),
|
||||
indexing_options,
|
||||
)?;
|
||||
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
|
||||
TaskStore::load_dump(&src, env)?;
|
||||
analytics::copy_user_id(src.as_ref(), dst.as_ref());
|
||||
|
||||
info!("Loading indexes.");
|
||||
|
||||
Ok(())
|
||||
}
|
@ -1,31 +1,30 @@
|
||||
use std::fs::File;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::{info, trace, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::fs::create_dir_all;
|
||||
|
||||
use loaders::v1::MetadataV1;
|
||||
|
||||
pub use actor::DumpActor;
|
||||
pub use handle_impl::*;
|
||||
pub use message::DumpMsg;
|
||||
use tokio::fs::create_dir_all;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use super::index_resolver::index_store::IndexStore;
|
||||
use super::index_resolver::uuid_store::UuidStore;
|
||||
use super::index_resolver::IndexResolver;
|
||||
use super::updates::UpdateSender;
|
||||
use crate::analytics;
|
||||
use crate::compression::{from_tar_gz, to_tar_gz};
|
||||
use crate::index_controller::dump_actor::error::DumpActorError;
|
||||
use crate::index_controller::dump_actor::loaders::{v2, v3};
|
||||
use crate::index_controller::updates::UpdateMsg;
|
||||
use crate::index_controller::dump_actor::loaders::{v2, v3, v4};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::task::Job;
|
||||
use crate::tasks::TaskStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
use error::Result;
|
||||
|
||||
mod actor;
|
||||
mod compat;
|
||||
pub mod error;
|
||||
mod handle_impl;
|
||||
mod loaders;
|
||||
@ -71,18 +70,19 @@ pub enum MetadataVersion {
|
||||
V1(MetadataV1),
|
||||
V2(Metadata),
|
||||
V3(Metadata),
|
||||
V4(Metadata),
|
||||
}
|
||||
|
||||
impl MetadataVersion {
|
||||
pub fn new_v3(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
pub fn new_v4(index_db_size: usize, update_db_size: usize) -> Self {
|
||||
let meta = Metadata::new(index_db_size, update_db_size);
|
||||
Self::V3(meta)
|
||||
Self::V4(meta)
|
||||
}
|
||||
|
||||
pub fn db_version(&self) -> &str {
|
||||
match self {
|
||||
Self::V1(meta) => &meta.db_version,
|
||||
Self::V2(meta) | Self::V3(meta) => &meta.db_version,
|
||||
Self::V2(meta) | Self::V3(meta) | Self::V4(meta) => &meta.db_version,
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,13 +91,16 @@ impl MetadataVersion {
|
||||
MetadataVersion::V1(_) => "V1",
|
||||
MetadataVersion::V2(_) => "V2",
|
||||
MetadataVersion::V3(_) => "V3",
|
||||
MetadataVersion::V4(_) => "V4",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump_date(&self) -> Option<&DateTime<Utc>> {
|
||||
match self {
|
||||
MetadataVersion::V1(_) => None,
|
||||
MetadataVersion::V2(meta) | MetadataVersion::V3(meta) => Some(&meta.dump_date),
|
||||
MetadataVersion::V2(meta) | MetadataVersion::V3(meta) | MetadataVersion::V4(meta) => {
|
||||
Some(&meta.dump_date)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -190,8 +193,9 @@ pub fn load_dump(
|
||||
);
|
||||
|
||||
match meta {
|
||||
MetadataVersion::V1(meta) => {
|
||||
meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)?
|
||||
MetadataVersion::V1(_meta) => {
|
||||
anyhow::bail!("This version (v1) of the dump is too old to be imported.")
|
||||
// meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer _opts)?
|
||||
}
|
||||
MetadataVersion::V2(meta) => v2::load_dump(
|
||||
meta,
|
||||
@ -209,6 +213,14 @@ pub fn load_dump(
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
MetadataVersion::V4(meta) => v4::load_dump(
|
||||
meta,
|
||||
&tmp_src_path,
|
||||
tmp_dst.path(),
|
||||
index_db_size,
|
||||
update_db_size,
|
||||
indexer_opts,
|
||||
)?,
|
||||
}
|
||||
// Persist and atomically rename the db
|
||||
let persisted_dump = tmp_dst.into_path();
|
||||
@ -222,21 +234,17 @@ pub fn load_dump(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct DumpTask<U, I> {
|
||||
struct DumpJob {
|
||||
dump_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update_sender: UpdateSender,
|
||||
update_file_store: UpdateFileStore,
|
||||
task_store: TaskStore,
|
||||
uid: String,
|
||||
update_db_size: usize,
|
||||
index_db_size: usize,
|
||||
}
|
||||
|
||||
impl<U, I> DumpTask<U, I>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
impl DumpJob {
|
||||
async fn run(self) -> Result<()> {
|
||||
trace!("Performing dump.");
|
||||
|
||||
@ -245,18 +253,32 @@ where
|
||||
let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let meta = MetadataVersion::new_v3(self.index_db_size, self.update_db_size);
|
||||
let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size);
|
||||
let meta_path = temp_dump_path.join(META_FILE_NAME);
|
||||
let mut meta_file = File::create(&meta_path)?;
|
||||
serde_json::to_writer(&mut meta_file, &meta)?;
|
||||
analytics::copy_user_id(&self.db_path, &temp_dump_path);
|
||||
|
||||
create_dir_all(&temp_dump_path.join("indexes")).await?;
|
||||
let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?;
|
||||
|
||||
UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?;
|
||||
let (sender, receiver) = oneshot::channel();
|
||||
|
||||
self.task_store
|
||||
.register_job(Job::Dump {
|
||||
ret: sender,
|
||||
path: temp_dump_path.clone(),
|
||||
})
|
||||
.await;
|
||||
receiver.await??;
|
||||
self.task_store
|
||||
.dump(&temp_dump_path, self.update_file_store.clone())
|
||||
.await?;
|
||||
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME: We may copy more files than necessary, if new files are added while we are
|
||||
// performing the dump. We need a way to filter them out.
|
||||
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?;
|
||||
to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||
.map_err(|e| DumpActorError::Internal(e.into()))?;
|
||||
@ -279,17 +301,17 @@ mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use futures::future::{err, ok};
|
||||
use nelson::Mocker;
|
||||
use once_cell::sync::Lazy;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::index::error::Result as IndexResult;
|
||||
use crate::index::test::Mocker;
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::index_resolver::error::IndexResolverError;
|
||||
use crate::index_controller::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
|
||||
use crate::index_controller::updates::create_update_handler;
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
use crate::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_resolver::meta_store::MockIndexMetaStore;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
fn setup() {
|
||||
static SETUP: Lazy<()> = Lazy::new(|| {
|
||||
@ -305,6 +327,7 @@ mod test {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn test_dump_normal() {
|
||||
setup();
|
||||
|
||||
@ -313,12 +336,11 @@ mod test {
|
||||
let uuids = std::iter::repeat_with(Uuid::new_v4)
|
||||
.take(4)
|
||||
.collect::<HashSet<_>>();
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
let uuids_cloned = uuids.clone();
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_dump()
|
||||
.once()
|
||||
.returning(move |_| Box::pin(ok(uuids_cloned.clone())));
|
||||
.returning(move |_| Box::pin(ok(())));
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().times(4).returning(move |uuid| {
|
||||
@ -332,20 +354,25 @@ mod test {
|
||||
.when::<&Path, IndexResult<()>>("dump")
|
||||
.once()
|
||||
.then(move |_| Ok(()));
|
||||
Box::pin(ok(Some(Index::faux(mocker))))
|
||||
Box::pin(ok(Some(Index::mock(mocker))))
|
||||
});
|
||||
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let update_sender =
|
||||
create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
|
||||
//let update_sender =
|
||||
// create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
|
||||
|
||||
let task = DumpTask {
|
||||
//TODO: fix dump tests
|
||||
let mocker = Mocker::default();
|
||||
let task_store = TaskStore::mock(mocker);
|
||||
|
||||
let task = DumpJob {
|
||||
dump_path: tmp.path().into(),
|
||||
// this should do nothing
|
||||
update_file_store,
|
||||
db_path: tmp.path().into(),
|
||||
index_resolver,
|
||||
update_sender,
|
||||
task_store,
|
||||
uid: String::from("test"),
|
||||
update_db_size: 4096 * 10,
|
||||
index_db_size: 4096 * 10,
|
||||
@ -355,27 +382,28 @@ mod test {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn error_performing_dump() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_dump()
|
||||
.once()
|
||||
.returning(move |_| Box::pin(err(IndexResolverError::ExistingPrimaryKey)));
|
||||
|
||||
let index_store = MockIndexStore::new();
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
let mocker = Mocker::default();
|
||||
let file_store = UpdateFileStore::mock(mocker);
|
||||
|
||||
let update_sender =
|
||||
create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap();
|
||||
let mocker = Mocker::default();
|
||||
let task_store = TaskStore::mock(mocker);
|
||||
|
||||
let task = DumpTask {
|
||||
let task = DumpJob {
|
||||
dump_path: tmp.path().into(),
|
||||
// this should do nothing
|
||||
db_path: tmp.path().into(),
|
||||
index_resolver,
|
||||
update_sender,
|
||||
update_file_store: file_store,
|
||||
task_store,
|
||||
uid: String::from("test"),
|
||||
update_db_size: 4096 * 10,
|
||||
index_db_size: 4096 * 10,
|
||||
|
@ -4,11 +4,14 @@ use meilisearch_error::Code;
|
||||
use meilisearch_error::ErrorCode;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use super::DocumentAdditionFormat;
|
||||
use crate::document_formats::DocumentFormatError;
|
||||
use crate::index::error::IndexError;
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use super::dump_actor::error::DumpActorError;
|
||||
use super::index_resolver::error::IndexResolverError;
|
||||
use super::updates::error::UpdateLoopError;
|
||||
use crate::index_resolver::error::IndexResolverError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, IndexControllerError>;
|
||||
|
||||
@ -19,26 +22,47 @@ pub enum IndexControllerError {
|
||||
#[error("{0}")]
|
||||
IndexResolver(#[from] IndexResolverError),
|
||||
#[error("{0}")]
|
||||
UpdateLoop(#[from] UpdateLoopError),
|
||||
#[error("{0}")]
|
||||
DumpActor(#[from] DumpActorError),
|
||||
#[error("{0}")]
|
||||
IndexError(#[from] IndexError),
|
||||
#[error("An internal error has occurred. `{0}`.")]
|
||||
Internal(Box<dyn Error + Send + Sync + 'static>),
|
||||
#[error("{0}")]
|
||||
TaskError(#[from] TaskError),
|
||||
#[error("{0}")]
|
||||
DumpError(#[from] DumpActorError),
|
||||
#[error("{0}")]
|
||||
DocumentFormatError(#[from] DocumentFormatError),
|
||||
#[error("A {0} payload is missing.")]
|
||||
MissingPayload(DocumentAdditionFormat),
|
||||
#[error("The provided payload reached the size limit.")]
|
||||
PayloadTooLarge,
|
||||
}
|
||||
|
||||
internal_error!(IndexControllerError: JoinError);
|
||||
internal_error!(IndexControllerError: JoinError, UpdateFileStoreError);
|
||||
|
||||
impl From<actix_web::error::PayloadError> for IndexControllerError {
|
||||
fn from(other: actix_web::error::PayloadError) -> Self {
|
||||
match other {
|
||||
actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge,
|
||||
_ => Self::Internal(Box::new(other)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for IndexControllerError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
IndexControllerError::MissingUid => Code::BadRequest,
|
||||
IndexControllerError::IndexResolver(e) => e.error_code(),
|
||||
IndexControllerError::UpdateLoop(e) => e.error_code(),
|
||||
IndexControllerError::DumpActor(e) => e.error_code(),
|
||||
IndexControllerError::IndexError(e) => e.error_code(),
|
||||
IndexControllerError::Internal(_) => Code::Internal,
|
||||
IndexControllerError::TaskError(e) => e.error_code(),
|
||||
IndexControllerError::DocumentFormatError(e) => e.error_code(),
|
||||
IndexControllerError::MissingPayload(_) => Code::MissingPayload,
|
||||
IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
IndexControllerError::DumpError(DumpActorError::DumpAlreadyRunning) => {
|
||||
Code::DumpAlreadyInProgress
|
||||
}
|
||||
IndexControllerError::DumpError(_) => Code::DumpProcessFailed,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,185 +0,0 @@
|
||||
pub mod error;
|
||||
pub mod index_store;
|
||||
pub mod uuid_store;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use error::{IndexResolverError, Result};
|
||||
use index_store::{IndexStore, MapIndexStore};
|
||||
use log::error;
|
||||
use uuid::Uuid;
|
||||
use uuid_store::{HeedUuidStore, UuidStore};
|
||||
|
||||
use crate::{
|
||||
index::{update_handler::UpdateHandler, Index},
|
||||
options::IndexerOpts,
|
||||
};
|
||||
|
||||
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
|
||||
|
||||
pub fn create_index_resolver(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<HardStateIndexResolver> {
|
||||
let uuid_store = HeedUuidStore::new(&path)?;
|
||||
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
|
||||
Ok(IndexResolver::new(uuid_store, index_store))
|
||||
}
|
||||
|
||||
pub struct IndexResolver<U, I> {
|
||||
index_uuid_store: U,
|
||||
index_store: I,
|
||||
}
|
||||
|
||||
impl IndexResolver<HeedUuidStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
HeedUuidStore::load_dump(&src, &dst)?;
|
||||
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
|
||||
let update_handler = UpdateHandler::new(indexer_opts)?;
|
||||
for index in indexes {
|
||||
let index = index?;
|
||||
Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexResolver<U, I>
|
||||
where
|
||||
U: UuidStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I) -> Self {
|
||||
Self {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
|
||||
let uuids = self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
|
||||
let mut indexes = Vec::new();
|
||||
for uuid in uuids {
|
||||
indexes.push(self.get_index_by_uuid(uuid).await?);
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn get_uuids_size(&self) -> Result<u64> {
|
||||
Ok(self.index_uuid_store.get_size().await?)
|
||||
}
|
||||
|
||||
pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
|
||||
let uuids = self
|
||||
.index_uuid_store
|
||||
.snapshot(path.as_ref().to_owned())
|
||||
.await?;
|
||||
let mut indexes = Vec::new();
|
||||
for uuid in uuids {
|
||||
indexes.push(self.get_index_by_uuid(uuid).await?);
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn create_index(&self, uid: String, primary_key: Option<String>) -> Result<Index> {
|
||||
if !is_index_uid_valid(&uid) {
|
||||
return Err(IndexResolverError::BadlyFormatted(uid));
|
||||
}
|
||||
let uuid = Uuid::new_v4();
|
||||
let index = self.index_store.create(uuid, primary_key).await?;
|
||||
match self.index_uuid_store.insert(uid, uuid).await {
|
||||
Err(e) => {
|
||||
match self.index_store.delete(uuid).await {
|
||||
Ok(Some(index)) => {
|
||||
index.inner().clone().prepare_for_closing();
|
||||
}
|
||||
Ok(None) => (),
|
||||
Err(e) => error!("Error while deleting index: {:?}", e),
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(index),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
let uuids = self.index_uuid_store.list().await?;
|
||||
let mut indexes = Vec::new();
|
||||
for (name, uuid) in uuids {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => indexes.push((name, index)),
|
||||
None => {
|
||||
// we found an unexisting index, we remove it from the uuid store
|
||||
let _ = self.index_uuid_store.delete(name).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Uuid> {
|
||||
match self.index_uuid_store.delete(uid.clone()).await? {
|
||||
Some(uuid) => {
|
||||
match self.index_store.delete(uuid).await {
|
||||
Ok(Some(index)) => {
|
||||
index.inner().clone().prepare_for_closing();
|
||||
}
|
||||
Ok(None) => (),
|
||||
Err(e) => error!("Error while deleting index: {:?}", e),
|
||||
}
|
||||
Ok(uuid)
|
||||
}
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_by_uuid(&self, uuid: Uuid) -> Result<Index> {
|
||||
// TODO: Handle this error better.
|
||||
self.index_store
|
||||
.get(uuid)
|
||||
.await?
|
||||
.ok_or_else(|| IndexResolverError::UnexistingIndex(String::new()))
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.get_uuid(uid).await? {
|
||||
(name, Some(uuid)) => {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => Ok(index),
|
||||
None => {
|
||||
// For some reason we got a uuid to an unexisting index, we return an error,
|
||||
// and remove the uuid from the uuid store.
|
||||
let _ = self.index_uuid_store.delete(name.clone()).await;
|
||||
Err(IndexResolverError::UnexistingIndex(name))
|
||||
}
|
||||
}
|
||||
}
|
||||
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_uuid(&self, uid: String) -> Result<Uuid> {
|
||||
match self.index_uuid_store.get_uuid(uid).await? {
|
||||
(_, Some(uuid)) => Ok(uuid),
|
||||
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_index_uid_valid(uid: &str) -> bool {
|
||||
uid.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt;
|
||||
use std::io::Cursor;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
@ -8,44 +9,39 @@ use actix_web::error::PayloadError;
|
||||
use bytes::Bytes;
|
||||
use chrono::{DateTime, Utc};
|
||||
use futures::Stream;
|
||||
use log::info;
|
||||
use futures::StreamExt;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::time::sleep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use dump_actor::DumpActorHandle;
|
||||
pub use dump_actor::{DumpInfo, DumpStatus};
|
||||
use snapshot::load_snapshot;
|
||||
|
||||
use crate::index::error::Result as IndexResult;
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::index::{
|
||||
Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked,
|
||||
};
|
||||
use crate::index_controller::index_resolver::create_index_resolver;
|
||||
use crate::index_controller::snapshot::SnapshotService;
|
||||
use crate::index_controller::dump_actor::{load_dump, DumpActor, DumpActorHandleImpl};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::snapshot::{load_snapshot, SnapshotService};
|
||||
use crate::tasks::create_task_store;
|
||||
use crate::tasks::error::TaskError;
|
||||
use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId};
|
||||
use crate::tasks::{TaskFilter, TaskStore};
|
||||
use error::Result;
|
||||
|
||||
use self::dump_actor::load_dump;
|
||||
use self::index_resolver::error::IndexResolverError;
|
||||
use self::index_resolver::index_store::{IndexStore, MapIndexStore};
|
||||
use self::index_resolver::uuid_store::{HeedUuidStore, UuidStore};
|
||||
use self::index_resolver::IndexResolver;
|
||||
use self::updates::status::UpdateStatus;
|
||||
use self::updates::UpdateMsg;
|
||||
use self::dump_actor::{DumpActorHandle, DumpInfo};
|
||||
use self::error::IndexControllerError;
|
||||
use crate::index_resolver::index_store::{IndexStore, MapIndexStore};
|
||||
use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use crate::index_resolver::{create_index_resolver, IndexResolver, IndexUid};
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
mod dump_actor;
|
||||
pub mod error;
|
||||
mod index_resolver;
|
||||
mod snapshot;
|
||||
pub mod update_file_store;
|
||||
pub mod updates;
|
||||
|
||||
/// Concrete implementation of the IndexController, exposed by meilisearch-lib
|
||||
pub type MeiliSearch =
|
||||
IndexController<HeedUuidStore, MapIndexStore, dump_actor::DumpActorHandleImpl>;
|
||||
pub type MeiliSearch = IndexController<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
pub type Payload = Box<
|
||||
dyn Stream<Item = std::result::Result<Bytes, PayloadError>> + Send + Sync + 'static + Unpin,
|
||||
@ -68,6 +64,25 @@ pub struct IndexSettings {
|
||||
pub primary_key: Option<String>,
|
||||
}
|
||||
|
||||
pub struct IndexController<U, I> {
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
task_store: TaskStore,
|
||||
dump_handle: dump_actor::DumpActorHandleImpl,
|
||||
update_file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
/// Need a custom implementation for clone because deriving require that U and I are clone.
|
||||
impl<U, I> Clone for IndexController<U, I> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
index_resolver: self.index_resolver.clone(),
|
||||
task_store: self.task_store.clone(),
|
||||
dump_handle: self.dump_handle.clone(),
|
||||
update_file_store: self.update_file_store.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DocumentAdditionFormat {
|
||||
Json,
|
||||
@ -99,7 +114,11 @@ pub struct Stats {
|
||||
pub enum Update {
|
||||
DeleteDocuments(Vec<String>),
|
||||
ClearDocuments,
|
||||
Settings(Settings<Unchecked>),
|
||||
Settings {
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the update was a deletion
|
||||
is_deletion: bool,
|
||||
},
|
||||
DocumentAddition {
|
||||
#[derivative(Debug = "ignore")]
|
||||
payload: Payload,
|
||||
@ -107,12 +126,19 @@ pub enum Update {
|
||||
method: IndexDocumentsMethod,
|
||||
format: DocumentAdditionFormat,
|
||||
},
|
||||
DeleteIndex,
|
||||
CreateIndex {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
UpdateIndex {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct IndexControllerBuilder {
|
||||
max_index_size: Option<usize>,
|
||||
max_update_store_size: Option<usize>,
|
||||
max_task_store_size: Option<usize>,
|
||||
snapshot_dir: Option<PathBuf>,
|
||||
import_snapshot: Option<PathBuf>,
|
||||
snapshot_interval: Option<Duration>,
|
||||
@ -132,12 +158,12 @@ impl IndexControllerBuilder {
|
||||
let index_size = self
|
||||
.max_index_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
|
||||
let update_store_size = self
|
||||
.max_index_size
|
||||
let task_store_size = self
|
||||
.max_task_store_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
||||
|
||||
if let Some(ref path) = self.import_snapshot {
|
||||
info!("Loading from snapshot {:?}", path);
|
||||
log::info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
db_path.as_ref(),
|
||||
path,
|
||||
@ -149,67 +175,84 @@ impl IndexControllerBuilder {
|
||||
db_path.as_ref(),
|
||||
src_path,
|
||||
index_size,
|
||||
update_store_size,
|
||||
task_store_size,
|
||||
&indexer_options,
|
||||
)?;
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(db_path.as_ref())?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(task_store_size);
|
||||
options.max_dbs(20);
|
||||
|
||||
let meta_env = options.open(&db_path)?;
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&db_path)?;
|
||||
|
||||
let index_resolver = Arc::new(create_index_resolver(
|
||||
&db_path,
|
||||
index_size,
|
||||
&indexer_options,
|
||||
meta_env.clone(),
|
||||
update_file_store.clone(),
|
||||
)?);
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
let update_sender =
|
||||
updates::create_update_handler(index_resolver.clone(), &db_path, update_store_size)?;
|
||||
let task_store =
|
||||
create_task_store(meta_env, index_resolver.clone()).map_err(|e| anyhow::anyhow!(e))?;
|
||||
|
||||
let dump_path = self
|
||||
.dump_dst
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?;
|
||||
let analytics_path = db_path.as_ref().join("instance-uid");
|
||||
let dump_handle = dump_actor::DumpActorHandleImpl::new(
|
||||
dump_path,
|
||||
analytics_path,
|
||||
index_resolver.clone(),
|
||||
update_sender.clone(),
|
||||
index_size,
|
||||
update_store_size,
|
||||
)?;
|
||||
let dump_handle = {
|
||||
let analytics_path = &db_path;
|
||||
let (sender, receiver) = mpsc::channel(10);
|
||||
let actor = DumpActor::new(
|
||||
receiver,
|
||||
update_file_store.clone(),
|
||||
task_store.clone(),
|
||||
dump_path,
|
||||
analytics_path,
|
||||
index_size,
|
||||
task_store_size,
|
||||
);
|
||||
|
||||
let dump_handle = Arc::new(dump_handle);
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
DumpActorHandleImpl { sender }
|
||||
};
|
||||
|
||||
if self.schedule_snapshot {
|
||||
let snapshot_service = SnapshotService::new(
|
||||
index_resolver.clone(),
|
||||
update_sender.clone(),
|
||||
self.snapshot_interval
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?,
|
||||
self.snapshot_dir
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?,
|
||||
db_path.as_ref().into(),
|
||||
db_path
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.map(|n| n.to_owned().into_string().expect("invalid path"))
|
||||
.unwrap_or_else(|| String::from("data.ms")),
|
||||
);
|
||||
let snapshot_period = self
|
||||
.snapshot_interval
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?;
|
||||
let snapshot_path = self
|
||||
.snapshot_dir
|
||||
.ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?;
|
||||
|
||||
let snapshot_service = SnapshotService {
|
||||
db_path: db_path.as_ref().to_path_buf(),
|
||||
snapshot_period,
|
||||
snapshot_path,
|
||||
index_size,
|
||||
meta_env_size: task_store_size,
|
||||
task_store: task_store.clone(),
|
||||
};
|
||||
|
||||
tokio::task::spawn(snapshot_service.run());
|
||||
}
|
||||
|
||||
Ok(IndexController {
|
||||
index_resolver,
|
||||
update_sender,
|
||||
task_store,
|
||||
dump_handle,
|
||||
update_file_store,
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the index controller builder's max update store size.
|
||||
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
|
||||
self.max_update_store_size.replace(max_update_store_size);
|
||||
pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
|
||||
self.max_task_store_size.replace(max_update_store_size);
|
||||
self
|
||||
}
|
||||
|
||||
@ -270,61 +313,133 @@ impl IndexControllerBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
// We are using derivative here to derive Clone, because U, I and D do not necessarily implement
|
||||
// Clone themselves.
|
||||
#[derive(derivative::Derivative)]
|
||||
#[derivative(Clone(bound = ""))]
|
||||
pub struct IndexController<U, I, D> {
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update_sender: updates::UpdateSender,
|
||||
dump_handle: Arc<D>,
|
||||
}
|
||||
|
||||
impl<U, I, D> IndexController<U, I, D>
|
||||
impl<U, I> IndexController<U, I>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
D: DumpActorHandle + Send + Sync,
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn builder() -> IndexControllerBuilder {
|
||||
IndexControllerBuilder::default()
|
||||
}
|
||||
|
||||
pub async fn register_update(
|
||||
&self,
|
||||
uid: String,
|
||||
update: Update,
|
||||
create_index: bool,
|
||||
) -> Result<UpdateStatus> {
|
||||
match self.index_resolver.get_uuid(uid).await {
|
||||
Ok(uuid) => {
|
||||
let update_result = UpdateMsg::update(&self.update_sender, uuid, update).await?;
|
||||
Ok(update_result)
|
||||
pub async fn register_update(&self, uid: String, update: Update) -> Result<Task> {
|
||||
let uid = IndexUid::new(uid)?;
|
||||
let content = match update {
|
||||
Update::DeleteDocuments(ids) => {
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids))
|
||||
}
|
||||
Err(IndexResolverError::UnexistingIndex(name)) => {
|
||||
if create_index {
|
||||
let index = self.index_resolver.create_index(name, None).await?;
|
||||
let update_result =
|
||||
UpdateMsg::update(&self.update_sender, index.uuid(), update).await?;
|
||||
Ok(update_result)
|
||||
} else {
|
||||
Err(IndexResolverError::UnexistingIndex(name).into())
|
||||
Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear),
|
||||
Update::Settings {
|
||||
settings,
|
||||
is_deletion,
|
||||
} => TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
},
|
||||
Update::DocumentAddition {
|
||||
mut payload,
|
||||
primary_key,
|
||||
format,
|
||||
method,
|
||||
} => {
|
||||
let mut buffer = Vec::new();
|
||||
while let Some(bytes) = payload.next().await {
|
||||
let bytes = bytes?;
|
||||
buffer.extend_from_slice(&bytes);
|
||||
}
|
||||
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
|
||||
let documents_count = tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
// check if the payload is empty, and return an error
|
||||
if buffer.is_empty() {
|
||||
return Err(IndexControllerError::MissingPayload(format));
|
||||
}
|
||||
|
||||
let reader = Cursor::new(buffer);
|
||||
let count = match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
|
||||
};
|
||||
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(count)
|
||||
})
|
||||
.await??;
|
||||
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy: method,
|
||||
primary_key,
|
||||
documents_count,
|
||||
}
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
Update::DeleteIndex => TaskContent::IndexDeletion,
|
||||
Update::CreateIndex { primary_key } => TaskContent::IndexCreation { primary_key },
|
||||
Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate { primary_key },
|
||||
};
|
||||
|
||||
let task = self.task_store.register(uid, content).await?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
let task = self.task_store.get_task(id, filter).await?;
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn get_index_task(&self, index_uid: String, task_id: TaskId) -> Result<Task> {
|
||||
let creation_task_id = self
|
||||
.index_resolver
|
||||
.get_index_creation_task_id(index_uid.clone())
|
||||
.await?;
|
||||
if task_id < creation_task_id {
|
||||
return Err(TaskError::UnexistingTask(task_id).into());
|
||||
}
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index(index_uid);
|
||||
let task = self.task_store.get_task(task_id, Some(filter)).await?;
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub async fn update_status(&self, uid: String, id: u64) -> Result<UpdateStatus> {
|
||||
let uuid = self.index_resolver.get_uuid(uid).await?;
|
||||
let result = UpdateMsg::get_update(&self.update_sender, uuid, id).await?;
|
||||
Ok(result)
|
||||
pub async fn list_tasks(
|
||||
&self,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
offset: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let tasks = self.task_store.list_tasks(offset, filter, limit).await?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub async fn all_update_status(&self, uid: String) -> Result<Vec<UpdateStatus>> {
|
||||
let uuid = self.index_resolver.get_uuid(uid).await?;
|
||||
let result = UpdateMsg::list_updates(&self.update_sender, uuid).await?;
|
||||
Ok(result)
|
||||
pub async fn list_index_task(
|
||||
&self,
|
||||
index_uid: String,
|
||||
limit: Option<usize>,
|
||||
offset: Option<TaskId>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let task_id = self
|
||||
.index_resolver
|
||||
.get_index_creation_task_id(index_uid.clone())
|
||||
.await?;
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index(index_uid);
|
||||
|
||||
let tasks = self
|
||||
.task_store
|
||||
.list_tasks(
|
||||
Some(offset.unwrap_or_default() + task_id),
|
||||
Some(filter),
|
||||
limit,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
|
||||
@ -377,28 +492,8 @@ where
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
pub async fn update_index(
|
||||
&self,
|
||||
uid: String,
|
||||
mut index_settings: IndexSettings,
|
||||
) -> Result<IndexMetadata> {
|
||||
index_settings.uid.take();
|
||||
|
||||
let index = self.index_resolver.get_index(uid.clone()).await?;
|
||||
let uuid = index.uuid();
|
||||
let meta =
|
||||
spawn_blocking(move || index.update_primary_key(index_settings.primary_key)).await??;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn search(&self, uid: String, query: SearchQuery) -> Result<SearchResult> {
|
||||
let index = self.index_resolver.get_index(uid.clone()).await?;
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let result = spawn_blocking(move || index.perform_search(query)).await??;
|
||||
Ok(result)
|
||||
}
|
||||
@ -417,45 +512,50 @@ where
|
||||
}
|
||||
|
||||
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
|
||||
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let uuid = index.uuid();
|
||||
let mut stats = spawn_blocking(move || index.stats()).await??;
|
||||
let last_task = self.task_store.get_processing_task().await?;
|
||||
// Check if the currently indexing update is from our index.
|
||||
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
|
||||
let is_indexing = last_task
|
||||
.map(|task| task.index_uid.into_inner() == uid)
|
||||
.unwrap_or_default();
|
||||
|
||||
let index = self.index_resolver.get_index(uid).await?;
|
||||
let mut stats = spawn_blocking(move || index.stats()).await??;
|
||||
stats.is_indexing = Some(is_indexing);
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
pub async fn get_all_stats(&self) -> Result<Stats> {
|
||||
let update_infos = UpdateMsg::get_info(&self.update_sender).await?;
|
||||
let mut database_size = self.index_resolver.get_uuids_size().await? + update_infos.size;
|
||||
let mut last_update: Option<DateTime<_>> = None;
|
||||
let mut last_task: Option<DateTime<_>> = None;
|
||||
let mut indexes = BTreeMap::new();
|
||||
let mut database_size = 0;
|
||||
let processing_task = self.task_store.get_processing_task().await?;
|
||||
|
||||
for (index_uid, index) in self.index_resolver.list().await? {
|
||||
let uuid = index.uuid();
|
||||
let (mut stats, meta) = spawn_blocking::<_, IndexResult<_>>(move || {
|
||||
let stats = index.stats()?;
|
||||
let meta = index.meta()?;
|
||||
Ok((stats, meta))
|
||||
})
|
||||
.await??;
|
||||
let (mut stats, meta) =
|
||||
spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || {
|
||||
Ok((index.stats()?, index.meta()?))
|
||||
})
|
||||
.await??;
|
||||
|
||||
database_size += stats.size;
|
||||
|
||||
last_update = last_update.map_or(Some(meta.updated_at), |last| {
|
||||
last_task = last_task.map_or(Some(meta.updated_at), |last| {
|
||||
Some(last.max(meta.updated_at))
|
||||
});
|
||||
|
||||
// Check if the currently indexing update is from our index.
|
||||
stats.is_indexing = Some(Some(uuid) == update_infos.processing);
|
||||
stats.is_indexing = processing_task
|
||||
.as_ref()
|
||||
.map(|p| p.index_uid.as_str() == index_uid)
|
||||
.or(Some(false));
|
||||
|
||||
indexes.insert(index_uid, stats);
|
||||
}
|
||||
|
||||
Ok(Stats {
|
||||
database_size,
|
||||
last_update,
|
||||
last_update: last_task,
|
||||
indexes,
|
||||
})
|
||||
}
|
||||
@ -467,41 +567,6 @@ where
|
||||
pub async fn dump_info(&self, uid: String) -> Result<DumpInfo> {
|
||||
Ok(self.dump_handle.dump_info(uid).await?)
|
||||
}
|
||||
|
||||
pub async fn create_index(
|
||||
&self,
|
||||
uid: String,
|
||||
primary_key: Option<String>,
|
||||
) -> Result<IndexMetadata> {
|
||||
let index = self
|
||||
.index_resolver
|
||||
.create_index(uid.clone(), primary_key)
|
||||
.await?;
|
||||
let meta = spawn_blocking(move || -> IndexResult<_> {
|
||||
let meta = index.meta()?;
|
||||
let meta = IndexMetadata {
|
||||
uuid: index.uuid(),
|
||||
uid: uid.clone(),
|
||||
name: uid,
|
||||
meta,
|
||||
};
|
||||
Ok(meta)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<()> {
|
||||
let uuid = self.index_resolver.delete_index(uid).await?;
|
||||
|
||||
let update_sender = self.update_sender.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = UpdateMsg::delete(&update_sender, uuid).await;
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
@ -521,28 +586,28 @@ pub async fn get_arc_ownership_blocking<T>(mut item: Arc<T>) -> T {
|
||||
mod test {
|
||||
use futures::future::ok;
|
||||
use mockall::predicate::eq;
|
||||
use tokio::sync::mpsc;
|
||||
use nelson::Mocker;
|
||||
|
||||
use crate::index::error::Result as IndexResult;
|
||||
use crate::index::test::Mocker;
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::dump_actor::MockDumpActorHandle;
|
||||
use crate::index_controller::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
|
||||
use crate::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_resolver::meta_store::MockIndexMetaStore;
|
||||
use crate::index_resolver::IndexResolver;
|
||||
|
||||
use super::updates::UpdateSender;
|
||||
use super::*;
|
||||
|
||||
impl<D: DumpActorHandle> IndexController<MockUuidStore, MockIndexStore, D> {
|
||||
impl IndexController<MockIndexMetaStore, MockIndexStore> {
|
||||
pub fn mock(
|
||||
index_resolver: IndexResolver<MockUuidStore, MockIndexStore>,
|
||||
update_sender: UpdateSender,
|
||||
dump_handle: D,
|
||||
index_resolver: IndexResolver<MockIndexMetaStore, MockIndexStore>,
|
||||
task_store: TaskStore,
|
||||
update_file_store: UpdateFileStore,
|
||||
dump_handle: DumpActorHandleImpl,
|
||||
) -> Self {
|
||||
IndexController {
|
||||
index_resolver: Arc::new(index_resolver),
|
||||
update_sender,
|
||||
dump_handle: Arc::new(dump_handle),
|
||||
task_store,
|
||||
dump_handle,
|
||||
update_file_store,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -577,11 +642,19 @@ mod test {
|
||||
exhaustive_facets_count: Some(true),
|
||||
};
|
||||
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_get_uuid()
|
||||
.expect_get()
|
||||
.with(eq(index_uid.to_owned()))
|
||||
.returning(move |s| Box::pin(ok((s, Some(index_uuid)))));
|
||||
.returning(move |s| {
|
||||
Box::pin(ok((
|
||||
s,
|
||||
Some(crate::index_resolver::meta_store::IndexMeta {
|
||||
uuid: index_uuid,
|
||||
creation_task_id: 0,
|
||||
}),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
let result_clone = result.clone();
|
||||
@ -600,14 +673,20 @@ mod test {
|
||||
assert_eq!(&q, &query);
|
||||
Ok(result.clone())
|
||||
});
|
||||
let index = Index::faux(mocker);
|
||||
let index = Index::mock(mocker);
|
||||
Box::pin(ok(Some(index)))
|
||||
});
|
||||
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store);
|
||||
let (update_sender, _) = mpsc::channel(1);
|
||||
let dump_actor = MockDumpActorHandle::new();
|
||||
let index_controller = IndexController::mock(index_resolver, update_sender, dump_actor);
|
||||
let task_store_mocker = nelson::Mocker::default();
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store.clone());
|
||||
let task_store = TaskStore::mock(task_store_mocker);
|
||||
// let dump_actor = MockDumpActorHandle::new();
|
||||
let (sender, _) = mpsc::channel(1);
|
||||
let dump_handle = DumpActorHandleImpl { sender };
|
||||
let index_controller =
|
||||
IndexController::mock(index_resolver, task_store, update_file_store, dump_handle);
|
||||
|
||||
let r = index_controller
|
||||
.search(index_uid.to_owned(), query.clone())
|
||||
|
@ -1,312 +0,0 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::bail;
|
||||
use log::{error, info, trace};
|
||||
use tokio::fs;
|
||||
use tokio::task::spawn_blocking;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::analytics;
|
||||
use crate::compression::from_tar_gz;
|
||||
use crate::index_controller::updates::UpdateMsg;
|
||||
|
||||
use super::index_resolver::index_store::IndexStore;
|
||||
use super::index_resolver::uuid_store::UuidStore;
|
||||
use super::index_resolver::IndexResolver;
|
||||
use super::updates::UpdateSender;
|
||||
|
||||
pub struct SnapshotService<U, I> {
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update_sender: UpdateSender,
|
||||
snapshot_period: Duration,
|
||||
snapshot_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
db_name: String,
|
||||
}
|
||||
|
||||
impl<U, I> SnapshotService<U, I>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
update_sender: UpdateSender,
|
||||
snapshot_period: Duration,
|
||||
snapshot_path: PathBuf,
|
||||
db_path: PathBuf,
|
||||
db_name: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
index_resolver,
|
||||
update_sender,
|
||||
snapshot_period,
|
||||
snapshot_path,
|
||||
db_path,
|
||||
db_name,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(self) {
|
||||
info!(
|
||||
"Snapshot scheduled every {}s.",
|
||||
self.snapshot_period.as_secs()
|
||||
);
|
||||
loop {
|
||||
if let Err(e) = self.perform_snapshot().await {
|
||||
error!("Error while performing snapshot: {}", e);
|
||||
}
|
||||
sleep(self.snapshot_period).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn perform_snapshot(&self) -> anyhow::Result<()> {
|
||||
trace!("Performing snapshot.");
|
||||
|
||||
let snapshot_dir = self.snapshot_path.clone();
|
||||
fs::create_dir_all(&snapshot_dir).await?;
|
||||
let temp_snapshot_dir = spawn_blocking(tempfile::tempdir).await??;
|
||||
let temp_snapshot_path = temp_snapshot_dir.path().to_owned();
|
||||
|
||||
let indexes = self
|
||||
.index_resolver
|
||||
.snapshot(temp_snapshot_path.clone())
|
||||
.await?;
|
||||
|
||||
analytics::copy_user_id(&self.db_path, &temp_snapshot_path.clone());
|
||||
|
||||
if indexes.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
UpdateMsg::snapshot(&self.update_sender, temp_snapshot_path.clone(), indexes).await?;
|
||||
|
||||
let snapshot_path = self
|
||||
.snapshot_path
|
||||
.join(format!("{}.snapshot", self.db_name));
|
||||
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||
temp_snapshot_file.persist(&snapshot_path)?;
|
||||
Ok(snapshot_path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
trace!("Created snapshot in {:?}.", snapshot_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: impl AsRef<Path>,
|
||||
snapshot_path: impl AsRef<Path>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
||||
match from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
//clean created db folder
|
||||
std::fs::remove_dir_all(&db_path)?;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
||||
bail!(
|
||||
"snapshot doesn't exist at {:?}",
|
||||
snapshot_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
||||
)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::{collections::HashSet, sync::Arc};
|
||||
|
||||
use futures::future::{err, ok};
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::Rng;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::error::IndexError;
|
||||
use crate::index::test::Mocker;
|
||||
use crate::index::{error::Result as IndexResult, Index};
|
||||
use crate::index_controller::index_resolver::error::IndexResolverError;
|
||||
use crate::index_controller::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
|
||||
use crate::index_controller::index_resolver::IndexResolver;
|
||||
use crate::index_controller::updates::create_update_handler;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn setup() {
|
||||
static SETUP: Lazy<()> = Lazy::new(|| {
|
||||
if cfg!(windows) {
|
||||
std::env::set_var("TMP", ".");
|
||||
} else {
|
||||
std::env::set_var("TMPDIR", ".");
|
||||
}
|
||||
});
|
||||
|
||||
// just deref to make sure the env is setup
|
||||
*SETUP
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_normal() {
|
||||
setup();
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
let uuids_num: usize = rng.gen_range(5..10);
|
||||
let uuids = (0..uuids_num)
|
||||
.map(|_| Uuid::new_v4())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
let uuids_clone = uuids.clone();
|
||||
uuid_store
|
||||
.expect_snapshot()
|
||||
.times(1)
|
||||
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
|
||||
|
||||
let mut indexes = uuids.clone().into_iter().map(|uuid| {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when("snapshot")
|
||||
.times(1)
|
||||
.then(|_: &Path| -> IndexResult<()> { Ok(()) });
|
||||
mocker.when("uuid").then(move |_: ()| uuid);
|
||||
Index::faux(mocker)
|
||||
});
|
||||
|
||||
let uuids_clone = uuids.clone();
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store
|
||||
.expect_get()
|
||||
.withf(move |uuid| uuids_clone.contains(uuid))
|
||||
.times(uuids_num)
|
||||
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
|
||||
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let update_sender =
|
||||
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
|
||||
|
||||
let snapshot_path = tempfile::tempdir().unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
index_resolver,
|
||||
update_sender,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
// this should do nothing
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
snapshot_service.perform_snapshot().await.unwrap();
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_uuid_snapshot() {
|
||||
setup();
|
||||
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
uuid_store.expect_snapshot().once().returning(move |_| {
|
||||
Box::pin(err(IndexResolverError::IndexAlreadyExists(
|
||||
"test".to_string(),
|
||||
)))
|
||||
});
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store.expect_get().never();
|
||||
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let update_sender =
|
||||
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
|
||||
|
||||
let snapshot_path = tempfile::tempdir().unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
index_resolver,
|
||||
update_sender,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
// this should do nothing
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_performing_index_snapshot() {
|
||||
setup();
|
||||
|
||||
let uuids: HashSet<Uuid> = vec![Uuid::new_v4()].into_iter().collect();
|
||||
|
||||
let mut uuid_store = MockUuidStore::new();
|
||||
let uuids_clone = uuids.clone();
|
||||
uuid_store
|
||||
.expect_snapshot()
|
||||
.once()
|
||||
.returning(move |_| Box::pin(ok(uuids_clone.clone())));
|
||||
|
||||
let mut indexes = uuids.clone().into_iter().map(|uuid| {
|
||||
let mocker = Mocker::default();
|
||||
// index returns random error
|
||||
mocker.when("snapshot").then(|_: &Path| -> IndexResult<()> {
|
||||
Err(IndexError::DocumentNotFound("1".to_string()))
|
||||
});
|
||||
mocker.when("uuid").then(move |_: ()| uuid);
|
||||
Index::faux(mocker)
|
||||
});
|
||||
|
||||
let uuids_clone = uuids.clone();
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store
|
||||
.expect_get()
|
||||
.withf(move |uuid| uuids_clone.contains(uuid))
|
||||
.once()
|
||||
.returning(move |_| Box::pin(ok(Some(indexes.next().unwrap()))));
|
||||
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let update_sender =
|
||||
create_update_handler(index_resolver.clone(), dir.path(), 4096 * 100).unwrap();
|
||||
|
||||
let snapshot_path = tempfile::tempdir().unwrap();
|
||||
let snapshot_service = SnapshotService::new(
|
||||
index_resolver,
|
||||
update_sender,
|
||||
Duration::from_millis(100),
|
||||
snapshot_path.path().to_owned(),
|
||||
// this should do nothing
|
||||
snapshot_path.path().to_owned(),
|
||||
"data.ms".to_string(),
|
||||
);
|
||||
|
||||
assert!(snapshot_service.perform_snapshot().await.is_err());
|
||||
}
|
||||
}
|
@ -1,177 +0,0 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{self, BufReader, BufWriter, Write};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use serde_json::Map;
|
||||
use tempfile::{NamedTempFile, PersistError};
|
||||
use uuid::Uuid;
|
||||
|
||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||
|
||||
use crate::document_formats::read_ndjson;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Error while persisting update to disk: {0}")]
|
||||
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
|
||||
|
||||
type Result<T> = std::result::Result<T, UpdateFileStoreError>;
|
||||
|
||||
macro_rules! into_update_store_error {
|
||||
($($other:path),*) => {
|
||||
$(
|
||||
impl From<$other> for UpdateFileStoreError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
into_update_store_error!(
|
||||
PersistError,
|
||||
io::Error,
|
||||
serde_json::Error,
|
||||
milli::documents::Error
|
||||
);
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) -> Result<()> {
|
||||
self.file.persist(&self.path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
|
||||
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
|
||||
// No update files to load
|
||||
if !src_update_files_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
create_dir_all(&dst_update_files_path)?;
|
||||
|
||||
let entries = std::fs::read_dir(src_update_files_path)?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry?;
|
||||
let update_file = BufReader::new(File::open(entry.path())?);
|
||||
let file_uuid = entry.file_name();
|
||||
let file_uuid = file_uuid
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||
let dst_path = dst_update_files_path.join(file_uuid);
|
||||
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||
read_ndjson(update_file, dst_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&path)?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
/// Creates a new temporary update file.
|
||||
///
|
||||
/// A call to `persist` is needed to persist the file in the database.
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let file = File::open(path)?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let src = self.path.join(uuid.to_string());
|
||||
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(uuid.to_string());
|
||||
std::fs::copy(src, dst)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Peforms a dump of the given update file uuid into the provided dump path.
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_string = uuid.to_string();
|
||||
let update_file_path = self.path.join(&uuid_string);
|
||||
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(&uuid_string);
|
||||
|
||||
let update_file = File::open(update_file_path)?;
|
||||
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
|
||||
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
|
||||
|
||||
let mut document_buffer = Map::new();
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
|
||||
// for jsonl for example...)
|
||||
while let Some((index, document)) = document_reader.next_document_with_index()? {
|
||||
for (field_id, content) in document.iter() {
|
||||
if let Some(field_name) = index.name(field_id) {
|
||||
let content = serde_json::from_slice(content)?;
|
||||
document_buffer.insert(field_name.to_string(), content);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut dst_file, &document_buffer)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
document_buffer.clear();
|
||||
}
|
||||
|
||||
dst_file.persist(dst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
Ok(self.get_update(uuid)?.metadata()?.len())
|
||||
}
|
||||
|
||||
pub fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
std::fs::remove_file(path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,113 +0,0 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::Index;
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Update, UpdateStatus, UpdateStoreInfo};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum UpdateMsg {
|
||||
Update {
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
},
|
||||
ListUpdates {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<Vec<UpdateStatus>>>,
|
||||
},
|
||||
GetUpdate {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<UpdateStatus>>,
|
||||
id: u64,
|
||||
},
|
||||
DeleteIndex {
|
||||
uuid: Uuid,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Snapshot {
|
||||
indexes: Vec<Index>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
Dump {
|
||||
indexes: Vec<Index>,
|
||||
path: PathBuf,
|
||||
ret: oneshot::Sender<Result<()>>,
|
||||
},
|
||||
GetInfo {
|
||||
ret: oneshot::Sender<Result<UpdateStoreInfo>>,
|
||||
},
|
||||
}
|
||||
|
||||
impl UpdateMsg {
|
||||
pub async fn snapshot(
|
||||
sender: &mpsc::Sender<Self>,
|
||||
path: PathBuf,
|
||||
indexes: Vec<Index>,
|
||||
) -> Result<()> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::Snapshot { path, indexes, ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
|
||||
pub async fn dump(
|
||||
sender: &mpsc::Sender<Self>,
|
||||
indexes: Vec<Index>,
|
||||
path: PathBuf,
|
||||
) -> Result<()> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::Dump { path, indexes, ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
pub async fn update(
|
||||
sender: &mpsc::Sender<Self>,
|
||||
uuid: Uuid,
|
||||
update: Update,
|
||||
) -> Result<UpdateStatus> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::Update { uuid, update, ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
|
||||
pub async fn get_update(
|
||||
sender: &mpsc::Sender<Self>,
|
||||
uuid: Uuid,
|
||||
id: u64,
|
||||
) -> Result<UpdateStatus> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::GetUpdate { uuid, id, ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
|
||||
pub async fn list_updates(
|
||||
sender: &mpsc::Sender<Self>,
|
||||
uuid: Uuid,
|
||||
) -> Result<Vec<UpdateStatus>> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::ListUpdates { uuid, ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
|
||||
pub async fn get_info(sender: &mpsc::Sender<Self>) -> Result<UpdateStoreInfo> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::GetInfo { ret };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
|
||||
pub async fn delete(sender: &mpsc::Sender<Self>, uuid: Uuid) -> Result<()> {
|
||||
let (ret, rcv) = oneshot::channel();
|
||||
let msg = Self::DeleteIndex { ret, uuid };
|
||||
sender.send(msg).await?;
|
||||
rcv.await?
|
||||
}
|
||||
}
|
@ -1,266 +0,0 @@
|
||||
pub mod error;
|
||||
mod message;
|
||||
pub mod status;
|
||||
pub mod store;
|
||||
|
||||
use std::io::Cursor;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_stream::stream;
|
||||
use futures::StreamExt;
|
||||
use log::trace;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use self::error::{Result, UpdateLoopError};
|
||||
pub use self::message::UpdateMsg;
|
||||
use self::store::{UpdateStore, UpdateStoreInfo};
|
||||
use crate::document_formats::{read_csv, read_json, read_ndjson};
|
||||
use crate::index::{Index, Settings, Unchecked};
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use status::UpdateStatus;
|
||||
|
||||
use super::index_resolver::index_store::IndexStore;
|
||||
use super::index_resolver::uuid_store::UuidStore;
|
||||
use super::index_resolver::IndexResolver;
|
||||
use super::{DocumentAdditionFormat, Update};
|
||||
|
||||
pub type UpdateSender = mpsc::Sender<UpdateMsg>;
|
||||
|
||||
pub fn create_update_handler<U, I>(
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
db_path: impl AsRef<Path>,
|
||||
update_store_size: usize,
|
||||
) -> anyhow::Result<UpdateSender>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
let path = db_path.as_ref().to_owned();
|
||||
let (sender, receiver) = mpsc::channel(100);
|
||||
let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?;
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
|
||||
Ok(sender)
|
||||
}
|
||||
|
||||
pub struct UpdateLoop {
|
||||
store: Arc<UpdateStore>,
|
||||
inbox: Option<mpsc::Receiver<UpdateMsg>>,
|
||||
update_file_store: UpdateFileStore,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl UpdateLoop {
|
||||
pub fn new<U, I>(
|
||||
update_db_size: usize,
|
||||
inbox: mpsc::Receiver<UpdateMsg>,
|
||||
path: impl AsRef<Path>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
) -> anyhow::Result<Self>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
let path = path.as_ref().to_owned();
|
||||
std::fs::create_dir_all(&path)?;
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(update_db_size);
|
||||
|
||||
let must_exit = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let update_file_store = UpdateFileStore::new(&path).unwrap();
|
||||
let store = UpdateStore::open(
|
||||
options,
|
||||
&path,
|
||||
index_resolver,
|
||||
must_exit.clone(),
|
||||
update_file_store.clone(),
|
||||
)?;
|
||||
|
||||
let inbox = Some(inbox);
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
inbox,
|
||||
must_exit,
|
||||
update_file_store,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use UpdateMsg::*;
|
||||
|
||||
trace!("Started update actor.");
|
||||
|
||||
let mut inbox = self
|
||||
.inbox
|
||||
.take()
|
||||
.expect("A receiver should be present by now.");
|
||||
|
||||
let must_exit = self.must_exit.clone();
|
||||
let stream = stream! {
|
||||
loop {
|
||||
let msg = inbox.recv().await;
|
||||
|
||||
if must_exit.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
|
||||
match msg {
|
||||
Some(msg) => yield msg,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
stream
|
||||
.for_each_concurrent(Some(10), |msg| async {
|
||||
match msg {
|
||||
Update { uuid, update, ret } => {
|
||||
let _ = ret.send(self.handle_update(uuid, update).await);
|
||||
}
|
||||
ListUpdates { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_list_updates(uuid).await);
|
||||
}
|
||||
GetUpdate { uuid, ret, id } => {
|
||||
let _ = ret.send(self.handle_get_update(uuid, id).await);
|
||||
}
|
||||
DeleteIndex { uuid, ret } => {
|
||||
let _ = ret.send(self.handle_delete(uuid).await);
|
||||
}
|
||||
Snapshot { indexes, path, ret } => {
|
||||
let _ = ret.send(self.handle_snapshot(indexes, path).await);
|
||||
}
|
||||
GetInfo { ret } => {
|
||||
let _ = ret.send(self.handle_get_info().await);
|
||||
}
|
||||
Dump { indexes, path, ret } => {
|
||||
let _ = ret.send(self.handle_dump(indexes, path).await);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn handle_update(&self, index_uuid: Uuid, update: Update) -> Result<UpdateStatus> {
|
||||
let registration = match update {
|
||||
Update::DocumentAddition {
|
||||
mut payload,
|
||||
primary_key,
|
||||
method,
|
||||
format,
|
||||
} => {
|
||||
let mut buffer = Vec::new();
|
||||
while let Some(bytes) = payload.next().await {
|
||||
match bytes {
|
||||
Ok(bytes) => {
|
||||
buffer.extend_from_slice(&bytes);
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
let (content_uuid, mut update_file) = self.update_file_store.new_update()?;
|
||||
tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
// check if the payload is empty, and return an error
|
||||
if buffer.is_empty() {
|
||||
return Err(UpdateLoopError::MissingPayload(format));
|
||||
}
|
||||
|
||||
let reader = Cursor::new(buffer);
|
||||
match format {
|
||||
DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?,
|
||||
DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?,
|
||||
}
|
||||
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
|
||||
store::Update::DocumentAddition {
|
||||
primary_key,
|
||||
method,
|
||||
content_uuid,
|
||||
}
|
||||
}
|
||||
Update::Settings(settings) => store::Update::Settings(settings),
|
||||
Update::ClearDocuments => store::Update::ClearDocuments,
|
||||
Update::DeleteDocuments(ids) => store::Update::DeleteDocuments(ids),
|
||||
};
|
||||
|
||||
let store = self.store.clone();
|
||||
let status =
|
||||
tokio::task::spawn_blocking(move || store.register_update(index_uuid, registration))
|
||||
.await??;
|
||||
|
||||
Ok(status.into())
|
||||
}
|
||||
|
||||
async fn handle_list_updates(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let update_store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = update_store.list(uuid)?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_get_update(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
|
||||
let store = self.store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = store
|
||||
.meta(uuid, id)?
|
||||
.ok_or(UpdateLoopError::UnexistingUpdate(id))?;
|
||||
Ok(result)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
async fn handle_delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || store.delete_all(uuid)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_snapshot(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || update_store.snapshot(indexes, path)).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_dump(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
|
||||
let update_store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
update_store.dump(&indexes, path.to_path_buf())?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let update_store = self.store.clone();
|
||||
let info = tokio::task::spawn_blocking(move || -> Result<UpdateStoreInfo> {
|
||||
let info = update_store.get_info()?;
|
||||
Ok(info)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
}
|
@ -1,251 +0,0 @@
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{
|
||||
index::{Settings, Unchecked},
|
||||
Update,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateResult {
|
||||
DocumentsAddition(DocumentAdditionResult),
|
||||
DocumentDeletion { deleted: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum UpdateMeta {
|
||||
DocumentsAddition {
|
||||
method: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
ClearDocuments,
|
||||
DeleteDocuments {
|
||||
ids: Vec<String>,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Enqueued {
|
||||
pub update_id: u64,
|
||||
pub meta: Update,
|
||||
pub enqueued_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Enqueued {
|
||||
pub fn new(meta: Update, update_id: u64) -> Self {
|
||||
Self {
|
||||
enqueued_at: Utc::now(),
|
||||
meta,
|
||||
update_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processing(self) -> Processing {
|
||||
Processing {
|
||||
from: self,
|
||||
started_processing_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn abort(self) -> Aborted {
|
||||
Aborted {
|
||||
from: self,
|
||||
aborted_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
&self.meta
|
||||
}
|
||||
|
||||
pub fn id(&self) -> u64 {
|
||||
self.update_id
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processed {
|
||||
pub success: UpdateResult,
|
||||
pub processed_at: DateTime<Utc>,
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
}
|
||||
|
||||
impl Processed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Processing {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub started_processing_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Processing {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
self.from.meta()
|
||||
}
|
||||
|
||||
pub fn process(self, success: UpdateResult) -> Processed {
|
||||
Processed {
|
||||
success,
|
||||
from: self,
|
||||
processed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fail(self, error: impl ErrorCode) -> Failed {
|
||||
let msg = error.to_string();
|
||||
let code = error.error_code();
|
||||
Failed {
|
||||
from: self,
|
||||
msg,
|
||||
code,
|
||||
failed_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Aborted {
|
||||
#[serde(flatten)]
|
||||
pub from: Enqueued,
|
||||
pub aborted_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Aborted {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Failed {
|
||||
#[serde(flatten)]
|
||||
pub from: Processing,
|
||||
pub msg: String,
|
||||
pub code: Code,
|
||||
pub failed_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Display for Failed {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.msg.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for Failed {}
|
||||
|
||||
impl ErrorCode for Failed {
|
||||
fn error_code(&self) -> Code {
|
||||
self.code
|
||||
}
|
||||
}
|
||||
|
||||
impl Failed {
|
||||
pub fn id(&self) -> u64 {
|
||||
self.from.id()
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
self.from.meta()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(tag = "status", rename_all = "camelCase")]
|
||||
pub enum UpdateStatus {
|
||||
Processing(Processing),
|
||||
Enqueued(Enqueued),
|
||||
Processed(Processed),
|
||||
Aborted(Aborted),
|
||||
Failed(Failed),
|
||||
}
|
||||
|
||||
impl UpdateStatus {
|
||||
pub fn id(&self) -> u64 {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.id(),
|
||||
UpdateStatus::Enqueued(u) => u.id(),
|
||||
UpdateStatus::Processed(u) => u.id(),
|
||||
UpdateStatus::Aborted(u) => u.id(),
|
||||
UpdateStatus::Failed(u) => u.id(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn meta(&self) -> &Update {
|
||||
match self {
|
||||
UpdateStatus::Processing(u) => u.meta(),
|
||||
UpdateStatus::Enqueued(u) => u.meta(),
|
||||
UpdateStatus::Processed(u) => u.meta(),
|
||||
UpdateStatus::Aborted(u) => u.meta(),
|
||||
UpdateStatus::Failed(u) => u.meta(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn processed(&self) -> Option<&Processed> {
|
||||
match self {
|
||||
UpdateStatus::Processed(p) => Some(p),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Enqueued> for UpdateStatus {
|
||||
fn from(other: Enqueued) -> Self {
|
||||
Self::Enqueued(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Aborted> for UpdateStatus {
|
||||
fn from(other: Aborted) -> Self {
|
||||
Self::Aborted(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processed> for UpdateStatus {
|
||||
fn from(other: Processed) -> Self {
|
||||
Self::Processed(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Processing> for UpdateStatus {
|
||||
fn from(other: Processing) -> Self {
|
||||
Self::Processing(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Failed> for UpdateStatus {
|
||||
fn from(other: Failed) -> Self {
|
||||
Self::Failed(other)
|
||||
}
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
use std::{borrow::Cow, convert::TryInto, mem::size_of};
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct NextIdCodec;
|
||||
|
||||
pub enum NextIdKey {
|
||||
Global,
|
||||
Index(Uuid),
|
||||
}
|
||||
|
||||
impl<'a> BytesEncode<'a> for NextIdCodec {
|
||||
type EItem = NextIdKey;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
match item {
|
||||
NextIdKey::Global => Some(Cow::Borrowed(b"__global__")),
|
||||
NextIdKey::Index(ref uuid) => Some(Cow::Borrowed(uuid.as_bytes())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PendingKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for PendingKeyCodec {
|
||||
type EItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_encode((global_id, uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(&global_id.to_be_bytes());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for PendingKeyCodec {
|
||||
type DItem = (u64, Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let global_id_bytes = bytes.get(0..size_of::<u64>())?.try_into().ok()?;
|
||||
let global_id = u64::from_be_bytes(global_id_bytes);
|
||||
|
||||
let uuid_bytes = bytes
|
||||
.get(size_of::<u64>()..(size_of::<u64>() + size_of::<Uuid>()))?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes
|
||||
.get((size_of::<u64>() + size_of::<Uuid>())..)?
|
||||
.try_into()
|
||||
.ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((global_id, uuid, update_id))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UpdateKeyCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for UpdateKeyCodec {
|
||||
type EItem = (Uuid, u64);
|
||||
|
||||
fn bytes_encode((uuid, update_id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut bytes = Vec::with_capacity(size_of::<Self::EItem>());
|
||||
bytes.extend_from_slice(uuid.as_bytes());
|
||||
bytes.extend_from_slice(&update_id.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for UpdateKeyCodec {
|
||||
type DItem = (Uuid, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let uuid_bytes = bytes.get(0..size_of::<Uuid>())?.try_into().ok()?;
|
||||
let uuid = Uuid::from_bytes(uuid_bytes);
|
||||
|
||||
let update_id_bytes = bytes.get(size_of::<Uuid>()..)?.try_into().ok()?;
|
||||
let update_id = u64::from_be_bytes(update_id_bytes);
|
||||
|
||||
Some((uuid, update_id))
|
||||
}
|
||||
}
|
@ -1,157 +0,0 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use heed::{EnvOpenOptions, RoTxn};
|
||||
use rayon::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Deserializer;
|
||||
use tempfile::{NamedTempFile, TempDir};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{Result, State, UpdateStore};
|
||||
use crate::{
|
||||
index::Index,
|
||||
index_controller::{
|
||||
update_file_store::UpdateFileStore,
|
||||
updates::status::{Enqueued, UpdateStatus},
|
||||
},
|
||||
Update,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct UpdateEntry {
|
||||
pub uuid: Uuid,
|
||||
pub update: UpdateStatus,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
pub fn dump(&self, indexes: &[Index], path: PathBuf) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Dumping);
|
||||
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let uuids = indexes.iter().map(|i| i.uuid()).collect();
|
||||
|
||||
self.dump_updates(&txn, &uuids, &path)?;
|
||||
|
||||
indexes
|
||||
.par_iter()
|
||||
.try_for_each(|index| index.dump(&path))
|
||||
.unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_updates(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
let mut dump_data_file = NamedTempFile::new_in(&path)?;
|
||||
|
||||
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
||||
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
||||
|
||||
let mut dst_path = path.as_ref().join("updates");
|
||||
create_dir_all(&dst_path)?;
|
||||
dst_path.push("data.jsonl");
|
||||
dump_data_file.persist(dst_path).unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_pending(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
mut file: impl Write,
|
||||
dst_path: impl AsRef<Path>,
|
||||
) -> Result<()> {
|
||||
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
||||
|
||||
for pending in pendings {
|
||||
let ((_, uuid, _), data) = pending?;
|
||||
if uuids.contains(&uuid) {
|
||||
let update = data.decode()?;
|
||||
|
||||
if let Enqueued {
|
||||
meta: Update::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} = update
|
||||
{
|
||||
self.update_file_store
|
||||
.dump(content_uuid, &dst_path)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
let update_json = UpdateEntry {
|
||||
uuid,
|
||||
update: update.into(),
|
||||
};
|
||||
|
||||
serde_json::to_writer(&mut file, &update_json)?;
|
||||
file.write_all(b"\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dump_completed(
|
||||
&self,
|
||||
txn: &RoTxn,
|
||||
uuids: &HashSet<Uuid>,
|
||||
mut file: impl Write,
|
||||
) -> Result<()> {
|
||||
let updates = self.updates.iter(txn)?.lazily_decode_data();
|
||||
|
||||
for update in updates {
|
||||
let ((uuid, _), data) = update?;
|
||||
if uuids.contains(&uuid) {
|
||||
let update = data.decode()?;
|
||||
|
||||
let update_json = UpdateEntry { uuid, update };
|
||||
|
||||
serde_json::to_writer(&mut file, &update_json)?;
|
||||
file.write_all(b"\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
db_size: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(db_size as usize);
|
||||
|
||||
// create a dummy update fiel store, since it is not needed right now.
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let update_file_store = UpdateFileStore::new(tmp.path()).unwrap();
|
||||
let (store, _) = UpdateStore::new(options, &dst, update_file_store)?;
|
||||
|
||||
let src_update_path = src.as_ref().join("updates");
|
||||
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
||||
let update_data = BufReader::new(update_data);
|
||||
|
||||
let stream = Deserializer::from_reader(update_data).into_iter::<UpdateEntry>();
|
||||
let mut wtxn = store.env.write_txn()?;
|
||||
|
||||
for entry in stream {
|
||||
let UpdateEntry { uuid, update } = entry?;
|
||||
store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,784 +0,0 @@
|
||||
mod codec;
|
||||
pub mod dump;
|
||||
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::{
|
||||
collections::{BTreeMap, HashSet},
|
||||
path::PathBuf,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeJson};
|
||||
use heed::zerocopy::U64;
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use log::error;
|
||||
use parking_lot::{Mutex, MutexGuard};
|
||||
use rayon::prelude::*;
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::time::timeout;
|
||||
use uuid::Uuid;
|
||||
|
||||
use codec::*;
|
||||
|
||||
use super::error::Result;
|
||||
use super::status::{Enqueued, Processing};
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::index_resolver::index_store::IndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::UuidStore;
|
||||
use crate::index_controller::updates::*;
|
||||
use crate::EnvSizer;
|
||||
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
type BEU64 = U64<heed::byteorder::BE>;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Update {
|
||||
DeleteDocuments(Vec<String>),
|
||||
DocumentAddition {
|
||||
primary_key: Option<String>,
|
||||
method: IndexDocumentsMethod,
|
||||
content_uuid: Uuid,
|
||||
},
|
||||
Settings(Settings<Unchecked>),
|
||||
ClearDocuments,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct UpdateStoreInfo {
|
||||
/// Size of the update store in bytes.
|
||||
pub size: u64,
|
||||
/// Uuid of the currently processing update if it exists
|
||||
pub processing: Option<Uuid>,
|
||||
}
|
||||
|
||||
/// A data structure that allows concurrent reads AND exactly one writer.
|
||||
pub struct StateLock {
|
||||
lock: Mutex<()>,
|
||||
data: ArcSwap<State>,
|
||||
}
|
||||
|
||||
pub struct StateLockGuard<'a> {
|
||||
_lock: MutexGuard<'a, ()>,
|
||||
state: &'a StateLock,
|
||||
}
|
||||
|
||||
impl StateLockGuard<'_> {
|
||||
pub fn swap(&self, state: State) -> Arc<State> {
|
||||
self.state.data.swap(Arc::new(state))
|
||||
}
|
||||
}
|
||||
|
||||
impl StateLock {
|
||||
fn from_state(state: State) -> Self {
|
||||
let lock = Mutex::new(());
|
||||
let data = ArcSwap::from(Arc::new(state));
|
||||
Self { lock, data }
|
||||
}
|
||||
|
||||
pub fn read(&self) -> Arc<State> {
|
||||
self.data.load().clone()
|
||||
}
|
||||
|
||||
pub fn write(&self) -> StateLockGuard {
|
||||
let _lock = self.lock.lock();
|
||||
let state = &self;
|
||||
StateLockGuard { _lock, state }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum State {
|
||||
Idle,
|
||||
Processing(Uuid, Processing),
|
||||
Snapshoting,
|
||||
Dumping,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct UpdateStore {
|
||||
pub env: Env,
|
||||
/// A queue containing the updates to process, ordered by arrival.
|
||||
/// The key are built as follow:
|
||||
/// | global_update_id | index_uuid | update_id |
|
||||
/// | 8-bytes | 16-bytes | 8-bytes |
|
||||
pending_queue: Database<PendingKeyCodec, SerdeJson<Enqueued>>,
|
||||
/// Map indexes to the next available update id. If NextIdKey::Global is queried, then the next
|
||||
/// global update id is returned
|
||||
next_update_id: Database<NextIdCodec, OwnedType<BEU64>>,
|
||||
/// Contains all the performed updates meta, be they failed, aborted, or processed.
|
||||
/// The keys are built as follow:
|
||||
/// | Uuid | id |
|
||||
/// | 16-bytes | 8-bytes |
|
||||
updates: Database<UpdateKeyCodec, SerdeJson<UpdateStatus>>,
|
||||
/// Indicates the current state of the update store,
|
||||
state: Arc<StateLock>,
|
||||
/// Wake up the loop when a new event occurs.
|
||||
notification_sender: mpsc::Sender<()>,
|
||||
update_file_store: UpdateFileStore,
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateStore {
|
||||
fn new(
|
||||
mut options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> anyhow::Result<(Self, mpsc::Receiver<()>)> {
|
||||
options.max_dbs(5);
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
std::fs::create_dir_all(&update_path)?;
|
||||
let env = options.open(update_path)?;
|
||||
let pending_queue = env.create_database(Some("pending-queue"))?;
|
||||
let next_update_id = env.create_database(Some("next-update-id"))?;
|
||||
let updates = env.create_database(Some("updates"))?;
|
||||
|
||||
let state = Arc::new(StateLock::from_state(State::Idle));
|
||||
|
||||
let (notification_sender, notification_receiver) = mpsc::channel(1);
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
env,
|
||||
pending_queue,
|
||||
next_update_id,
|
||||
updates,
|
||||
state,
|
||||
notification_sender,
|
||||
path: path.as_ref().to_owned(),
|
||||
update_file_store,
|
||||
},
|
||||
notification_receiver,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn open<U, I>(
|
||||
options: EnvOpenOptions,
|
||||
path: impl AsRef<Path>,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
must_exit: Arc<AtomicBool>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> anyhow::Result<Arc<Self>>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
let (update_store, mut notification_receiver) =
|
||||
Self::new(options, path, update_file_store)?;
|
||||
let update_store = Arc::new(update_store);
|
||||
|
||||
// Send a first notification to trigger the process.
|
||||
if let Err(TrySendError::Closed(())) = update_store.notification_sender.try_send(()) {
|
||||
panic!("Failed to init update store");
|
||||
}
|
||||
|
||||
// We need a weak reference so we can take ownership on the arc later when we
|
||||
// want to close the index.
|
||||
let duration = Duration::from_secs(10 * 60); // 10 minutes
|
||||
let update_store_weak = Arc::downgrade(&update_store);
|
||||
tokio::task::spawn_local(async move {
|
||||
// Block and wait for something to process with a timeout. The timeout
|
||||
// function returns a Result and we must just unlock the loop on Result.
|
||||
'outer: while timeout(duration, notification_receiver.recv())
|
||||
.await
|
||||
.map_or(true, |o| o.is_some())
|
||||
{
|
||||
loop {
|
||||
match update_store_weak.upgrade() {
|
||||
Some(update_store) => {
|
||||
let handler = index_resolver.clone();
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
update_store.process_pending_update(handler)
|
||||
})
|
||||
.await
|
||||
.expect("Fatal error processing update.");
|
||||
match res {
|
||||
Ok(Some(_)) => (),
|
||||
Ok(None) => break,
|
||||
Err(e) => {
|
||||
error!("Fatal error while processing an update that requires the update store to shutdown: {}", e);
|
||||
must_exit.store(true, Ordering::SeqCst);
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the ownership on the arc has been taken, we need to exit.
|
||||
None => break 'outer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error!("Update store loop exited.");
|
||||
});
|
||||
|
||||
Ok(update_store)
|
||||
}
|
||||
|
||||
/// Returns the next global update id and the next update id for a given `index_uuid`.
|
||||
fn next_update_id(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<(u64, u64)> {
|
||||
let global_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Global)?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id
|
||||
.put(txn, &NextIdKey::Global, &BEU64::new(global_id + 1))?;
|
||||
|
||||
let update_id = self.next_update_id_raw(txn, index_uuid)?;
|
||||
|
||||
Ok((global_id, update_id))
|
||||
}
|
||||
|
||||
/// Returns the next next update id for a given `index_uuid` without
|
||||
/// incrementing the global update id. This is useful for the dumps.
|
||||
fn next_update_id_raw(&self, txn: &mut heed::RwTxn, index_uuid: Uuid) -> heed::Result<u64> {
|
||||
let update_id = self
|
||||
.next_update_id
|
||||
.get(txn, &NextIdKey::Index(index_uuid))?
|
||||
.map(U64::get)
|
||||
.unwrap_or_default();
|
||||
|
||||
self.next_update_id.put(
|
||||
txn,
|
||||
&NextIdKey::Index(index_uuid),
|
||||
&BEU64::new(update_id + 1),
|
||||
)?;
|
||||
|
||||
Ok(update_id)
|
||||
}
|
||||
|
||||
/// Registers the update content in the pending store and the meta
|
||||
/// into the pending-meta store. Returns the new unique update id.
|
||||
pub fn register_update(&self, index_uuid: Uuid, update: Update) -> heed::Result<Enqueued> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
let (global_id, update_id) = self.next_update_id(&mut txn, index_uuid)?;
|
||||
let meta = Enqueued::new(update, update_id);
|
||||
|
||||
self.pending_queue
|
||||
.put(&mut txn, &(global_id, index_uuid, update_id), &meta)?;
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
if let Err(TrySendError::Closed(())) = self.notification_sender.try_send(()) {
|
||||
panic!("Update store loop exited");
|
||||
}
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
|
||||
/// Push already processed update in the UpdateStore without triggering the notification
|
||||
/// process. This is useful for the dumps.
|
||||
pub fn register_raw_updates(
|
||||
&self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
update: &UpdateStatus,
|
||||
index_uuid: Uuid,
|
||||
) -> heed::Result<()> {
|
||||
match update {
|
||||
UpdateStatus::Enqueued(enqueued) => {
|
||||
let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
||||
self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
||||
wtxn,
|
||||
&(global_id, index_uuid, enqueued.id()),
|
||||
enqueued,
|
||||
)?;
|
||||
}
|
||||
_ => {
|
||||
let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
||||
self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||
/// only writing the result meta to the processed-meta store *after* it has been processed.
|
||||
fn process_pending_update<U, I>(
|
||||
&self,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
) -> Result<Option<()>>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
// Create a read transaction to be able to retrieve the pending update in order.
|
||||
let rtxn = self.env.read_txn()?;
|
||||
let first_meta = self.pending_queue.first(&rtxn)?;
|
||||
drop(rtxn);
|
||||
|
||||
// If there is a pending update we process and only keep
|
||||
// a reader while processing it, not a writer.
|
||||
match first_meta {
|
||||
Some(((global_id, index_uuid, _), pending)) => {
|
||||
let processing = pending.processing();
|
||||
// Acquire the state lock and set the current state to processing.
|
||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||
let state = self.state.write();
|
||||
state.swap(State::Processing(index_uuid, processing.clone()));
|
||||
|
||||
let result = self.perform_update(processing, index_resolver, index_uuid, global_id);
|
||||
|
||||
state.swap(State::Idle);
|
||||
|
||||
result
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_update<U, I>(
|
||||
&self,
|
||||
processing: Processing,
|
||||
index_resolver: Arc<IndexResolver<U, I>>,
|
||||
index_uuid: Uuid,
|
||||
global_id: u64,
|
||||
) -> Result<Option<()>>
|
||||
where
|
||||
U: UuidStore + Sync + Send + 'static,
|
||||
I: IndexStore + Sync + Send + 'static,
|
||||
{
|
||||
// Process the pending update using the provided user function.
|
||||
let handle = Handle::current();
|
||||
let update_id = processing.id();
|
||||
//IndexMsg::update(index_resolver, index_uuid, processing.clone()
|
||||
let result = match handle.block_on(index_resolver.get_index_by_uuid(index_uuid)) {
|
||||
Ok(index) => index.handle_update(processing),
|
||||
Err(e) => Err(processing.fail(e)),
|
||||
};
|
||||
|
||||
// Once the pending update have been successfully processed
|
||||
// we must remove the content from the pending and processing stores and
|
||||
// write the *new* meta to the processed-meta store and commit.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.pending_queue
|
||||
.delete(&mut wtxn, &(global_id, index_uuid, update_id))?;
|
||||
|
||||
let result = match result {
|
||||
Ok(res) => res.into(),
|
||||
Err(res) => res.into(),
|
||||
};
|
||||
|
||||
self.updates
|
||||
.put(&mut wtxn, &(index_uuid, update_id), &result)?;
|
||||
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
/// List the updates for `index_uuid`.
|
||||
pub fn list(&self, index_uuid: Uuid) -> Result<Vec<UpdateStatus>> {
|
||||
let mut update_list = BTreeMap::<u64, UpdateStatus>::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid {
|
||||
update_list.insert(id, pending.decode()?.into());
|
||||
}
|
||||
}
|
||||
|
||||
let updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(&txn, index_uuid.as_bytes())?;
|
||||
|
||||
for entry in updates {
|
||||
let (_, update) = entry?;
|
||||
update_list.insert(update.id(), update);
|
||||
}
|
||||
|
||||
// If the currently processing update is from this index, replace the corresponding pending update with this one.
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing) if uuid == index_uuid => {
|
||||
update_list.insert(processing.id(), processing.clone().into());
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
Ok(update_list.into_iter().map(|(_, v)| v).collect())
|
||||
}
|
||||
|
||||
/// Returns the update associated meta or `None` if the update doesn't exist.
|
||||
pub fn meta(&self, index_uuid: Uuid, update_id: u64) -> heed::Result<Option<UpdateStatus>> {
|
||||
// Check if the update is the one currently processing
|
||||
match *self.state.read() {
|
||||
State::Processing(uuid, ref processing)
|
||||
if uuid == index_uuid && processing.id() == update_id =>
|
||||
{
|
||||
return Ok(Some(processing.clone().into()));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
// Else, check if it is in the updates database:
|
||||
let update = self.updates.get(&txn, &(index_uuid, update_id))?;
|
||||
|
||||
if let Some(update) = update {
|
||||
return Ok(Some(update));
|
||||
}
|
||||
|
||||
// If nothing was found yet, we resolve to iterate over the pending queue.
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
for entry in pendings {
|
||||
let ((_, uuid, id), pending) = entry?;
|
||||
if uuid == index_uuid && id == update_id {
|
||||
return Ok(Some(pending.decode()?.into()));
|
||||
}
|
||||
}
|
||||
|
||||
// No update was found.
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// Delete all updates for an index from the update store. If the currently processing update
|
||||
/// is for `index_uuid`, the call will block until the update is terminated.
|
||||
pub fn delete_all(&self, index_uuid: Uuid) -> Result<()> {
|
||||
let mut txn = self.env.write_txn()?;
|
||||
// Contains all the content file paths that we need to be removed if the deletion was successful.
|
||||
let mut uuids_to_remove = Vec::new();
|
||||
|
||||
let mut pendings = self.pending_queue.iter_mut(&mut txn)?.lazily_decode_data();
|
||||
|
||||
while let Some(Ok(((_, uuid, _), pending))) = pendings.next() {
|
||||
if uuid == index_uuid {
|
||||
let pending = pending.decode()?;
|
||||
if let Update::DocumentAddition { content_uuid, .. } = pending.meta() {
|
||||
uuids_to_remove.push(*content_uuid);
|
||||
}
|
||||
|
||||
//Invariant check: we can only delete the current entry when we don't hold
|
||||
//references to it anymore. This must be done after we have retrieved its content.
|
||||
unsafe {
|
||||
pendings.del_current()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop(pendings);
|
||||
|
||||
let mut updates = self
|
||||
.updates
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter_mut(&mut txn, index_uuid.as_bytes())?
|
||||
.lazily_decode_data();
|
||||
|
||||
while let Some(_) = updates.next() {
|
||||
unsafe {
|
||||
updates.del_current()?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(updates);
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
// If the currently processing update is from our index, we wait until it is
|
||||
// finished before returning. This ensure that no write to the index occurs after we delete it.
|
||||
if let State::Processing(uuid, _) = *self.state.read() {
|
||||
if uuid == index_uuid {
|
||||
// wait for a write lock, do nothing with it.
|
||||
self.state.write();
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, remove any outstanding update files. This must be done after waiting for the
|
||||
// last update to ensure that the update files are not deleted before the update needs
|
||||
// them.
|
||||
uuids_to_remove.iter().for_each(|uuid| {
|
||||
let _ = self.update_file_store.delete(*uuid);
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, indexes: Vec<Index>, path: impl AsRef<Path>) -> Result<()> {
|
||||
let state_lock = self.state.write();
|
||||
state_lock.swap(State::Snapshoting);
|
||||
|
||||
let txn = self.env.write_txn()?;
|
||||
|
||||
let update_path = path.as_ref().join("updates");
|
||||
create_dir_all(&update_path)?;
|
||||
|
||||
// acquire write lock to prevent further writes during snapshot
|
||||
create_dir_all(&update_path)?;
|
||||
let db_path = update_path.join("data.mdb");
|
||||
|
||||
// create db snapshot
|
||||
self.env.copy_to_path(&db_path, CompactionOption::Enabled)?;
|
||||
|
||||
let pendings = self.pending_queue.iter(&txn)?.lazily_decode_data();
|
||||
|
||||
let uuids: HashSet<_> = indexes.iter().map(|i| i.uuid()).collect();
|
||||
for entry in pendings {
|
||||
let ((_, uuid, _), pending) = entry?;
|
||||
if uuids.contains(&uuid) {
|
||||
if let Enqueued {
|
||||
meta: Update::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} = pending.decode()?
|
||||
{
|
||||
self.update_file_store.snapshot(content_uuid, &path)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let path = path.as_ref().to_owned();
|
||||
indexes
|
||||
.par_iter()
|
||||
.try_for_each(|index| index.snapshot(&path))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_info(&self) -> Result<UpdateStoreInfo> {
|
||||
let mut size = self.env.size();
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.pending_queue.iter(&txn)? {
|
||||
let (_, pending) = entry?;
|
||||
if let Enqueued {
|
||||
meta: store::Update::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} = pending
|
||||
{
|
||||
let len = self.update_file_store.get_size(content_uuid)?;
|
||||
size += len;
|
||||
}
|
||||
}
|
||||
let processing = match *self.state.read() {
|
||||
State::Processing(uuid, _) => Some(uuid),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Ok(UpdateStoreInfo { size, processing })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use futures::future::ok;
|
||||
use mockall::predicate::eq;
|
||||
|
||||
use crate::index::error::IndexError;
|
||||
use crate::index::test::Mocker;
|
||||
use crate::index_controller::index_resolver::index_store::MockIndexStore;
|
||||
use crate::index_controller::index_resolver::uuid_store::MockUuidStore;
|
||||
use crate::index_controller::updates::status::{Failed, Processed};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_next_id() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
let index_store = MockIndexStore::new();
|
||||
let uuid_store = MockUuidStore::new();
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store);
|
||||
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
Arc::new(index_resolver),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
update_file_store,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let index1_uuid = Uuid::new_v4();
|
||||
let index2_uuid = Uuid::new_v4();
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((0, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index2_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((1, 0), ids);
|
||||
|
||||
let mut txn = update_store.env.write_txn().unwrap();
|
||||
let ids = update_store.next_update_id(&mut txn, index1_uuid).unwrap();
|
||||
txn.commit().unwrap();
|
||||
assert_eq!((2, 1), ids);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_register_update() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let index_store = MockIndexStore::new();
|
||||
let uuid_store = MockUuidStore::new();
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store);
|
||||
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let update_store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
Arc::new(index_resolver),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
update_file_store,
|
||||
)
|
||||
.unwrap();
|
||||
let update = Update::ClearDocuments;
|
||||
let uuid = Uuid::new_v4();
|
||||
let store_clone = update_store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.register_update(uuid, update).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = update_store.env.read_txn().unwrap();
|
||||
assert!(update_store
|
||||
.pending_queue
|
||||
.get(&txn, &(0, uuid, 0))
|
||||
.unwrap()
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_process_update_success() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let index_uuid = Uuid::new_v4();
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store
|
||||
.expect_get()
|
||||
.with(eq(index_uuid))
|
||||
.returning(|_uuid| {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<Processing, std::result::Result<Processed, Failed>>("handle_update")
|
||||
.once()
|
||||
.then(|update| Ok(update.process(status::UpdateResult::Other)));
|
||||
|
||||
Box::pin(ok(Some(Index::faux(mocker))))
|
||||
});
|
||||
|
||||
let uuid_store = MockUuidStore::new();
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
|
||||
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
index_resolver.clone(),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
update_file_store,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// wait a bit for the event loop exit.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
|
||||
let update = Enqueued::new(Update::ClearDocuments, 0);
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(0, index_uuid, 0), &update)
|
||||
.unwrap();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
// Process the pending, and check that it has been moved to the update databases, and
|
||||
// removed from the pending database.
|
||||
let store_clone = store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.process_pending_update(index_resolver).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = store.env.read_txn().unwrap();
|
||||
|
||||
assert!(store.pending_queue.first(&txn).unwrap().is_none());
|
||||
let update = store.updates.get(&txn, &(index_uuid, 0)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Processed(_)));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_process_update_failure() {
|
||||
let dir = tempfile::tempdir_in(".").unwrap();
|
||||
let index_uuid = Uuid::new_v4();
|
||||
|
||||
let mut index_store = MockIndexStore::new();
|
||||
index_store
|
||||
.expect_get()
|
||||
.with(eq(index_uuid))
|
||||
.returning(|_uuid| {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<Processing, std::result::Result<Processed, Failed>>("handle_update")
|
||||
.once()
|
||||
.then(|update| Err(update.fail(IndexError::DocumentNotFound("1".to_string()))));
|
||||
|
||||
Box::pin(ok(Some(Index::faux(mocker))))
|
||||
});
|
||||
|
||||
let uuid_store = MockUuidStore::new();
|
||||
let index_resolver = Arc::new(IndexResolver::new(uuid_store, index_store));
|
||||
|
||||
let update_file_store = UpdateFileStore::new(dir.path()).unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let store = UpdateStore::open(
|
||||
options,
|
||||
dir.path(),
|
||||
index_resolver.clone(),
|
||||
Arc::new(AtomicBool::new(false)),
|
||||
update_file_store,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// wait a bit for the event loop exit.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
|
||||
let update = Enqueued::new(Update::ClearDocuments, 0);
|
||||
|
||||
store
|
||||
.pending_queue
|
||||
.put(&mut txn, &(0, index_uuid, 0), &update)
|
||||
.unwrap();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
// Process the pending, and check that it has been moved to the update databases, and
|
||||
// removed from the pending database.
|
||||
let store_clone = store.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
store_clone.process_pending_update(index_resolver).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let txn = store.env.read_txn().unwrap();
|
||||
|
||||
assert!(store.pending_queue.first(&txn).unwrap().is_none());
|
||||
let update = store.updates.get(&txn, &(index_uuid, 0)).unwrap().unwrap();
|
||||
|
||||
assert!(matches!(update, UpdateStatus::Failed(_)));
|
||||
}
|
||||
}
|
@ -2,7 +2,6 @@ use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use milli::update::UpdateBuilder;
|
||||
use tokio::fs;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::task::spawn_blocking;
|
||||
@ -11,7 +10,6 @@ use uuid::Uuid;
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||
use crate::options::IndexerOpts;
|
||||
|
||||
type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
@ -19,7 +17,7 @@ type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>;
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, mockall::automock)]
|
||||
pub trait IndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index>;
|
||||
async fn create(&self, uuid: Uuid) -> Result<Index>;
|
||||
async fn get(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>;
|
||||
}
|
||||
@ -28,7 +26,6 @@ pub struct MapIndexStore {
|
||||
index_store: AsyncMap<Uuid, Index>,
|
||||
path: PathBuf,
|
||||
index_size: usize,
|
||||
update_file_store: Arc<UpdateFileStore>,
|
||||
update_handler: Arc<UpdateHandler>,
|
||||
}
|
||||
|
||||
@ -39,14 +36,12 @@ impl MapIndexStore {
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<Self> {
|
||||
let update_handler = Arc::new(UpdateHandler::new(indexer_opts)?);
|
||||
let update_file_store = Arc::new(UpdateFileStore::new(path.as_ref()).unwrap());
|
||||
let path = path.as_ref().join("indexes/");
|
||||
let index_store = Arc::new(RwLock::new(HashMap::new()));
|
||||
Ok(Self {
|
||||
index_store,
|
||||
path,
|
||||
index_size,
|
||||
update_file_store,
|
||||
update_handler,
|
||||
})
|
||||
}
|
||||
@ -54,7 +49,7 @@ impl MapIndexStore {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl IndexStore for MapIndexStore {
|
||||
async fn create(&self, uuid: Uuid, primary_key: Option<String>) -> Result<Index> {
|
||||
async fn create(&self, uuid: Uuid) -> Result<Index> {
|
||||
// We need to keep the lock until we are sure the db file has been opened correclty, to
|
||||
// ensure that another db is not created at the same time.
|
||||
let mut lock = self.index_store.write().await;
|
||||
@ -68,20 +63,9 @@ impl IndexStore for MapIndexStore {
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let update_handler = self.update_handler.clone();
|
||||
let index = spawn_blocking(move || -> Result<Index> {
|
||||
let index = Index::open(path, index_size, file_store, uuid, update_handler)?;
|
||||
if let Some(primary_key) = primary_key {
|
||||
let inner = index.inner();
|
||||
let mut txn = inner.write_txn()?;
|
||||
|
||||
let mut builder = UpdateBuilder::new(0).settings(&mut txn, index.inner());
|
||||
builder.set_primary_key(primary_key);
|
||||
builder.execute(|_, _| ())?;
|
||||
|
||||
txn.commit()?;
|
||||
}
|
||||
let index = Index::open(path, index_size, uuid, update_handler)?;
|
||||
Ok(index)
|
||||
})
|
||||
.await??;
|
||||
@ -104,12 +88,10 @@ impl IndexStore for MapIndexStore {
|
||||
}
|
||||
|
||||
let index_size = self.index_size;
|
||||
let file_store = self.update_file_store.clone();
|
||||
let update_handler = self.update_handler.clone();
|
||||
let index = spawn_blocking(move || {
|
||||
Index::open(path, index_size, file_store, uuid, update_handler)
|
||||
})
|
||||
.await??;
|
||||
let index =
|
||||
spawn_blocking(move || Index::open(path, index_size, uuid, update_handler))
|
||||
.await??;
|
||||
self.index_store.write().await.insert(uuid, index.clone());
|
||||
Ok(Some(index))
|
||||
}
|
@ -3,98 +3,92 @@ use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use heed::types::{ByteSlice, Str};
|
||||
use heed::{CompactionOption, Database, Env, EnvOpenOptions};
|
||||
use heed::types::{SerdeBincode, Str};
|
||||
use heed::{CompactionOption, Database, Env};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::{IndexResolverError, Result};
|
||||
use crate::tasks::task::TaskId;
|
||||
use crate::EnvSizer;
|
||||
|
||||
const UUID_STORE_SIZE: usize = 1_073_741_824; //1GiB
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct DumpEntry {
|
||||
uuid: Uuid,
|
||||
uid: String,
|
||||
pub struct DumpEntry {
|
||||
pub uid: String,
|
||||
pub index_meta: IndexMeta,
|
||||
}
|
||||
|
||||
const UUIDS_DB_PATH: &str = "index_uuids";
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, mockall::automock)]
|
||||
pub trait UuidStore: Sized {
|
||||
pub trait IndexMetaStore: Sized {
|
||||
// Create a new entry for `name`. Return an error if `err` and the entry already exists, return
|
||||
// the uuid otherwise.
|
||||
async fn get_uuid(&self, uid: String) -> Result<(String, Option<Uuid>)>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>>;
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>>;
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()>;
|
||||
async fn get(&self, uid: String) -> Result<(String, Option<IndexMeta>)>;
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>>;
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>>;
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>;
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn get_size(&self) -> Result<u64>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>>;
|
||||
async fn dump(&self, path: PathBuf) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct IndexMeta {
|
||||
pub uuid: Uuid,
|
||||
pub creation_task_id: TaskId,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HeedUuidStore {
|
||||
pub struct HeedMetaStore {
|
||||
env: Env,
|
||||
db: Database<Str, ByteSlice>,
|
||||
db: Database<Str, SerdeBincode<IndexMeta>>,
|
||||
}
|
||||
|
||||
impl HeedUuidStore {
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UUIDS_DB_PATH);
|
||||
create_dir_all(&path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(UUID_STORE_SIZE); // 1GB
|
||||
options.max_dbs(1);
|
||||
let env = options.open(path)?;
|
||||
impl HeedMetaStore {
|
||||
pub fn new(env: heed::Env) -> Result<Self> {
|
||||
let db = env.create_database(Some("uuids"))?;
|
||||
Ok(Self { env, db })
|
||||
}
|
||||
|
||||
pub fn get_uuid(&self, name: &str) -> Result<Option<Uuid>> {
|
||||
fn get(&self, name: &str) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
match db.get(&txn, name)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
Ok(Some(uuid))
|
||||
}
|
||||
Some(meta) => Ok(Some(meta)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
match db.get(&txn, &uid)? {
|
||||
Some(uuid) => {
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
Some(meta) => {
|
||||
db.delete(&mut txn, &uid)?;
|
||||
txn.commit()?;
|
||||
Ok(Some(uuid))
|
||||
Ok(Some(meta))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let txn = env.read_txn()?;
|
||||
let mut entries = Vec::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (name, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
entries.push((name.to_owned(), uuid))
|
||||
let (name, meta) = entry?;
|
||||
entries.push((name.to_string(), meta))
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
let mut txn = env.write_txn()?;
|
||||
@ -103,20 +97,17 @@ impl HeedUuidStore {
|
||||
return Err(IndexResolverError::IndexAlreadyExists(name));
|
||||
}
|
||||
|
||||
db.put(&mut txn, &name, uuid.as_bytes())?;
|
||||
db.put(&mut txn, &name, &meta)?;
|
||||
txn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
let env = self.env.clone();
|
||||
let db = self.db;
|
||||
fn snapshot(&self, mut path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
// Write transaction to acquire a lock on the database.
|
||||
let txn = env.write_txn()?;
|
||||
let txn = self.env.write_txn()?;
|
||||
let mut entries = HashSet::new();
|
||||
for entry in db.iter(&txn)? {
|
||||
let (_, uuid) = entry?;
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (_, IndexMeta { uuid, .. }) = entry?;
|
||||
entries.insert(uuid);
|
||||
}
|
||||
|
||||
@ -125,56 +116,49 @@ impl HeedUuidStore {
|
||||
path.push(UUIDS_DB_PATH);
|
||||
create_dir_all(&path).unwrap();
|
||||
path.push("data.mdb");
|
||||
env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
self.env.copy_to_path(path, CompactionOption::Enabled)?;
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
pub fn get_size(&self) -> Result<u64> {
|
||||
fn get_size(&self) -> Result<u64> {
|
||||
Ok(self.env.size())
|
||||
}
|
||||
|
||||
pub fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
pub fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let dump_path = path.join(UUIDS_DB_PATH);
|
||||
create_dir_all(&dump_path)?;
|
||||
let dump_file_path = dump_path.join("data.jsonl");
|
||||
let mut dump_file = File::create(&dump_file_path)?;
|
||||
let mut uuids = HashSet::new();
|
||||
|
||||
let txn = self.env.read_txn()?;
|
||||
for entry in self.db.iter(&txn)? {
|
||||
let (uid, uuid) = entry?;
|
||||
let (uid, index_meta) = entry?;
|
||||
let uid = uid.to_string();
|
||||
let uuid = Uuid::from_slice(uuid)?;
|
||||
|
||||
let entry = DumpEntry { uuid, uid };
|
||||
let entry = DumpEntry { uid, index_meta };
|
||||
serde_json::to_writer(&mut dump_file, &entry)?;
|
||||
dump_file.write_all(b"\n").unwrap();
|
||||
|
||||
uuids.insert(uuid);
|
||||
}
|
||||
|
||||
Ok(uuids)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH);
|
||||
std::fs::create_dir_all(&uuid_resolver_path)?;
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, env: heed::Env) -> Result<()> {
|
||||
let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl");
|
||||
let indexes = File::open(&src_indexes)?;
|
||||
let mut indexes = BufReader::new(indexes);
|
||||
let mut line = String::new();
|
||||
|
||||
let db = Self::new(dst)?;
|
||||
let db = Self::new(env)?;
|
||||
let mut txn = db.env.write_txn()?;
|
||||
|
||||
loop {
|
||||
match indexes.read_line(&mut line) {
|
||||
Ok(0) => break,
|
||||
Ok(_) => {
|
||||
let DumpEntry { uuid, uid } = serde_json::from_str(&line)?;
|
||||
db.db.put(&mut txn, &uid, uuid.as_bytes())?;
|
||||
let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?;
|
||||
db.db.put(&mut txn, &uid, &index_meta)?;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
@ -183,32 +167,30 @@ impl HeedUuidStore {
|
||||
}
|
||||
txn.commit()?;
|
||||
|
||||
db.env.prepare_for_closing().wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl UuidStore for HeedUuidStore {
|
||||
async fn get_uuid(&self, name: String) -> Result<(String, Option<Uuid>)> {
|
||||
impl IndexMetaStore for HeedMetaStore {
|
||||
async fn get(&self, name: String) -> Result<(String, Option<IndexMeta>)> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.get_uuid(&name).map(|res| (name, res))).await?
|
||||
tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await?
|
||||
}
|
||||
|
||||
async fn delete(&self, uid: String) -> Result<Option<Uuid>> {
|
||||
async fn delete(&self, uid: String) -> Result<Option<IndexMeta>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.delete(uid)).await?
|
||||
}
|
||||
|
||||
async fn list(&self) -> Result<Vec<(String, Uuid)>> {
|
||||
async fn list(&self) -> Result<Vec<(String, IndexMeta)>> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.list()).await?
|
||||
}
|
||||
|
||||
async fn insert(&self, name: String, uuid: Uuid) -> Result<()> {
|
||||
async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.insert(name, uuid)).await?
|
||||
tokio::task::spawn_blocking(move || this.insert(name, meta)).await?
|
||||
}
|
||||
|
||||
async fn snapshot(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
@ -220,8 +202,8 @@ impl UuidStore for HeedUuidStore {
|
||||
self.get_size()
|
||||
}
|
||||
|
||||
async fn dump(&self, path: PathBuf) -> Result<HashSet<Uuid>> {
|
||||
async fn dump(&self, path: PathBuf) -> Result<()> {
|
||||
let this = self.clone();
|
||||
tokio::task::spawn_blocking(move || this.dump(path)).await?
|
||||
Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??)
|
||||
}
|
||||
}
|
578
meilisearch-lib/src/index_resolver/mod.rs
Normal file
578
meilisearch-lib/src/index_resolver/mod.rs
Normal file
@ -0,0 +1,578 @@
|
||||
pub mod error;
|
||||
pub mod index_store;
|
||||
pub mod meta_store;
|
||||
|
||||
use std::convert::TryInto;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::Utc;
|
||||
use error::{IndexResolverError, Result};
|
||||
use heed::Env;
|
||||
use index_store::{IndexStore, MapIndexStore};
|
||||
use meilisearch_error::ResponseError;
|
||||
use meta_store::{HeedMetaStore, IndexMetaStore};
|
||||
use milli::update::DocumentDeletionResult;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::task::spawn_blocking;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index::{error::Result as IndexResult, Index};
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::tasks::batch::Batch;
|
||||
use crate::tasks::task::{DocumentDeletion, Job, Task, TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
use crate::tasks::Pending;
|
||||
use crate::tasks::TaskPerformer;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
use self::meta_store::IndexMeta;
|
||||
|
||||
pub type HardStateIndexResolver = IndexResolver<HeedMetaStore, MapIndexStore>;
|
||||
|
||||
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
|
||||
/// bytes long
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub struct IndexUid(#[cfg_attr(test, proptest(regex("[a-zA-Z0-9_-]{1,400}")))] String);
|
||||
|
||||
pub fn create_index_resolver(
|
||||
path: impl AsRef<Path>,
|
||||
index_size: usize,
|
||||
indexer_opts: &IndexerOpts,
|
||||
meta_env: heed::Env,
|
||||
file_store: UpdateFileStore,
|
||||
) -> anyhow::Result<HardStateIndexResolver> {
|
||||
let uuid_store = HeedMetaStore::new(meta_env)?;
|
||||
let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?;
|
||||
Ok(IndexResolver::new(uuid_store, index_store, file_store))
|
||||
}
|
||||
|
||||
impl IndexUid {
|
||||
pub fn new(uid: String) -> Result<Self> {
|
||||
if !uid
|
||||
.chars()
|
||||
.all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
|
||||
|| !(1..=400).contains(&uid.len())
|
||||
{
|
||||
Err(IndexResolverError::BadlyFormatted(uid))
|
||||
} else {
|
||||
Ok(Self(uid))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new_unchecked(s: impl AsRef<str>) -> Self {
|
||||
Self(s.as_ref().to_string())
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> String {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Return a reference over the inner str.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for IndexUid {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryInto<IndexUid> for String {
|
||||
type Error = IndexResolverError;
|
||||
|
||||
fn try_into(self) -> Result<IndexUid> {
|
||||
IndexUid::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<U, I> TaskPerformer for IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore + Send + Sync + 'static,
|
||||
I: IndexStore + Send + Sync + 'static,
|
||||
{
|
||||
type Error = ResponseError;
|
||||
|
||||
async fn process(&self, mut batch: Batch) -> Batch {
|
||||
// Until batching is implemented, all batch should contain only one update.
|
||||
debug_assert_eq!(batch.len(), 1);
|
||||
|
||||
match batch.tasks.first_mut() {
|
||||
Some(Pending::Task(task)) => {
|
||||
task.events.push(TaskEvent::Processing(Utc::now()));
|
||||
|
||||
match self.process_task(task).await {
|
||||
Ok(success) => {
|
||||
task.events.push(TaskEvent::Succeded {
|
||||
result: success,
|
||||
timestamp: Utc::now(),
|
||||
});
|
||||
}
|
||||
Err(err) => task.events.push(TaskEvent::Failed {
|
||||
error: err.into(),
|
||||
timestamp: Utc::now(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
Some(Pending::Job(job)) => {
|
||||
let job = std::mem::take(job);
|
||||
self.process_job(job).await;
|
||||
}
|
||||
|
||||
None => (),
|
||||
}
|
||||
|
||||
batch
|
||||
}
|
||||
|
||||
async fn finish(&self, batch: &Batch) {
|
||||
for task in &batch.tasks {
|
||||
if let Some(content_uuid) = task.get_content_uuid() {
|
||||
if let Err(e) = self.file_store.delete(content_uuid).await {
|
||||
log::error!("error deleting update file: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IndexResolver<U, I> {
|
||||
index_uuid_store: U,
|
||||
index_store: I,
|
||||
file_store: UpdateFileStore,
|
||||
}
|
||||
|
||||
impl IndexResolver<HeedMetaStore, MapIndexStore> {
|
||||
pub fn load_dump(
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
index_db_size: usize,
|
||||
env: Env,
|
||||
indexer_opts: &IndexerOpts,
|
||||
) -> anyhow::Result<()> {
|
||||
HeedMetaStore::load_dump(&src, env)?;
|
||||
let indexes_path = src.as_ref().join("indexes");
|
||||
let indexes = indexes_path.read_dir()?;
|
||||
let update_handler = UpdateHandler::new(indexer_opts)?;
|
||||
for index in indexes {
|
||||
Index::load_dump(&index?.path(), &dst, index_db_size, &update_handler)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<U, I> IndexResolver<U, I>
|
||||
where
|
||||
U: IndexMetaStore,
|
||||
I: IndexStore,
|
||||
{
|
||||
pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self {
|
||||
Self {
|
||||
index_uuid_store,
|
||||
index_store,
|
||||
file_store,
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_task(&self, task: &Task) -> Result<TaskResult> {
|
||||
let index_uid = task.index_uid.clone();
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition {
|
||||
content_uuid,
|
||||
merge_strategy,
|
||||
primary_key,
|
||||
..
|
||||
} => {
|
||||
let primary_key = primary_key.clone();
|
||||
let content_uuid = *content_uuid;
|
||||
let method = *merge_strategy;
|
||||
|
||||
let index = self.get_or_create_index(index_uid, task.id).await?;
|
||||
let file_store = self.file_store.clone();
|
||||
let result = spawn_blocking(move || {
|
||||
index.update_documents(method, content_uuid, primary_key, file_store)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(result.into())
|
||||
}
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) => {
|
||||
let ids = ids.clone();
|
||||
let index = self.get_index(index_uid.into_inner()).await?;
|
||||
|
||||
let DocumentDeletionResult {
|
||||
deleted_documents, ..
|
||||
} = spawn_blocking(move || index.delete_documents(&ids)).await??;
|
||||
|
||||
Ok(TaskResult::DocumentDeletion { deleted_documents })
|
||||
}
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => {
|
||||
let index = self.get_index(index_uid.into_inner()).await?;
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
let number_documents = index.stats()?.number_of_documents;
|
||||
index.clear_documents()?;
|
||||
Ok(number_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::SettingsUpdate {
|
||||
settings,
|
||||
is_deletion,
|
||||
} => {
|
||||
let index = if *is_deletion {
|
||||
self.get_index(index_uid.into_inner()).await?
|
||||
} else {
|
||||
self.get_or_create_index(index_uid, task.id).await?
|
||||
};
|
||||
|
||||
let settings = settings.clone();
|
||||
spawn_blocking(move || index.update_settings(&settings.check())).await??;
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexDeletion => {
|
||||
let index = self.delete_index(index_uid.into_inner()).await?;
|
||||
|
||||
let deleted_documents = spawn_blocking(move || -> IndexResult<u64> {
|
||||
Ok(index.stats()?.number_of_documents)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(TaskResult::ClearAll { deleted_documents })
|
||||
}
|
||||
TaskContent::IndexCreation { primary_key } => {
|
||||
let index = self.create_index(index_uid, task.id).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
TaskContent::IndexUpdate { primary_key } => {
|
||||
let index = self.get_index(index_uid.into_inner()).await?;
|
||||
|
||||
if let Some(primary_key) = primary_key {
|
||||
let primary_key = primary_key.clone();
|
||||
spawn_blocking(move || index.update_primary_key(primary_key)).await??;
|
||||
}
|
||||
|
||||
Ok(TaskResult::Other)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_job(&self, job: Job) {
|
||||
match job {
|
||||
Job::Dump { ret, path } => {
|
||||
log::trace!("The Dump task is getting executed");
|
||||
|
||||
if ret.send(self.dump(path).await).is_err() {
|
||||
log::error!("The dump actor died.");
|
||||
}
|
||||
}
|
||||
Job::Empty => log::error!("Tried to process an empty task."),
|
||||
Job::Snapshot(job) => {
|
||||
if let Err(e) = job.run().await {
|
||||
log::error!("Error performing snapshot: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
for (_, index) in self.list().await? {
|
||||
index.dump(&path)?;
|
||||
}
|
||||
self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn create_index(&self, uid: IndexUid, creation_task_id: TaskId) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid.into_inner()).await? {
|
||||
(uid, Some(_)) => Err(IndexResolverError::IndexAlreadyExists(uid)),
|
||||
(uid, None) => {
|
||||
let uuid = Uuid::new_v4();
|
||||
let index = self.index_store.create(uuid).await?;
|
||||
match self
|
||||
.index_uuid_store
|
||||
.insert(
|
||||
uid,
|
||||
IndexMeta {
|
||||
uuid,
|
||||
creation_task_id,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Err(e) => {
|
||||
match self.index_store.delete(uuid).await {
|
||||
Ok(Some(index)) => {
|
||||
index.close();
|
||||
}
|
||||
Ok(None) => (),
|
||||
Err(e) => log::error!("Error while deleting index: {:?}", e),
|
||||
}
|
||||
Err(e)
|
||||
}
|
||||
Ok(()) => Ok(index),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an index with name `uid`.
|
||||
pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result<Index> {
|
||||
match self.create_index(uid, task_id).await {
|
||||
Ok(index) => Ok(index),
|
||||
Err(IndexResolverError::IndexAlreadyExists(uid)) => self.get_index(uid).await,
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list(&self) -> Result<Vec<(String, Index)>> {
|
||||
let uuids = self.index_uuid_store.list().await?;
|
||||
let mut indexes = Vec::new();
|
||||
for (name, IndexMeta { uuid, .. }) in uuids {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => indexes.push((name, index)),
|
||||
None => {
|
||||
// we found an unexisting index, we remove it from the uuid store
|
||||
let _ = self.index_uuid_store.delete(name).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(indexes)
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.delete(uid.clone()).await? {
|
||||
Some(IndexMeta { uuid, .. }) => match self.index_store.delete(uuid).await? {
|
||||
Some(index) => {
|
||||
index.clone().close();
|
||||
Ok(index)
|
||||
}
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
},
|
||||
None => Err(IndexResolverError::UnexistingIndex(uid)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index(&self, uid: String) -> Result<Index> {
|
||||
match self.index_uuid_store.get(uid).await? {
|
||||
(name, Some(IndexMeta { uuid, .. })) => {
|
||||
match self.index_store.get(uuid).await? {
|
||||
Some(index) => Ok(index),
|
||||
None => {
|
||||
// For some reason we got a uuid to an unexisting index, we return an error,
|
||||
// and remove the uuid from the uuid store.
|
||||
let _ = self.index_uuid_store.delete(name.clone()).await;
|
||||
Err(IndexResolverError::UnexistingIndex(name))
|
||||
}
|
||||
}
|
||||
}
|
||||
(name, _) => Err(IndexResolverError::UnexistingIndex(name)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result<TaskId> {
|
||||
let (uid, meta) = self.index_uuid_store.get(index_uid).await?;
|
||||
meta.map(
|
||||
|IndexMeta {
|
||||
creation_task_id, ..
|
||||
}| creation_task_id,
|
||||
)
|
||||
.ok_or(IndexResolverError::UnexistingIndex(uid))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use super::*;
|
||||
|
||||
use futures::future::ok;
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use nelson::Mocker;
|
||||
use proptest::prelude::*;
|
||||
|
||||
use crate::index::{
|
||||
error::{IndexError, Result as IndexResult},
|
||||
Checked, IndexMeta, IndexStats, Settings,
|
||||
};
|
||||
use index_store::MockIndexStore;
|
||||
use meta_store::MockIndexMetaStore;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_process_task(
|
||||
task in any::<Task>(),
|
||||
index_exists in any::<bool>(),
|
||||
index_op_fails in any::<bool>(),
|
||||
any_int in any::<u64>(),
|
||||
) {
|
||||
actix_rt::System::new().block_on(async move {
|
||||
let uuid = Uuid::new_v4();
|
||||
let mut index_store = MockIndexStore::new();
|
||||
|
||||
let mocker = Mocker::default();
|
||||
|
||||
// Return arbitrary data from index call.
|
||||
match &task.content {
|
||||
TaskContent::DocumentAddition{primary_key, ..} => {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(DocumentAdditionResult { indexed_documents: any_int, number_of_documents: any_int })
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
if primary_key.is_some() {
|
||||
mocker.when::<String, IndexResult<IndexMeta>>("update_primary_key")
|
||||
.then(move |_| Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None }));
|
||||
}
|
||||
mocker.when::<(IndexDocumentsMethod, Uuid, Option<String>, UpdateFileStore), IndexResult<DocumentAdditionResult>>("update_documents")
|
||||
.then(move |(_, _, _, _)| result());
|
||||
}
|
||||
TaskContent::SettingsUpdate{..} => {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(())
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
mocker.when::<&Settings<Checked>, IndexResult<()>>("update_settings")
|
||||
.then(move |_| result());
|
||||
}
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Ids(_ids)) => {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(any_int as u64)
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
|
||||
mocker.when::<&[String], IndexResult<u64>>("delete_documents")
|
||||
.then(move |_| result());
|
||||
},
|
||||
TaskContent::DocumentDeletion(DocumentDeletion::Clear) => {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(())
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
mocker.when::<(), IndexResult<()>>("clear_documents")
|
||||
.then(move |_| result());
|
||||
},
|
||||
TaskContent::IndexDeletion => {
|
||||
mocker.when::<(), ()>("close")
|
||||
.times(index_exists as usize)
|
||||
.then(move |_| ());
|
||||
}
|
||||
TaskContent::IndexUpdate { primary_key }
|
||||
| TaskContent::IndexCreation { primary_key } => {
|
||||
if primary_key.is_some() {
|
||||
let result = move || if !index_op_fails {
|
||||
Ok(IndexMeta{ created_at: Utc::now(), updated_at: Utc::now(), primary_key: None })
|
||||
} else {
|
||||
// return this error because it's easy to generate...
|
||||
Err(IndexError::DocumentNotFound("a doc".into()))
|
||||
};
|
||||
mocker.when::<String, IndexResult<IndexMeta>>("update_primary_key")
|
||||
.then(move |_| result());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mocker.when::<(), IndexResult<IndexStats>>("stats")
|
||||
.then(|()| Ok(IndexStats { size: 0, number_of_documents: 0, is_indexing: Some(false), field_distribution: BTreeMap::new() }));
|
||||
|
||||
let index = Index::mock(mocker);
|
||||
|
||||
match &task.content {
|
||||
// an unexisting index should trigger an index creation in the folllowing cases:
|
||||
TaskContent::DocumentAddition { .. }
|
||||
| TaskContent::SettingsUpdate { is_deletion: false, .. }
|
||||
| TaskContent::IndexCreation { .. } if !index_exists => {
|
||||
index_store
|
||||
.expect_create()
|
||||
.once()
|
||||
.withf(move |&found| !index_exists || found == uuid)
|
||||
.returning(move |_| Box::pin(ok(index.clone())));
|
||||
},
|
||||
TaskContent::IndexDeletion => {
|
||||
index_store
|
||||
.expect_delete()
|
||||
// this is called only if the index.exists
|
||||
.times(index_exists as usize)
|
||||
.withf(move |&found| !index_exists || found == uuid)
|
||||
.returning(move |_| Box::pin(ok(Some(index.clone()))));
|
||||
}
|
||||
// if index already exists, create index will return an error
|
||||
TaskContent::IndexCreation { .. } if index_exists => (),
|
||||
// The index exists and get should be called
|
||||
_ if index_exists => {
|
||||
index_store
|
||||
.expect_get()
|
||||
.once()
|
||||
.withf(move |&found| found == uuid)
|
||||
.returning(move |_| Box::pin(ok(Some(index.clone()))));
|
||||
},
|
||||
// the index doesn't exist and shouldn't be created, the uuidstore will return an error, and get_index will never be called.
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let mut uuid_store = MockIndexMetaStore::new();
|
||||
uuid_store
|
||||
.expect_get()
|
||||
.returning(move |uid| {
|
||||
Box::pin(ok((uid, index_exists.then(|| crate::index_resolver::meta_store::IndexMeta {uuid, creation_task_id: 0 }))))
|
||||
});
|
||||
|
||||
// we sould only be creating an index if the index doesn't alredy exist
|
||||
uuid_store
|
||||
.expect_insert()
|
||||
.withf(move |_, _| !index_exists)
|
||||
.returning(|_, _| Box::pin(ok(())));
|
||||
|
||||
uuid_store
|
||||
.expect_delete()
|
||||
.times(matches!(task.content, TaskContent::IndexDeletion) as usize)
|
||||
.returning(move |_| Box::pin(ok(index_exists.then(|| crate::index_resolver::meta_store::IndexMeta { uuid, creation_task_id: 0}))));
|
||||
|
||||
let mocker = Mocker::default();
|
||||
let update_file_store = UpdateFileStore::mock(mocker);
|
||||
let index_resolver = IndexResolver::new(uuid_store, index_store, update_file_store);
|
||||
|
||||
let result = index_resolver.process_task(&task).await;
|
||||
|
||||
// Test for some expected output scenarios:
|
||||
// Index creation and deletion cannot fail because of a failed index op, since they
|
||||
// don't perform index ops.
|
||||
if index_op_fails && !matches!(task.content, TaskContent::IndexDeletion | TaskContent::IndexCreation { primary_key: None } | TaskContent::IndexUpdate { primary_key: None })
|
||||
|| (index_exists && matches!(task.content, TaskContent::IndexCreation { .. }))
|
||||
|| (!index_exists && matches!(task.content, TaskContent::IndexDeletion
|
||||
| TaskContent::DocumentDeletion(_)
|
||||
| TaskContent::SettingsUpdate { is_deletion: true, ..}
|
||||
| TaskContent::IndexUpdate { .. } ))
|
||||
{
|
||||
assert!(result.is_err(), "{:?}", result);
|
||||
} else {
|
||||
assert!(result.is_ok(), "{:?}", result);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
@ -2,12 +2,14 @@
|
||||
pub mod error;
|
||||
pub mod options;
|
||||
|
||||
mod analytics;
|
||||
pub mod index;
|
||||
pub mod index_controller;
|
||||
mod index_resolver;
|
||||
mod snapshot;
|
||||
pub mod tasks;
|
||||
mod update_file_store;
|
||||
|
||||
mod analytics;
|
||||
|
||||
pub use index_controller::updates::store::Update;
|
||||
pub use index_controller::MeiliSearch;
|
||||
|
||||
pub use milli;
|
||||
|
182
meilisearch-lib/src/snapshot.rs
Normal file
182
meilisearch-lib/src/snapshot.rs
Normal file
@ -0,0 +1,182 @@
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::bail;
|
||||
use fs_extra::dir::{self, CopyOptions};
|
||||
use log::{info, trace};
|
||||
use tokio::time::sleep;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::compression::from_tar_gz;
|
||||
use crate::tasks::task::Job;
|
||||
use crate::tasks::TaskStore;
|
||||
|
||||
pub struct SnapshotService {
|
||||
pub(crate) db_path: PathBuf,
|
||||
pub(crate) snapshot_period: Duration,
|
||||
pub(crate) snapshot_path: PathBuf,
|
||||
pub(crate) index_size: usize,
|
||||
pub(crate) meta_env_size: usize,
|
||||
pub(crate) task_store: TaskStore,
|
||||
}
|
||||
|
||||
impl SnapshotService {
|
||||
pub async fn run(self) {
|
||||
info!(
|
||||
"Snapshot scheduled every {}s.",
|
||||
self.snapshot_period.as_secs()
|
||||
);
|
||||
loop {
|
||||
let snapshot_job = SnapshotJob {
|
||||
dest_path: self.snapshot_path.clone(),
|
||||
src_path: self.db_path.clone(),
|
||||
meta_env_size: self.meta_env_size,
|
||||
index_size: self.index_size,
|
||||
};
|
||||
let job = Job::Snapshot(snapshot_job);
|
||||
self.task_store.register_job(job).await;
|
||||
|
||||
sleep(self.snapshot_period).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: impl AsRef<Path>,
|
||||
snapshot_path: impl AsRef<Path>,
|
||||
ignore_snapshot_if_db_exists: bool,
|
||||
ignore_missing_snapshot: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
|
||||
match from_tar_gz(snapshot_path, &db_path) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
//clean created db folder
|
||||
std::fs::remove_dir_all(&db_path)?;
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
} else if db_path.as_ref().exists() && !ignore_snapshot_if_db_exists {
|
||||
bail!(
|
||||
"database already exists at {:?}, try to delete it or rename it",
|
||||
db_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| db_path.as_ref().to_owned())
|
||||
)
|
||||
} else if !snapshot_path.as_ref().exists() && !ignore_missing_snapshot {
|
||||
bail!(
|
||||
"snapshot doesn't exist at {:?}",
|
||||
snapshot_path
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| snapshot_path.as_ref().to_owned())
|
||||
)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SnapshotJob {
|
||||
dest_path: PathBuf,
|
||||
src_path: PathBuf,
|
||||
|
||||
meta_env_size: usize,
|
||||
index_size: usize,
|
||||
}
|
||||
|
||||
impl SnapshotJob {
|
||||
pub async fn run(self) -> anyhow::Result<()> {
|
||||
tokio::task::spawn_blocking(|| self.run_sync()).await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_sync(self) -> anyhow::Result<()> {
|
||||
trace!("Performing snapshot.");
|
||||
|
||||
let snapshot_dir = self.dest_path.clone();
|
||||
std::fs::create_dir_all(&snapshot_dir)?;
|
||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||
let temp_snapshot_path = temp_snapshot_dir.path();
|
||||
|
||||
self.snapshot_meta_env(temp_snapshot_path)?;
|
||||
self.snapshot_file_store(temp_snapshot_path)?;
|
||||
self.snapshot_indexes(temp_snapshot_path)?;
|
||||
|
||||
let db_name = self
|
||||
.src_path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("data.ms")
|
||||
.to_string();
|
||||
|
||||
let snapshot_path = self.dest_path.join(format!("{}.snapshot", db_name));
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||
let _file = temp_snapshot_file.persist(&snapshot_path)?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::fs::Permissions;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
let perm = Permissions::from_mode(0o644);
|
||||
_file.set_permissions(perm)?;
|
||||
}
|
||||
|
||||
trace!("Created snapshot in {:?}.", snapshot_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(self.meta_env_size);
|
||||
let env = options.open(&self.src_path)?;
|
||||
|
||||
let dst = path.join("data.mdb");
|
||||
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_file_store(&self, path: &Path) -> anyhow::Result<()> {
|
||||
// for now we simply copy the updates/updates_files
|
||||
// FIXME(marin): We may copy more files than necessary, if new files are added while we are
|
||||
// performing the snapshop. We need a way to filter them out.
|
||||
|
||||
let dst = path.join("updates");
|
||||
fs::create_dir_all(&dst)?;
|
||||
let options = CopyOptions::default();
|
||||
dir::copy(self.src_path.join("updates/updates_files"), dst, &options)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn snapshot_indexes(&self, path: &Path) -> anyhow::Result<()> {
|
||||
let indexes_path = self.src_path.join("indexes/");
|
||||
let dst = path.join("indexes/");
|
||||
|
||||
for entry in WalkDir::new(indexes_path).max_depth(1).into_iter().skip(1) {
|
||||
let entry = entry?;
|
||||
let name = entry.file_name();
|
||||
let dst = dst.join(name);
|
||||
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
|
||||
let dst = dst.join("data.mdb");
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
options.map_size(self.index_size);
|
||||
let env = options.open(entry.path())?;
|
||||
|
||||
env.copy_to_path(dst, heed::CompactionOption::Enabled)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
22
meilisearch-lib/src/tasks/batch.rs
Normal file
22
meilisearch-lib/src/tasks/batch.rs
Normal file
@ -0,0 +1,22 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use super::{task::Task, task_store::Pending};
|
||||
|
||||
pub type BatchId = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Batch {
|
||||
pub id: BatchId,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub tasks: Vec<Pending<Task>>,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
pub fn len(&self) -> usize {
|
||||
self.tasks.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.tasks.is_empty()
|
||||
}
|
||||
}
|
33
meilisearch-lib/src/tasks/error.rs
Normal file
33
meilisearch-lib/src/tasks/error.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use meilisearch_error::{Code, ErrorCode};
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::update_file_store::UpdateFileStoreError;
|
||||
|
||||
use super::task::TaskId;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, TaskError>;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TaskError {
|
||||
#[error("Task `{0}` not found.")]
|
||||
UnexistingTask(TaskId),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||
}
|
||||
|
||||
internal_error!(
|
||||
TaskError: heed::Error,
|
||||
JoinError,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
UpdateFileStoreError
|
||||
);
|
||||
|
||||
impl ErrorCode for TaskError {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
TaskError::UnexistingTask(_) => Code::TaskNotFound,
|
||||
TaskError::Internal(_) => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
60
meilisearch-lib/src/tasks/mod.rs
Normal file
60
meilisearch-lib/src/tasks/mod.rs
Normal file
@ -0,0 +1,60 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(test)]
|
||||
pub use task_store::test::MockTaskStore as TaskStore;
|
||||
#[cfg(not(test))]
|
||||
pub use task_store::TaskStore;
|
||||
|
||||
pub use task_store::{Pending, TaskFilter};
|
||||
|
||||
use batch::Batch;
|
||||
use error::Result;
|
||||
use scheduler::Scheduler;
|
||||
|
||||
pub mod batch;
|
||||
pub mod error;
|
||||
pub mod scheduler;
|
||||
pub mod task;
|
||||
mod task_store;
|
||||
|
||||
#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))]
|
||||
#[async_trait]
|
||||
pub trait TaskPerformer: Sync + Send + 'static {
|
||||
type Error: Serialize + for<'de> Deserialize<'de> + std::error::Error + Sync + Send + 'static;
|
||||
/// Processes the `Task` batch returning the batch with the `Task` updated.
|
||||
async fn process(&self, batch: Batch) -> Batch;
|
||||
/// `finish` is called when the result of `process` has been commited to the task store. This
|
||||
/// method can be used to perform cleanup after the update has been completed for example.
|
||||
async fn finish(&self, batch: &Batch);
|
||||
}
|
||||
|
||||
pub fn create_task_store<P>(env: heed::Env, performer: Arc<P>) -> Result<TaskStore>
|
||||
where
|
||||
P: TaskPerformer,
|
||||
{
|
||||
let task_store = TaskStore::new(env)?;
|
||||
let scheduler = Scheduler::new(task_store.clone(), performer, Duration::from_millis(1));
|
||||
tokio::task::spawn_local(scheduler.run());
|
||||
Ok(task_store)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct DebugError;
|
||||
|
||||
impl Display for DebugError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("an error")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DebugError {}
|
||||
}
|
253
meilisearch-lib/src/tasks/scheduler.rs
Normal file
253
meilisearch-lib/src/tasks/scheduler.rs
Normal file
@ -0,0 +1,253 @@
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::batch::Batch;
|
||||
use super::error::Result;
|
||||
#[cfg(test)]
|
||||
use super::task_store::test::MockTaskStore as TaskStore;
|
||||
use super::task_store::Pending;
|
||||
#[cfg(not(test))]
|
||||
use super::task_store::TaskStore;
|
||||
use super::TaskPerformer;
|
||||
use crate::tasks::task::TaskEvent;
|
||||
|
||||
/// The scheduler roles is to perform batches of tasks one at a time. It will monitor the TaskStore
|
||||
/// for new tasks, put them in a batch, and process the batch as soon as possible.
|
||||
///
|
||||
/// When a batch is currently processing, the scheduler is just waiting.
|
||||
pub struct Scheduler<P: TaskPerformer> {
|
||||
store: TaskStore,
|
||||
performer: Arc<P>,
|
||||
|
||||
/// The interval at which the the `TaskStore` should be checked for new updates
|
||||
task_store_check_interval: Duration,
|
||||
}
|
||||
|
||||
impl<P> Scheduler<P>
|
||||
where
|
||||
P: TaskPerformer + Send + Sync + 'static,
|
||||
P::Error: Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static,
|
||||
{
|
||||
pub fn new(store: TaskStore, performer: Arc<P>, task_store_check_interval: Duration) -> Self {
|
||||
Self {
|
||||
store,
|
||||
performer,
|
||||
task_store_check_interval,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(self) {
|
||||
loop {
|
||||
if let Err(e) = self.process_next_batch().await {
|
||||
log::error!("an error occured while processing an update batch: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_next_batch(&self) -> Result<()> {
|
||||
match self.prepare_batch().await? {
|
||||
Some(mut batch) => {
|
||||
for task in &mut batch.tasks {
|
||||
match task {
|
||||
Pending::Task(task) => task.events.push(TaskEvent::Processing(Utc::now())),
|
||||
Pending::Job(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
// the jobs are ignored
|
||||
batch.tasks = self.store.update_tasks(batch.tasks).await?;
|
||||
|
||||
let performer = self.performer.clone();
|
||||
let batch_result = performer.process(batch).await;
|
||||
self.handle_batch_result(batch_result).await?;
|
||||
}
|
||||
None => {
|
||||
// No update found to create a batch we wait a bit before we retry.
|
||||
tokio::time::sleep(self.task_store_check_interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks for pending tasks and groups them in a batch. If there are no pending update,
|
||||
/// return Ok(None)
|
||||
///
|
||||
/// Until batching is properly implemented, the batches contain only one task.
|
||||
async fn prepare_batch(&self) -> Result<Option<Batch>> {
|
||||
match self.store.peek_pending_task().await {
|
||||
Some(Pending::Task(next_task_id)) => {
|
||||
let mut task = self.store.get_task(next_task_id, None).await?;
|
||||
|
||||
task.events.push(TaskEvent::Batched {
|
||||
timestamp: Utc::now(),
|
||||
batch_id: 0,
|
||||
});
|
||||
|
||||
let batch = Batch {
|
||||
id: 0,
|
||||
// index_uid: task.index_uid.clone(),
|
||||
created_at: Utc::now(),
|
||||
tasks: vec![Pending::Task(task)],
|
||||
};
|
||||
Ok(Some(batch))
|
||||
}
|
||||
Some(Pending::Job(job)) => Ok(Some(Batch {
|
||||
id: 0,
|
||||
created_at: Utc::now(),
|
||||
tasks: vec![Pending::Job(job)],
|
||||
})),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles the result from a batch processing.
|
||||
///
|
||||
/// When a task is processed, the result of the processing is pushed to its event list. The
|
||||
/// handle batch result make sure that the new state is save into its store.
|
||||
/// The tasks are then removed from the processing queue.
|
||||
async fn handle_batch_result(&self, mut batch: Batch) -> Result<()> {
|
||||
let tasks = self.store.update_tasks(batch.tasks).await?;
|
||||
batch.tasks = tasks;
|
||||
self.store.delete_pending(&batch.tasks[0]).await;
|
||||
self.performer.finish(&batch).await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use nelson::Mocker;
|
||||
|
||||
use crate::index_resolver::IndexUid;
|
||||
use crate::tasks::task::Task;
|
||||
use crate::tasks::task_store::TaskFilter;
|
||||
|
||||
use super::super::task::{TaskContent, TaskEvent, TaskId, TaskResult};
|
||||
use super::super::MockTaskPerformer;
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prepare_batch_full() {
|
||||
let mocker = Mocker::default();
|
||||
|
||||
mocker
|
||||
.when::<(TaskId, Option<TaskFilter>), Result<Option<Task>>>("get_task")
|
||||
.once()
|
||||
.then(|(id, _filter)| {
|
||||
let task = Task {
|
||||
id,
|
||||
index_uid: IndexUid::new("Test".to_string()).unwrap(),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![TaskEvent::Created(Utc::now())],
|
||||
};
|
||||
Ok(Some(task))
|
||||
});
|
||||
|
||||
mocker
|
||||
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
|
||||
.then(|()| Some(Pending::Task(1)));
|
||||
|
||||
let store = TaskStore::mock(mocker);
|
||||
let performer = Arc::new(MockTaskPerformer::new());
|
||||
|
||||
let scheduler = Scheduler {
|
||||
store,
|
||||
performer,
|
||||
task_store_check_interval: Duration::from_millis(1),
|
||||
};
|
||||
|
||||
let batch = scheduler.prepare_batch().await.unwrap().unwrap();
|
||||
|
||||
assert_eq!(batch.tasks.len(), 1);
|
||||
assert!(
|
||||
matches!(batch.tasks[0], Pending::Task(Task { id: 1, .. })),
|
||||
"{:?}",
|
||||
batch.tasks[0]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_prepare_batch_empty() {
|
||||
let mocker = Mocker::default();
|
||||
mocker
|
||||
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
|
||||
.then(|()| None);
|
||||
|
||||
let store = TaskStore::mock(mocker);
|
||||
let performer = Arc::new(MockTaskPerformer::new());
|
||||
|
||||
let scheduler = Scheduler {
|
||||
store,
|
||||
performer,
|
||||
task_store_check_interval: Duration::from_millis(1),
|
||||
};
|
||||
|
||||
assert!(scheduler.prepare_batch().await.unwrap().is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_loop_run_normal() {
|
||||
let mocker = Mocker::default();
|
||||
let mut id = Some(1);
|
||||
mocker
|
||||
.when::<(), Option<Pending<TaskId>>>("peek_pending_task")
|
||||
.then(move |()| id.take().map(Pending::Task));
|
||||
mocker
|
||||
.when::<(TaskId, Option<TaskFilter>), Result<Task>>("get_task")
|
||||
.once()
|
||||
.then(|(id, _)| {
|
||||
let task = Task {
|
||||
id,
|
||||
index_uid: IndexUid::new("Test".to_string()).unwrap(),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![TaskEvent::Created(Utc::now())],
|
||||
};
|
||||
Ok(task)
|
||||
});
|
||||
|
||||
mocker
|
||||
.when::<Vec<Pending<Task>>, Result<Vec<Pending<Task>>>>("update_tasks")
|
||||
.times(2)
|
||||
.then(|tasks| {
|
||||
assert_eq!(tasks.len(), 1);
|
||||
Ok(tasks)
|
||||
});
|
||||
|
||||
mocker.when::<(), ()>("delete_pending").once().then(|_| ());
|
||||
|
||||
let store = TaskStore::mock(mocker);
|
||||
|
||||
let mut performer = MockTaskPerformer::new();
|
||||
performer.expect_process().once().returning(|mut batch| {
|
||||
batch.tasks.iter_mut().for_each(|t| match t {
|
||||
Pending::Task(Task { ref mut events, .. }) => events.push(TaskEvent::Succeded {
|
||||
result: TaskResult::Other,
|
||||
timestamp: Utc::now(),
|
||||
}),
|
||||
_ => panic!("expected a task, found a job"),
|
||||
});
|
||||
|
||||
batch
|
||||
});
|
||||
|
||||
performer.expect_finish().once().returning(|_| ());
|
||||
|
||||
let performer = Arc::new(performer);
|
||||
|
||||
let scheduler = Scheduler {
|
||||
store,
|
||||
performer,
|
||||
task_store_check_interval: Duration::from_millis(1),
|
||||
};
|
||||
|
||||
let handle = tokio::spawn(scheduler.run());
|
||||
|
||||
if let Ok(r) = tokio::time::timeout(Duration::from_millis(100), handle).await {
|
||||
r.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
169
meilisearch-lib/src/tasks/task.rs
Normal file
169
meilisearch-lib/src/tasks/task.rs
Normal file
@ -0,0 +1,169 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use meilisearch_error::ResponseError;
|
||||
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::oneshot;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::batch::BatchId;
|
||||
use crate::{
|
||||
index::{Settings, Unchecked},
|
||||
index_resolver::{error::IndexResolverError, IndexUid},
|
||||
snapshot::SnapshotJob,
|
||||
};
|
||||
|
||||
pub type TaskId = u64;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum TaskResult {
|
||||
DocumentAddition { indexed_documents: u64 },
|
||||
DocumentDeletion { deleted_documents: u64 },
|
||||
ClearAll { deleted_documents: u64 },
|
||||
Other,
|
||||
}
|
||||
|
||||
impl From<DocumentAdditionResult> for TaskResult {
|
||||
fn from(other: DocumentAdditionResult) -> Self {
|
||||
Self::DocumentAddition {
|
||||
indexed_documents: other.indexed_documents,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum TaskEvent {
|
||||
Created(#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] DateTime<Utc>),
|
||||
Batched {
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
timestamp: DateTime<Utc>,
|
||||
batch_id: BatchId,
|
||||
},
|
||||
Processing(#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] DateTime<Utc>),
|
||||
Succeded {
|
||||
result: TaskResult,
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
Failed {
|
||||
error: ResponseError,
|
||||
#[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))]
|
||||
timestamp: DateTime<Utc>,
|
||||
},
|
||||
}
|
||||
|
||||
/// A task represents an operation that Meilisearch must do.
|
||||
/// It's stored on disk and executed from the lowest to highest Task id.
|
||||
/// Everytime a new task is created it has a higher Task id than the previous one.
|
||||
/// See also `Job`.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub struct Task {
|
||||
pub id: TaskId,
|
||||
pub index_uid: IndexUid,
|
||||
pub content: TaskContent,
|
||||
pub events: Vec<TaskEvent>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
/// Return true when a task is finished.
|
||||
/// A task is finished when its last state is either `Succeeded` or `Failed`.
|
||||
pub fn is_finished(&self) -> bool {
|
||||
self.events.last().map_or(false, |event| {
|
||||
matches!(event, TaskEvent::Succeded { .. } | TaskEvent::Failed { .. })
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the content_uuid of the `Task` if there is one.
|
||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
||||
match self {
|
||||
Task {
|
||||
content: TaskContent::DocumentAddition { content_uuid, .. },
|
||||
..
|
||||
} => Some(*content_uuid),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A job is like a volatile priority `Task`.
|
||||
/// It should be processed as fast as possible and is not stored on disk.
|
||||
/// This means, when Meilisearch is closed all your unprocessed jobs will disappear.
|
||||
#[derive(Debug, derivative::Derivative)]
|
||||
#[derivative(PartialEq)]
|
||||
pub enum Job {
|
||||
Dump {
|
||||
#[derivative(PartialEq = "ignore")]
|
||||
ret: oneshot::Sender<Result<(), IndexResolverError>>,
|
||||
path: PathBuf,
|
||||
},
|
||||
Snapshot(#[derivative(PartialEq = "ignore")] SnapshotJob),
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl Default for Job {
|
||||
fn default() -> Self {
|
||||
Self::Empty
|
||||
}
|
||||
}
|
||||
|
||||
impl Job {
|
||||
pub fn take(&mut self) -> Self {
|
||||
std::mem::take(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
pub enum DocumentDeletion {
|
||||
Clear,
|
||||
Ids(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
pub enum TaskContent {
|
||||
DocumentAddition {
|
||||
#[cfg_attr(test, proptest(value = "Uuid::new_v4()"))]
|
||||
content_uuid: Uuid,
|
||||
#[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))]
|
||||
merge_strategy: IndexDocumentsMethod,
|
||||
primary_key: Option<String>,
|
||||
documents_count: usize,
|
||||
},
|
||||
DocumentDeletion(DocumentDeletion),
|
||||
SettingsUpdate {
|
||||
settings: Settings<Unchecked>,
|
||||
/// Indicates whether the task was a deletion
|
||||
is_deletion: bool,
|
||||
},
|
||||
IndexDeletion,
|
||||
IndexCreation {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
IndexUpdate {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub(super) fn index_document_method_strategy() -> impl Strategy<Value = IndexDocumentsMethod> {
|
||||
prop_oneof![
|
||||
Just(IndexDocumentsMethod::ReplaceDocuments),
|
||||
Just(IndexDocumentsMethod::UpdateDocuments),
|
||||
]
|
||||
}
|
||||
|
||||
pub(super) fn datetime_strategy() -> impl Strategy<Value = DateTime<Utc>> {
|
||||
Just(Utc::now())
|
||||
}
|
||||
}
|
480
meilisearch-lib/src/tasks/task_store/mod.rs
Normal file
480
meilisearch-lib/src/tasks/task_store/mod.rs
Normal file
@ -0,0 +1,480 @@
|
||||
mod store;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashSet};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::Utc;
|
||||
use heed::{Env, RwTxn};
|
||||
use log::debug;
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::error::TaskError;
|
||||
use super::task::{Job, Task, TaskContent, TaskId};
|
||||
use super::Result;
|
||||
use crate::index_resolver::IndexUid;
|
||||
use crate::tasks::task::TaskEvent;
|
||||
use crate::update_file_store::UpdateFileStore;
|
||||
|
||||
#[cfg(test)]
|
||||
pub use store::test::MockStore as Store;
|
||||
#[cfg(not(test))]
|
||||
pub use store::Store;
|
||||
|
||||
/// Defines constraints to be applied when querying for Tasks from the store.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct TaskFilter {
|
||||
indexes: Option<HashSet<String>>,
|
||||
}
|
||||
|
||||
impl TaskFilter {
|
||||
fn pass(&self, task: &Task) -> bool {
|
||||
self.indexes
|
||||
.as_ref()
|
||||
.map(|indexes| indexes.contains(&*task.index_uid))
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
/// Adds an index to the filter, so the filter must match this index.
|
||||
pub fn filter_index(&mut self, index: String) {
|
||||
self.indexes
|
||||
.get_or_insert_with(Default::default)
|
||||
.insert(index);
|
||||
}
|
||||
}
|
||||
|
||||
/// You can't clone a job because of its volatile nature.
|
||||
/// If you need to take the `Job` with you though. You can call the method
|
||||
/// `Pending::take`. It'll return the `Pending` as-is but `Empty` the original.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Pending<T> {
|
||||
/// A task stored on disk that must be processed.
|
||||
Task(T),
|
||||
/// Job always have a higher priority over normal tasks and are not stored on disk.
|
||||
/// It can be refered as `Volatile job`.
|
||||
Job(Job),
|
||||
}
|
||||
|
||||
impl Pending<TaskId> {
|
||||
/// Makes a copy of the task or take the content of the volatile job.
|
||||
pub(crate) fn take(&mut self) -> Self {
|
||||
match self {
|
||||
Self::Task(id) => Self::Task(*id),
|
||||
Self::Job(job) => Self::Job(job.take()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Pending<TaskId> {}
|
||||
|
||||
impl PartialOrd for Pending<TaskId> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
match (self, other) {
|
||||
// in case of two tasks we want to return the lowest taskId first.
|
||||
(Pending::Task(lhs), Pending::Task(rhs)) => Some(lhs.cmp(rhs).reverse()),
|
||||
// A job is always better than a task.
|
||||
(Pending::Task(_), Pending::Job(_)) => Some(Ordering::Less),
|
||||
(Pending::Job(_), Pending::Task(_)) => Some(Ordering::Greater),
|
||||
// When there is two jobs we consider them equals.
|
||||
(Pending::Job(_), Pending::Job(_)) => Some(Ordering::Equal),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Pending<Task> {
|
||||
pub fn get_content_uuid(&self) -> Option<Uuid> {
|
||||
match self {
|
||||
Pending::Task(task) => task.get_content_uuid(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Pending<TaskId> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.partial_cmp(other).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TaskStore {
|
||||
store: Arc<Store>,
|
||||
pending_queue: Arc<RwLock<BinaryHeap<Pending<TaskId>>>>,
|
||||
}
|
||||
|
||||
impl Clone for TaskStore {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
store: self.store.clone(),
|
||||
pending_queue: self.pending_queue.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TaskStore {
|
||||
pub fn new(env: heed::Env) -> Result<Self> {
|
||||
let mut store = Store::new(env)?;
|
||||
let unfinished_tasks = store.reset_and_return_unfinished_tasks()?;
|
||||
let store = Arc::new(store);
|
||||
|
||||
Ok(Self {
|
||||
store,
|
||||
pending_queue: Arc::new(RwLock::new(unfinished_tasks)),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result<Task> {
|
||||
debug!("registering update: {:?}", content);
|
||||
let store = self.store.clone();
|
||||
let task = tokio::task::spawn_blocking(move || -> Result<Task> {
|
||||
let mut txn = store.wtxn()?;
|
||||
let next_task_id = store.next_task_id(&mut txn)?;
|
||||
let created_at = TaskEvent::Created(Utc::now());
|
||||
let task = Task {
|
||||
id: next_task_id,
|
||||
index_uid,
|
||||
content,
|
||||
events: vec![created_at],
|
||||
};
|
||||
|
||||
store.put(&mut txn, &task)?;
|
||||
txn.commit()?;
|
||||
|
||||
Ok(task)
|
||||
})
|
||||
.await??;
|
||||
|
||||
self.pending_queue
|
||||
.write()
|
||||
.await
|
||||
.push(Pending::Task(task.id));
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.store.put(wtxn, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register an update that applies on multiple indexes.
|
||||
/// Currently the update is considered as a priority.
|
||||
pub async fn register_job(&self, content: Job) {
|
||||
debug!("registering a job: {:?}", content);
|
||||
self.pending_queue.write().await.push(Pending::Job(content));
|
||||
}
|
||||
|
||||
/// Returns the next task to process.
|
||||
pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> {
|
||||
let mut pending_queue = self.pending_queue.write().await;
|
||||
loop {
|
||||
match pending_queue.peek()? {
|
||||
Pending::Job(Job::Empty) => drop(pending_queue.pop()),
|
||||
_ => return Some(pending_queue.peek_mut()?.take()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next task to process if there is one.
|
||||
pub async fn get_processing_task(&self) -> Result<Option<Task>> {
|
||||
match self.peek_pending_task().await {
|
||||
Some(Pending::Task(tid)) => {
|
||||
let task = self.get_task(tid, None).await?;
|
||||
Ok(matches!(task.events.last(), Some(TaskEvent::Processing(_))).then(|| task))
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
let store = self.store.clone();
|
||||
let task = tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
let txn = store.rtxn()?;
|
||||
let task = store.get(&txn, id)?;
|
||||
Ok(task)
|
||||
})
|
||||
.await??
|
||||
.ok_or(TaskError::UnexistingTask(id))?;
|
||||
|
||||
match filter {
|
||||
Some(filter) => filter
|
||||
.pass(&task)
|
||||
.then(|| task)
|
||||
.ok_or(TaskError::UnexistingTask(id)),
|
||||
None => Ok(task),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> {
|
||||
let store = self.store.clone();
|
||||
|
||||
let tasks = tokio::task::spawn_blocking(move || -> Result<_> {
|
||||
let mut txn = store.wtxn()?;
|
||||
|
||||
for task in &tasks {
|
||||
match task {
|
||||
Pending::Task(task) => store.put(&mut txn, task)?,
|
||||
Pending::Job(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
Ok(tasks)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
/// Delete one task from the queue and remove all `Empty` job.
|
||||
pub async fn delete_pending(&self, to_delete: &Pending<Task>) {
|
||||
if let Pending::Task(Task { id: pending_id, .. }) = to_delete {
|
||||
let mut pending_queue = self.pending_queue.write().await;
|
||||
*pending_queue = std::mem::take(&mut *pending_queue)
|
||||
.into_iter()
|
||||
.filter(|pending| match pending {
|
||||
Pending::Job(Job::Empty) => false,
|
||||
Pending::Task(id) => pending_id != id,
|
||||
_ => true,
|
||||
})
|
||||
.collect::<BinaryHeap<Pending<TaskId>>>();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_tasks(
|
||||
&self,
|
||||
offset: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let store = self.store.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let txn = store.rtxn()?;
|
||||
let tasks = store.list_tasks(&txn, offset, filter, limit)?;
|
||||
Ok(tasks)
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
pub async fn dump(
|
||||
&self,
|
||||
dir_path: impl AsRef<Path>,
|
||||
update_file_store: UpdateFileStore,
|
||||
) -> Result<()> {
|
||||
let update_dir = dir_path.as_ref().join("updates");
|
||||
let updates_file = update_dir.join("data.jsonl");
|
||||
let tasks = self.list_tasks(None, None, None).await?;
|
||||
|
||||
let dir_path = dir_path.as_ref().to_path_buf();
|
||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||
std::fs::create_dir(&update_dir)?;
|
||||
let updates_file = std::fs::File::create(updates_file)?;
|
||||
let mut updates_file = BufWriter::new(updates_file);
|
||||
|
||||
for task in tasks {
|
||||
serde_json::to_writer(&mut updates_file, &task)?;
|
||||
updates_file.write_all(b"\n")?;
|
||||
|
||||
if !task.is_finished() {
|
||||
if let Some(content_uuid) = task.get_content_uuid() {
|
||||
update_file_store.dump(content_uuid, &dir_path)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
updates_file.flush()?;
|
||||
Ok(())
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, env: Env) -> anyhow::Result<()> {
|
||||
// create a dummy update field store, since it is not needed right now.
|
||||
let store = Self::new(env.clone())?;
|
||||
|
||||
let src_update_path = src.as_ref().join("updates");
|
||||
let update_data = std::fs::File::open(&src_update_path.join("data.jsonl"))?;
|
||||
let update_data = std::io::BufReader::new(update_data);
|
||||
|
||||
let stream = serde_json::Deserializer::from_reader(update_data).into_iter::<Task>();
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
for entry in stream {
|
||||
store.register_raw_update(&mut wtxn, &entry?)?;
|
||||
}
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use crate::tasks::task_store::store::test::tmp_env;
|
||||
|
||||
use super::*;
|
||||
|
||||
use nelson::Mocker;
|
||||
use proptest::{
|
||||
strategy::Strategy,
|
||||
test_runner::{Config, TestRunner},
|
||||
};
|
||||
|
||||
pub enum MockTaskStore {
|
||||
Real(TaskStore),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl Clone for MockTaskStore {
|
||||
fn clone(&self) -> Self {
|
||||
match self {
|
||||
Self::Real(x) => Self::Real(x.clone()),
|
||||
Self::Mock(x) => Self::Mock(x.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MockTaskStore {
|
||||
pub fn new(env: heed::Env) -> Result<Self> {
|
||||
Ok(Self::Real(TaskStore::new(env)?))
|
||||
}
|
||||
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub async fn update_tasks(&self, tasks: Vec<Pending<Task>>) -> Result<Vec<Pending<Task>>> {
|
||||
match self {
|
||||
Self::Real(s) => s.update_tasks(tasks).await,
|
||||
Self::Mock(m) => unsafe {
|
||||
m.get::<_, Result<Vec<Pending<Task>>>>("update_tasks")
|
||||
.call(tasks)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_pending(&self, to_delete: &Pending<Task>) {
|
||||
match self {
|
||||
Self::Real(s) => s.delete_pending(to_delete).await,
|
||||
Self::Mock(m) => unsafe { m.get("delete_pending").call(to_delete) },
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> {
|
||||
match self {
|
||||
Self::Real(s) => s.get_task(id, filter).await,
|
||||
Self::Mock(m) => unsafe { m.get::<_, Result<Task>>("get_task").call((id, filter)) },
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_processing_task(&self) -> Result<Option<Task>> {
|
||||
match self {
|
||||
Self::Real(s) => s.get_processing_task().await,
|
||||
Self::Mock(m) => unsafe {
|
||||
m.get::<_, Result<Option<Task>>>("get_pending_task")
|
||||
.call(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn peek_pending_task(&self) -> Option<Pending<TaskId>> {
|
||||
match self {
|
||||
Self::Real(s) => s.peek_pending_task().await,
|
||||
Self::Mock(m) => unsafe {
|
||||
m.get::<_, Option<Pending<TaskId>>>("peek_pending_task")
|
||||
.call(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_tasks(
|
||||
&self,
|
||||
from: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
match self {
|
||||
Self::Real(s) => s.list_tasks(from, filter, limit).await,
|
||||
Self::Mock(_m) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dump(&self, path: &Path, update_file_store: UpdateFileStore) -> Result<()> {
|
||||
match self {
|
||||
Self::Real(s) => s.dump(path, update_file_store).await,
|
||||
Self::Mock(_m) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn register(&self, index_uid: IndexUid, content: TaskContent) -> Result<Task> {
|
||||
match self {
|
||||
Self::Real(s) => s.register(index_uid, content).await,
|
||||
Self::Mock(_m) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
match self {
|
||||
Self::Real(s) => s.register_raw_update(wtxn, task),
|
||||
Self::Mock(_m) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn register_job(&self, content: Job) {
|
||||
match self {
|
||||
Self::Real(s) => s.register_job(content).await,
|
||||
Self::Mock(_m) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_dump(path: impl AsRef<Path>, env: Env) -> anyhow::Result<()> {
|
||||
TaskStore::load_dump(path, env)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_increment_task_id() {
|
||||
let tmp = tmp_env();
|
||||
let store = Store::new(tmp.env()).unwrap();
|
||||
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
assert_eq!(store.next_task_id(&mut txn).unwrap(), 0);
|
||||
txn.abort().unwrap();
|
||||
|
||||
let gen_task = |id: TaskId| Task {
|
||||
id,
|
||||
index_uid: IndexUid::new_unchecked("test"),
|
||||
content: TaskContent::IndexCreation { primary_key: None },
|
||||
events: Vec::new(),
|
||||
};
|
||||
|
||||
let mut runner = TestRunner::new(Config::default());
|
||||
runner
|
||||
.run(&(0..100u64).prop_map(gen_task), |task| {
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
let previous_id = store.next_task_id(&mut txn).unwrap();
|
||||
|
||||
store.put(&mut txn, &task).unwrap();
|
||||
|
||||
let next_id = store.next_task_id(&mut txn).unwrap();
|
||||
|
||||
// if we put a task whose task_id is less than the next_id, then the next_id remains
|
||||
// unchanged, otherwise it becomes task.id + 1
|
||||
if task.id < previous_id {
|
||||
assert_eq!(next_id, previous_id)
|
||||
} else {
|
||||
assert_eq!(next_id, task.id + 1);
|
||||
}
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
}
|
452
meilisearch-lib/src/tasks/task_store/store.rs
Normal file
452
meilisearch-lib/src/tasks/task_store/store.rs
Normal file
@ -0,0 +1,452 @@
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
||||
|
||||
const UID_TASK_IDS: &str = "uid_task_id";
|
||||
const TASKS: &str = "tasks";
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
use std::ops::Range;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeJson, Unit};
|
||||
use heed::{BytesDecode, BytesEncode, Database, Env, RoTxn, RwTxn};
|
||||
|
||||
use crate::tasks::task::{Task, TaskId};
|
||||
|
||||
use super::super::Result;
|
||||
|
||||
use super::{Pending, TaskFilter};
|
||||
|
||||
enum IndexUidTaskIdCodec {}
|
||||
|
||||
impl<'a> BytesEncode<'a> for IndexUidTaskIdCodec {
|
||||
type EItem = (&'a str, TaskId);
|
||||
|
||||
fn bytes_encode((s, id): &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let size = s.len() + std::mem::size_of::<TaskId>() + 1;
|
||||
if size > 512 {
|
||||
return None;
|
||||
}
|
||||
let mut b = Vec::with_capacity(size);
|
||||
b.extend_from_slice(s.as_bytes());
|
||||
// null terminate the string
|
||||
b.push(0);
|
||||
b.extend_from_slice(&id.to_be_bytes());
|
||||
Some(Cow::Owned(b))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for IndexUidTaskIdCodec {
|
||||
type DItem = (&'a str, TaskId);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let len = bytes.len();
|
||||
let s_end = len.checked_sub(size_of::<TaskId>())?.checked_sub(1)?;
|
||||
let str_bytes = &bytes[..s_end];
|
||||
let str = std::str::from_utf8(str_bytes).ok()?;
|
||||
let id = TaskId::from_be_bytes(bytes[(len - size_of::<TaskId>())..].try_into().ok()?);
|
||||
Some((str, id))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Store {
|
||||
env: Env,
|
||||
uids_task_ids: Database<IndexUidTaskIdCodec, Unit>,
|
||||
tasks: Database<OwnedType<BEU64>, SerdeJson<Task>>,
|
||||
}
|
||||
|
||||
impl Store {
|
||||
/// Create a new store from the specified `Path`.
|
||||
/// Be really cautious when calling this function, the returned `Store` may
|
||||
/// be in an invalid state, with dangling processing tasks.
|
||||
/// You want to patch all un-finished tasks and put them in your pending
|
||||
/// queue with the `reset_and_return_unfinished_update` method.
|
||||
pub fn new(env: heed::Env) -> Result<Self> {
|
||||
let uids_task_ids = env.create_database(Some(UID_TASK_IDS))?;
|
||||
let tasks = env.create_database(Some(TASKS))?;
|
||||
|
||||
Ok(Self {
|
||||
env,
|
||||
uids_task_ids,
|
||||
tasks,
|
||||
})
|
||||
}
|
||||
|
||||
/// This function should be called *right after* creating the store.
|
||||
/// It put back all unfinished update in the `Created` state. This
|
||||
/// allow us to re-enqueue an update that didn't had the time to finish
|
||||
/// when Meilisearch closed.
|
||||
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
|
||||
let mut unfinished_tasks: BinaryHeap<Pending<TaskId>> = BinaryHeap::new();
|
||||
|
||||
let mut wtxn = self.wtxn()?;
|
||||
let mut iter = self.tasks.rev_iter_mut(&mut wtxn)?;
|
||||
|
||||
while let Some(entry) = iter.next() {
|
||||
let entry = entry?;
|
||||
let (id, mut task): (BEU64, Task) = entry;
|
||||
|
||||
// Since all tasks are ordered, we can stop iterating when we encounter our first non-finished task.
|
||||
if task.is_finished() {
|
||||
break;
|
||||
}
|
||||
|
||||
// we only keep the first state. It’s supposed to be a `Created` state.
|
||||
task.events.drain(1..);
|
||||
unfinished_tasks.push(Pending::Task(id.get()));
|
||||
|
||||
// Since we own the id and the task this is a safe operation.
|
||||
unsafe {
|
||||
iter.put_current(&id, &task)?;
|
||||
}
|
||||
}
|
||||
|
||||
drop(iter);
|
||||
wtxn.commit()?;
|
||||
|
||||
Ok(unfinished_tasks)
|
||||
}
|
||||
|
||||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||||
Ok(self.env.write_txn()?)
|
||||
}
|
||||
|
||||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||||
Ok(self.env.read_txn()?)
|
||||
}
|
||||
|
||||
/// Returns the id for the next task.
|
||||
///
|
||||
/// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit
|
||||
/// the task to the store in the same transaction, no one else will hav this task id.
|
||||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||||
let id = self
|
||||
.tasks
|
||||
.lazily_decode_data()
|
||||
.last(txn)?
|
||||
.map(|(id, _)| id.get() + 1)
|
||||
.unwrap_or(0);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
self.tasks.put(txn, &BEU64::new(task.id), task)?;
|
||||
self.uids_task_ids
|
||||
.put(txn, &(&task.index_uid, task.id), &())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||||
let task = self.tasks.get(txn, &BEU64::new(id))?;
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
pub fn list_tasks<'a>(
|
||||
&self,
|
||||
txn: &'a RoTxn,
|
||||
from: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
let from = from.unwrap_or_default();
|
||||
let range = from..limit
|
||||
.map(|limit| (limit as u64).saturating_add(from))
|
||||
.unwrap_or(u64::MAX);
|
||||
let iter: Box<dyn Iterator<Item = StdResult<_, heed::Error>>> = match filter {
|
||||
Some(filter) => {
|
||||
let iter = self
|
||||
.compute_candidates(txn, filter, range)?
|
||||
.into_iter()
|
||||
.filter_map(|id| self.tasks.get(txn, &BEU64::new(id)).transpose());
|
||||
|
||||
Box::new(iter)
|
||||
}
|
||||
None => Box::new(
|
||||
self.tasks
|
||||
.rev_range(txn, &(BEU64::new(range.start)..BEU64::new(range.end)))?
|
||||
.map(|r| r.map(|(_, t)| t)),
|
||||
),
|
||||
};
|
||||
|
||||
// Collect 'limit' task if it exists or all of them.
|
||||
let tasks = iter
|
||||
.take(limit.unwrap_or(usize::MAX))
|
||||
.try_fold::<_, _, StdResult<_, heed::Error>>(Vec::new(), |mut v, task| {
|
||||
v.push(task?);
|
||||
Ok(v)
|
||||
})?;
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
fn compute_candidates(
|
||||
&self,
|
||||
txn: &heed::RoTxn,
|
||||
filter: TaskFilter,
|
||||
range: Range<TaskId>,
|
||||
) -> Result<BinaryHeap<TaskId>> {
|
||||
let mut candidates = BinaryHeap::new();
|
||||
if let Some(indexes) = filter.indexes {
|
||||
for index in indexes {
|
||||
// We need to prefix search the null terminated string to make sure that we only
|
||||
// get exact matches for the index, and not other uids that would share the same
|
||||
// prefix, i.e test and test1.
|
||||
let mut index_uid = index.as_bytes().to_vec();
|
||||
index_uid.push(0);
|
||||
|
||||
self.uids_task_ids
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.rev_prefix_iter(txn, &index_uid)?
|
||||
.map(|entry| -> StdResult<_, heed::Error> {
|
||||
let (key, _) = entry?;
|
||||
let (_, id) =
|
||||
IndexUidTaskIdCodec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
||||
Ok(id)
|
||||
})
|
||||
.skip_while(|entry| {
|
||||
entry
|
||||
.as_ref()
|
||||
.ok()
|
||||
// we skip all elements till we enter in the range
|
||||
.map(|key| !range.contains(key))
|
||||
// if we encounter an error we returns true to collect it later
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.take_while(|entry| {
|
||||
entry
|
||||
.as_ref()
|
||||
.ok()
|
||||
// as soon as we are out of the range we exit
|
||||
.map(|key| range.contains(key))
|
||||
// if we encounter an error we returns true to collect it later
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.try_for_each::<_, StdResult<(), heed::Error>>(|id| {
|
||||
candidates.push(id?);
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use heed::EnvOpenOptions;
|
||||
use itertools::Itertools;
|
||||
use nelson::Mocker;
|
||||
use proptest::collection::vec;
|
||||
use proptest::prelude::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::index_resolver::IndexUid;
|
||||
use crate::tasks::task::TaskContent;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// TODO: use this mock to test the task store properly.
|
||||
#[allow(dead_code)]
|
||||
pub enum MockStore {
|
||||
Real(Store),
|
||||
Fake(Mocker),
|
||||
}
|
||||
|
||||
pub struct TmpEnv(TempDir, heed::Env);
|
||||
|
||||
impl TmpEnv {
|
||||
pub fn env(&self) -> heed::Env {
|
||||
self.1.clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tmp_env() -> TmpEnv {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100000);
|
||||
options.max_dbs(1000);
|
||||
let env = options.open(tmp.path()).unwrap();
|
||||
|
||||
TmpEnv(tmp, env)
|
||||
}
|
||||
|
||||
impl MockStore {
|
||||
pub fn new(env: heed::Env) -> Result<Self> {
|
||||
Ok(Self::Real(Store::new(env)?))
|
||||
}
|
||||
|
||||
pub fn reset_and_return_unfinished_tasks(&mut self) -> Result<BinaryHeap<Pending<TaskId>>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.reset_and_return_unfinished_tasks(),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn wtxn(&self) -> Result<RwTxn> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.wtxn(),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn rtxn(&self) -> Result<RoTxn> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.rtxn(),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.next_task_id(txn),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.put(txn, task),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.get(txn, id),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_tasks<'a>(
|
||||
&self,
|
||||
txn: &'a RoTxn,
|
||||
from: Option<TaskId>,
|
||||
filter: Option<TaskFilter>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<Vec<Task>> {
|
||||
match self {
|
||||
MockStore::Real(index) => index.list_tasks(txn, from, filter, limit),
|
||||
MockStore::Fake(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ordered_filtered_updates() {
|
||||
let tmp = tmp_env();
|
||||
let store = Store::new(tmp.env()).unwrap();
|
||||
|
||||
let tasks = (0..100)
|
||||
.map(|_| Task {
|
||||
id: rand::random(),
|
||||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![],
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut txn = store.env.write_txn().unwrap();
|
||||
tasks
|
||||
.iter()
|
||||
.try_for_each(|t| store.put(&mut txn, t))
|
||||
.unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
assert!(tasks
|
||||
.iter()
|
||||
.map(|t| t.id)
|
||||
.tuple_windows()
|
||||
.all(|(a, b)| a > b));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_same_index_prefix() {
|
||||
let tmp = tmp_env();
|
||||
let store = Store::new(tmp.env()).unwrap();
|
||||
|
||||
let task_1 = Task {
|
||||
id: 1,
|
||||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let task_2 = Task {
|
||||
id: 0,
|
||||
index_uid: IndexUid::new_unchecked("test1".to_string()),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
store.put(&mut txn, &task_1).unwrap();
|
||||
store.put(&mut txn, &task_2).unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
txn.abort().unwrap();
|
||||
assert_eq!(tasks.len(), 1);
|
||||
assert_eq!(&*tasks.first().unwrap().index_uid, "test");
|
||||
|
||||
// same thing but invert the ids
|
||||
let task_1 = Task {
|
||||
id: 0,
|
||||
index_uid: IndexUid::new_unchecked("test".to_string()),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![],
|
||||
};
|
||||
let task_2 = Task {
|
||||
id: 1,
|
||||
index_uid: IndexUid::new_unchecked("test1".to_string()),
|
||||
content: TaskContent::IndexDeletion,
|
||||
events: vec![],
|
||||
};
|
||||
|
||||
let mut txn = store.wtxn().unwrap();
|
||||
store.put(&mut txn, &task_1).unwrap();
|
||||
store.put(&mut txn, &task_2).unwrap();
|
||||
|
||||
let mut filter = TaskFilter::default();
|
||||
filter.filter_index("test".into());
|
||||
|
||||
let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap();
|
||||
|
||||
assert_eq!(tasks.len(), 1);
|
||||
assert_eq!(&*tasks.first().unwrap().index_uid, "test");
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn encode_decode_roundtrip(index_uid in any::<IndexUid>(), task_id in 0..TaskId::MAX) {
|
||||
let value = (index_uid.as_ref(), task_id);
|
||||
let bytes = IndexUidTaskIdCodec::bytes_encode(&value).unwrap();
|
||||
let (index, id) = IndexUidTaskIdCodec::bytes_decode(bytes.as_ref()).unwrap();
|
||||
assert_eq!(&*index_uid, index);
|
||||
assert_eq!(task_id, id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_doesnt_crash(index_uid in "\\PC*", task_id in 0..TaskId::MAX) {
|
||||
let value = (index_uid.as_ref(), task_id);
|
||||
IndexUidTaskIdCodec::bytes_encode(&value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_doesnt_crash(bytes in vec(any::<u8>(), 0..1000)) {
|
||||
IndexUidTaskIdCodec::bytes_decode(&bytes);
|
||||
}
|
||||
}
|
||||
}
|
256
meilisearch-lib/src/update_file_store.rs
Normal file
256
meilisearch-lib/src/update_file_store.rs
Normal file
@ -0,0 +1,256 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{self, BufReader, BufWriter, Write};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use serde_json::Map;
|
||||
use tempfile::{NamedTempFile, PersistError};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub use store::UpdateFileStore;
|
||||
#[cfg(test)]
|
||||
pub use test::MockUpdateFileStore as UpdateFileStore;
|
||||
|
||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||
|
||||
use crate::document_formats::read_ndjson;
|
||||
|
||||
pub struct UpdateFile {
|
||||
path: PathBuf,
|
||||
file: NamedTempFile,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Error while persisting update to disk: {0}")]
|
||||
pub struct UpdateFileStoreError(Box<dyn std::error::Error + Sync + Send + 'static>);
|
||||
|
||||
type Result<T> = std::result::Result<T, UpdateFileStoreError>;
|
||||
|
||||
macro_rules! into_update_store_error {
|
||||
($($other:path),*) => {
|
||||
$(
|
||||
impl From<$other> for UpdateFileStoreError {
|
||||
fn from(other: $other) -> Self {
|
||||
Self(Box::new(other))
|
||||
}
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
into_update_store_error!(
|
||||
PersistError,
|
||||
io::Error,
|
||||
serde_json::Error,
|
||||
milli::documents::Error
|
||||
);
|
||||
|
||||
impl UpdateFile {
|
||||
pub fn persist(self) -> Result<()> {
|
||||
self.file.persist(&self.path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for UpdateFile {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for UpdateFile {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
mod store {
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UpdateFileStore {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl UpdateFileStore {
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
|
||||
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
|
||||
// No update files to load
|
||||
if !src_update_files_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
create_dir_all(&dst_update_files_path)?;
|
||||
|
||||
let entries = std::fs::read_dir(src_update_files_path)?;
|
||||
|
||||
for entry in entries {
|
||||
let entry = entry?;
|
||||
let update_file = BufReader::new(File::open(entry.path())?);
|
||||
let file_uuid = entry.file_name();
|
||||
let file_uuid = file_uuid
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||
let dst_path = dst_update_files_path.join(file_uuid);
|
||||
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||
read_ndjson(update_file, dst_file)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&path)?;
|
||||
Ok(Self { path })
|
||||
}
|
||||
|
||||
/// Creates a new temporary update file.
|
||||
/// A call to `persist` is needed to persist the file in the database.
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
let file = NamedTempFile::new_in(&self.path)?;
|
||||
let uuid = Uuid::new_v4();
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let update_file = UpdateFile { file, path };
|
||||
|
||||
Ok((uuid, update_file))
|
||||
}
|
||||
|
||||
/// Returns the file corresponding to the requested uuid.
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
let file = File::open(path)?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copies the content of the update file pointed to by `uuid` to the `dst` directory.
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
let src = self.path.join(uuid.to_string());
|
||||
let mut dst = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(uuid.to_string());
|
||||
std::fs::copy(src, dst)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Peforms a dump of the given update file uuid into the provided dump path.
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
let uuid_string = uuid.to_string();
|
||||
let update_file_path = self.path.join(&uuid_string);
|
||||
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
|
||||
std::fs::create_dir_all(&dst)?;
|
||||
dst.push(&uuid_string);
|
||||
|
||||
let update_file = File::open(update_file_path)?;
|
||||
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
|
||||
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
|
||||
|
||||
let mut document_buffer = Map::new();
|
||||
// TODO: we need to find a way to do this more efficiently. (create a custom serializer
|
||||
// for jsonl for example...)
|
||||
while let Some((index, document)) = document_reader.next_document_with_index()? {
|
||||
for (field_id, content) in document.iter() {
|
||||
if let Some(field_name) = index.name(field_id) {
|
||||
let content = serde_json::from_slice(content)?;
|
||||
document_buffer.insert(field_name.to_string(), content);
|
||||
}
|
||||
}
|
||||
|
||||
serde_json::to_writer(&mut dst_file, &document_buffer)?;
|
||||
dst_file.write_all(b"\n")?;
|
||||
document_buffer.clear();
|
||||
}
|
||||
|
||||
dst_file.persist(dst)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
Ok(self.get_update(uuid)?.metadata()?.len())
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
let path = self.path.join(uuid.to_string());
|
||||
tokio::fs::remove_file(path).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use nelson::Mocker;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum MockUpdateFileStore {
|
||||
Real(store::UpdateFileStore),
|
||||
Mock(Arc<Mocker>),
|
||||
}
|
||||
|
||||
impl MockUpdateFileStore {
|
||||
pub fn mock(mocker: Mocker) -> Self {
|
||||
Self::Mock(Arc::new(mocker))
|
||||
}
|
||||
|
||||
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||
store::UpdateFileStore::load_dump(src, dst)
|
||||
}
|
||||
|
||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||
store::UpdateFileStore::new(path).map(Self::Real)
|
||||
}
|
||||
|
||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.new_update(),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_update(&self, uuid: Uuid) -> Result<File> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_update(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.snapshot(uuid, dst),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.dump(uuid, dump_path),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_size(&self, uuid: Uuid) -> Result<u64> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.get_size(uuid),
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete(&self, uuid: Uuid) -> Result<()> {
|
||||
match self {
|
||||
MockUpdateFileStore::Real(s) => s.delete(uuid).await,
|
||||
MockUpdateFileStore::Mock(_) => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user