split meilisearch-http and meilisearch-lib

This commit is contained in:
mpostma 2021-09-21 13:23:22 +02:00
parent 09d4e37044
commit 60518449fc
63 changed files with 608 additions and 324 deletions

79
Cargo.lock generated
View File

@ -872,6 +872,15 @@ dependencies = [
"termcolor", "termcolor",
] ]
[[package]]
name = "erased-serde"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "fake-simd" name = "fake-simd"
version = "0.1.2" version = "0.1.2"
@ -1571,6 +1580,7 @@ name = "meilisearch-error"
version = "0.22.0" version = "0.22.0"
dependencies = [ dependencies = [
"actix-http", "actix-http",
"serde",
] ]
[[package]] [[package]]
@ -1606,6 +1616,7 @@ dependencies = [
"log", "log",
"main_error", "main_error",
"meilisearch-error", "meilisearch-error",
"meilisearch-lib",
"meilisearch-tokenizer", "meilisearch-tokenizer",
"memmap", "memmap",
"milli", "milli",
@ -1646,6 +1657,74 @@ dependencies = [
"zip", "zip",
] ]
[[package]]
name = "meilisearch-lib"
version = "0.1.0"
dependencies = [
"actix-cors",
"actix-rt",
"actix-web",
"actix-web-static-files",
"anyhow",
"arc-swap",
"assert-json-diff",
"async-stream",
"async-trait",
"byte-unit",
"bytes",
"chrono",
"crossbeam-channel",
"either",
"env_logger",
"erased-serde",
"flate2",
"fst",
"futures",
"futures-util",
"heed",
"http",
"indexmap",
"itertools",
"log",
"main_error",
"meilisearch-error",
"meilisearch-tokenizer",
"memmap",
"milli",
"mime",
"mockall",
"num_cpus",
"obkv",
"once_cell",
"parking_lot",
"paste",
"pin-project",
"rand 0.8.4",
"rayon",
"regex",
"reqwest",
"rustls",
"serde",
"serde_json",
"serde_url_params",
"serdeval",
"sha2",
"siphasher",
"slice-group-by",
"structopt",
"sysinfo",
"tar",
"tempdir",
"tempfile",
"thiserror",
"tokio",
"tokio-stream",
"urlencoding",
"uuid",
"walkdir",
"whoami",
]
[[package]] [[package]]
name = "meilisearch-tokenizer" name = "meilisearch-tokenizer"
version = "0.2.5" version = "0.2.5"

View File

@ -2,6 +2,7 @@
members = [ members = [
"meilisearch-http", "meilisearch-http",
"meilisearch-error", "meilisearch-error",
"meilisearch-lib",
] ]
[profile.release] [profile.release]

View File

@ -6,3 +6,4 @@ edition = "2018"
[dependencies] [dependencies]
actix-http = "=3.0.0-beta.10" actix-http = "=3.0.0-beta.10"
serde = { version = "1.0.130", features = ["derive"] }

View File

@ -1,6 +1,7 @@
use std::fmt; use std::fmt;
use actix_http::http::StatusCode; use actix_http::http::StatusCode;
use serde::{Serialize, Deserialize};
pub trait ErrorCode: std::error::Error { pub trait ErrorCode: std::error::Error {
fn error_code(&self) -> Code; fn error_code(&self) -> Code;
@ -45,6 +46,7 @@ impl fmt::Display for ErrorType {
} }
} }
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
pub enum Code { pub enum Code {
// index related error // index related error
CreateIndex, CreateIndex,

View File

@ -45,6 +45,7 @@ indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1" itertools = "0.10.1"
log = "0.4.14" log = "0.4.14"
main_error = "0.1.1" main_error = "0.1.1"
meilisearch-lib = { path = "../meilisearch-lib" }
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0" memmap = "0.7.0"

View File

@ -4,8 +4,8 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use log::debug; use log::debug;
use serde::Serialize; use serde::Serialize;
use siphasher::sip::SipHasher; use siphasher::sip::SipHasher;
use meilisearch_lib::MeiliSearch;
use crate::Data;
use crate::Opt; use crate::Opt;
const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47"; const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47";
@ -18,8 +18,8 @@ struct EventProperties {
} }
impl EventProperties { impl EventProperties {
async fn from(data: Data) -> anyhow::Result<EventProperties> { async fn from(data: MeiliSearch) -> anyhow::Result<EventProperties> {
let stats = data.index_controller.get_all_stats().await?; let stats = data.get_all_stats().await?;
let database_size = stats.database_size; let database_size = stats.database_size;
let last_update_timestamp = stats.last_update.map(|u| u.timestamp()); let last_update_timestamp = stats.last_update.map(|u| u.timestamp());
@ -62,7 +62,7 @@ struct AmplitudeRequest<'a> {
events: Vec<Event<'a>>, events: Vec<Event<'a>>,
} }
pub async fn analytics_sender(data: Data, opt: Opt) { pub async fn analytics_sender(data: MeiliSearch, opt: Opt) {
let username = whoami::username(); let username = whoami::username();
let hostname = whoami::hostname(); let hostname = whoami::hostname();
let platform = whoami::platform(); let platform = whoami::platform();

View File

@ -1,86 +0,0 @@
use std::ops::Deref;
use std::sync::Arc;
use crate::index::{Checked, Settings};
use crate::index_controller::{
error::Result, DumpInfo, IndexController, IndexMetadata, IndexStats, Stats,
};
use crate::option::Opt;
pub mod search;
mod updates;
#[derive(Clone)]
pub struct Data {
inner: Arc<DataInner>,
}
impl Deref for Data {
type Target = DataInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
pub struct DataInner {
pub index_controller: IndexController,
//pub api_keys: ApiKeys,
}
impl Data {
pub fn new(options: Opt) -> anyhow::Result<Data> {
let path = options.db_path.clone();
let index_controller = IndexController::new(&path, &options)?;
let inner = DataInner {
index_controller,
};
let inner = Arc::new(inner);
Ok(Data { inner })
}
pub async fn settings(&self, uid: String) -> Result<Settings<Checked>> {
self.index_controller.settings(uid).await
}
pub async fn list_indexes(&self) -> Result<Vec<IndexMetadata>> {
self.index_controller.list_indexes().await
}
pub async fn index(&self, uid: String) -> Result<IndexMetadata> {
self.index_controller.get_index(uid).await
}
//pub async fn create_index(
//&self,
//uid: String,
//primary_key: Option<String>,
//) -> Result<IndexMetadata> {
//let settings = IndexSettings {
//uid: Some(uid),
//primary_key,
//};
//let meta = self.index_controller.create_index(settings).await?;
//Ok(meta)
//}
pub async fn get_index_stats(&self, uid: String) -> Result<IndexStats> {
Ok(self.index_controller.get_index_stats(uid).await?)
}
pub async fn get_all_stats(&self) -> Result<Stats> {
Ok(self.index_controller.get_all_stats().await?)
}
pub async fn create_dump(&self) -> Result<DumpInfo> {
Ok(self.index_controller.create_dump().await?)
}
pub async fn dump_status(&self, uid: String) -> Result<DumpInfo> {
Ok(self.index_controller.dump_info(uid).await?)
}
}

View File

@ -1,34 +0,0 @@
use serde_json::{Map, Value};
use super::Data;
use crate::index::{SearchQuery, SearchResult};
use crate::index_controller::error::Result;
impl Data {
pub async fn search(&self, index: String, search_query: SearchQuery) -> Result<SearchResult> {
self.index_controller.search(index, search_query).await
}
pub async fn retrieve_documents(
&self,
index: String,
offset: usize,
limit: usize,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Vec<Map<String, Value>>> {
self.index_controller
.documents(index, offset, limit, attributes_to_retrieve)
.await
}
pub async fn retrieve_document(
&self,
index: String,
document_id: String,
attributes_to_retrieve: Option<Vec<String>>,
) -> Result<Map<String, Value>> {
self.index_controller
.document(index, document_id, attributes_to_retrieve)
.await
}
}

View File

@ -1,32 +0,0 @@
use crate::index_controller::Update;
use crate::index_controller::{error::Result, IndexMetadata, IndexSettings, UpdateStatus};
use crate::Data;
impl Data {
pub async fn register_update(&self, index_uid: &str, update: Update) -> Result<UpdateStatus> {
let status = self.index_controller.register_update(index_uid, update).await?;
Ok(status)
}
pub async fn get_update_status(&self, index: String, uid: u64) -> Result<UpdateStatus> {
self.index_controller.update_status(index, uid).await
}
pub async fn get_updates_status(&self, index: String) -> Result<Vec<UpdateStatus>> {
self.index_controller.all_update_status(index).await
}
pub async fn update_index(
&self,
uid: String,
primary_key: Option<String>,
new_uid: Option<String>,
) -> Result<IndexMetadata> {
let settings = IndexSettings {
uid: new_uid,
primary_key,
};
self.index_controller.update_index(uid, settings).await
}
}

View File

@ -55,18 +55,6 @@ impl aweb::error::ResponseError for ResponseError {
} }
} }
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error); pub struct MilliError<'a>(pub &'a milli::Error);

View File

@ -38,7 +38,6 @@
//! Most of the routes use [extractors] to handle the authentication. //! Most of the routes use [extractors] to handle the authentication.
#![allow(rustdoc::private_intra_doc_links)] #![allow(rustdoc::private_intra_doc_links)]
pub mod data;
#[macro_use] #[macro_use]
pub mod error; pub mod error;
#[macro_use] #[macro_use]
@ -46,11 +45,8 @@ pub mod extractors;
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
pub mod analytics; pub mod analytics;
pub mod helpers; pub mod helpers;
mod index;
mod index_controller;
pub mod option; pub mod option;
pub mod routes; pub mod routes;
pub use self::data::Data;
use crate::extractors::authentication::AuthConfig; use crate::extractors::authentication::AuthConfig;
pub use option::Opt; pub use option::Opt;
@ -58,6 +54,7 @@ use actix_web::web;
use extractors::authentication::policies::*; use extractors::authentication::policies::*;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use meilisearch_lib::MeiliSearch;
use sha2::Digest; use sha2::Digest;
#[derive(Clone)] #[derive(Clone)]
@ -86,14 +83,14 @@ impl ApiKeys {
pub fn configure_data( pub fn configure_data(
config: &mut web::ServiceConfig, config: &mut web::ServiceConfig,
data: Data, data: MeiliSearch,
opt: &Opt, opt: &Opt,
) { ) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config config
.app_data(web::Data::new(data.clone()))
// TODO!: Why are we passing the data with two different things?
.app_data(data) .app_data(data)
// TODO!: Why are we passing the data with two different things?
//.app_data(data)
.app_data( .app_data(
web::JsonConfig::default() web::JsonConfig::default()
.limit(http_payload_size_limit) .limit(http_payload_size_limit)

View File

@ -1,7 +1,8 @@
use std::env; use std::env;
use actix_web::HttpServer; use actix_web::HttpServer;
use meilisearch_http::{create_app, Data, Opt}; use meilisearch_http::{create_app, Opt};
use meilisearch_lib::MeiliSearch;
use structopt::StructOpt; use structopt::StructOpt;
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
@ -39,6 +40,26 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
let mut meilisearch = MeiliSearch::builder();
meilisearch
.set_max_index_size(opt.max_index_size.get_bytes() as usize)
.set_max_update_store_size(opt.max_udb_size.get_bytes() as usize)
.set_ignore_missing_snapshot(opt.ignore_missing_snapshot)
.set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists)
.set_dump_dst(opt.dumps_dir.clone())
.set_snapshot_dir(opt.snapshot_dir.clone());
if let Some(ref path) = opt.import_snapshot {
meilisearch.set_import_snapshot(path.clone());
}
if let Some(ref path) = opt.import_dump {
meilisearch.set_dump_src(path.clone());
}
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
}
#[actix_web::main] #[actix_web::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let opt = Opt::from_args(); let opt = Opt::from_args();
@ -55,23 +76,23 @@ async fn main() -> anyhow::Result<()> {
_ => unreachable!(), _ => unreachable!(),
} }
let data = Data::new(opt.clone())?; let meilisearch = setup_meilisearch(&opt)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics { if !opt.no_analytics {
let analytics_data = data.clone(); let analytics_data = meilisearch.clone();
let analytics_opt = opt.clone(); let analytics_opt = opt.clone();
tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt)); tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt));
} }
print_launch_resume(&opt); print_launch_resume(&opt);
run_http(data, opt).await?; run_http(meilisearch, opt).await?;
Ok(()) Ok(())
} }
async fn run_http(data: Data, opt: Opt) -> anyhow::Result<()> { async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> {
let _enable_dashboard = &opt.env == "development"; let _enable_dashboard = &opt.env == "development";
let opt_clone = opt.clone(); let opt_clone = opt.clone();
let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone)) let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone))

View File

@ -8,7 +8,6 @@ use std::sync::Arc;
use std::fs; use std::fs;
use byte_unit::Byte; use byte_unit::Byte;
use milli::CompressionType;
use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; use rustls::internal::pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
use rustls::{ use rustls::{
AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth, AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, NoClientAuth,
@ -16,56 +15,7 @@ use rustls::{
}; };
use structopt::StructOpt; use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt}; use sysinfo::{RefreshKind, System, SystemExt};
use meilisearch_lib::options::IndexerOpts;
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

View File

@ -1,17 +1,17 @@
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use log::debug; use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(create_dump))) cfg.service(web::resource("").route(web::post().to(create_dump)))
.service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status))); .service(web::resource("/{dump_uid}/status").route(web::get().to(get_dump_status)));
} }
pub async fn create_dump(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> { pub async fn create_dump(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let res = data.create_dump().await?; let res = data.create_dump().await?;
debug!("returns: {:?}", res); debug!("returns: {:?}", res);
@ -30,10 +30,10 @@ struct DumpParam {
} }
async fn get_dump_status( async fn get_dump_status(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<DumpParam>, path: web::Path<DumpParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let res = data.dump_status(path.dump_uid.clone()).await?; let res = data.dump_info(path.dump_uid.clone()).await?;
debug!("returns: {:?}", res); debug!("returns: {:?}", res);
Ok(HttpResponse::Ok().json(res)) Ok(HttpResponse::Ok().json(res))

View File

@ -3,6 +3,8 @@ use actix_web::{web, HttpResponse};
use actix_web::web::Bytes; use actix_web::web::Bytes;
use futures::{Stream, StreamExt}; use futures::{Stream, StreamExt};
use log::debug; use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update};
use milli::update::IndexDocumentsMethod; use milli::update::IndexDocumentsMethod;
use serde::Deserialize; use serde::Deserialize;
//use serde_json::Value; //use serde_json::Value;
@ -11,9 +13,7 @@ use tokio::sync::mpsc;
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::payload::Payload; use crate::extractors::payload::Payload;
use crate::index_controller::{DocumentAdditionFormat, Update};
use crate::routes::IndexParam; use crate::routes::IndexParam;
use crate::Data;
const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0; const DEFAULT_RETRIEVE_DOCUMENTS_OFFSET: usize = 0;
const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20; const DEFAULT_RETRIEVE_DOCUMENTS_LIMIT: usize = 20;
@ -88,20 +88,20 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
} }
pub async fn get_document( pub async fn get_document(
data: GuardedData<Public, Data>, data: GuardedData<Public, MeiliSearch>,
path: web::Path<DocumentParam>, path: web::Path<DocumentParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index = path.index_uid.clone(); let index = path.index_uid.clone();
let id = path.document_id.clone(); let id = path.document_id.clone();
let document = data let document = data
.retrieve_document(index, id, None as Option<Vec<String>>) .document(index, id, None as Option<Vec<String>>)
.await?; .await?;
debug!("returns: {:?}", document); debug!("returns: {:?}", document);
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
//pub async fn delete_document( //pub async fn delete_document(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//path: web::Path<DocumentParam>, //path: web::Path<DocumentParam>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//let update_status = data //let update_status = data
@ -120,7 +120,7 @@ pub struct BrowseQuery {
} }
pub async fn get_all_documents( pub async fn get_all_documents(
data: GuardedData<Public, Data>, data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Query<BrowseQuery>, params: web::Query<BrowseQuery>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
@ -137,7 +137,7 @@ pub async fn get_all_documents(
}); });
let documents = data let documents = data
.retrieve_documents( .documents(
path.index_uid.clone(), path.index_uid.clone(),
params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET), params.offset.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_OFFSET),
params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT), params.limit.unwrap_or(DEFAULT_RETRIEVE_DOCUMENTS_LIMIT),
@ -157,7 +157,7 @@ pub struct UpdateDocumentsQuery {
/// Route used when the payload type is "application/json" /// Route used when the payload type is "application/json"
/// Used to add or replace documents /// Used to add or replace documents
pub async fn add_documents( pub async fn add_documents(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>, params: web::Query<UpdateDocumentsQuery>,
body: Payload, body: Payload,
@ -180,7 +180,7 @@ pub async fn add_documents(
/// Route used when the payload type is "application/json" /// Route used when the payload type is "application/json"
/// Used to add or replace documents /// Used to add or replace documents
pub async fn update_documents( pub async fn update_documents(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Query<UpdateDocumentsQuery>, params: web::Query<UpdateDocumentsQuery>,
body: Payload, body: Payload,
@ -201,7 +201,7 @@ pub async fn update_documents(
} }
//pub async fn delete_documents( //pub async fn delete_documents(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>, //path: web::Path<IndexParam>,
//body: web::Json<Vec<Value>>, //body: web::Json<Vec<Value>>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
@ -221,7 +221,7 @@ pub async fn update_documents(
//} //}
//pub async fn clear_all_documents( //pub async fn clear_all_documents(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>, //path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//let update_status = data.clear_documents(path.index_uid.clone()).await?; //let update_status = data.clear_documents(path.index_uid.clone()).await?;

View File

@ -1,12 +1,13 @@
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use log::debug; use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index_controller::IndexSettings;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::IndexParam; use crate::routes::IndexParam;
use crate::Data;
pub mod documents; pub mod documents;
pub mod search; pub mod search;
@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
); );
} }
pub async fn list_indexes(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> { pub async fn list_indexes(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let indexes = data.list_indexes().await?; let indexes = data.list_indexes().await?;
debug!("returns: {:?}", indexes); debug!("returns: {:?}", indexes);
Ok(HttpResponse::Ok().json(indexes)) Ok(HttpResponse::Ok().json(indexes))
@ -49,7 +50,7 @@ pub struct IndexCreateRequest {
} }
//pub async fn create_index( //pub async fn create_index(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//body: web::Json<IndexCreateRequest>, //body: web::Json<IndexCreateRequest>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//let body = body.into_inner(); //let body = body.into_inner();
@ -75,30 +76,34 @@ pub struct UpdateIndexResponse {
} }
pub async fn get_index( pub async fn get_index(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let meta = data.index(path.index_uid.clone()).await?; let meta = data.get_index(path.index_uid.clone()).await?;
debug!("returns: {:?}", meta); debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta)) Ok(HttpResponse::Ok().json(meta))
} }
pub async fn update_index( pub async fn update_index(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
body: web::Json<UpdateIndexRequest>, body: web::Json<UpdateIndexRequest>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body); debug!("called with params: {:?}", body);
let body = body.into_inner(); let body = body.into_inner();
let settings = IndexSettings {
uid: body.uid,
primary_key: body.primary_key,
};
let meta = data let meta = data
.update_index(path.into_inner().index_uid, body.primary_key, body.uid) .update_index(path.into_inner().index_uid, settings)
.await?; .await?;
debug!("returns: {:?}", meta); debug!("returns: {:?}", meta);
Ok(HttpResponse::Ok().json(meta)) Ok(HttpResponse::Ok().json(meta))
} }
//pub async fn delete_index( //pub async fn delete_index(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//path: web::Path<IndexParam>, //path: web::Path<IndexParam>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//data.delete_index(path.index_uid.clone()).await?; //data.delete_index(path.index_uid.clone()).await?;
@ -106,7 +111,7 @@ pub async fn update_index(
//} //}
pub async fn get_index_stats( pub async fn get_index_stats(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let response = data.get_index_stats(path.index_uid.clone()).await?; let response = data.get_index_stats(path.index_uid.clone()).await?;

View File

@ -1,13 +1,13 @@
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use log::debug; use log::debug;
use meilisearch_lib::MeiliSearch;
use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use serde::Deserialize; use serde::Deserialize;
use serde_json::Value; use serde_json::Value;
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT};
use crate::routes::IndexParam; use crate::routes::IndexParam;
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service( cfg.service(
@ -82,7 +82,7 @@ impl From<SearchQueryGet> for SearchQuery {
} }
pub async fn search_with_url_query( pub async fn search_with_url_query(
data: GuardedData<Public, Data>, data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Query<SearchQueryGet>, params: web::Query<SearchQueryGet>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
@ -99,7 +99,7 @@ pub async fn search_with_url_query(
} }
pub async fn search_with_post( pub async fn search_with_post(
data: GuardedData<Public, Data>, data: GuardedData<Public, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
params: web::Json<SearchQuery>, params: web::Json<SearchQuery>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {

View File

@ -148,7 +148,7 @@
//); //);
//pub async fn update_all( //pub async fn update_all(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>, //index_uid: web::Path<String>,
//body: web::Json<Settings<Unchecked>>, //body: web::Json<Settings<Unchecked>>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
@ -162,7 +162,7 @@
//} //}
//pub async fn get_all( //pub async fn get_all(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>, //index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//let settings = data.settings(index_uid.into_inner()).await?; //let settings = data.settings(index_uid.into_inner()).await?;
@ -171,7 +171,7 @@
//} //}
//pub async fn delete_all( //pub async fn delete_all(
//data: GuardedData<Private, Data>, //data: GuardedData<Private, MeiliSearch>,
//index_uid: web::Path<String>, //index_uid: web::Path<String>,
//) -> Result<HttpResponse, ResponseError> { //) -> Result<HttpResponse, ResponseError> {
//let settings = Settings::cleared(); //let settings = Settings::cleared();

View File

@ -1,12 +1,12 @@
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use log::debug; use log::debug;
use meilisearch_lib::MeiliSearch;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::routes::{IndexParam, UpdateStatusResponse}; use crate::routes::{IndexParam, UpdateStatusResponse};
use crate::Data;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(get_all_updates_status))) cfg.service(web::resource("").route(web::get().to(get_all_updates_status)))
@ -37,12 +37,12 @@ pub struct UpdateParam {
} }
pub async fn get_update_status( pub async fn get_update_status(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<UpdateParam>, path: web::Path<UpdateParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let params = path.into_inner(); let params = path.into_inner();
let meta = data let meta = data
.get_update_status(params.index_uid, params.update_id) .update_status(params.index_uid, params.update_id)
.await?; .await?;
let meta = UpdateStatusResponse::from(meta); let meta = UpdateStatusResponse::from(meta);
debug!("returns: {:?}", meta); debug!("returns: {:?}", meta);
@ -50,10 +50,10 @@ pub async fn get_update_status(
} }
pub async fn get_all_updates_status( pub async fn get_all_updates_status(
data: GuardedData<Private, Data>, data: GuardedData<Private, MeiliSearch>,
path: web::Path<IndexParam>, path: web::Path<IndexParam>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let metas = data.get_updates_status(path.into_inner().index_uid).await?; let metas = data.all_update_status(path.into_inner().index_uid).await?;
let metas = metas let metas = metas
.into_iter() .into_iter()
.map(UpdateStatusResponse::from) .map(UpdateStatusResponse::from)

View File

@ -5,12 +5,12 @@ use chrono::{DateTime, Utc};
use log::debug; use log::debug;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use meilisearch_lib::{MeiliSearch, UpdateResult, UpdateStatus, RegisterUpdate};
use meilisearch_lib::index::{Settings, Unchecked};
use crate::error::ResponseError; use crate::error::ResponseError;
use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::authentication::{policies::*, GuardedData};
use crate::index::{Settings, Unchecked}; use crate::ApiKeys;
use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{UpdateResult, UpdateStatus};
use crate::{ApiKeys, Data};
mod dump; mod dump;
mod indexes; mod indexes;
@ -187,15 +187,17 @@ impl From<UpdateStatus> for UpdateStatusResponse {
let duration = Duration::from_millis(duration as u64).as_secs_f64(); let duration = Duration::from_millis(duration as u64).as_secs_f64();
let update_id = failed.id(); let update_id = failed.id();
let response = failed.error; let processed_at = failed.failed_at;
let enqueued_at = failed.from.from.enqueued_at;
let response = failed.into();
let content = FailedUpdateResult { let content = FailedUpdateResult {
update_id, update_id,
update_type, update_type,
response, response,
duration, duration,
enqueued_at: failed.from.from.enqueued_at, enqueued_at,
processed_at: failed.failed_at, processed_at,
}; };
UpdateStatusResponse::Failed { content } UpdateStatusResponse::Failed { content }
} }
@ -230,7 +232,7 @@ pub async fn running() -> HttpResponse {
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" })) HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
} }
async fn get_stats(data: GuardedData<Private, Data>) -> Result<HttpResponse, ResponseError> { async fn get_stats(data: GuardedData<Private, MeiliSearch>) -> Result<HttpResponse, ResponseError> {
let response = data.get_all_stats().await?; let response = data.get_all_stats().await?;
debug!("returns: {:?}", response); debug!("returns: {:?}", response);
@ -245,7 +247,7 @@ struct VersionResponse {
pkg_version: String, pkg_version: String,
} }
async fn get_version(_data: GuardedData<Private, Data>) -> HttpResponse { async fn get_version(_data: GuardedData<Private, MeiliSearch>) -> HttpResponse {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
@ -288,7 +290,7 @@ mod test {
macro_rules! impl_is_policy { macro_rules! impl_is_policy {
($($param:ident)*) => { ($($param:ident)*) => {
impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func impl<Policy, Func, $($param,)* Res> Is<Policy, (($($param,)*), Res)> for Func
where Func: Fn(GuardedData<Policy, Data>, $($param,)*) -> Res {} where Func: Fn(GuardedData<Policy, MeiliSearch>, $($param,)*) -> Res {}
}; };
} }

View File

@ -0,0 +1,72 @@
[package]
name = "meilisearch-lib"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-cors = { git = "https://github.com/MarinPostma/actix-extras.git", rev = "963ac94d" }
actix-web = { version = "4.0.0-beta.9", features = ["rustls"] }
actix-web-static-files = { git = "https://github.com/MarinPostma/actix-web-static-files.git", rev = "39d8006", optional = true }
anyhow = { version = "1.0.43", features = ["backtrace"] }
async-stream = "0.3.2"
async-trait = "0.1.51"
arc-swap = "1.3.2"
byte-unit = { version = "4.0.12", default-features = false, features = ["std"] }
bytes = "1.1.0"
chrono = { version = "0.4.19", features = ["serde"] }
crossbeam-channel = "0.5.1"
either = "1.6.1"
env_logger = "0.9.0"
flate2 = "1.0.21"
fst = "0.4.7"
futures = "0.3.17"
futures-util = "0.3.17"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
http = "0.2.4"
indexmap = { version = "1.7.0", features = ["serde-1"] }
itertools = "0.10.1"
log = "0.4.14"
main_error = "0.1.1"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", rev = "6de1b41" }
mime = "0.3.16"
num_cpus = "1.13.0"
once_cell = "1.8.0"
parking_lot = "0.11.2"
rand = "0.8.4"
rayon = "1.5.1"
regex = "1.5.4"
rustls = "0.19.1"
serde = { version = "1.0.130", features = ["derive"] }
serde_json = { version = "1.0.67", features = ["preserve_order"] }
sha2 = "0.9.6"
siphasher = "0.3.7"
slice-group-by = "0.2.6"
structopt = "0.3.23"
tar = "0.4.37"
tempfile = "3.2.0"
thiserror = "1.0.28"
tokio = { version = "1.11.0", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde"] }
walkdir = "2.3.2"
obkv = "0.2.0"
pin-project = "1.0.8"
whoami = { version = "1.1.3", optional = true }
reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true }
serdeval = "0.1.0"
sysinfo = "0.20.2"
tokio-stream = "0.1.7"
erased-serde = "0.3.16"
[dev-dependencies]
actix-rt = "2.2.0"
assert-json-diff = { branch = "master", git = "https://github.com/qdequele/assert-json-diff" }
mockall = "0.10.2"
paste = "1.0.5"
serde_url_params = "0.2.1"
tempdir = "0.3.7"
urlencoding = "2.1.0"

View File

@ -0,0 +1,62 @@
use std::error::Error;
use std::fmt;
use meilisearch_error::{Code, ErrorCode};
use milli::UserError;
macro_rules! internal_error {
($target:ty : $($other:path), *) => {
$(
impl From<$other> for $target {
fn from(other: $other) -> Self {
Self::Internal(Box::new(other))
}
}
)*
}
}
#[derive(Debug)]
pub struct MilliError<'a>(pub &'a milli::Error);
impl Error for MilliError<'_> {}
impl fmt::Display for MilliError<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl ErrorCode for MilliError<'_> {
fn error_code(&self) -> Code {
match self.0 {
milli::Error::InternalError(_) => Code::Internal,
milli::Error::IoError(_) => Code::Internal,
milli::Error::UserError(ref error) => {
match error {
// TODO: wait for spec for new error codes.
| UserError::SerdeJson(_)
| UserError::MaxDatabaseSizeReached
| UserError::InvalidCriterionName { .. }
| UserError::InvalidDocumentId { .. }
| UserError::InvalidStoreFile
| UserError::NoSpaceLeftOnDevice
| UserError::InvalidAscDescSyntax { .. }
| UserError::DocumentLimitReached => Code::Internal,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::Filter,
UserError::InvalidFilterAttribute(_) => Code::Filter,
UserError::InvalidSortName { .. } => Code::Sort,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::MissingPrimaryKey => Code::MissingPrimaryKey,
UserError::PrimaryKeyCannotBeChanged => Code::PrimaryKeyAlreadyPresent,
UserError::PrimaryKeyCannotBeReset => Code::PrimaryKeyAlreadyPresent,
UserError::SortRankingRuleMissing => Code::Sort,
UserError::UnknownInternalDocumentId { .. } => Code::DocumentNotFound,
UserError::InvalidFacetsDistribution { .. } => Code::BadRequest,
UserError::InvalidSortableAttribute { .. } => Code::Sort,
}
}
}
}
}

View File

@ -6,7 +6,7 @@ use heed::RoTxn;
use indexmap::IndexMap; use indexmap::IndexMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::option::IndexerOpts; use crate::options::IndexerOpts;
use super::error::Result; use super::error::Result;
use super::{Index, Settings, Unchecked}; use super::{Index, Settings, Unchecked};

View File

@ -14,7 +14,7 @@ use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked}; pub use updates::{Checked, Facets, Settings, Unchecked};
use crate::helpers::EnvSizer; use crate::EnvSizer;
use crate::index_controller::update_file_store::UpdateFileStore; use crate::index_controller::update_file_store::UpdateFileStore;
use self::error::IndexError; use self::error::IndexError;

View File

@ -5,7 +5,7 @@ use rayon::ThreadPool;
use crate::index_controller::update_actor::RegisterUpdate; use crate::index_controller::update_actor::RegisterUpdate;
use crate::index_controller::{Failed, Processed, Processing}; use crate::index_controller::{Failed, Processed, Processing};
use crate::option::IndexerOpts; use crate::options::IndexerOpts;
pub struct UpdateHandler { pub struct UpdateHandler {
max_nb_chunks: Option<usize>, max_nb_chunks: Option<usize>,
@ -66,7 +66,7 @@ impl UpdateHandler {
match result { match result {
Ok(result) => Ok(meta.process(result)), Ok(result) => Ok(meta.process(result)),
Err(e) => Err(meta.fail(e.into())), Err(e) => Err(meta.fail(e)),
} }
} }
} }

View File

@ -11,7 +11,7 @@ use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
use crate::{ use crate::{
index::Unchecked, index::Unchecked,
option::IndexerOpts, options::IndexerOpts,
}; };
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]

View File

@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use crate::index::Index; use crate::index::Index;
use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore};
use crate::option::IndexerOpts; use crate::options::IndexerOpts;
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]

View File

@ -18,7 +18,7 @@ pub use message::DumpMsg;
use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle};
use crate::index_controller::dump_actor::error::DumpActorError; use crate::index_controller::dump_actor::error::DumpActorError;
use crate::{helpers::compression, option::IndexerOpts}; use crate::options::IndexerOpts;
use error::Result; use error::Result;
mod actor; mod actor;
@ -112,7 +112,7 @@ pub fn load_dump(
let tmp_src = tempfile::tempdir_in(".")?; let tmp_src = tempfile::tempdir_in(".")?;
let tmp_src_path = tmp_src.path(); let tmp_src_path = tmp_src.path();
compression::from_tar_gz(&src_path, tmp_src_path)?; crate::from_tar_gz(&src_path, tmp_src_path)?;
let meta_path = tmp_src_path.join(META_FILE_NAME); let meta_path = tmp_src_path.join(META_FILE_NAME);
let mut meta_file = File::open(&meta_path)?; let mut meta_file = File::open(&meta_path)?;
@ -162,6 +162,7 @@ impl<U, P> DumpTask<U, P>
where where
U: UuidResolverHandle + Send + Sync + Clone + 'static, U: UuidResolverHandle + Send + Sync + Clone + 'static,
P: UpdateActorHandle + Send + Sync + Clone + 'static, P: UpdateActorHandle + Send + Sync + Clone + 'static,
{ {
async fn run(self) -> Result<()> { async fn run(self) -> Result<()> {
trace!("Performing dump."); trace!("Performing dump.");
@ -186,7 +187,7 @@ where
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> { let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
compression::to_tar_gz(temp_dump_path, temp_dump_file.path()) crate::to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?; .map_err(|e| DumpActorError::Internal(e.into()))?;
let dump_path = self.path.join(self.uid).with_extension("dump"); let dump_path = self.path.join(self.uid).with_extension("dump");

View File

@ -16,7 +16,7 @@ use crate::index::{
use crate::index_controller::{ use crate::index_controller::{
get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing, get_arc_ownership_blocking, Failed, IndexStats, Processed, Processing,
}; };
use crate::option::IndexerOpts; use crate::options::IndexerOpts;
use super::error::{IndexActorError, Result}; use super::error::{IndexActorError, Result};
use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore}; use super::{IndexMeta, IndexMsg, IndexSettings, IndexStore};
@ -29,7 +29,9 @@ pub struct IndexActor<S> {
store: S, store: S,
} }
impl<S: IndexStore + Sync + Send> IndexActor<S> { impl<S> IndexActor<S>
where S: IndexStore + Sync + Send,
{
pub fn new( pub fn new(
receiver: mpsc::Receiver<IndexMsg>, receiver: mpsc::Receiver<IndexMsg>,
store: S, store: S,

View File

@ -1,4 +1,4 @@
use crate::option::IndexerOpts; use crate::options::IndexerOpts;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};

View File

@ -22,8 +22,8 @@ use update_actor::UpdateActorHandle;
pub use updates::*; pub use updates::*;
use uuid_resolver::{error::UuidResolverError, UuidResolverHandle}; use uuid_resolver::{error::UuidResolverError, UuidResolverHandle};
use crate::options::IndexerOpts;
use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings}; use crate::index::{Checked, Document, SearchQuery, SearchResult, Settings};
use crate::option::Opt;
use error::Result; use error::Result;
use self::dump_actor::load_dump; use self::dump_actor::load_dump;
@ -99,45 +99,58 @@ pub enum Update {
} }
} }
impl IndexController { #[derive(Default, Debug)]
pub fn new(path: impl AsRef<Path>, options: &Opt) -> anyhow::Result<Self> { pub struct IndexControllerBuilder {
let index_size = options.max_index_size.get_bytes() as usize; max_index_size: Option<usize>,
let update_store_size = options.max_index_size.get_bytes() as usize; max_update_store_size: Option<usize>,
snapshot_dir: Option<PathBuf>,
import_snapshot: Option<PathBuf>,
ignore_snapshot_if_db_exists: bool,
ignore_missing_snapshot: bool,
dump_src: Option<PathBuf>,
dump_dst: Option<PathBuf>,
}
if let Some(ref path) = options.import_snapshot { impl IndexControllerBuilder {
pub fn build(self, db_path: impl AsRef<Path>, indexer_options: IndexerOpts) -> anyhow::Result<IndexController> {
let index_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing index size"))?;
let update_store_size = self.max_index_size.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
if let Some(ref path) = self.import_snapshot {
info!("Loading from snapshot {:?}", path); info!("Loading from snapshot {:?}", path);
load_snapshot( load_snapshot(
&options.db_path, db_path.as_ref(),
path, path,
options.ignore_snapshot_if_db_exists, self.ignore_snapshot_if_db_exists,
options.ignore_missing_snapshot, self.ignore_missing_snapshot,
)?; )?;
} else if let Some(ref src_path) = options.import_dump { } else if let Some(ref src_path) = self.dump_src {
load_dump( load_dump(
&options.db_path, db_path.as_ref(),
src_path, src_path,
options.max_index_size.get_bytes() as usize, index_size,
options.max_udb_size.get_bytes() as usize, update_store_size,
&options.indexer_options, &indexer_options,
)?; )?;
} }
std::fs::create_dir_all(&path)?; std::fs::create_dir_all(db_path.as_ref())?;
let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&path)?; let uuid_resolver = uuid_resolver::UuidResolverHandleImpl::new(&db_path)?;
let index_handle = let index_handle =
index_actor::IndexActorHandleImpl::new(&path, index_size, &options.indexer_options)?; index_actor::IndexActorHandleImpl::new(&db_path, index_size, &indexer_options)?;
let update_handle = update_actor::UpdateActorHandleImpl::new( let update_handle = update_actor::UpdateActorHandleImpl::new(
index_handle.clone(), index_handle.clone(),
&path, &db_path,
update_store_size, update_store_size,
)?; )?;
let dump_handle = dump_actor::DumpActorHandleImpl::new( let dump_handle = dump_actor::DumpActorHandleImpl::new(
&options.dumps_dir, &self.dump_dst.ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?,
uuid_resolver.clone(), uuid_resolver.clone(),
update_handle.clone(), update_handle.clone(),
options.max_index_size.get_bytes() as usize, index_size,
options.max_udb_size.get_bytes() as usize, update_store_size,
)?; )?;
//if options.schedule_snapshot { //if options.schedule_snapshot {
@ -156,7 +169,7 @@ impl IndexController {
//tokio::task::spawn(snapshot_service.run()); //tokio::task::spawn(snapshot_service.run());
//} //}
Ok(Self { Ok(IndexController {
uuid_resolver, uuid_resolver,
index_handle, index_handle,
update_handle, update_handle,
@ -164,6 +177,59 @@ impl IndexController {
}) })
} }
/// Set the index controller builder's max update store size.
pub fn set_max_update_store_size(&mut self, max_update_store_size: usize) -> &mut Self {
self.max_update_store_size.replace(max_update_store_size);
self
}
pub fn set_max_index_size(&mut self, size: usize) -> &mut Self {
self.max_index_size.replace(size);
self
}
/// Set the index controller builder's snapshot path.
pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self {
self.snapshot_dir.replace(snapshot_dir);
self
}
/// Set the index controller builder's ignore snapshot if db exists.
pub fn set_ignore_snapshot_if_db_exists(&mut self, ignore_snapshot_if_db_exists: bool) -> &mut Self {
self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists;
self
}
/// Set the index controller builder's ignore missing snapshot.
pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self {
self.ignore_missing_snapshot = ignore_missing_snapshot;
self
}
/// Set the index controller builder's dump src.
pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self {
self.dump_src.replace(dump_src);
self
}
/// Set the index controller builder's dump dst.
pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self {
self.dump_dst.replace(dump_dst);
self
}
/// Set the index controller builder's import snapshot.
pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self {
self.import_snapshot.replace(import_snapshot);
self
}
}
impl IndexController {
pub fn builder() -> IndexControllerBuilder {
IndexControllerBuilder::default()
}
pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> { pub async fn register_update(&self, uid: &str, update: Update) -> Result<UpdateStatus> {
match self.uuid_resolver.get(uid.to_string()).await { match self.uuid_resolver.get(uid.to_string()).await {
Ok(uuid) => { Ok(uuid) => {

View File

@ -2,8 +2,6 @@ use std::path::Path;
use anyhow::bail; use anyhow::bail;
use crate::helpers::compression;
//pub struct SnapshotService<U, R> { //pub struct SnapshotService<U, R> {
//uuid_resolver_handle: R, //uuid_resolver_handle: R,
//update_handle: U, //update_handle: U,
@ -93,7 +91,7 @@ pub fn load_snapshot(
ignore_missing_snapshot: bool, ignore_missing_snapshot: bool,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() { if !db_path.as_ref().exists() && snapshot_path.as_ref().exists() {
match compression::from_tar_gz(snapshot_path, &db_path) { match crate::from_tar_gz(snapshot_path, &db_path) {
Ok(()) => Ok(()), Ok(()) => Ok(()),
Err(e) => { Err(e) => {
// clean created db folder // clean created db folder

View File

@ -41,6 +41,7 @@ impl UpdateActorHandle for UpdateActorHandleImpl {
self.sender.send(msg).await?; self.sender.send(msg).await?;
receiver.await? receiver.await?
} }
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> { async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus> {
let (ret, receiver) = oneshot::channel(); let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::GetUpdate { uuid, id, ret }; let msg = UpdateMsg::GetUpdate { uuid, id, ret };

View File

@ -28,7 +28,7 @@ use codec::*;
use super::RegisterUpdate; use super::RegisterUpdate;
use super::error::Result; use super::error::Result;
use crate::helpers::EnvSizer; use crate::EnvSizer;
use crate::index_controller::update_files_path; use crate::index_controller::update_files_path;
use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle}; use crate::index_controller::{index_actor::CONCURRENT_INDEX_MSG, updates::*, IndexActorHandle};
@ -323,7 +323,7 @@ impl UpdateStore {
let result = let result =
match handle.block_on(index_handle.update(index_uuid, processing.clone())) { match handle.block_on(index_handle.update(index_uuid, processing.clone())) {
Ok(result) => result, Ok(result) => result,
Err(e) => Err(processing.fail(e.into())), Err(e) => Err(processing.fail(e)),
}; };
// Once the pending update have been successfully processed // Once the pending update have been successfully processed

View File

@ -1,11 +1,12 @@
use std::{error::Error, fmt::Display};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use meilisearch_error::{Code, ErrorCode};
use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; use milli::update::{DocumentAdditionResult, IndexDocumentsMethod};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{ use crate::index::{Settings, Unchecked};
error::ResponseError,
index::{Settings, Unchecked},
};
use super::update_actor::RegisterUpdate; use super::update_actor::RegisterUpdate;
@ -115,10 +116,13 @@ impl Processing {
} }
} }
pub fn fail(self, error: ResponseError) -> Failed { pub fn fail(self, error: impl ErrorCode) -> Failed {
let msg = error.to_string();
let code = error.error_code();
Failed { Failed {
from: self, from: self,
error, msg,
code,
failed_at: Utc::now(), failed_at: Utc::now(),
} }
} }
@ -147,10 +151,25 @@ impl Aborted {
pub struct Failed { pub struct Failed {
#[serde(flatten)] #[serde(flatten)]
pub from: Processing, pub from: Processing,
pub error: ResponseError, pub msg: String,
pub code: Code,
pub failed_at: DateTime<Utc>, pub failed_at: DateTime<Utc>,
} }
impl Display for Failed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.msg.fmt(f)
}
}
impl Error for Failed { }
impl ErrorCode for Failed {
fn error_code(&self) -> Code {
self.code
}
}
impl Failed { impl Failed {
pub fn id(&self) -> u64 { pub fn id(&self) -> u64 {
self.from.id() self.from.id()

View File

@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use uuid::Uuid; use uuid::Uuid;
use super::{error::UuidResolverError, Result, UUID_STORE_SIZE}; use super::{error::UuidResolverError, Result, UUID_STORE_SIZE};
use crate::helpers::EnvSizer; use crate::EnvSizer;
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
struct DumpEntry { struct DumpEntry {

View File

@ -0,0 +1,53 @@
#[macro_use]
pub mod error;
pub mod options;
pub mod index;
pub mod index_controller;
pub use index_controller::{UpdateResult, UpdateStatus, IndexController as MeiliSearch, update_actor::RegisterUpdate};
use walkdir::WalkDir;
pub trait EnvSizer {
fn size(&self) -> u64;
}
impl EnvSizer for heed::Env {
fn size(&self) -> u64 {
WalkDir::new(self.path())
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}
}
use std::fs::{create_dir_all, File};
use std::io::Write;
use std::path::Path;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use tar::{Archive, Builder};
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let mut f = File::create(dest)?;
let gz_encoder = GzEncoder::new(&mut f, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
tar_encoder.append_dir_all(".", src)?;
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
f.flush()?;
Ok(())
}
pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let f = File::open(&src)?;
let gz = GzDecoder::new(f);
let mut ar = Archive::new(gz);
create_dir_all(&dest)?;
ar.unpack(&dest)?;
Ok(())
}

View File

@ -0,0 +1,115 @@
use core::fmt;
use std::{ops::Deref, str::FromStr};
use byte_unit::{Byte, ByteError};
use milli::CompressionType;
use structopt::StructOpt;
use sysinfo::{RefreshKind, System, SystemExt};
#[derive(Debug, Clone, StructOpt)]
pub struct IndexerOpts {
/// The amount of documents to skip before printing
/// a log regarding the indexing advancement.
#[structopt(long, default_value = "100000")] // 100k
pub log_every_n: usize,
/// Grenad max number of chunks in bytes.
#[structopt(long)]
pub max_nb_chunks: Option<usize>,
/// The maximum amount of memory the indexer will use. It defaults to 2/3
/// of the available memory. It is recommended to use something like 80%-90%
/// of the available memory, no more.
///
/// In case the engine is unable to retrieve the available memory the engine will
/// try to use the memory it needs but without real limit, this can lead to
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
#[structopt(long, default_value)]
pub max_memory: MaxMemory,
/// The name of the compression algorithm to use when compressing intermediate
/// Grenad chunks while indexing documents.
///
/// Choosing a fast algorithm will make the indexing faster but may consume more memory.
#[structopt(long, default_value = "snappy", possible_values = &["snappy", "zlib", "lz4", "lz4hc", "zstd"])]
pub chunk_compression_type: CompressionType,
/// The level of compression of the chosen algorithm.
#[structopt(long, requires = "chunk-compression-type")]
pub chunk_compression_level: Option<u32>,
/// Number of parallel jobs for indexing, defaults to # of CPUs.
#[structopt(long)]
pub indexing_jobs: Option<usize>,
}
impl Default for IndexerOpts {
fn default() -> Self {
Self {
log_every_n: 100_000,
max_nb_chunks: None,
max_memory: MaxMemory::default(),
chunk_compression_type: CompressionType::None,
chunk_compression_level: None,
indexing_jobs: None,
}
}
}
/// A type used to detect the max memory available and use 2/3 of it.
#[derive(Debug, Clone, Copy)]
pub struct MaxMemory(Option<Byte>);
impl FromStr for MaxMemory {
type Err = ByteError;
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
Byte::from_str(s).map(Some).map(MaxMemory)
}
}
impl Default for MaxMemory {
fn default() -> MaxMemory {
MaxMemory(
total_memory_bytes()
.map(|bytes| bytes * 2 / 3)
.map(Byte::from_bytes),
)
}
}
impl fmt::Display for MaxMemory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
None => f.write_str("unknown"),
}
}
}
impl Deref for MaxMemory {
type Target = Option<Byte>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl MaxMemory {
pub fn unlimited() -> Self {
Self(None)
}
}
/// Returns the total amount of bytes available or `None` if this system isn't supported.
fn total_memory_bytes() -> Option<u64> {
if System::IS_SUPPORTED {
let memory_kind = RefreshKind::new().with_memory();
let mut system = System::new_with_specifics(memory_kind);
system.refresh_memory();
Some(system.total_memory() * 1024) // KiB into bytes
} else {
None
}
}