mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
retrieve update status
This commit is contained in:
parent
0cd9e62fc6
commit
54861335a0
52
Cargo.lock
generated
52
Cargo.lock
generated
@ -1188,6 +1188,22 @@ dependencies = [
|
|||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heed"
|
||||||
|
version = "0.10.6"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
"heed-traits 0.7.0",
|
||||||
|
"heed-types 0.7.2",
|
||||||
|
"libc",
|
||||||
|
"lmdb-rkv-sys",
|
||||||
|
"once_cell",
|
||||||
|
"page_size",
|
||||||
|
"synchronoise",
|
||||||
|
"url",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed"
|
name = "heed"
|
||||||
version = "0.10.6"
|
version = "0.10.6"
|
||||||
@ -1195,8 +1211,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "afcc6c911acaadad3ebe9f1ef1707d80bd71c92037566f47b6238a03b60adf1a"
|
checksum = "afcc6c911acaadad3ebe9f1ef1707d80bd71c92037566f47b6238a03b60adf1a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed-traits",
|
"heed-traits 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"heed-types",
|
"heed-types 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"libc",
|
"libc",
|
||||||
"lmdb-rkv-sys",
|
"lmdb-rkv-sys",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@ -1207,12 +1223,27 @@ dependencies = [
|
|||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heed-traits"
|
||||||
|
version = "0.7.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-traits"
|
name = "heed-traits"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
|
checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heed-types"
|
||||||
|
version = "0.7.2"
|
||||||
|
dependencies = [
|
||||||
|
"bincode",
|
||||||
|
"heed-traits 0.7.0",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"zerocopy",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-types"
|
name = "heed-types"
|
||||||
version = "0.7.2"
|
version = "0.7.2"
|
||||||
@ -1220,7 +1251,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
|
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
"heed-traits",
|
"heed-traits 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
@ -1615,7 +1646,7 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"grenad",
|
"grenad",
|
||||||
"heed",
|
"heed 0.10.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"http",
|
"http",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
"jemallocator",
|
"jemallocator",
|
||||||
@ -1699,6 +1730,7 @@ dependencies = [
|
|||||||
"bstr",
|
"bstr",
|
||||||
"byte-unit",
|
"byte-unit",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
"chrono",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"either",
|
"either",
|
||||||
@ -1706,7 +1738,7 @@ dependencies = [
|
|||||||
"fst",
|
"fst",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
"grenad",
|
"grenad",
|
||||||
"heed",
|
"heed 0.10.6",
|
||||||
"human_format",
|
"human_format",
|
||||||
"itertools",
|
"itertools",
|
||||||
"jemallocator",
|
"jemallocator",
|
||||||
@ -1728,6 +1760,7 @@ dependencies = [
|
|||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"serde_millis",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
"smallstr",
|
"smallstr",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
@ -2594,6 +2627,15 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_millis"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6e2dc780ca5ee2c369d1d01d100270203c4ff923d2a4264812d723766434d00"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_qs"
|
name = "serde_qs"
|
||||||
version = "0.8.2"
|
version = "0.8.2"
|
||||||
|
94
src/data/mod.rs
Normal file
94
src/data/mod.rs
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
mod search;
|
||||||
|
mod updates;
|
||||||
|
|
||||||
|
pub use search::{SearchQuery, SearchResult};
|
||||||
|
|
||||||
|
use std::fs::create_dir_all;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use milli::Index;
|
||||||
|
use sha2::Digest;
|
||||||
|
|
||||||
|
use crate::option::Opt;
|
||||||
|
use crate::updates::UpdateQueue;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Data {
|
||||||
|
inner: Arc<DataInner>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for Data {
|
||||||
|
type Target = DataInner;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DataInner {
|
||||||
|
pub indexes: Arc<Index>,
|
||||||
|
pub update_queue: Arc<UpdateQueue>,
|
||||||
|
api_keys: ApiKeys,
|
||||||
|
options: Opt,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ApiKeys {
|
||||||
|
pub public: Option<String>,
|
||||||
|
pub private: Option<String>,
|
||||||
|
pub master: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ApiKeys {
|
||||||
|
pub fn generate_missing_api_keys(&mut self) {
|
||||||
|
if let Some(master_key) = &self.master {
|
||||||
|
if self.private.is_none() {
|
||||||
|
let key = format!("{}-private", master_key);
|
||||||
|
let sha = sha2::Sha256::digest(key.as_bytes());
|
||||||
|
self.private = Some(format!("{:x}", sha));
|
||||||
|
}
|
||||||
|
if self.public.is_none() {
|
||||||
|
let key = format!("{}-public", master_key);
|
||||||
|
let sha = sha2::Sha256::digest(key.as_bytes());
|
||||||
|
self.public = Some(format!("{:x}", sha));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Data {
|
||||||
|
pub fn new(options: Opt) -> anyhow::Result<Data> {
|
||||||
|
let db_size = options.max_mdb_size.get_bytes() as usize;
|
||||||
|
let path = options.db_path.join("main");
|
||||||
|
create_dir_all(&path)?;
|
||||||
|
let indexes = Index::new(&path, Some(db_size))?;
|
||||||
|
let indexes = Arc::new(indexes);
|
||||||
|
|
||||||
|
let update_queue = Arc::new(UpdateQueue::new(&options, indexes.clone())?);
|
||||||
|
|
||||||
|
let mut api_keys = ApiKeys {
|
||||||
|
master: options.clone().master_key,
|
||||||
|
private: None,
|
||||||
|
public: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
api_keys.generate_missing_api_keys();
|
||||||
|
|
||||||
|
let inner = DataInner { indexes, options, update_queue, api_keys };
|
||||||
|
let inner = Arc::new(inner);
|
||||||
|
|
||||||
|
Ok(Data { inner })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn http_payload_size_limit(&self) -> usize {
|
||||||
|
self.options.http_payload_size_limit.get_bytes() as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn api_keys(&self) -> &ApiKeys {
|
||||||
|
&self.api_keys
|
||||||
|
}
|
||||||
|
}
|
@ -1,28 +1,20 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::create_dir_all;
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::ops::Deref;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use async_compression::tokio_02::write::GzipEncoder;
|
|
||||||
use futures_util::stream::StreamExt;
|
|
||||||
use tokio::io::AsyncWriteExt;
|
|
||||||
use milli::{Index, SearchResult as Results, obkv_to_json};
|
|
||||||
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
|
||||||
use sha2::Digest;
|
|
||||||
use serde_json::{Value, Map};
|
use serde_json::{Value, Map};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use milli::{SearchResult as Results, obkv_to_json};
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
|
|
||||||
use crate::option::Opt;
|
use super::Data;
|
||||||
use crate::updates::{UpdateQueue, UpdateMeta, UpdateStatus, UpdateMetaProgress};
|
|
||||||
|
|
||||||
const DEFAULT_SEARCH_LIMIT: usize = 20;
|
const DEFAULT_SEARCH_LIMIT: usize = 20;
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||||
|
#[allow(dead_code)]
|
||||||
pub struct SearchQuery {
|
pub struct SearchQuery {
|
||||||
q: Option<String>,
|
q: Option<String>,
|
||||||
offset: Option<usize>,
|
offset: Option<usize>,
|
||||||
@ -48,176 +40,6 @@ pub struct SearchResult {
|
|||||||
processing_time_ms: u128,
|
processing_time_ms: u128,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Data {
|
|
||||||
inner: Arc<DataInner>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for Data {
|
|
||||||
type Target = DataInner;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.inner
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct DataInner {
|
|
||||||
pub indexes: Arc<Index>,
|
|
||||||
pub update_queue: Arc<UpdateQueue>,
|
|
||||||
api_keys: ApiKeys,
|
|
||||||
options: Opt,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct ApiKeys {
|
|
||||||
pub public: Option<String>,
|
|
||||||
pub private: Option<String>,
|
|
||||||
pub master: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ApiKeys {
|
|
||||||
pub fn generate_missing_api_keys(&mut self) {
|
|
||||||
if let Some(master_key) = &self.master {
|
|
||||||
if self.private.is_none() {
|
|
||||||
let key = format!("{}-private", master_key);
|
|
||||||
let sha = sha2::Sha256::digest(key.as_bytes());
|
|
||||||
self.private = Some(format!("{:x}", sha));
|
|
||||||
}
|
|
||||||
if self.public.is_none() {
|
|
||||||
let key = format!("{}-public", master_key);
|
|
||||||
let sha = sha2::Sha256::digest(key.as_bytes());
|
|
||||||
self.public = Some(format!("{:x}", sha));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Data {
|
|
||||||
pub fn new(options: Opt) -> anyhow::Result<Data> {
|
|
||||||
let db_size = options.max_mdb_size.get_bytes() as usize;
|
|
||||||
let path = options.db_path.join("main");
|
|
||||||
create_dir_all(&path)?;
|
|
||||||
let indexes = Index::new(&path, Some(db_size))?;
|
|
||||||
let indexes = Arc::new(indexes);
|
|
||||||
|
|
||||||
let update_queue = Arc::new(UpdateQueue::new(&options, indexes.clone())?);
|
|
||||||
|
|
||||||
let mut api_keys = ApiKeys {
|
|
||||||
master: options.clone().master_key,
|
|
||||||
private: None,
|
|
||||||
public: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
api_keys.generate_missing_api_keys();
|
|
||||||
|
|
||||||
let inner = DataInner { indexes, options, update_queue, api_keys };
|
|
||||||
let inner = Arc::new(inner);
|
|
||||||
|
|
||||||
Ok(Data { inner })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn add_documents<B, E, S>(
|
|
||||||
&self,
|
|
||||||
_index: S,
|
|
||||||
method: IndexDocumentsMethod,
|
|
||||||
format: UpdateFormat,
|
|
||||||
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
|
|
||||||
) -> anyhow::Result<UpdateStatus<UpdateMeta, UpdateMetaProgress, String>>
|
|
||||||
where
|
|
||||||
B: Deref<Target = [u8]>,
|
|
||||||
E: std::error::Error + Send + Sync + 'static,
|
|
||||||
S: AsRef<str>,
|
|
||||||
{
|
|
||||||
let file = tokio::task::spawn_blocking(tempfile::tempfile).await?;
|
|
||||||
let file = tokio::fs::File::from_std(file?);
|
|
||||||
let mut encoder = GzipEncoder::new(file);
|
|
||||||
|
|
||||||
while let Some(result) = stream.next().await {
|
|
||||||
let bytes = &*result?;
|
|
||||||
encoder.write_all(&bytes[..]).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
encoder.shutdown().await?;
|
|
||||||
let mut file = encoder.into_inner();
|
|
||||||
file.sync_all().await?;
|
|
||||||
let file = file.into_std().await;
|
|
||||||
let mmap = unsafe { memmap::Mmap::map(&file)? };
|
|
||||||
|
|
||||||
let meta = UpdateMeta::DocumentsAddition { method, format };
|
|
||||||
|
|
||||||
let queue = self.update_queue.clone();
|
|
||||||
let meta_cloned = meta.clone();
|
|
||||||
let update_id = tokio::task::spawn_blocking(move || queue.register_update(&meta_cloned, &mmap[..])).await??;
|
|
||||||
|
|
||||||
Ok(UpdateStatus::Pending { update_id, meta })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn search<S: AsRef<str>>(&self, _index: S, search_query: SearchQuery) -> anyhow::Result<SearchResult> {
|
|
||||||
let start = Instant::now();
|
|
||||||
let index = &self.indexes;
|
|
||||||
let rtxn = index.read_txn()?;
|
|
||||||
|
|
||||||
let mut search = index.search(&rtxn);
|
|
||||||
if let Some(query) = &search_query.q {
|
|
||||||
search.query(query);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(offset) = search_query.offset {
|
|
||||||
search.offset(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
let limit = search_query.limit.unwrap_or(DEFAULT_SEARCH_LIMIT);
|
|
||||||
search.limit(limit);
|
|
||||||
|
|
||||||
let Results { found_words, documents_ids, nb_hits, .. } = search.execute().unwrap();
|
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
|
||||||
|
|
||||||
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
|
||||||
Some(fields) => Cow::Borrowed(fields),
|
|
||||||
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let attributes_to_highlight = match search_query.attributes_to_highlight {
|
|
||||||
Some(fields) => fields.iter().map(ToOwned::to_owned).collect(),
|
|
||||||
None => HashSet::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let stop_words = fst::Set::default();
|
|
||||||
let highlighter = Highlighter::new(&stop_words);
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
|
|
||||||
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
|
|
||||||
highlighter.highlight_record(&mut object, &found_words, &attributes_to_highlight);
|
|
||||||
documents.push(object);
|
|
||||||
}
|
|
||||||
|
|
||||||
let processing_time_ms = start.elapsed().as_millis();
|
|
||||||
|
|
||||||
let result = SearchResult {
|
|
||||||
hits: documents,
|
|
||||||
nb_hits,
|
|
||||||
query: search_query.q.unwrap_or_default(),
|
|
||||||
offset: search_query.offset.unwrap_or(0),
|
|
||||||
limit,
|
|
||||||
processing_time_ms,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn http_payload_size_limit(&self) -> usize {
|
|
||||||
self.options.http_payload_size_limit.get_bytes() as usize
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn api_keys(&self) -> &ApiKeys {
|
|
||||||
&self.api_keys
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Highlighter<'a, A> {
|
struct Highlighter<'a, A> {
|
||||||
analyzer: Analyzer<'a, A>,
|
analyzer: Analyzer<'a, A>,
|
||||||
}
|
}
|
||||||
@ -276,3 +98,59 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Data {
|
||||||
|
pub fn search<S: AsRef<str>>(&self, _index: S, search_query: SearchQuery) -> anyhow::Result<SearchResult> {
|
||||||
|
let start = Instant::now();
|
||||||
|
let index = &self.indexes;
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
let mut search = index.search(&rtxn);
|
||||||
|
if let Some(query) = &search_query.q {
|
||||||
|
search.query(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(offset) = search_query.offset {
|
||||||
|
search.offset(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
let limit = search_query.limit.unwrap_or(DEFAULT_SEARCH_LIMIT);
|
||||||
|
search.limit(limit);
|
||||||
|
|
||||||
|
let Results { found_words, documents_ids, nb_hits, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
|
||||||
|
let displayed_fields = match index.displayed_fields(&rtxn).unwrap() {
|
||||||
|
Some(fields) => Cow::Borrowed(fields),
|
||||||
|
None => Cow::Owned(fields_ids_map.iter().map(|(id, _)| id).collect()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let attributes_to_highlight = match search_query.attributes_to_highlight {
|
||||||
|
Some(fields) => fields.iter().map(ToOwned::to_owned).collect(),
|
||||||
|
None => HashSet::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let stop_words = fst::Set::default();
|
||||||
|
let highlighter = Highlighter::new(&stop_words);
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
|
||||||
|
let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
|
||||||
|
highlighter.highlight_record(&mut object, &found_words, &attributes_to_highlight);
|
||||||
|
documents.push(object);
|
||||||
|
}
|
||||||
|
|
||||||
|
let processing_time_ms = start.elapsed().as_millis();
|
||||||
|
|
||||||
|
let result = SearchResult {
|
||||||
|
hits: documents,
|
||||||
|
nb_hits,
|
||||||
|
query: search_query.q.unwrap_or_default(),
|
||||||
|
offset: search_query.offset.unwrap_or(0),
|
||||||
|
limit,
|
||||||
|
processing_time_ms,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
}
|
53
src/data/updates.rs
Normal file
53
src/data/updates.rs
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
use async_compression::tokio_02::write::GzipEncoder;
|
||||||
|
use futures_util::stream::StreamExt;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use milli::update::{IndexDocumentsMethod, UpdateFormat};
|
||||||
|
use milli::update_store::UpdateStatus;
|
||||||
|
|
||||||
|
use super::Data;
|
||||||
|
use crate::updates::UpdateMeta;
|
||||||
|
|
||||||
|
impl Data {
|
||||||
|
pub async fn add_documents<B, E, S>(
|
||||||
|
&self,
|
||||||
|
_index: S,
|
||||||
|
method: IndexDocumentsMethod,
|
||||||
|
format: UpdateFormat,
|
||||||
|
mut stream: impl futures::Stream<Item=Result<B, E>> + Unpin,
|
||||||
|
) -> anyhow::Result<UpdateStatus<UpdateMeta, String, String>>
|
||||||
|
where
|
||||||
|
B: Deref<Target = [u8]>,
|
||||||
|
E: std::error::Error + Send + Sync + 'static,
|
||||||
|
S: AsRef<str>,
|
||||||
|
{
|
||||||
|
let file = tokio::task::spawn_blocking(tempfile::tempfile).await?;
|
||||||
|
let file = tokio::fs::File::from_std(file?);
|
||||||
|
let mut encoder = GzipEncoder::new(file);
|
||||||
|
|
||||||
|
while let Some(result) = stream.next().await {
|
||||||
|
let bytes = &*result?;
|
||||||
|
encoder.write_all(&bytes[..]).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder.shutdown().await?;
|
||||||
|
let mut file = encoder.into_inner();
|
||||||
|
file.sync_all().await?;
|
||||||
|
let file = file.into_std().await;
|
||||||
|
let mmap = unsafe { memmap::Mmap::map(&file)? };
|
||||||
|
|
||||||
|
let meta = UpdateMeta::DocumentsAddition { method, format };
|
||||||
|
|
||||||
|
let queue = self.update_queue.clone();
|
||||||
|
let update = tokio::task::spawn_blocking(move || queue.register_update(meta, &mmap[..])).await??;
|
||||||
|
|
||||||
|
Ok(update.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn get_update_status(&self, _index: &str, uid: u64) -> anyhow::Result<Option<UpdateStatus<UpdateMeta, String, String>>> {
|
||||||
|
self.update_queue.get_update_status(uid)
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
use actix_web::{delete, get, post, put};
|
use actix_web::{delete, get, post, put};
|
||||||
use actix_web::{web, HttpResponse};
|
use actix_web::{web, HttpResponse};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
|
use log::error;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::Data;
|
use crate::Data;
|
||||||
@ -93,8 +94,8 @@ async fn delete_index(
|
|||||||
|
|
||||||
#[derive(Deserialize)]
|
#[derive(Deserialize)]
|
||||||
struct UpdateParam {
|
struct UpdateParam {
|
||||||
_index_uid: String,
|
index_uid: String,
|
||||||
_update_id: u64,
|
update_id: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get(
|
#[get(
|
||||||
@ -102,10 +103,23 @@ struct UpdateParam {
|
|||||||
wrap = "Authentication::Private"
|
wrap = "Authentication::Private"
|
||||||
)]
|
)]
|
||||||
async fn get_update_status(
|
async fn get_update_status(
|
||||||
_data: web::Data<Data>,
|
data: web::Data<Data>,
|
||||||
_path: web::Path<UpdateParam>,
|
path: web::Path<UpdateParam>,
|
||||||
) -> Result<HttpResponse, ResponseError> {
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let result = data.get_update_status(&path.index_uid, path.update_id);
|
||||||
|
match result {
|
||||||
|
Ok(Some(meta)) => {
|
||||||
|
let json = serde_json::to_string(&meta).unwrap();
|
||||||
|
Ok(HttpResponse::Ok().body(json))
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
todo!()
|
todo!()
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("{}", e);
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")]
|
#[get("/indexes/{index_uid}/updates", wrap = "Authentication::Private")]
|
||||||
|
@ -8,14 +8,15 @@ use std::ops::Deref;
|
|||||||
use std::fs::create_dir_all;
|
use std::fs::create_dir_all;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use byte_unit::Byte;
|
||||||
use flate2::read::GzDecoder;
|
use flate2::read::GzDecoder;
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use byte_unit::Byte;
|
use log::info;
|
||||||
use milli::update::{UpdateBuilder, UpdateFormat, IndexDocumentsMethod, UpdateIndexingStep::*};
|
use milli::Index;
|
||||||
use milli::{UpdateStore, UpdateHandler as Handler, Index};
|
use milli::update::{UpdateBuilder, UpdateFormat, IndexDocumentsMethod };
|
||||||
|
use milli::update_store::{UpdateStore, UpdateHandler as Handler, UpdateStatus, Processing, Processed, Failed};
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
use tokio::sync::broadcast;
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use crate::option::Opt;
|
use crate::option::Opt;
|
||||||
@ -40,23 +41,13 @@ pub enum UpdateMetaProgress {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
|
||||||
#[serde(tag = "type")]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub enum UpdateStatus<M, P, N> {
|
|
||||||
Pending { update_id: u64, meta: M },
|
|
||||||
Progressing { update_id: u64, meta: P },
|
|
||||||
Processed { update_id: u64, meta: N },
|
|
||||||
Aborted { update_id: u64, meta: M },
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct UpdateQueue {
|
pub struct UpdateQueue {
|
||||||
inner: Arc<UpdateStore<UpdateMeta, String>>,
|
inner: Arc<UpdateStore<UpdateMeta, String, String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Deref for UpdateQueue {
|
impl Deref for UpdateQueue {
|
||||||
type Target = Arc<UpdateStore<UpdateMeta, String>>;
|
type Target = Arc<UpdateStore<UpdateMeta, String, String>>;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
&self.inner
|
&self.inner
|
||||||
@ -115,8 +106,6 @@ pub struct IndexerOpts {
|
|||||||
pub indexing_jobs: Option<usize>,
|
pub indexing_jobs: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
type UpdateSender = broadcast::Sender<UpdateStatus<UpdateMeta, UpdateMetaProgress, String>>;
|
|
||||||
|
|
||||||
struct UpdateHandler {
|
struct UpdateHandler {
|
||||||
indexes: Arc<Index>,
|
indexes: Arc<Index>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
@ -127,14 +116,12 @@ struct UpdateHandler {
|
|||||||
linked_hash_map_size: usize,
|
linked_hash_map_size: usize,
|
||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
chunk_fusing_shrink_size: u64,
|
chunk_fusing_shrink_size: u64,
|
||||||
update_status_sender: UpdateSender,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UpdateHandler {
|
impl UpdateHandler {
|
||||||
fn new(
|
fn new(
|
||||||
opt: &IndexerOpts,
|
opt: &IndexerOpts,
|
||||||
indexes: Arc<Index>,
|
indexes: Arc<Index>,
|
||||||
update_status_sender: UpdateSender,
|
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||||
.num_threads(opt.indexing_jobs.unwrap_or(0))
|
.num_threads(opt.indexing_jobs.unwrap_or(0))
|
||||||
@ -149,7 +136,6 @@ impl UpdateHandler {
|
|||||||
linked_hash_map_size: opt.linked_hash_map_size,
|
linked_hash_map_size: opt.linked_hash_map_size,
|
||||||
chunk_compression_type: opt.chunk_compression_type,
|
chunk_compression_type: opt.chunk_compression_type,
|
||||||
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
|
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
|
||||||
update_status_sender,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,23 +177,7 @@ impl UpdateHandler {
|
|||||||
Box::new(content) as Box<dyn io::Read>
|
Box::new(content) as Box<dyn io::Read>
|
||||||
};
|
};
|
||||||
|
|
||||||
let result = builder.execute(reader, |indexing_step, update_id| {
|
let result = builder.execute(reader, |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
||||||
let (current, total) = match indexing_step {
|
|
||||||
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
|
||||||
ComputeIdsAndMergeDocuments { documents_seen, total_documents } => (documents_seen, Some(total_documents)),
|
|
||||||
IndexDocuments { documents_seen, total_documents } => (documents_seen, Some(total_documents)),
|
|
||||||
MergeDataIntoFinalDatabase { databases_seen, total_databases } => (databases_seen, Some(total_databases)),
|
|
||||||
};
|
|
||||||
let _ = self.update_status_sender.send(UpdateStatus::Progressing {
|
|
||||||
update_id,
|
|
||||||
meta: UpdateMetaProgress::DocumentsAddition {
|
|
||||||
step: indexing_step.step(),
|
|
||||||
total_steps: indexing_step.number_of_steps(),
|
|
||||||
current,
|
|
||||||
total,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(()) => wtxn.commit().map_err(Into::into),
|
Ok(()) => wtxn.commit().map_err(Into::into),
|
||||||
@ -226,57 +196,41 @@ impl UpdateHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_settings(&self, settings: Settings, update_builder: UpdateBuilder) -> Result<()> {
|
fn update_settings(&self, settings: &Settings, update_builder: UpdateBuilder) -> Result<()> {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = self.indexes.write_txn()?;
|
let mut wtxn = self.indexes.write_txn()?;
|
||||||
let mut builder = update_builder.settings(&mut wtxn, &self.indexes);
|
let mut builder = update_builder.settings(&mut wtxn, &self.indexes);
|
||||||
|
|
||||||
// We transpose the settings JSON struct into a real setting update.
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
if let Some(names) = settings.searchable_attributes {
|
if let Some(ref names) = settings.searchable_attributes {
|
||||||
match names {
|
match names {
|
||||||
Some(names) => builder.set_searchable_fields(names),
|
Some(names) => builder.set_searchable_fields(&names),
|
||||||
None => builder.reset_searchable_fields(),
|
None => builder.reset_searchable_fields(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We transpose the settings JSON struct into a real setting update.
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
if let Some(names) = settings.displayed_attributes {
|
if let Some(ref names) = settings.displayed_attributes {
|
||||||
match names {
|
match names {
|
||||||
Some(names) => builder.set_displayed_fields(names),
|
Some(names) => builder.set_displayed_fields(&names),
|
||||||
None => builder.reset_displayed_fields(),
|
None => builder.reset_displayed_fields(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We transpose the settings JSON struct into a real setting update.
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
if let Some(facet_types) = settings.faceted_attributes {
|
if let Some(ref facet_types) = settings.faceted_attributes {
|
||||||
builder.set_faceted_fields(facet_types);
|
builder.set_faceted_fields(&facet_types);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We transpose the settings JSON struct into a real setting update.
|
// We transpose the settings JSON struct into a real setting update.
|
||||||
if let Some(criteria) = settings.criteria {
|
if let Some(ref criteria) = settings.criteria {
|
||||||
match criteria {
|
match criteria {
|
||||||
Some(criteria) => builder.set_criteria(criteria),
|
Some(criteria) => builder.set_criteria(&criteria),
|
||||||
None => builder.reset_criteria(),
|
None => builder.reset_criteria(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let result = builder.execute(|indexing_step, update_id| {
|
let result = builder.execute(|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step));
|
||||||
let (current, total) = match indexing_step {
|
|
||||||
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),
|
|
||||||
ComputeIdsAndMergeDocuments { documents_seen, total_documents } => (documents_seen, Some(total_documents)),
|
|
||||||
IndexDocuments { documents_seen, total_documents } => (documents_seen, Some(total_documents)),
|
|
||||||
MergeDataIntoFinalDatabase { databases_seen, total_databases } => (databases_seen, Some(total_databases)),
|
|
||||||
};
|
|
||||||
let _ = self.update_status_sender.send(UpdateStatus::Progressing {
|
|
||||||
update_id,
|
|
||||||
meta: UpdateMetaProgress::DocumentsAddition {
|
|
||||||
step: indexing_step.step(),
|
|
||||||
total_steps: indexing_step.number_of_steps(),
|
|
||||||
current,
|
|
||||||
total,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(_count) => wtxn.commit().map_err(Into::into),
|
Ok(_count) => wtxn.commit().map_err(Into::into),
|
||||||
@ -284,7 +238,7 @@ impl UpdateHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_facets(&self, levels: Facets, update_builder: UpdateBuilder) -> Result<()> {
|
fn update_facets(&self, levels: &Facets, update_builder: UpdateBuilder) -> Result<()> {
|
||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = self.indexes.write_txn()?;
|
let mut wtxn = self.indexes.write_txn()?;
|
||||||
let mut builder = update_builder.facets(&mut wtxn, &self.indexes);
|
let mut builder = update_builder.facets(&mut wtxn, &self.indexes);
|
||||||
@ -301,28 +255,30 @@ impl UpdateHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Handler<UpdateMeta, String> for UpdateHandler {
|
impl Handler<UpdateMeta, String, String> for UpdateHandler {
|
||||||
fn handle_update(&mut self, update_id: u64, meta: UpdateMeta, content: &[u8]) -> heed::Result<String> {
|
fn handle_update(
|
||||||
|
&mut self,
|
||||||
|
update_id: u64,
|
||||||
|
meta: Processing<UpdateMeta>,
|
||||||
|
content: &[u8]
|
||||||
|
) -> Result<Processed<UpdateMeta, String>, Failed<UpdateMeta, String>> {
|
||||||
use UpdateMeta::*;
|
use UpdateMeta::*;
|
||||||
|
|
||||||
let update_builder = self.update_buidler(update_id);
|
let update_builder = self.update_buidler(update_id);
|
||||||
|
|
||||||
let result: anyhow::Result<()> = match meta {
|
let result: anyhow::Result<()> = match meta.meta() {
|
||||||
DocumentsAddition { method, format } => {
|
DocumentsAddition { method, format } => self.update_documents(*format, *method, content, update_builder),
|
||||||
self.update_documents(format, method, content, update_builder)
|
|
||||||
},
|
|
||||||
ClearDocuments => self.clear_documents(update_builder),
|
ClearDocuments => self.clear_documents(update_builder),
|
||||||
Settings(settings) => self.update_settings(settings, update_builder),
|
Settings(settings) => self.update_settings(settings, update_builder),
|
||||||
Facets(levels) => self.update_facets(levels, update_builder),
|
Facets(levels) => self.update_facets(levels, update_builder),
|
||||||
};
|
};
|
||||||
|
|
||||||
let meta = match result {
|
let new_meta = match result {
|
||||||
Ok(()) => format!("valid update content"),
|
Ok(()) => format!("valid update content"),
|
||||||
Err(e) => format!("error while processing update content: {:?}", e),
|
Err(e) => format!("error while processing update content: {:?}", e),
|
||||||
};
|
};
|
||||||
|
|
||||||
let processed = UpdateStatus::Processed { update_id, meta: meta.clone() };
|
let meta = meta.process(new_meta);
|
||||||
let _ = self.update_status_sender.send(processed);
|
|
||||||
|
|
||||||
Ok(meta)
|
Ok(meta)
|
||||||
}
|
}
|
||||||
@ -333,8 +289,7 @@ impl UpdateQueue {
|
|||||||
opt: &Opt,
|
opt: &Opt,
|
||||||
indexes: Arc<Index>,
|
indexes: Arc<Index>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let (sender, _) = broadcast::channel(100);
|
let handler = UpdateHandler::new(&opt.indexer_options, indexes)?;
|
||||||
let handler = UpdateHandler::new(&opt.indexer_options, indexes, sender)?;
|
|
||||||
let size = opt.max_udb_size.get_bytes() as usize;
|
let size = opt.max_udb_size.get_bytes() as usize;
|
||||||
let path = opt.db_path.join("updates.mdb");
|
let path = opt.db_path.join("updates.mdb");
|
||||||
create_dir_all(&path)?;
|
create_dir_all(&path)?;
|
||||||
@ -345,4 +300,9 @@ impl UpdateQueue {
|
|||||||
)?;
|
)?;
|
||||||
Ok(Self { inner })
|
Ok(Self { inner })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn get_update_status(&self, update_id: u64) -> Result<Option<UpdateStatus<UpdateMeta, String, String>>> {
|
||||||
|
Ok(self.inner.meta(update_id)?)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user