2019-10-23 16:32:11 +02:00
|
|
|
mod clear_all;
|
2019-10-11 15:33:35 +02:00
|
|
|
mod customs_update;
|
2019-10-03 15:04:11 +02:00
|
|
|
mod documents_addition;
|
|
|
|
mod documents_deletion;
|
2019-10-07 17:48:26 +02:00
|
|
|
mod schema_update;
|
2019-10-29 15:24:09 +01:00
|
|
|
mod stop_words_addition;
|
2019-10-29 15:43:26 +01:00
|
|
|
mod stop_words_deletion;
|
2019-12-11 16:49:09 +01:00
|
|
|
mod synonyms_update;
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2019-10-23 16:32:11 +02:00
|
|
|
pub use self::clear_all::{apply_clear_all, push_clear_all};
|
2019-10-11 15:33:35 +02:00
|
|
|
pub use self::customs_update::{apply_customs_update, push_customs_update};
|
2019-11-05 15:23:41 +01:00
|
|
|
pub use self::documents_addition::{
|
|
|
|
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
|
|
|
|
};
|
2019-10-18 13:05:28 +02:00
|
|
|
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
2019-10-08 17:24:11 +02:00
|
|
|
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
2019-10-29 15:24:09 +01:00
|
|
|
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
2019-10-29 15:43:26 +01:00
|
|
|
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
2019-12-11 16:49:09 +01:00
|
|
|
pub use self::synonyms_update::{apply_synonyms_update, SynonymsUpdate};
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2019-10-08 17:24:11 +02:00
|
|
|
use std::cmp;
|
2019-11-05 15:23:41 +01:00
|
|
|
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
2019-11-12 16:18:53 +01:00
|
|
|
use std::time::Instant;
|
2019-10-07 17:48:26 +02:00
|
|
|
|
2019-11-12 18:00:47 +01:00
|
|
|
use chrono::{DateTime, Utc};
|
2019-10-21 12:05:53 +02:00
|
|
|
use heed::Result as ZResult;
|
2019-10-07 16:16:04 +02:00
|
|
|
use log::debug;
|
2019-10-18 13:05:28 +02:00
|
|
|
use serde::{Deserialize, Serialize};
|
2020-01-16 16:19:04 +01:00
|
|
|
use fst::{IntoStreamer, Streamer};
|
|
|
|
use sdset::Set;
|
2019-10-07 17:48:26 +02:00
|
|
|
|
2019-10-21 17:33:52 +02:00
|
|
|
use crate::{store, DocumentId, MResult};
|
2019-11-26 16:12:06 +01:00
|
|
|
use crate::database::{MainT, UpdateT};
|
2019-11-26 11:06:55 +01:00
|
|
|
use meilisearch_schema::Schema;
|
2019-10-03 15:04:11 +02:00
|
|
|
|
2019-10-16 17:05:24 +02:00
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
2019-11-12 18:00:47 +01:00
|
|
|
pub struct Update {
|
|
|
|
data: UpdateData,
|
|
|
|
enqueued_at: DateTime<Utc>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Update {
|
|
|
|
fn clear_all() -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::ClearAll,
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn schema(data: Schema) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::Schema(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn customs(data: Vec<u8>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::Customs(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn documents_addition(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::DocumentsAddition(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn documents_partial(data: Vec<HashMap<String, serde_json::Value>>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::DocumentsPartial(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn documents_deletion(data: Vec<DocumentId>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::DocumentsDeletion(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-11 16:49:09 +01:00
|
|
|
fn synonyms_update(data: BTreeMap<String, Vec<String>>) -> Update {
|
2019-11-12 18:00:47 +01:00
|
|
|
Update {
|
2019-12-11 16:49:09 +01:00
|
|
|
data: UpdateData::SynonymsUpdate(data),
|
2019-11-12 18:00:47 +01:00
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn stop_words_addition(data: BTreeSet<String>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::StopWordsAddition(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn stop_words_deletion(data: BTreeSet<String>) -> Update {
|
|
|
|
Update {
|
|
|
|
data: UpdateData::StopWordsDeletion(data),
|
|
|
|
enqueued_at: Utc::now(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
pub enum UpdateData {
|
2019-10-23 16:32:11 +02:00
|
|
|
ClearAll,
|
2019-10-11 13:49:17 +02:00
|
|
|
Schema(Schema),
|
2019-10-11 15:33:35 +02:00
|
|
|
Customs(Vec<u8>),
|
2019-11-05 15:23:41 +01:00
|
|
|
DocumentsAddition(Vec<HashMap<String, serde_json::Value>>),
|
|
|
|
DocumentsPartial(Vec<HashMap<String, serde_json::Value>>),
|
2019-10-03 15:04:11 +02:00
|
|
|
DocumentsDeletion(Vec<DocumentId>),
|
2019-12-11 16:49:09 +01:00
|
|
|
SynonymsUpdate(BTreeMap<String, Vec<String>>),
|
2019-10-29 15:24:09 +01:00
|
|
|
StopWordsAddition(BTreeSet<String>),
|
2019-10-29 15:43:26 +01:00
|
|
|
StopWordsDeletion(BTreeSet<String>),
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
|
|
|
|
2019-11-12 18:00:47 +01:00
|
|
|
impl UpdateData {
|
2019-10-31 11:13:37 +01:00
|
|
|
pub fn update_type(&self) -> UpdateType {
|
|
|
|
match self {
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::ClearAll => UpdateType::ClearAll,
|
|
|
|
UpdateData::Schema(_) => UpdateType::Schema,
|
|
|
|
UpdateData::Customs(_) => UpdateType::Customs,
|
|
|
|
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
|
2019-10-31 11:13:37 +01:00
|
|
|
number: addition.len(),
|
|
|
|
},
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
|
2019-11-05 15:23:41 +01:00
|
|
|
number: addition.len(),
|
|
|
|
},
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
2019-10-31 11:13:37 +01:00
|
|
|
number: deletion.len(),
|
|
|
|
},
|
2019-12-11 16:49:09 +01:00
|
|
|
UpdateData::SynonymsUpdate(addition) => UpdateType::SynonymsUpdate {
|
2019-10-31 11:13:37 +01:00
|
|
|
number: addition.len(),
|
|
|
|
},
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::StopWordsAddition(addition) => UpdateType::StopWordsAddition {
|
2019-10-31 11:13:37 +01:00
|
|
|
number: addition.len(),
|
|
|
|
},
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::StopWordsDeletion(deletion) => UpdateType::StopWordsDeletion {
|
2019-10-31 11:13:37 +01:00
|
|
|
number: deletion.len(),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-07 16:16:04 +02:00
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
2019-11-12 16:18:53 +01:00
|
|
|
#[serde(tag = "name")]
|
2019-10-03 16:13:09 +02:00
|
|
|
pub enum UpdateType {
|
2019-10-23 16:32:11 +02:00
|
|
|
ClearAll,
|
2019-11-12 16:18:53 +01:00
|
|
|
Schema,
|
2019-10-11 15:33:35 +02:00
|
|
|
Customs,
|
2019-10-03 16:13:09 +02:00
|
|
|
DocumentsAddition { number: usize },
|
2019-11-05 15:23:41 +01:00
|
|
|
DocumentsPartial { number: usize },
|
2019-10-03 16:13:09 +02:00
|
|
|
DocumentsDeletion { number: usize },
|
2019-12-11 16:49:09 +01:00
|
|
|
SynonymsUpdate { number: usize },
|
2019-10-29 15:24:09 +01:00
|
|
|
StopWordsAddition { number: usize },
|
2019-10-29 15:43:26 +01:00
|
|
|
StopWordsDeletion { number: usize },
|
2019-10-03 16:13:09 +02:00
|
|
|
}
|
|
|
|
|
2019-10-29 11:30:44 +01:00
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
2019-11-29 15:05:54 +01:00
|
|
|
#[serde(rename_all = "camelCase")]
|
2019-10-31 11:13:37 +01:00
|
|
|
pub struct ProcessedUpdateResult {
|
2019-10-03 16:13:09 +02:00
|
|
|
pub update_id: u64,
|
2019-11-12 16:18:53 +01:00
|
|
|
#[serde(rename = "type")]
|
2019-10-03 16:13:09 +02:00
|
|
|
pub update_type: UpdateType,
|
2019-11-12 16:18:53 +01:00
|
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
|
|
pub error: Option<String>,
|
|
|
|
pub duration: f64, // in seconds
|
2019-11-12 18:00:47 +01:00
|
|
|
pub enqueued_at: DateTime<Utc>,
|
|
|
|
pub processed_at: DateTime<Utc>,
|
2019-10-03 16:13:09 +02:00
|
|
|
}
|
|
|
|
|
2019-10-31 11:13:37 +01:00
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
2019-11-29 15:05:54 +01:00
|
|
|
#[serde(rename_all = "camelCase")]
|
2019-10-31 11:13:37 +01:00
|
|
|
pub struct EnqueuedUpdateResult {
|
|
|
|
pub update_id: u64,
|
2019-11-29 15:09:48 +01:00
|
|
|
#[serde(rename = "type")]
|
2019-10-31 11:13:37 +01:00
|
|
|
pub update_type: UpdateType,
|
2019-11-12 18:00:47 +01:00
|
|
|
pub enqueued_at: DateTime<Utc>,
|
2019-10-31 11:13:37 +01:00
|
|
|
}
|
|
|
|
|
2019-10-29 11:30:44 +01:00
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
2019-11-12 18:00:47 +01:00
|
|
|
#[serde(rename_all = "camelCase", tag = "status")]
|
2019-10-03 16:54:37 +02:00
|
|
|
pub enum UpdateStatus {
|
2019-11-12 16:18:53 +01:00
|
|
|
Enqueued {
|
|
|
|
#[serde(flatten)]
|
2019-11-12 18:00:47 +01:00
|
|
|
content: EnqueuedUpdateResult,
|
2019-11-12 16:18:53 +01:00
|
|
|
},
|
2019-11-28 18:27:08 +01:00
|
|
|
Failed {
|
|
|
|
#[serde(flatten)]
|
|
|
|
content: ProcessedUpdateResult,
|
|
|
|
},
|
2019-11-12 16:18:53 +01:00
|
|
|
Processed {
|
|
|
|
#[serde(flatten)]
|
2019-11-12 18:00:47 +01:00
|
|
|
content: ProcessedUpdateResult,
|
2019-11-12 16:18:53 +01:00
|
|
|
},
|
2019-10-03 16:54:37 +02:00
|
|
|
}
|
|
|
|
|
2019-10-16 17:05:24 +02:00
|
|
|
pub fn update_status(
|
2019-11-26 16:12:06 +01:00
|
|
|
update_reader: &heed::RoTxn<UpdateT>,
|
2019-10-03 16:54:37 +02:00
|
|
|
updates_store: store::Updates,
|
|
|
|
updates_results_store: store::UpdatesResults,
|
|
|
|
update_id: u64,
|
2019-11-12 18:00:47 +01:00
|
|
|
) -> MResult<Option<UpdateStatus>> {
|
2019-11-26 16:12:06 +01:00
|
|
|
match updates_results_store.update_result(update_reader, update_id)? {
|
2019-11-28 18:27:08 +01:00
|
|
|
Some(result) => {
|
|
|
|
if result.error.is_some() {
|
|
|
|
Ok(Some(UpdateStatus::Failed { content: result }))
|
|
|
|
} else {
|
|
|
|
Ok(Some(UpdateStatus::Processed { content: result }))
|
|
|
|
}
|
|
|
|
},
|
2019-11-26 16:12:06 +01:00
|
|
|
None => match updates_store.get(update_reader, update_id)? {
|
2019-11-12 18:00:47 +01:00
|
|
|
Some(update) => Ok(Some(UpdateStatus::Enqueued {
|
|
|
|
content: EnqueuedUpdateResult {
|
2019-10-31 11:13:37 +01:00
|
|
|
update_id,
|
2019-11-12 18:00:47 +01:00
|
|
|
update_type: update.data.update_type(),
|
|
|
|
enqueued_at: update.enqueued_at,
|
|
|
|
},
|
|
|
|
})),
|
|
|
|
None => Ok(None),
|
|
|
|
},
|
2019-10-03 16:54:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 17:24:11 +02:00
|
|
|
pub fn next_update_id(
|
2019-11-26 16:12:06 +01:00
|
|
|
update_writer: &mut heed::RwTxn<UpdateT>,
|
2019-10-07 16:16:04 +02:00
|
|
|
updates_store: store::Updates,
|
|
|
|
updates_results_store: store::UpdatesResults,
|
2019-10-18 13:05:28 +02:00
|
|
|
) -> ZResult<u64> {
|
2019-11-26 16:12:06 +01:00
|
|
|
let last_update = updates_store.last_update(update_writer)?;
|
|
|
|
let last_update = last_update.map(|(n, _)| n);
|
2019-10-07 16:16:04 +02:00
|
|
|
|
2019-11-26 16:12:06 +01:00
|
|
|
let last_update_results_id = updates_results_store.last_update(update_writer)?;
|
2019-10-07 16:16:04 +02:00
|
|
|
let last_update_results_id = last_update_results_id.map(|(n, _)| n);
|
|
|
|
|
2019-11-26 16:12:06 +01:00
|
|
|
let max_update_id = cmp::max(last_update, last_update_results_id);
|
2019-10-08 17:24:11 +02:00
|
|
|
let new_update_id = max_update_id.map_or(0, |n| n + 1);
|
2019-10-08 17:16:48 +02:00
|
|
|
|
2019-10-08 17:24:11 +02:00
|
|
|
Ok(new_update_id)
|
2019-10-08 17:16:48 +02:00
|
|
|
}
|
|
|
|
|
2019-11-04 10:49:27 +01:00
|
|
|
pub fn update_task<'a, 'b>(
|
2019-11-26 16:12:06 +01:00
|
|
|
writer: &'a mut heed::RwTxn<'b, MainT>,
|
|
|
|
index: &store::Index,
|
2019-10-31 17:27:58 +01:00
|
|
|
update_id: u64,
|
|
|
|
update: Update,
|
|
|
|
) -> MResult<ProcessedUpdateResult> {
|
2019-10-08 14:53:35 +02:00
|
|
|
debug!("Processing update number {}", update_id);
|
|
|
|
|
2019-11-12 18:00:47 +01:00
|
|
|
let Update { enqueued_at, data } = update;
|
|
|
|
|
|
|
|
let (update_type, result, duration) = match data {
|
|
|
|
UpdateData::ClearAll => {
|
2019-10-23 16:32:11 +02:00
|
|
|
let start = Instant::now();
|
|
|
|
|
|
|
|
let update_type = UpdateType::ClearAll;
|
|
|
|
let result = apply_clear_all(
|
|
|
|
writer,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2020-01-16 16:19:04 +01:00
|
|
|
index.prefix_documents_cache,
|
|
|
|
index.prefix_postings_lists_cache,
|
2019-10-23 16:32:11 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
(update_type, result, start.elapsed())
|
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::Schema(schema) => {
|
2019-10-07 17:48:26 +02:00
|
|
|
let start = Instant::now();
|
2019-10-09 11:45:19 +02:00
|
|
|
|
2019-11-12 16:18:53 +01:00
|
|
|
let update_type = UpdateType::Schema;
|
2019-10-21 17:33:52 +02:00
|
|
|
let result = apply_schema_update(
|
|
|
|
writer,
|
|
|
|
&schema,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2019-12-30 11:44:42 +01:00
|
|
|
index.prefix_documents_cache,
|
2020-01-16 16:19:04 +01:00
|
|
|
index.prefix_postings_lists_cache,
|
2019-10-21 17:33:52 +02:00
|
|
|
);
|
2019-10-07 17:48:26 +02:00
|
|
|
|
|
|
|
(update_type, result, start.elapsed())
|
2019-10-18 13:05:28 +02:00
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::Customs(customs) => {
|
2019-10-11 15:33:35 +02:00
|
|
|
let start = Instant::now();
|
|
|
|
|
|
|
|
let update_type = UpdateType::Customs;
|
2019-10-16 17:05:24 +02:00
|
|
|
let result = apply_customs_update(writer, index.main, &customs).map_err(Into::into);
|
2019-10-11 15:33:35 +02:00
|
|
|
|
|
|
|
(update_type, result, start.elapsed())
|
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::DocumentsAddition(documents) => {
|
2019-10-07 16:16:04 +02:00
|
|
|
let start = Instant::now();
|
2019-10-07 15:00:28 +02:00
|
|
|
|
2019-10-18 13:05:28 +02:00
|
|
|
let update_type = UpdateType::DocumentsAddition {
|
|
|
|
number: documents.len(),
|
|
|
|
};
|
2019-10-07 16:16:04 +02:00
|
|
|
|
2019-10-07 17:48:26 +02:00
|
|
|
let result = apply_documents_addition(
|
|
|
|
writer,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
2019-10-14 14:07:10 +02:00
|
|
|
index.documents_fields_counts,
|
2019-10-07 17:48:26 +02:00
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2019-12-30 11:44:42 +01:00
|
|
|
index.prefix_documents_cache,
|
2019-12-30 12:27:24 +01:00
|
|
|
index.prefix_postings_lists_cache,
|
2019-10-07 17:48:26 +02:00
|
|
|
documents,
|
|
|
|
);
|
2019-10-07 15:00:28 +02:00
|
|
|
|
|
|
|
(update_type, result, start.elapsed())
|
2019-10-18 13:05:28 +02:00
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::DocumentsPartial(documents) => {
|
2019-11-05 15:23:41 +01:00
|
|
|
let start = Instant::now();
|
|
|
|
|
|
|
|
let update_type = UpdateType::DocumentsPartial {
|
|
|
|
number: documents.len(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let result = apply_documents_partial_addition(
|
|
|
|
writer,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2019-12-30 11:44:42 +01:00
|
|
|
index.prefix_documents_cache,
|
2020-01-16 16:19:04 +01:00
|
|
|
index.prefix_postings_lists_cache,
|
2019-11-05 15:23:41 +01:00
|
|
|
documents,
|
|
|
|
);
|
|
|
|
|
|
|
|
(update_type, result, start.elapsed())
|
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::DocumentsDeletion(documents) => {
|
2019-10-07 16:16:04 +02:00
|
|
|
let start = Instant::now();
|
2019-10-07 15:00:28 +02:00
|
|
|
|
2019-10-18 13:05:28 +02:00
|
|
|
let update_type = UpdateType::DocumentsDeletion {
|
|
|
|
number: documents.len(),
|
|
|
|
};
|
2019-10-07 16:16:04 +02:00
|
|
|
|
2019-10-07 17:48:26 +02:00
|
|
|
let result = apply_documents_deletion(
|
|
|
|
writer,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
2019-10-14 14:07:10 +02:00
|
|
|
index.documents_fields_counts,
|
2019-10-07 17:48:26 +02:00
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2020-01-16 16:19:04 +01:00
|
|
|
index.prefix_postings_lists_cache,
|
2019-10-07 17:48:26 +02:00
|
|
|
documents,
|
|
|
|
);
|
2019-10-07 15:00:28 +02:00
|
|
|
|
2019-10-08 17:06:56 +02:00
|
|
|
(update_type, result, start.elapsed())
|
2019-10-18 13:05:28 +02:00
|
|
|
}
|
2019-12-11 16:49:09 +01:00
|
|
|
UpdateData::SynonymsUpdate(synonyms) => {
|
2019-10-08 17:16:48 +02:00
|
|
|
let start = Instant::now();
|
|
|
|
|
2019-12-11 16:49:09 +01:00
|
|
|
let update_type = UpdateType::SynonymsUpdate {
|
2019-10-18 13:05:28 +02:00
|
|
|
number: synonyms.len(),
|
|
|
|
};
|
2019-10-08 17:16:48 +02:00
|
|
|
|
2019-12-11 16:49:09 +01:00
|
|
|
let result = apply_synonyms_update(writer, index.main, index.synonyms, synonyms);
|
2019-10-08 17:16:48 +02:00
|
|
|
|
2019-10-29 15:24:09 +01:00
|
|
|
(update_type, result, start.elapsed())
|
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::StopWordsAddition(stop_words) => {
|
2019-10-29 15:24:09 +01:00
|
|
|
let start = Instant::now();
|
|
|
|
|
|
|
|
let update_type = UpdateType::StopWordsAddition {
|
|
|
|
number: stop_words.len(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let result =
|
|
|
|
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
|
|
|
|
2019-10-29 15:43:26 +01:00
|
|
|
(update_type, result, start.elapsed())
|
|
|
|
}
|
2019-11-12 18:00:47 +01:00
|
|
|
UpdateData::StopWordsDeletion(stop_words) => {
|
2019-10-29 15:43:26 +01:00
|
|
|
let start = Instant::now();
|
|
|
|
|
|
|
|
let update_type = UpdateType::StopWordsDeletion {
|
|
|
|
number: stop_words.len(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let result = apply_stop_words_deletion(
|
|
|
|
writer,
|
|
|
|
index.main,
|
|
|
|
index.documents_fields,
|
|
|
|
index.documents_fields_counts,
|
|
|
|
index.postings_lists,
|
|
|
|
index.docs_words,
|
2019-12-30 11:44:42 +01:00
|
|
|
index.prefix_documents_cache,
|
2020-01-16 16:19:04 +01:00
|
|
|
index.prefix_postings_lists_cache,
|
2019-10-29 15:43:26 +01:00
|
|
|
stop_words,
|
|
|
|
);
|
|
|
|
|
2019-10-07 15:00:28 +02:00
|
|
|
(update_type, result, start.elapsed())
|
2019-10-18 13:05:28 +02:00
|
|
|
}
|
2019-10-07 15:00:28 +02:00
|
|
|
};
|
|
|
|
|
2019-10-18 13:05:28 +02:00
|
|
|
debug!(
|
|
|
|
"Processed update number {} {:?} {:?}",
|
|
|
|
update_id, update_type, result
|
|
|
|
);
|
2019-10-07 17:48:26 +02:00
|
|
|
|
2019-10-31 11:13:37 +01:00
|
|
|
let status = ProcessedUpdateResult {
|
2019-10-07 15:00:28 +02:00
|
|
|
update_id,
|
|
|
|
update_type,
|
2019-11-12 16:18:53 +01:00
|
|
|
error: result.map_err(|e| e.to_string()).err(),
|
|
|
|
duration: duration.as_secs_f64(),
|
2019-11-12 18:00:47 +01:00
|
|
|
enqueued_at,
|
|
|
|
processed_at: Utc::now(),
|
2019-10-07 15:00:28 +02:00
|
|
|
};
|
|
|
|
|
2019-10-31 17:27:58 +01:00
|
|
|
Ok(status)
|
2019-10-03 15:04:11 +02:00
|
|
|
}
|
2020-01-16 16:19:04 +01:00
|
|
|
|
|
|
|
fn compute_short_prefixes(
|
|
|
|
writer: &mut heed::RwTxn<MainT>,
|
|
|
|
main_store: store::Main,
|
|
|
|
postings_lists_store: store::PostingsLists,
|
|
|
|
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
|
|
|
) -> MResult<()>
|
|
|
|
{
|
|
|
|
// retrieve the words fst to compute all those prefixes
|
|
|
|
let words_fst = match main_store.words_fst(writer)? {
|
|
|
|
Some(fst) => fst,
|
|
|
|
None => return Ok(()),
|
|
|
|
};
|
|
|
|
|
|
|
|
// clear the prefixes
|
|
|
|
let pplc_store = prefix_postings_lists_cache_store;
|
|
|
|
pplc_store.clear(writer)?;
|
|
|
|
|
|
|
|
for prefix_len in 1..=2 {
|
|
|
|
// compute prefixes and store those in the PrefixPostingsListsCache store.
|
|
|
|
let mut previous_prefix: Option<([u8; 4], Vec<_>)> = None;
|
|
|
|
let mut stream = words_fst.into_stream();
|
|
|
|
while let Some(input) = stream.next() {
|
|
|
|
|
|
|
|
// We skip the prefixes that are shorter than the current length
|
|
|
|
// we want to cache (<). We must ignore the input when it is exactly the
|
|
|
|
// same word as the prefix because if we match exactly on it we need
|
|
|
|
// to consider it as an exact match and not as a prefix (=).
|
|
|
|
if input.len() <= prefix_len { continue }
|
|
|
|
|
|
|
|
if let Some(postings_list) = postings_lists_store.postings_list(writer, input)?.map(|p| p.matches.into_owned()) {
|
|
|
|
let prefix = &input[..prefix_len];
|
|
|
|
|
|
|
|
let mut arr_prefix = [0; 4];
|
|
|
|
arr_prefix[..prefix_len].copy_from_slice(prefix);
|
|
|
|
|
|
|
|
match previous_prefix {
|
|
|
|
Some((ref mut prev_prefix, ref mut prev_pl)) if *prev_prefix != arr_prefix => {
|
|
|
|
prev_pl.sort_unstable();
|
|
|
|
prev_pl.dedup();
|
|
|
|
|
|
|
|
if let Ok(prefix) = std::str::from_utf8(&prev_prefix[..prefix_len]) {
|
|
|
|
debug!("writing the prefix of {:?} of length {}", prefix, prev_pl.len());
|
|
|
|
}
|
|
|
|
|
|
|
|
let pls = Set::new_unchecked(&prev_pl);
|
|
|
|
pplc_store.put_prefix_postings_list(writer, *prev_prefix, &pls)?;
|
|
|
|
|
|
|
|
*prev_prefix = arr_prefix;
|
|
|
|
prev_pl.clear();
|
|
|
|
prev_pl.extend_from_slice(&postings_list);
|
|
|
|
},
|
|
|
|
Some((_, ref mut prev_pl)) => prev_pl.extend_from_slice(&postings_list),
|
|
|
|
None => previous_prefix = Some((arr_prefix, postings_list.to_vec())),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// write the last prefix postings lists
|
|
|
|
if let Some((prev_prefix, mut prev_pl)) = previous_prefix.take() {
|
|
|
|
prev_pl.sort_unstable();
|
|
|
|
prev_pl.dedup();
|
|
|
|
|
|
|
|
let pls = Set::new_unchecked(&prev_pl);
|
|
|
|
pplc_store.put_prefix_postings_list(writer, prev_prefix, &pls)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|