mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Introduce the stop words deletion update type
This commit is contained in:
parent
776673ebae
commit
a226fd23c3
@ -195,6 +195,14 @@ impl Index {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn stop_words_deletion(&self) -> update::StopWordsDeletion {
|
||||||
|
update::StopWordsDeletion::new(
|
||||||
|
self.updates,
|
||||||
|
self.updates_results,
|
||||||
|
self.updates_notifier.clone(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
|
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
|
||||||
match self.updates.last_update_id(reader)? {
|
match self.updates.last_update_id(reader)? {
|
||||||
Some((id, _)) => Ok(Some(id)),
|
Some((id, _)) => Ok(Some(id)),
|
||||||
|
@ -4,6 +4,7 @@ mod documents_addition;
|
|||||||
mod documents_deletion;
|
mod documents_deletion;
|
||||||
mod schema_update;
|
mod schema_update;
|
||||||
mod stop_words_addition;
|
mod stop_words_addition;
|
||||||
|
mod stop_words_deletion;
|
||||||
mod synonyms_addition;
|
mod synonyms_addition;
|
||||||
mod synonyms_deletion;
|
mod synonyms_deletion;
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ pub use self::documents_addition::{apply_documents_addition, DocumentsAddition};
|
|||||||
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
|
||||||
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
pub use self::schema_update::{apply_schema_update, push_schema_update};
|
||||||
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
pub use self::stop_words_addition::{apply_stop_words_addition, StopWordsAddition};
|
||||||
|
pub use self::stop_words_deletion::{apply_stop_words_deletion, StopWordsDeletion};
|
||||||
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
pub use self::synonyms_addition::{apply_synonyms_addition, SynonymsAddition};
|
||||||
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
pub use self::synonyms_deletion::{apply_synonyms_deletion, SynonymsDeletion};
|
||||||
|
|
||||||
@ -37,6 +39,7 @@ pub enum Update {
|
|||||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||||
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||||
StopWordsAddition(BTreeSet<String>),
|
StopWordsAddition(BTreeSet<String>),
|
||||||
|
StopWordsDeletion(BTreeSet<String>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -49,6 +52,7 @@ pub enum UpdateType {
|
|||||||
SynonymsAddition { number: usize },
|
SynonymsAddition { number: usize },
|
||||||
SynonymsDeletion { number: usize },
|
SynonymsDeletion { number: usize },
|
||||||
StopWordsAddition { number: usize },
|
StopWordsAddition { number: usize },
|
||||||
|
StopWordsDeletion { number: usize },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -226,6 +230,25 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
let result =
|
let result =
|
||||||
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
apply_stop_words_addition(writer, index.main, index.postings_lists, stop_words);
|
||||||
|
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
}
|
||||||
|
Update::StopWordsDeletion(stop_words) => {
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
let update_type = UpdateType::StopWordsDeletion {
|
||||||
|
number: stop_words.len(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = apply_stop_words_deletion(
|
||||||
|
writer,
|
||||||
|
index.main,
|
||||||
|
index.documents_fields,
|
||||||
|
index.documents_fields_counts,
|
||||||
|
index.postings_lists,
|
||||||
|
index.docs_words,
|
||||||
|
stop_words,
|
||||||
|
);
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -95,7 +95,7 @@ pub fn apply_stop_words_addition(
|
|||||||
main_store.put_words_fst(writer, &word_fst)?;
|
main_store.put_words_fst(writer, &word_fst)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// now we add all of these stop words to the main store
|
// now we add all of these stop words from the main store
|
||||||
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||||
|
|
||||||
let op = OpBuilder::new()
|
let op = OpBuilder::new()
|
||||||
|
112
meilidb-core/src/update/stop_words_deletion.rs
Normal file
112
meilidb-core/src/update/stop_words_deletion.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
|
use fst::{set::OpBuilder, SetBuilder};
|
||||||
|
|
||||||
|
use crate::automaton::normalize_str;
|
||||||
|
use crate::update::documents_addition::reindex_all_documents;
|
||||||
|
use crate::update::{next_update_id, Update};
|
||||||
|
use crate::{store, MResult};
|
||||||
|
|
||||||
|
pub struct StopWordsDeletion {
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
updates_notifier: crossbeam_channel::Sender<()>,
|
||||||
|
stop_words: BTreeSet<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StopWordsDeletion {
|
||||||
|
pub fn new(
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
updates_notifier: crossbeam_channel::Sender<()>,
|
||||||
|
) -> StopWordsDeletion {
|
||||||
|
StopWordsDeletion {
|
||||||
|
updates_store,
|
||||||
|
updates_results_store,
|
||||||
|
updates_notifier,
|
||||||
|
stop_words: BTreeSet::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_stop_word<S: AsRef<str>>(&mut self, stop_word: S) {
|
||||||
|
let stop_word = normalize_str(stop_word.as_ref());
|
||||||
|
self.stop_words.insert(stop_word);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult<u64> {
|
||||||
|
let _ = self.updates_notifier.send(());
|
||||||
|
let update_id = push_stop_words_deletion(
|
||||||
|
writer,
|
||||||
|
self.updates_store,
|
||||||
|
self.updates_results_store,
|
||||||
|
self.stop_words,
|
||||||
|
)?;
|
||||||
|
Ok(update_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push_stop_words_deletion(
|
||||||
|
writer: &mut heed::RwTxn,
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
deletion: BTreeSet<String>,
|
||||||
|
) -> MResult<u64> {
|
||||||
|
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||||
|
|
||||||
|
let update = Update::StopWordsDeletion(deletion);
|
||||||
|
updates_store.put_update(writer, last_update_id, &update)?;
|
||||||
|
|
||||||
|
Ok(last_update_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_stop_words_deletion(
|
||||||
|
writer: &mut heed::RwTxn,
|
||||||
|
main_store: store::Main,
|
||||||
|
documents_fields_store: store::DocumentsFields,
|
||||||
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
|
postings_lists_store: store::PostingsLists,
|
||||||
|
docs_words_store: store::DocsWords,
|
||||||
|
deletion: BTreeSet<String>,
|
||||||
|
) -> MResult<()> {
|
||||||
|
let mut stop_words_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
for word in deletion {
|
||||||
|
stop_words_builder.insert(&word).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create the new delta stop words fst
|
||||||
|
let delta_stop_words = stop_words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// now we delete all of these stop words from the main store
|
||||||
|
let stop_words_fst = main_store.stop_words_fst(writer)?.unwrap_or_default();
|
||||||
|
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(&stop_words_fst)
|
||||||
|
.add(&delta_stop_words)
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let mut stop_words_builder = SetBuilder::memory();
|
||||||
|
stop_words_builder.extend_stream(op).unwrap();
|
||||||
|
let stop_words_fst = stop_words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
main_store.put_stop_words_fst(writer, &stop_words_fst)?;
|
||||||
|
|
||||||
|
// now that we have setup the stop words
|
||||||
|
// lets reindex everything...
|
||||||
|
reindex_all_documents(
|
||||||
|
writer,
|
||||||
|
main_store,
|
||||||
|
documents_fields_store,
|
||||||
|
documents_fields_counts_store,
|
||||||
|
postings_lists_store,
|
||||||
|
docs_words_store,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user