mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Introduce synonyms deletions updates
This commit is contained in:
parent
0e224efa46
commit
ba32ce21d0
@ -1,4 +1,5 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use rkv::StoreError;
|
||||||
use crate::error::MResult;
|
use crate::error::MResult;
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
@ -18,6 +19,19 @@ impl Synonyms {
|
|||||||
self.synonyms.put(writer, word, &blob)
|
self.synonyms.put(writer, word, &blob)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn del_synonyms(
|
||||||
|
&self,
|
||||||
|
writer: &mut rkv::Writer,
|
||||||
|
word: &[u8],
|
||||||
|
) -> Result<bool, rkv::StoreError>
|
||||||
|
{
|
||||||
|
match self.synonyms.delete(writer, word) {
|
||||||
|
Ok(()) => Ok(true),
|
||||||
|
Err(StoreError::LmdbError(lmdb::Error::NotFound)) => Ok(false),
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn synonyms(
|
pub fn synonyms(
|
||||||
&self,
|
&self,
|
||||||
reader: &impl rkv::Readable,
|
reader: &impl rkv::Readable,
|
||||||
|
@ -2,11 +2,13 @@ mod documents_addition;
|
|||||||
mod documents_deletion;
|
mod documents_deletion;
|
||||||
mod schema_update;
|
mod schema_update;
|
||||||
mod synonyms_addition;
|
mod synonyms_addition;
|
||||||
|
mod synonyms_deletion;
|
||||||
|
|
||||||
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
|
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
|
||||||
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
|
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
|
||||||
pub use self::schema_update::apply_schema_update;
|
pub use self::schema_update::apply_schema_update;
|
||||||
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
|
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
|
||||||
|
pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};
|
||||||
|
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
@ -24,6 +26,7 @@ pub enum Update {
|
|||||||
DocumentsAddition(Vec<rmpv::Value>),
|
DocumentsAddition(Vec<rmpv::Value>),
|
||||||
DocumentsDeletion(Vec<DocumentId>),
|
DocumentsDeletion(Vec<DocumentId>),
|
||||||
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||||
|
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -32,6 +35,7 @@ pub enum UpdateType {
|
|||||||
DocumentsAddition { number: usize },
|
DocumentsAddition { number: usize },
|
||||||
DocumentsDeletion { number: usize },
|
DocumentsDeletion { number: usize },
|
||||||
SynonymsAddition { number: usize },
|
SynonymsAddition { number: usize },
|
||||||
|
SynonymsDeletion { number: usize },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize)]
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
@ -172,6 +176,21 @@ pub fn push_synonyms_addition(
|
|||||||
Ok(last_update_id)
|
Ok(last_update_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn push_synonyms_deletion(
|
||||||
|
writer: &mut rkv::Writer,
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
) -> MResult<u64>
|
||||||
|
{
|
||||||
|
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||||
|
|
||||||
|
let update = Update::SynonymsDeletion(deletion);
|
||||||
|
let update_id = updates_store.put_update(writer, last_update_id, &update)?;
|
||||||
|
|
||||||
|
Ok(last_update_id)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn update_task(
|
pub fn update_task(
|
||||||
writer: &mut rkv::Writer,
|
writer: &mut rkv::Writer,
|
||||||
index: store::Index,
|
index: store::Index,
|
||||||
@ -249,6 +268,20 @@ pub fn update_task(
|
|||||||
synonyms,
|
synonyms,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
},
|
||||||
|
Update::SynonymsDeletion(synonyms) => {
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
|
let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
|
||||||
|
|
||||||
|
let result = apply_synonyms_deletion(
|
||||||
|
writer,
|
||||||
|
index.main,
|
||||||
|
index.synonyms,
|
||||||
|
synonyms,
|
||||||
|
);
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -62,7 +62,7 @@ pub fn apply_synonyms_addition(
|
|||||||
main_store: store::Main,
|
main_store: store::Main,
|
||||||
synonyms_store: store::Synonyms,
|
synonyms_store: store::Synonyms,
|
||||||
addition: BTreeMap<String, Vec<String>>,
|
addition: BTreeMap<String, Vec<String>>,
|
||||||
) -> Result<(), Error>
|
) -> MResult<()>
|
||||||
{
|
{
|
||||||
let mut synonyms_builder = SetBuilder::memory();
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
151
meilidb-core/src/update/synonyms_deletion.rs
Normal file
151
meilidb-core/src/update/synonyms_deletion.rs
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
|
use sdset::SetBuf;
|
||||||
|
|
||||||
|
use crate::automaton::normalize_str;
|
||||||
|
use crate::raw_indexer::RawIndexer;
|
||||||
|
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
||||||
|
use crate::store;
|
||||||
|
use crate::update::push_synonyms_deletion;
|
||||||
|
use crate::{MResult, Error, RankedMap};
|
||||||
|
|
||||||
|
pub struct SynonymsDeletion {
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
updates_notifier: crossbeam_channel::Sender<()>,
|
||||||
|
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SynonymsDeletion {
|
||||||
|
pub fn new(
|
||||||
|
updates_store: store::Updates,
|
||||||
|
updates_results_store: store::UpdatesResults,
|
||||||
|
updates_notifier: crossbeam_channel::Sender<()>,
|
||||||
|
) -> SynonymsDeletion
|
||||||
|
{
|
||||||
|
SynonymsDeletion {
|
||||||
|
updates_store,
|
||||||
|
updates_results_store,
|
||||||
|
updates_notifier,
|
||||||
|
synonyms: BTreeMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
|
||||||
|
let synonym = normalize_str(synonym.as_ref());
|
||||||
|
self.synonyms.insert(synonym, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||||
|
where S: AsRef<str>,
|
||||||
|
T: AsRef<str>,
|
||||||
|
I: Iterator<Item=T>,
|
||||||
|
{
|
||||||
|
let synonym = normalize_str(synonym.as_ref());
|
||||||
|
let value = self.synonyms.entry(synonym).or_insert(None);
|
||||||
|
let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
|
||||||
|
match value {
|
||||||
|
Some(v) => v.extend(alternatives),
|
||||||
|
None => *value = Some(Vec::from_iter(alternatives)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self, mut writer: rkv::Writer) -> MResult<u64> {
|
||||||
|
let update_id = push_synonyms_deletion(
|
||||||
|
&mut writer,
|
||||||
|
self.updates_store,
|
||||||
|
self.updates_results_store,
|
||||||
|
self.synonyms,
|
||||||
|
)?;
|
||||||
|
writer.commit()?;
|
||||||
|
let _ = self.updates_notifier.send(());
|
||||||
|
|
||||||
|
Ok(update_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_synonyms_deletion(
|
||||||
|
writer: &mut rkv::Writer,
|
||||||
|
main_store: store::Main,
|
||||||
|
synonyms_store: store::Synonyms,
|
||||||
|
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
) -> MResult<()>
|
||||||
|
{
|
||||||
|
let mut delete_whole_synonym_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
for (synonym, alternatives) in deletion {
|
||||||
|
match alternatives {
|
||||||
|
Some(alternatives) => {
|
||||||
|
let prev_alternatives = synonyms_store.synonyms(writer, synonym.as_bytes())?;
|
||||||
|
let prev_alternatives = match prev_alternatives {
|
||||||
|
Some(alternatives) => alternatives,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let delta_alternatives = {
|
||||||
|
let alternatives = SetBuf::from_dirty(alternatives);
|
||||||
|
let mut builder = SetBuilder::memory();
|
||||||
|
builder.extend_iter(alternatives).unwrap();
|
||||||
|
builder.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(prev_alternatives.stream())
|
||||||
|
.add(delta_alternatives.stream())
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let (alternatives, empty_alternatives) = {
|
||||||
|
let mut builder = SetBuilder::memory();
|
||||||
|
let len = builder.get_ref().len();
|
||||||
|
builder.extend_stream(op).unwrap();
|
||||||
|
let is_empty = len == builder.get_ref().len();
|
||||||
|
let bytes = builder.into_inner().unwrap();
|
||||||
|
let alternatives = fst::Set::from_bytes(bytes).unwrap();
|
||||||
|
|
||||||
|
(alternatives, is_empty)
|
||||||
|
};
|
||||||
|
|
||||||
|
if empty_alternatives {
|
||||||
|
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
|
||||||
|
} else {
|
||||||
|
synonyms_store.put_synonyms(writer, synonym.as_bytes(), &alternatives)?;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
delete_whole_synonym_builder.insert(&synonym).unwrap();
|
||||||
|
synonyms_store.del_synonyms(writer, synonym.as_bytes())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let delta_synonyms = delete_whole_synonym_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let synonyms = match main_store.synonyms_fst(writer)? {
|
||||||
|
Some(synonyms) => {
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(synonyms.stream())
|
||||||
|
.add(delta_synonyms.stream())
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
|
synonyms_builder.extend_stream(op).unwrap();
|
||||||
|
synonyms_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => fst::Set::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
main_store.put_synonyms_fst(writer, &synonyms)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user