mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
feat: Make multiple document updates shadow themselves
This commit is contained in:
parent
5627f15d41
commit
ac347d788c
@ -49,8 +49,8 @@ where B: TokenizerBuilder
|
||||
}
|
||||
|
||||
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
||||
for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) {
|
||||
|
||||
for token in self.tokenizer_builder.build(v) {
|
||||
let Token { word, word_index, char_index } = token;
|
||||
let document_id = self.document_id;
|
||||
|
||||
// FIXME must u32::try_from instead
|
||||
@ -61,15 +61,13 @@ where B: TokenizerBuilder
|
||||
|
||||
// insert the exact representation
|
||||
let word_lower = word.to_lowercase();
|
||||
let length = word.chars().count() as u16;
|
||||
|
||||
if self.stop_words.contains(&word_lower) { continue }
|
||||
|
||||
// and the unidecoded lowercased version
|
||||
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
|
||||
if word_lower != word_unidecoded {
|
||||
|
||||
// FIXME must u16/u32::try_from instead
|
||||
let length = word_unidecoded.chars().count() as u16;
|
||||
let word_area = match WordArea::new(char_index as u32, length) {
|
||||
Ok(word_area) => word_area,
|
||||
Err(_) => return Ok(()),
|
||||
@ -79,8 +77,6 @@ where B: TokenizerBuilder
|
||||
self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
|
||||
}
|
||||
|
||||
// FIXME must u16/u32::try_from instead
|
||||
let length = word.chars().count() as u16;
|
||||
let word_area = match WordArea::new(char_index as u32, length) {
|
||||
Ok(word_area) => word_area,
|
||||
Err(_) => return Ok(()),
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::btree_map::{BTreeMap, Entry};
|
||||
use std::path::PathBuf;
|
||||
use std::error::Error;
|
||||
|
||||
@ -39,6 +39,10 @@ impl DocumentUpdate {
|
||||
|
||||
pub fn remove(&mut self) {
|
||||
self.cleared = true;
|
||||
self.clear();
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.words_indexes.clear();
|
||||
self.attributes.clear();
|
||||
}
|
||||
@ -61,7 +65,13 @@ impl RawUpdateBuilder {
|
||||
}
|
||||
|
||||
pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate {
|
||||
self.document_updates.entry(document_id).or_insert_with(DocumentUpdate::new)
|
||||
match self.document_updates.entry(document_id) {
|
||||
Entry::Occupied(mut occupied) => {
|
||||
occupied.get_mut().clear();
|
||||
occupied.into_mut()
|
||||
},
|
||||
Entry::Vacant(vacant) => vacant.insert(DocumentUpdate::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build(mut self) -> Result<Update, Box<Error>> {
|
||||
|
Loading…
Reference in New Issue
Block a user