mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
feat: Make multiple document updates shadow themselves
This commit is contained in:
parent
5627f15d41
commit
ac347d788c
@ -49,8 +49,8 @@ where B: TokenizerBuilder
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
||||||
for Token { word, word_index, char_index } in self.tokenizer_builder.build(v) {
|
for token in self.tokenizer_builder.build(v) {
|
||||||
|
let Token { word, word_index, char_index } = token;
|
||||||
let document_id = self.document_id;
|
let document_id = self.document_id;
|
||||||
|
|
||||||
// FIXME must u32::try_from instead
|
// FIXME must u32::try_from instead
|
||||||
@ -61,15 +61,13 @@ where B: TokenizerBuilder
|
|||||||
|
|
||||||
// insert the exact representation
|
// insert the exact representation
|
||||||
let word_lower = word.to_lowercase();
|
let word_lower = word.to_lowercase();
|
||||||
|
let length = word.chars().count() as u16;
|
||||||
|
|
||||||
if self.stop_words.contains(&word_lower) { continue }
|
if self.stop_words.contains(&word_lower) { continue }
|
||||||
|
|
||||||
// and the unidecoded lowercased version
|
// and the unidecoded lowercased version
|
||||||
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
|
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
|
||||||
if word_lower != word_unidecoded {
|
if word_lower != word_unidecoded {
|
||||||
|
|
||||||
// FIXME must u16/u32::try_from instead
|
|
||||||
let length = word_unidecoded.chars().count() as u16;
|
|
||||||
let word_area = match WordArea::new(char_index as u32, length) {
|
let word_area = match WordArea::new(char_index as u32, length) {
|
||||||
Ok(word_area) => word_area,
|
Ok(word_area) => word_area,
|
||||||
Err(_) => return Ok(()),
|
Err(_) => return Ok(()),
|
||||||
@ -79,8 +77,6 @@ where B: TokenizerBuilder
|
|||||||
self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
|
self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME must u16/u32::try_from instead
|
|
||||||
let length = word.chars().count() as u16;
|
|
||||||
let word_area = match WordArea::new(char_index as u32, length) {
|
let word_area = match WordArea::new(char_index as u32, length) {
|
||||||
Ok(word_area) => word_area,
|
Ok(word_area) => word_area,
|
||||||
Err(_) => return Ok(()),
|
Err(_) => return Ok(()),
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::btree_map::{BTreeMap, Entry};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
@ -39,6 +39,10 @@ impl DocumentUpdate {
|
|||||||
|
|
||||||
pub fn remove(&mut self) {
|
pub fn remove(&mut self) {
|
||||||
self.cleared = true;
|
self.cleared = true;
|
||||||
|
self.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clear(&mut self) {
|
||||||
self.words_indexes.clear();
|
self.words_indexes.clear();
|
||||||
self.attributes.clear();
|
self.attributes.clear();
|
||||||
}
|
}
|
||||||
@ -61,7 +65,13 @@ impl RawUpdateBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate {
|
pub fn document_update(&mut self, document_id: DocumentId) -> &mut DocumentUpdate {
|
||||||
self.document_updates.entry(document_id).or_insert_with(DocumentUpdate::new)
|
match self.document_updates.entry(document_id) {
|
||||||
|
Entry::Occupied(mut occupied) => {
|
||||||
|
occupied.get_mut().clear();
|
||||||
|
occupied.into_mut()
|
||||||
|
},
|
||||||
|
Entry::Vacant(vacant) => vacant.insert(DocumentUpdate::new()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build(mut self) -> Result<Update, Box<Error>> {
|
pub fn build(mut self) -> Result<Update, Box<Error>> {
|
||||||
|
Loading…
Reference in New Issue
Block a user