mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Avoid iterating on big databases when useless
This commit is contained in:
parent
a8d28e364d
commit
1ae13c1374
@ -50,35 +50,38 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
self.max_memory,
|
self.max_memory,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut new_word_docids_iter = new_word_docids.into_cursor()?;
|
if !common_prefix_fst_words.is_empty() {
|
||||||
let mut current_prefixes: Option<&&[String]> = None;
|
let mut new_word_docids_iter = new_word_docids.into_cursor()?;
|
||||||
let mut prefixes_cache = HashMap::new();
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
while let Some((word, data)) = new_word_docids_iter.move_on_next()? {
|
let mut prefixes_cache = HashMap::new();
|
||||||
current_prefixes = match current_prefixes.take() {
|
while let Some((word, data)) = new_word_docids_iter.move_on_next()? {
|
||||||
Some(prefixes) if word.starts_with(&prefixes[0].as_bytes()) => Some(prefixes),
|
current_prefixes = match current_prefixes.take() {
|
||||||
_otherwise => {
|
Some(prefixes) if word.starts_with(&prefixes[0].as_bytes()) => Some(prefixes),
|
||||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter)?;
|
_otherwise => {
|
||||||
common_prefix_fst_words
|
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter)?;
|
||||||
.iter()
|
common_prefix_fst_words
|
||||||
.find(|prefixes| word.starts_with(&prefixes[0].as_bytes()))
|
.iter()
|
||||||
}
|
.find(|prefixes| word.starts_with(&prefixes[0].as_bytes()))
|
||||||
};
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(prefixes) = current_prefixes {
|
if let Some(prefixes) = current_prefixes {
|
||||||
for prefix in prefixes.iter() {
|
for prefix in prefixes.iter() {
|
||||||
if word.starts_with(prefix.as_bytes()) {
|
if word.starts_with(prefix.as_bytes()) {
|
||||||
match prefixes_cache.get_mut(prefix.as_bytes()) {
|
match prefixes_cache.get_mut(prefix.as_bytes()) {
|
||||||
Some(value) => value.push(data.to_owned()),
|
Some(value) => value.push(data.to_owned()),
|
||||||
None => {
|
None => {
|
||||||
prefixes_cache.insert(prefix.clone().into(), vec![data.to_owned()]);
|
prefixes_cache
|
||||||
|
.insert(prefix.clone().into(), vec![data.to_owned()]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter)?;
|
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter)?;
|
||||||
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.index.word_docids.remap_data_type::<ByteSlice>();
|
let db = self.index.word_docids.remap_data_type::<ByteSlice>();
|
||||||
|
@ -83,70 +83,76 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
self.max_memory,
|
self.max_memory,
|
||||||
);
|
);
|
||||||
|
|
||||||
// We compute the prefix docids associated with the common prefixes between
|
if !common_prefix_fst_words.is_empty() {
|
||||||
// the old and new word prefix fst.
|
// We compute the prefix docids associated with the common prefixes between
|
||||||
let mut buffer = Vec::new();
|
// the old and new word prefix fst.
|
||||||
let mut current_prefixes: Option<&&[String]> = None;
|
let mut buffer = Vec::new();
|
||||||
let mut prefixes_cache = HashMap::new();
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
while let Some((key, data)) = new_wppd_iter.move_on_next()? {
|
let mut prefixes_cache = HashMap::new();
|
||||||
let (w1, w2, prox) = StrStrU8Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
while let Some((key, data)) = new_wppd_iter.move_on_next()? {
|
||||||
if prox > self.max_proximity {
|
let (w1, w2, prox) =
|
||||||
continue;
|
StrStrU8Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
||||||
|
if prox > self.max_proximity {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
insert_current_prefix_data_in_sorter(
|
||||||
|
&mut buffer,
|
||||||
|
&mut current_prefixes,
|
||||||
|
&mut prefixes_cache,
|
||||||
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
|
common_prefix_fst_words,
|
||||||
|
self.max_prefix_length,
|
||||||
|
w1,
|
||||||
|
w2,
|
||||||
|
prox,
|
||||||
|
data,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
insert_current_prefix_data_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut buffer,
|
|
||||||
&mut current_prefixes,
|
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
common_prefix_fst_words,
|
|
||||||
self.max_prefix_length,
|
|
||||||
w1,
|
|
||||||
w2,
|
|
||||||
prox,
|
|
||||||
data,
|
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
write_prefixes_in_sorter(
|
if !new_prefix_fst_words.is_empty() {
|
||||||
&mut prefixes_cache,
|
// We compute the prefix docids associated with the newly added prefixes
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
// in the new word prefix fst.
|
||||||
)?;
|
let mut db_iter = self
|
||||||
|
.index
|
||||||
|
.word_pair_proximity_docids
|
||||||
|
.remap_data_type::<ByteSlice>()
|
||||||
|
.iter(self.wtxn)?;
|
||||||
|
|
||||||
// We compute the prefix docids associated with the newly added prefixes
|
let mut buffer = Vec::new();
|
||||||
// in the new word prefix fst.
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
let mut db_iter =
|
let mut prefixes_cache = HashMap::new();
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>().iter(self.wtxn)?;
|
while let Some(((w1, w2, prox), data)) = db_iter.next().transpose()? {
|
||||||
|
if prox > self.max_proximity {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
insert_current_prefix_data_in_sorter(
|
||||||
let mut current_prefixes: Option<&&[String]> = None;
|
&mut buffer,
|
||||||
let mut prefixes_cache = HashMap::new();
|
&mut current_prefixes,
|
||||||
while let Some(((w1, w2, prox), data)) = db_iter.next().transpose()? {
|
&mut prefixes_cache,
|
||||||
if prox > self.max_proximity {
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
continue;
|
&new_prefix_fst_words,
|
||||||
|
self.max_prefix_length,
|
||||||
|
w1,
|
||||||
|
w2,
|
||||||
|
prox,
|
||||||
|
data,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
insert_current_prefix_data_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut buffer,
|
|
||||||
&mut current_prefixes,
|
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
&new_prefix_fst_words,
|
|
||||||
self.max_prefix_length,
|
|
||||||
w1,
|
|
||||||
w2,
|
|
||||||
prox,
|
|
||||||
data,
|
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
write_prefixes_in_sorter(
|
|
||||||
&mut prefixes_cache,
|
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
drop(db_iter);
|
|
||||||
|
|
||||||
// All of the word prefix pairs in the database that have a w2
|
// All of the word prefix pairs in the database that have a w2
|
||||||
// that is contained in the `suppr_pw` set must be removed as well.
|
// that is contained in the `suppr_pw` set must be removed as well.
|
||||||
let mut iter = self
|
let mut iter = self
|
||||||
|
@ -74,42 +74,46 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
|
|||||||
|
|
||||||
let mut new_word_position_docids_iter = new_word_position_docids.into_cursor()?;
|
let mut new_word_position_docids_iter = new_word_position_docids.into_cursor()?;
|
||||||
|
|
||||||
// We fetch all the new common prefixes between the previous and new prefix fst.
|
if !common_prefix_fst_words.is_empty() {
|
||||||
let mut buffer = Vec::new();
|
// We fetch all the new common prefixes between the previous and new prefix fst.
|
||||||
let mut current_prefixes: Option<&&[String]> = None;
|
let mut buffer = Vec::new();
|
||||||
let mut prefixes_cache = HashMap::new();
|
let mut current_prefixes: Option<&&[String]> = None;
|
||||||
while let Some((key, data)) = new_word_position_docids_iter.move_on_next()? {
|
let mut prefixes_cache = HashMap::new();
|
||||||
let (word, pos) = StrBEU32Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
while let Some((key, data)) = new_word_position_docids_iter.move_on_next()? {
|
||||||
|
let (word, pos) = StrBEU32Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
||||||
|
|
||||||
current_prefixes = match current_prefixes.take() {
|
current_prefixes = match current_prefixes.take() {
|
||||||
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),
|
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),
|
||||||
_otherwise => {
|
_otherwise => {
|
||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut prefix_position_docids_sorter,
|
&mut prefix_position_docids_sorter,
|
||||||
)?;
|
)?;
|
||||||
common_prefix_fst_words.iter().find(|prefixes| word.starts_with(&prefixes[0]))
|
common_prefix_fst_words
|
||||||
}
|
.iter()
|
||||||
};
|
.find(|prefixes| word.starts_with(&prefixes[0]))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if let Some(prefixes) = current_prefixes {
|
if let Some(prefixes) = current_prefixes {
|
||||||
for prefix in prefixes.iter() {
|
for prefix in prefixes.iter() {
|
||||||
if word.starts_with(prefix) {
|
if word.starts_with(prefix) {
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
buffer.extend_from_slice(prefix.as_bytes());
|
buffer.extend_from_slice(prefix.as_bytes());
|
||||||
buffer.extend_from_slice(&pos.to_be_bytes());
|
buffer.extend_from_slice(&pos.to_be_bytes());
|
||||||
match prefixes_cache.get_mut(&buffer) {
|
match prefixes_cache.get_mut(&buffer) {
|
||||||
Some(value) => value.push(data.to_owned()),
|
Some(value) => value.push(data.to_owned()),
|
||||||
None => {
|
None => {
|
||||||
prefixes_cache.insert(buffer.clone(), vec![data.to_owned()]);
|
prefixes_cache.insert(buffer.clone(), vec![data.to_owned()]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_position_docids_sorter)?;
|
write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_position_docids_sorter)?;
|
||||||
|
}
|
||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.index.word_position_docids.remap_data_type::<ByteSlice>();
|
let db = self.index.word_position_docids.remap_data_type::<ByteSlice>();
|
||||||
|
Loading…
Reference in New Issue
Block a user