Reduce incremental indexing time of words_prefix_position_docids DB

This database can easily contain millions of entries. Thus, iterating
over it can be very expensive.

For regular `documentAdditionOrUpdate` tasks, `del_prefix_fst_words`
will always be empty. Thus, we can save a significant amount of time
by adding this `if !del_prefix_fst_words.is_empty()` condition.

The code's behaviour remains completely unchanged.
This commit is contained in:
Loïc Lecrenier 2023-01-31 11:42:24 +01:00
parent 33f61d2cd4
commit a2690ea8d4

View File

@ -140,6 +140,10 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
// We remove all the entries that are no more required in this word prefix position // We remove all the entries that are no more required in this word prefix position
// docids database. // docids database.
// We also avoid iterating over the whole `word_prefix_position_docids` database if we know in
// advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below
// will always be false (i.e. if `del_prefix_fst_words` is empty).
if !del_prefix_fst_words.is_empty() {
let mut iter = let mut iter =
self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data();
while let Some(((prefix, _), _)) = iter.next().transpose()? { while let Some(((prefix, _), _)) = iter.next().transpose()? {
@ -147,8 +151,8 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
unsafe { iter.del_current()? }; unsafe { iter.del_current()? };
} }
} }
drop(iter); drop(iter);
}
// We finally write all the word prefix position docids into the LMDB database. // We finally write all the word prefix position docids into the LMDB database.
sorter_into_lmdb_database( sorter_into_lmdb_database(