From a2690ea8d47d6104bf9e6a011b65e33ada822a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 31 Jan 2023 11:42:24 +0100 Subject: [PATCH] Reduce incremental indexing time of `words_prefix_position_docids` DB This database can easily contain millions of entries. Thus, iterating over it can be very expensive. For regular `documentAdditionOrUpdate` tasks, `del_prefix_fst_words` will always be empty. Thus, we can save a significant amount of time by adding this `if !del_prefix_fst_words.is_empty()` condition. The code's behaviour remains completely unchanged. --- .../src/update/words_prefix_position_docids.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/milli/src/update/words_prefix_position_docids.rs b/milli/src/update/words_prefix_position_docids.rs index 5dbc9f89b..6f12dde38 100644 --- a/milli/src/update/words_prefix_position_docids.rs +++ b/milli/src/update/words_prefix_position_docids.rs @@ -140,16 +140,20 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> { // We remove all the entries that are no more required in this word prefix position // docids database. - let mut iter = - self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); - while let Some(((prefix, _), _)) = iter.next().transpose()? { - if del_prefix_fst_words.contains(prefix.as_bytes()) { - unsafe { iter.del_current()? }; + // We also avoid iterating over the whole `word_prefix_position_docids` database if we know in + // advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below + // will always be false (i.e. if `del_prefix_fst_words` is empty). + if !del_prefix_fst_words.is_empty() { + let mut iter = + self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); + while let Some(((prefix, _), _)) = iter.next().transpose()? { + if del_prefix_fst_words.contains(prefix.as_bytes()) { + unsafe { iter.del_current()? }; + } } + drop(iter); } - drop(iter); - // We finally write all the word prefix position docids into the LMDB database. sorter_into_lmdb_database( self.wtxn,