Avoid iterating on big databases when useless

2024-11-23 10:37:41 +08:00 · 2022-03-01 18:02:12 +01:00 · 2022-03-01 18:02:12 +01:00 · 1ae13c1374
commit 1ae13c1374
parent a8d28e364d
3 changed files with 111 additions and 98 deletions
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
            self.max_memory,
        );

+        if !common_prefix_fst_words.is_empty() {
            let mut new_word_docids_iter = new_word_docids.into_cursor()?;
            let mut current_prefixes: Option<&&[String]> = None;
            let mut prefixes_cache = HashMap::new();
@ -70,7 +71,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
                            match prefixes_cache.get_mut(prefix.as_bytes()) {
                                Some(value) => value.push(data.to_owned()),
                                None => {
-                                prefixes_cache.insert(prefix.clone().into(), vec![data.to_owned()]);
+                                    prefixes_cache
+                                        .insert(prefix.clone().into(), vec![data.to_owned()]);
                                }
                            }
                        }
@ -79,6 +81,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
            }

            write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_docids_sorter)?;
+        }

        // We fetch the docids associated to the newly added word prefix fst only.
        let db = self.index.word_docids.remap_data_type::<ByteSlice>();
--- a/milli/src/update/word_prefix_pair_proximity_docids.rs
+++ b/milli/src/update/word_prefix_pair_proximity_docids.rs
@ -83,13 +83,15 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
            self.max_memory,
        );

+        if !common_prefix_fst_words.is_empty() {
            // We compute the prefix docids associated with the common prefixes between
            // the old and new word prefix fst.
            let mut buffer = Vec::new();
            let mut current_prefixes: Option<&&[String]> = None;
            let mut prefixes_cache = HashMap::new();
            while let Some((key, data)) = new_wppd_iter.move_on_next()? {
-            let (w1, w2, prox) = StrStrU8Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
+                let (w1, w2, prox) =
+                    StrStrU8Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
                if prox > self.max_proximity {
                    continue;
                }
@ -112,11 +114,16 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
                &mut prefixes_cache,
                &mut word_prefix_pair_proximity_docids_sorter,
            )?;
+        }

+        if !new_prefix_fst_words.is_empty() {
            // We compute the prefix docids associated with the newly added prefixes
            // in the new word prefix fst.
-        let mut db_iter =
-            self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>().iter(self.wtxn)?;
+            let mut db_iter = self
+                .index
+                .word_pair_proximity_docids
+                .remap_data_type::<ByteSlice>()
+                .iter(self.wtxn)?;

            let mut buffer = Vec::new();
            let mut current_prefixes: Option<&&[String]> = None;
@ -144,8 +151,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
                &mut prefixes_cache,
                &mut word_prefix_pair_proximity_docids_sorter,
            )?;
-
-        drop(db_iter);
+        }

        // All of the word prefix pairs in the database that have a w2
        // that is contained in the `suppr_pw` set must be removed as well.
--- a/milli/src/update/words_prefix_position_docids.rs
+++ b/milli/src/update/words_prefix_position_docids.rs
@ -74,6 +74,7 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {

        let mut new_word_position_docids_iter = new_word_position_docids.into_cursor()?;

+        if !common_prefix_fst_words.is_empty() {
            // We fetch all the new common prefixes between the previous and new prefix fst.
            let mut buffer = Vec::new();
            let mut current_prefixes: Option<&&[String]> = None;
@ -88,7 +89,9 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
                            &mut prefixes_cache,
                            &mut prefix_position_docids_sorter,
                        )?;
-                    common_prefix_fst_words.iter().find(|prefixes| word.starts_with(&prefixes[0]))
+                        common_prefix_fst_words
+                            .iter()
+                            .find(|prefixes| word.starts_with(&prefixes[0]))
                    }
                };

@ -110,6 +113,7 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
            }

            write_prefixes_in_sorter(&mut prefixes_cache, &mut prefix_position_docids_sorter)?;
+        }

        // We fetch the docids associated to the newly added word prefix fst only.
        let db = self.index.word_position_docids.remap_data_type::<ByteSlice>();