From a983129613d1f9340484e648536fcce4c4f4303d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic.lecrenier@me.com>
Date: Thu, 20 Oct 2022 09:49:37 +0200
Subject: [PATCH] Apply suggestions from code review

---
 milli/src/search/criteria/mod.rs              |  5 ++--
 milli/src/update/index_documents/mod.rs       |  8 ++++++-
 milli/src/update/prefix_word_pairs/mod.rs     | 24 +++++++++++++++++--
 .../update/prefix_word_pairs/prefix_word.rs   |  9 ++++---
 .../update/prefix_word_pairs/word_prefix.rs   |  7 ++++--
 5 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs
index 234252ff2..4069306b3 100644
--- a/milli/src/search/criteria/mod.rs
+++ b/milli/src/search/criteria/mod.rs
@@ -448,9 +448,8 @@ pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBit
                 } else {
                     let mut bitmap = RoaringBitmap::new();
                     for dist in 0..=dist {
-                        match ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
-                            Some(m) => bitmap |= m,
-                            None => {}
+                        if let Some(m) = ctx.word_pair_proximity_docids(s1, s2, dist as u8 + 1)? {
+                            bitmap |= m
                         }
                     }
                     if bitmap.is_empty() {
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 897f2f8f8..5550c8725 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -528,7 +528,13 @@ where
 
         if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
             // Run the word prefix pair proximity docids update operation.
-            PrefixWordPairsProximityDocids::new(self.wtxn, self.index).execute(
+            PrefixWordPairsProximityDocids::new(
+                self.wtxn,
+                self.index,
+                self.indexer_config.chunk_compression_type,
+                self.indexer_config.chunk_compression_level,
+            )
+            .execute(
                 word_pair_proximity_docids,
                 &new_prefix_fst_words,
                 &common_prefix_fst_words,
diff --git a/milli/src/update/prefix_word_pairs/mod.rs b/milli/src/update/prefix_word_pairs/mod.rs
index 1549acf40..03abdbb6e 100644
--- a/milli/src/update/prefix_word_pairs/mod.rs
+++ b/milli/src/update/prefix_word_pairs/mod.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::collections::HashSet;
 use std::io::BufReader;
 
+use grenad::CompressionType;
 use heed::types::ByteSlice;
 
 use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap};
@@ -18,10 +19,24 @@ pub struct PrefixWordPairsProximityDocids<'t, 'u, 'i> {
     index: &'i Index,
     max_proximity: u8,
     max_prefix_length: usize,
+    chunk_compression_type: CompressionType,
+    chunk_compression_level: Option<u32>,
 }
 impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
-    pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Self {
-        Self { wtxn, index, max_proximity: 4, max_prefix_length: 2 }
+    pub fn new(
+        wtxn: &'t mut heed::RwTxn<'i, 'u>,
+        index: &'i Index,
+        chunk_compression_type: CompressionType,
+        chunk_compression_level: Option<u32>,
+    ) -> Self {
+        Self {
+            wtxn,
+            index,
+            max_proximity: 4,
+            max_prefix_length: 2,
+            chunk_compression_type,
+            chunk_compression_level,
+        }
     }
     /// Set the maximum proximity required to make a prefix be part of the words prefixes
     /// database. If two words are too far from the threshold the associated documents will
@@ -42,6 +57,7 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
         self.max_prefix_length = value;
         self
     }
+
     #[logging_timer::time("WordPrefixPairProximityDocids::{}")]
     pub fn execute<'a>(
         self,
@@ -60,6 +76,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
             new_prefix_fst_words,
             common_prefix_fst_words,
             del_prefix_fst_words,
+            self.chunk_compression_type,
+            self.chunk_compression_level,
         )?;
 
         index_prefix_word_database(
@@ -72,6 +90,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
             new_prefix_fst_words,
             common_prefix_fst_words,
             del_prefix_fst_words,
+            self.chunk_compression_type,
+            self.chunk_compression_level,
         )?;
 
         Ok(())
diff --git a/milli/src/update/prefix_word_pairs/prefix_word.rs b/milli/src/update/prefix_word_pairs/prefix_word.rs
index 8883cc451..9bc184825 100644
--- a/milli/src/update/prefix_word_pairs/prefix_word.rs
+++ b/milli/src/update/prefix_word_pairs/prefix_word.rs
@@ -23,6 +23,8 @@ pub fn index_prefix_word_database(
     new_prefix_fst_words: &[String],
     common_prefix_fst_words: &[&[String]],
     del_prefix_fst_words: &HashSet<Vec<u8>>,
+    chunk_compression_type: CompressionType,
+    chunk_compression_level: Option<u32>,
 ) -> Result<()> {
     let max_proximity = max_proximity - 1;
     debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
@@ -35,7 +37,7 @@ pub fn index_prefix_word_database(
         .filter(|s| s.len() <= max_prefix_length)
         .collect();
 
-    for proximity in 1..=max_proximity - 1 {
+    for proximity in 1..max_proximity {
         for prefix in common_prefixes.iter() {
             let mut prefix_key = vec![];
             prefix_key.push(proximity);
@@ -78,7 +80,8 @@ pub fn index_prefix_word_database(
 
     // Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity)
     // element in an intermediary grenad
-    let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
+    let mut writer =
+        create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
 
     for proximity in 1..=max_proximity - 1 {
         for prefix in new_prefixes.iter() {
@@ -144,7 +147,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
     mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>,
     mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
 ) -> Result<()> {
-    let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = <_>::default();
+    let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = BTreeMap::default();
 
     // Memory usage check:
     // The content of the loop will be called for each `word2` that follows a word beginning
diff --git a/milli/src/update/prefix_word_pairs/word_prefix.rs b/milli/src/update/prefix_word_pairs/word_prefix.rs
index eb0b05d89..5895cdc46 100644
--- a/milli/src/update/prefix_word_pairs/word_prefix.rs
+++ b/milli/src/update/prefix_word_pairs/word_prefix.rs
@@ -187,6 +187,8 @@ pub fn index_word_prefix_database(
     new_prefix_fst_words: &[String],
     common_prefix_fst_words: &[&[String]],
     del_prefix_fst_words: &HashSet<Vec<u8>>,
+    chunk_compression_type: CompressionType,
+    chunk_compression_level: Option<u32>,
 ) -> Result<()> {
     debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
 
@@ -249,7 +251,8 @@ pub fn index_word_prefix_database(
 
         // Since we read the DB, we can't write to it directly, so we add each new (proximity, word1, prefix)
         // element in an intermediary grenad
-        let mut writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
+        let mut writer =
+            create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
 
         execute_on_word_pairs_and_prefixes(
             &mut db_iter,
@@ -325,7 +328,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
         };
         let word2_start_different_than_prev = word2[0] != prev_word2_start;
         // if there were no potential prefixes for the previous word2 based on its first letter,
-        // and if the current word2 starts with the s`ame letter, then there is also no potential
+        // and if the current word2 starts with the same letter, then there is also no potential
         // prefixes for the current word2, and we can skip to the next iteration
         if empty_prefixes && !word2_start_different_than_prev {
             continue;