From 2b51a63418625d53da66af3deea40222ba98b1ec Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 30 Sep 2024 11:42:36 +0200
Subject: [PATCH] Remove dead code

---
 .../extract/searchable/extract_word_docids.rs | 175 +-----------------
 .../extract_word_pair_proximity_docids.rs     |   1 -
 milli/src/update/new/indexer/mod.rs           |  36 ----
 3 files changed, 1 insertion(+), 211 deletions(-)
diff --git a/milli/src/update/new/extract/searchable/extract_word_docids.rs b/milli/src/update/new/extract/searchable/extract_word_docids.rs
index fb79de7b9..20a69d4bc 100644
--- a/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -1,4 +1,3 @@
-use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fs::File;
 use std::num::NonZero;
@@ -8,7 +7,6 @@ use heed::RoTxn;
 use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
-use super::SearchableExtractor;
 use crate::update::new::extract::cache::CboCachedSorter;
 use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::{DocumentChange, ItemsPool};
@@ -20,178 +18,6 @@ use crate::{
 
 const MAX_COUNTED_WORDS: usize = 30;
 
-trait ProtoWordDocidsExtractor {
-    fn build_key(field_id: FieldId, position: u16, word: &str) -> Cow<'_, [u8]>;
-    fn attributes_to_extract<'a>(
-        _rtxn: &'a RoTxn,
-        _index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>>;
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
-}
-
-impl<T> SearchableExtractor for T
-where
-    T: ProtoWordDocidsExtractor,
-{
-    fn extract_document_change(
-        rtxn: &RoTxn,
-        index: &Index,
-        document_tokenizer: &DocumentTokenizer,
-        fields_ids_map: &mut GlobalFieldsIdsMap,
-        cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
-        document_change: DocumentChange,
-    ) -> Result<()> {
-        match document_change {
-            DocumentChange::Deletion(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-            }
-            DocumentChange::Update(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    cached_sorter.insert_del_u32(&key, inner.docid()).map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(
-                    inner.current(rtxn, index)?.unwrap(),
-                    fields_ids_map,
-                    &mut token_fn,
-                )?;
-
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-            DocumentChange::Insertion(inner) => {
-                let mut token_fn = |_fname: &str, fid, pos, word: &str| {
-                    let key = Self::build_key(fid, pos, word);
-                    cached_sorter.insert_add_u32(&key, inner.docid()).map_err(crate::Error::from)
-                };
-                document_tokenizer.tokenize_document(inner.new(), fields_ids_map, &mut token_fn)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        Self::attributes_to_extract(rtxn, index)
-    }
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
-        Self::attributes_to_skip(rtxn, index)
-    }
-}
-
-pub struct WordDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>> {
-        // exact attributes must be skipped and stored in a separate DB, see `ExactWordDocidsExtractor`.
-        index.exact_attributes(rtxn).map_err(Into::into)
-    }
-
-    /// TODO write in an external Vec buffer
-    fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        Cow::Borrowed(word.as_bytes())
-    }
-}
-
-pub struct ExactWordDocidsExtractor;
-impl ProtoWordDocidsExtractor for ExactWordDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        let exact_attributes = index.exact_attributes(rtxn)?;
-        // If there are no user-defined searchable fields, we return all exact attributes.
-        // Otherwise, we return the intersection of exact attributes and user-defined searchable fields.
-        if let Some(searchable_attributes) = index.user_defined_searchable_fields(rtxn)? {
-            let attributes = exact_attributes
-                .into_iter()
-                .filter(|attr| searchable_attributes.contains(attr))
-                .collect();
-            Ok(Some(attributes))
-        } else {
-            Ok(Some(exact_attributes))
-        }
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(_field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        Cow::Borrowed(word.as_bytes())
-    }
-}
-
-pub struct WordFidDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordFidDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(field_id: FieldId, _position: u16, word: &str) -> Cow<[u8]> {
-        let mut key = Vec::new();
-        key.extend_from_slice(word.as_bytes());
-        key.push(0);
-        key.extend_from_slice(&field_id.to_be_bytes());
-        Cow::Owned(key)
-    }
-}
-
-pub struct WordPositionDocidsExtractor;
-impl ProtoWordDocidsExtractor for WordPositionDocidsExtractor {
-    fn attributes_to_extract<'a>(
-        rtxn: &'a RoTxn,
-        index: &'a Index,
-    ) -> Result<Option<Vec<&'a str>>> {
-        index.user_defined_searchable_fields(rtxn).map_err(Into::into)
-    }
-
-    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
-        Ok(vec![])
-    }
-
-    fn build_key(_field_id: FieldId, position: u16, word: &str) -> Cow<[u8]> {
-        // position must be bucketed to reduce the number of keys in the DB.
-        let position = bucketed_position(position);
-        let mut key = Vec::new();
-        key.extend_from_slice(word.as_bytes());
-        key.push(0);
-        key.extend_from_slice(&position.to_be_bytes());
-        Cow::Owned(key)
-    }
-}
-
-// V2
-
 struct WordDocidsCachedSorters {
     word_fid_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
     word_docids: CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
@@ -340,6 +166,7 @@ impl WordDocidsCachedSorters {
         buffer.extend_from_slice(&field_id.to_be_bytes());
         self.word_fid_docids.insert_del_u32(buffer, docid)?;
 
+        let position = bucketed_position(position);
         buffer.clear();
         buffer.extend_from_slice(word.as_bytes());
         buffer.push(0);
diff --git a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
index 5736fc1d4..7d3655be8 100644
--- a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@@ -2,7 +2,6 @@ use std::collections::VecDeque;
 use std::rc::Rc;
 
 use heed::RoTxn;
-use itertools::merge_join_by;
 use obkv::KvReader;
 
 use super::tokenize_document::DocumentTokenizer;
diff --git a/milli/src/update/new/indexer/mod.rs b/milli/src/update/new/indexer/mod.rs
index 19132b5db..5187e4f4c 100644
--- a/milli/src/update/new/indexer/mod.rs
+++ b/milli/src/update/new/indexer/mod.rs
@@ -133,42 +133,6 @@ where
                         extractor_sender.send_searchable::<FidWordCountDocids>(fid_word_count_docids).unwrap();
                     }
 
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "exact_word_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<ExactWordDocidsExtractor, ExactWordDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         grenad_parameters,
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "word_position_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<WordPositionDocidsExtractor, WordPositionDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         grenad_parameters,
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
-                    // {
-                    //     let span = tracing::trace_span!(target: "indexing::documents::extract", "fid_word_count_docids");
-                    //     let _entered = span.enter();
-                    //     extract_and_send_docids::<FidWordCountDocidsExtractor, FidWordCountDocids>(
-                    //         index,
-                    //         &global_fields_ids_map,
-                    //         GrenadParameters::default(),
-                    //         document_changes.clone(),
-                    //         &extractor_sender,
-                    //     )?;
-                    // }
-
                     {
                         let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
                         let _entered = span.enter();