Remove old query_tree code and make clippy happy

2025-02-20 17:45:54 +08:00 · 2023-03-23 09:39:16 +01:00 · 2023-03-23 09:39:16 +01:00 · 7169d85115
commit 7169d85115
parent f5f5f03ec0
8 changed files with 82 additions and 1590 deletions
--- a/milli/src/search/facet/facet_distribution.rs
+++ b/milli/src/search/facet/facet_distribution.rs
@ -73,7 +73,7 @@ impl<'a> FacetDistribution<'a> {

                let distribution_prelength = distribution.len();
                let db = self.index.field_id_docid_facet_f64s;
-                for docid in candidates.into_iter() {
+                for docid in candidates {
                    key_buffer.truncate(mem::size_of::<FieldId>());
                    key_buffer.extend_from_slice(&docid.to_be_bytes());
                    let iter = db
@ -97,7 +97,7 @@ impl<'a> FacetDistribution<'a> {
                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();

                let db = self.index.field_id_docid_facet_strings;
-                'outer: for docid in candidates.into_iter() {
+                'outer: for docid in candidates {
                    key_buffer.truncate(mem::size_of::<FieldId>());
                    key_buffer.extend_from_slice(&docid.to_be_bytes());
                    let iter = db
@ -505,7 +505,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..10_000).into_iter().collect())
+            .candidates((0..10_000).collect())
            .execute()
            .unwrap();

@ -513,7 +513,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..5_000).into_iter().collect())
+            .candidates((0..5_000).collect())
            .execute()
            .unwrap();

@ -521,7 +521,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..5_000).into_iter().collect())
+            .candidates((0..5_000).collect())
            .execute()
            .unwrap();

@ -529,7 +529,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..5_000).into_iter().collect())
+            .candidates((0..5_000).collect())
            .max_values_per_facet(1)
            .execute()
            .unwrap();
@ -546,7 +546,7 @@ mod tests {
            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
            .unwrap();

-        let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::<Vec<_>>();
+        let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();

        let mut documents = vec![];
        for i in 0..10_000 {
@ -582,7 +582,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..10_000).into_iter().collect())
+            .candidates((0..10_000).collect())
            .execute()
            .unwrap();

@ -590,7 +590,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..5_000).into_iter().collect())
+            .candidates((0..5_000).collect())
            .execute()
            .unwrap();

@ -606,7 +606,7 @@ mod tests {
            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
            .unwrap();

-        let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
+        let facet_values = (0..1000).collect::<Vec<_>>();

        let mut documents = vec![];
        for i in 0..1000 {
@ -634,7 +634,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..1000).into_iter().collect())
+            .candidates((0..1000).collect())
            .compute_stats()
            .unwrap();

@ -642,7 +642,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((217..777).into_iter().collect())
+            .candidates((217..777).collect())
            .compute_stats()
            .unwrap();

@ -658,7 +658,7 @@ mod tests {
            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
            .unwrap();

-        let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
+        let facet_values = (0..1000).collect::<Vec<_>>();

        let mut documents = vec![];
        for i in 0..1000 {
@ -686,7 +686,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..1000).into_iter().collect())
+            .candidates((0..1000).collect())
            .compute_stats()
            .unwrap();

@ -694,7 +694,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((217..777).into_iter().collect())
+            .candidates((217..777).collect())
            .compute_stats()
            .unwrap();

@ -710,7 +710,7 @@ mod tests {
            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
            .unwrap();

-        let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
+        let facet_values = (0..1000).collect::<Vec<_>>();

        let mut documents = vec![];
        for i in 0..1000 {
@ -738,7 +738,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..1000).into_iter().collect())
+            .candidates((0..1000).collect())
            .compute_stats()
            .unwrap();

@ -746,7 +746,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((217..777).into_iter().collect())
+            .candidates((217..777).collect())
            .compute_stats()
            .unwrap();

@ -762,7 +762,7 @@ mod tests {
            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
            .unwrap();

-        let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
+        let facet_values = (0..1000).collect::<Vec<_>>();

        let mut documents = vec![];
        for i in 0..1000 {
@ -794,7 +794,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((0..1000).into_iter().collect())
+            .candidates((0..1000).collect())
            .compute_stats()
            .unwrap();

@ -802,7 +802,7 @@ mod tests {

        let map = FacetDistribution::new(&txn, &index)
            .facets(std::iter::once("colour"))
-            .candidates((217..777).into_iter().collect())
+            .candidates((217..777).collect())
            .compute_stats()
            .unwrap();

--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@ -142,7 +142,7 @@ mod tests {
        let indexes = [get_simple_index(), get_random_looking_index()];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
            let mut results = String::new();
            iterate_over_facet_distribution(
                &txn,
@ -166,7 +166,7 @@ mod tests {
        let indexes = [get_simple_index(), get_random_looking_index()];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
            let mut results = String::new();
            let mut nbr_facets = 0;
            iterate_over_facet_distribution(
--- a/milli/src/search/facet/facet_range_search.rs
+++ b/milli/src/search/facet/facet_range_search.rs
@ -410,7 +410,7 @@ mod tests {

            let mut results = String::new();

-            for i in (0..=255).into_iter().rev() {
+            for i in (0..=255).rev() {
                let i = i as f64;
                let start = Bound::Included(i);
                let end = Bound::Included(255.);
@ -431,7 +431,7 @@ mod tests {

            let mut results = String::new();

-            for i in (0..=255).into_iter().rev() {
+            for i in (0..=255).rev() {
                let i = i as f64;
                let start = Bound::Excluded(i);
                let end = Bound::Excluded(255.);
@ -466,7 +466,7 @@ mod tests {

            let mut results = String::new();

-            for i in (0..=128).into_iter().rev() {
+            for i in (0..=128).rev() {
                let i = i as f64;
                let start = Bound::Included(i);
                let end = Bound::Included(255. - i);
@ -491,7 +491,7 @@ mod tests {

            let mut results = String::new();

-            for i in (0..=128).into_iter().rev() {
+            for i in (0..=128).rev() {
                let i = i as f64;
                let start = Bound::Excluded(i);
                let end = Bound::Excluded(255. - i);
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@ -132,7 +132,7 @@ mod tests {
        let indexes = [get_simple_index(), get_random_looking_index()];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
            let mut results = String::new();
            let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
            for el in iter {
@ -154,7 +154,7 @@ mod tests {
        ];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
            let mut results = String::new();
            let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
            for el in iter {
--- a/milli/src/search/facet/facet_sort_descending.rs
+++ b/milli/src/search/facet/facet_sort_descending.rs
@ -142,7 +142,7 @@ mod tests {
        ];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
            let mut results = String::new();
            let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
            let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
@ -165,7 +165,7 @@ mod tests {
        ];
        for (i, index) in indexes.iter().enumerate() {
            let txn = index.env.read_txn().unwrap();
-            let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
            let mut results = String::new();
            let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
            let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -1,21 +1,14 @@
 pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
-use self::fst_utils::{Complement, Intersection, StartsWith, Union};
 pub use self::matches::{
    FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
 };
 use crate::{
    execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
 };
-use fst::automaton::Str;
-use fst::{Automaton, IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
-use std::borrow::Cow;
-use std::collections::hash_map::{Entry, HashMap};
 use std::fmt;
-use std::result::Result as StdResult;
-use std::str::Utf8Error;

 // Building these factories is not free.
 static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
@ -26,7 +19,6 @@ pub mod facet;
 mod fst_utils;
 mod matches;
 pub mod new;
-mod query_tree;

 pub struct Search<'a> {
    query: Option<String>,
@ -200,70 +192,6 @@ impl Default for TermsMatchingStrategy {
    }
 }

-pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
-
-pub fn word_derivations<'c>(
-    word: &str,
-    is_prefix: bool,
-    max_typo: u8,
-    fst: &fst::Set<Cow<[u8]>>,
-    cache: &'c mut WordDerivationsCache,
-) -> StdResult<&'c [(String, u8)], Utf8Error> {
-    match cache.entry((word.to_string(), is_prefix, max_typo)) {
-        Entry::Occupied(entry) => Ok(entry.into_mut()),
-        Entry::Vacant(entry) => {
-            // println!("word derivations {word} {is_prefix} {max_typo}");
-            let mut derived_words = Vec::new();
-            if max_typo == 0 {
-                if is_prefix {
-                    let prefix = Str::new(word).starts_with();
-                    let mut stream = fst.search(prefix).into_stream();
-
-                    while let Some(word) = stream.next() {
-                        let word = std::str::from_utf8(word)?;
-                        derived_words.push((word.to_string(), 0));
-                    }
-                } else if fst.contains(word) {
-                    derived_words.push((word.to_string(), 0));
-                }
-            } else if max_typo == 1 {
-                let dfa = build_dfa(word, 1, is_prefix);
-                let starts = StartsWith(Str::new(get_first(word)));
-                let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
-
-                while let Some((word, state)) = stream.next() {
-                    let word = std::str::from_utf8(word)?;
-                    let d = dfa.distance(state.1);
-                    derived_words.push((word.to_string(), d.to_u8()));
-                }
-            } else {
-                let starts = StartsWith(Str::new(get_first(word)));
-                let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
-                let second_dfa = build_dfa(word, 2, is_prefix);
-                let second = Intersection(&second_dfa, &starts);
-                let automaton = Union(first, &second);
-
-                let mut stream = fst.search_with_state(automaton).into_stream();
-
-                while let Some((found_word, state)) = stream.next() {
-                    let found_word = std::str::from_utf8(found_word)?;
-                    // in the case the typo is on the first letter, we know the number of typo
-                    // is two
-                    if get_first(found_word) != get_first(word) {
-                        derived_words.push((found_word.to_string(), 2));
-                    } else {
-                        // Else, we know that it is the second dfa that matched and compute the
-                        // correct distance
-                        let d = second_dfa.distance((state.1).0);
-                        derived_words.push((found_word.to_string(), d.to_u8()));
-                    }
-                }
-            }
-            Ok(entry.insert(derived_words))
-        }
-    }
-}
-
 fn get_first(s: &str) -> &str {
    match s.chars().next() {
        Some(c) => &s[..c.len_utf8()],
@ -337,66 +265,66 @@ mod test {
        assert!(!search.is_typo_authorized().unwrap());
    }

-    #[test]
-    fn test_one_typos_tolerance() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_one_typos_tolerance() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 1)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 1)]);
+    // }

-    #[test]
-    fn test_one_typos_first_letter() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_one_typos_first_letter() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[]);
-    }
+    //     assert_eq!(found, &[]);
+    // }

-    #[test]
-    fn test_two_typos_tolerance() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_two_typos_tolerance() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 2)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 2)]);
+    // }

-    #[test]
-    fn test_two_typos_first_letter() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_two_typos_first_letter() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 2)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 2)]);
+    // }

-    #[test]
-    fn test_prefix() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_prefix() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 0)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 0)]);
+    // }

-    #[test]
-    fn test_bad_prefix() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_bad_prefix() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[]);
-    }
+    //     assert_eq!(found, &[]);
+    // }

-    #[test]
-    fn test_prefix_with_typo() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_prefix_with_typo() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 1)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 1)]);
+    // }
 }
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -15,10 +15,9 @@ mod sort;
 // TODO: documentation + comments
 mod words;

-// #[cfg(test)]
 use std::collections::{BTreeSet, HashSet};

-use charabia::{Tokenize, TokenizerBuilder};
+use charabia::TokenizerBuilder;
 use db_cache::DatabaseCache;
 use graph_based_ranking_rule::{Proximity, Typo};
 use heed::RoTxn;
@ -254,7 +253,7 @@ pub fn execute_search(
        }

        let tokenizer = tokbuilder.build();
-        let tokens = tokenizer.tokenize(&query);
+        let tokens = tokenizer.tokenize(query);

        let query_terms = located_query_terms_from_string(ctx, tokens, words_limit)?;
        let graph = QueryGraph::from_query(ctx, query_terms)?;
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs