Remove unused files

2024-11-26 20:15:07 +08:00 · 2021-03-02 11:02:09 +01:00 · 2021-03-02 11:02:09 +01:00 · 6bf6b40495
commit 6bf6b40495
parent f118d7e067
3 changed files with 0 additions and 382 deletions
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -3,8 +3,6 @@
 mod criterion;
 mod external_documents_ids;
 mod fields_ids_map;
-mod mdfs;
-mod query_tokens;
 mod search;
 mod update_store;
 pub mod facet;
--- a/milli/src/mdfs.rs
+++ b/milli/src/mdfs.rs
@ -1,163 +0,0 @@
-use std::collections::hash_map::Entry::{Occupied, Vacant};
-use std::collections::HashMap;
-use std::mem;
-
-use roaring::RoaringBitmap;
-use crate::Index;
-
-/// A mana depth first search implementation.
-pub struct Mdfs<'a> {
-    index: &'a Index,
-    rtxn: &'a heed::RoTxn<'a>,
-    words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
-    union_cache: HashMap<(usize, u8), RoaringBitmap>,
-    candidates: RoaringBitmap,
-    mana: u32,
-    max_mana: u32,
-}
-
-impl<'a> Mdfs<'a> {
-    pub fn new(
-        index: &'a Index,
-        rtxn: &'a heed::RoTxn,
-        words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
-        candidates: RoaringBitmap,
-    ) -> Mdfs<'a>
-    {
-        // Compute the number of pairs (windows) we have for this list of words.
-        let mana = words.len().saturating_sub(1) as u32;
-        let max_mana = mana * 8;
-        Mdfs { index, rtxn, words, union_cache: HashMap::new(), candidates, mana, max_mana }
-    }
-}
-
-impl<'a> Iterator for Mdfs<'a> {
-    type Item = anyhow::Result<(u32, RoaringBitmap)>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // If there is less or only one word therefore the only
-        // possible documents that we can return are the candidates.
-        if self.words.len() <= 1 {
-            if self.candidates.is_empty() { return None }
-            return Some(Ok((0, mem::take(&mut self.candidates))));
-        }
-
-        while self.mana <= self.max_mana {
-            let mut answer = RoaringBitmap::new();
-            let result = mdfs_step(
-                &self.index,
-                &self.rtxn,
-                self.mana,
-                self.words,
-                &self.candidates,
-                &self.candidates,
-                &mut self.union_cache,
-                &mut answer,
-            );
-
-            match result {
-                Ok(()) => {
-                    // We always increase the mana for the next loop.
-                    let proximity = self.mana;
-                    self.mana += 1;
-
-                    // If no documents were found we must not return and continue
-                    // the search with more mana.
-                    if !answer.is_empty() {
-
-                        // We remove the answered documents from the list of
-                        // candidates to be sure we don't search for them again.
-                        self.candidates.difference_with(&answer);
-
-                        // We return the answer.
-                        return Some(Ok((proximity, answer)));
-                    }
-                },
-                Err(e) => return Some(Err(e)),
-            }
-        }
-
-        None
-    }
-}
-
-fn mdfs_step(
-    index: &Index,
-    rtxn: &heed::RoTxn,
-    mana: u32,
-    words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
-    candidates: &RoaringBitmap,
-    parent_docids: &RoaringBitmap,
-    union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
-    answer: &mut RoaringBitmap,
-) -> anyhow::Result<()>
-{
-    use std::cmp::{min, max};
-
-    let (words1, words2) = (&words[0].0, &words[1].0);
-    let pairs = words_pair_combinations(words1, words2);
-    let tail = &words[1..];
-    let nb_children = tail.len() as u32 - 1;
-
-    // The minimum amount of mana that you must consume is at least 1 and the
-    // amount of mana that your children can consume. Because the last child must
-    // consume the remaining mana, it is mandatory that there not too much at the end.
-    let min_proximity = max(1, mana.saturating_sub(nb_children * 8)) as u8;
-
-    // The maximum amount of mana that you can use is 8 or the remaining amount of
-    // mana minus your children, as you can't just consume all the mana,
-    // your children must have at least 1 mana.
-    let max_proximity = min(8, mana - nb_children) as u8;
-
-    for proximity in min_proximity..=max_proximity {
-        let mut docids = match union_cache.entry((words.len(), proximity)) {
-            Occupied(entry) => entry.get().clone(),
-            Vacant(entry) => {
-                let mut docids = RoaringBitmap::new();
-                if proximity == 8 {
-                    docids = candidates.clone();
-                } else {
-                    for (w1, w2) in pairs.iter().cloned() {
-                        let key = (w1, w2, proximity);
-                        if let Some(di) = index.word_pair_proximity_docids.get(rtxn, &key)? {
-                            docids.union_with(&di);
-                        }
-                    }
-                }
-                entry.insert(docids).clone()
-            }
-        };
-
-        // We must be sure that we only return docids that are present in the candidates.
-        docids.intersect_with(parent_docids);
-
-        if !docids.is_empty() {
-            let mana = mana.checked_sub(proximity as u32).unwrap();
-            if tail.len() < 2 {
-                // We are the last pair, we return without recuring as we don't have any child.
-                answer.union_with(&docids);
-                return Ok(());
-            } else {
-                return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer);
-            }
-        }
-    }
-
-    Ok(())
-}
-
-fn words_pair_combinations<'h>(
-    w1: &'h HashMap<String, (u8, RoaringBitmap)>,
-    w2: &'h HashMap<String, (u8, RoaringBitmap)>,
-) -> Vec<(&'h str, &'h str)>
-{
-    let mut pairs = Vec::new();
-    for (w1, (_typos, docids1)) in w1 {
-        for (w2, (_typos, docids2)) in w2 {
-            if !docids1.is_disjoint(&docids2) {
-                pairs.push((w1.as_str(), w2.as_str()));
-            }
-        }
-    }
-    pairs
-}
--- a/milli/src/query_tokens.rs
+++ b/milli/src/query_tokens.rs
@ -1,217 +0,0 @@
-use meilisearch_tokenizer::{Token, TokenKind};
-
-#[derive(Debug)]
-enum State {
-    Free,
-    Quoted,
-}
-
-impl State {
-    fn swap(&mut self) {
-        match self {
-            State::Quoted => *self = State::Free,
-            State::Free => *self = State::Quoted,
-        }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub enum QueryToken<'a> {
-    Free(Token<'a>),
-    Quoted(Token<'a>),
-}
-
-pub fn query_tokens<'a>(mut tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = QueryToken<'a>> {
-    let mut state = State::Free;
-    let f = move || {
-        loop {
-            let token = tokens.next()?;
-            match token.kind() {
-                _ if token.text().trim() == "\"" => state.swap(),
-                TokenKind::Word => {
-                    let token = match state {
-                        State::Quoted => QueryToken::Quoted(token),
-                        State::Free => QueryToken::Free(token),
-                    };
-                    return Some(token);
-                },
-                _ => (),
-            }
-        }
-    };
-    std::iter::from_fn(f)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use QueryToken::{Quoted, Free};
-    use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
-    use fst::Set;
-
-    macro_rules! assert_eq_query_token {
-        ($test:expr, Quoted($val:literal)) => {
-            match $test {
-                Quoted(val) => assert_eq!(val.text(), $val),
-                Free(val) => panic!("expected Quoted(\"{}\"), found Free(\"{}\")", $val, val.text()),
-            }
-        };
-
-        ($test:expr, Free($val:literal)) => {
-            match $test {
-                Quoted(val) => panic!("expected Free(\"{}\"), found Quoted(\"{}\")", $val, val.text()),
-                Free(val) => assert_eq!(val.text(), $val),
-            }
-        };
-    }
-
-    #[test]
-    fn empty() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert!(iter.next().is_none());
-
-        let query = " ";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn one_quoted_string() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "\"hello\"";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn one_pending_quoted_string() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "\"hello";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn one_non_quoted_string() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "hello";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn quoted_directly_followed_by_free_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "\"hello\"world";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Free("world"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn free_directly_followed_by_quoted_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "hello\"world\"";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn free_followed_by_quoted_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "hello \"world\"";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn multiple_spaces_separated_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "hello    world   ";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Free("world"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn multi_interleaved_quoted_free_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "hello \"world\" coucou \"monde\"";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
-        assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn multi_quoted_strings() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "\"hello world\" coucou \"monde est beau\"";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
-        assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("est"));
-        assert_eq_query_token!(iter.next().unwrap(), Quoted("beau"));
-        assert!(iter.next().is_none());
-    }
-
-    #[test]
-    fn chinese() {
-        let stop_words = Set::default();
-        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
-        let query = "汽车男生";
-        let analyzed = analyzer.analyze(query);
-        let tokens = analyzed.tokens();
-        let mut iter = query_tokens(tokens);
-        assert_eq_query_token!(iter.next().unwrap(), Free("汽车"));
-        assert_eq_query_token!(iter.next().unwrap(), Free("男生"));
-        assert!(iter.next().is_none());
-    }
-}