Merge pull request #97 from meilisearch/criteria

Introduce all the criteria
2024-11-23 18:45:06 +08:00 · 2021-03-03 18:24:22 +01:00 · 2021-03-03 18:24:22 +01:00 · 2924ed31f3
commit 2924ed31f3
parent 4e84999f20 9b6b35d9b7
16 changed files with 1967 additions and 784 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -866,6 +866,7 @@ dependencies = [
 "anyhow",
 "byte-unit",
 "heed",
 "jemallocator",
 "milli",
 "stderrlog",
 "structopt",
--- a/http-ui/src/main.rs
+++ b/http-ui/src/main.rs
@ -32,7 +32,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
 use milli::facet::FacetValue;
 use milli::update::UpdateIndexingStep::*;
 use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
-use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
+use milli::{obkv_to_json, Index, UpdateStore, SearchResult, MatchingWords, FacetCondition};
 static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
@ -132,7 +132,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
        Self { analyzer }
    }
-    fn highlight_value(&self, value: Value, words_to_highlight: &HashSet<String>) -> Value {
+    fn highlight_value(&self, value: Value, matching_words: &MatchingWords) -> Value {
        match value {
            Value::Null => Value::Null,
            Value::Bool(boolean) => Value::Bool(boolean),
@ -142,7 +142,7 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
                let analyzed = self.analyzer.analyze(&old_string);
                for (word, token) in analyzed.reconstruct() {
                    if token.is_word() {
-                        let to_highlight = words_to_highlight.contains(token.text());
+                        let to_highlight = matching_words.matches(token.text());
                        if to_highlight { string.push_str("<mark>") }
                        string.push_str(word);
                        if to_highlight { string.push_str("</mark>") }
@ -154,12 +154,12 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
            },
            Value::Array(values) => {
                Value::Array(values.into_iter()
-                    .map(|v| self.highlight_value(v, words_to_highlight))
+                    .map(|v| self.highlight_value(v, matching_words))
                    .collect())
            },
            Value::Object(object) => {
                Value::Object(object.into_iter()
-                    .map(|(k, v)| (k, self.highlight_value(v, words_to_highlight)))
+                    .map(|(k, v)| (k, self.highlight_value(v, matching_words)))
                    .collect())
            },
        }
@ -168,14 +168,14 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> {
    fn highlight_record(
        &self,
        object: &mut Map<String, Value>,
-        words_to_highlight: &HashSet<String>,
+        matching_words: &MatchingWords,
        attributes_to_highlight: &HashSet<String>,
    ) {
        // TODO do we need to create a string for element that are not and needs to be highlight?
        for (key, value) in object.iter_mut() {
            if attributes_to_highlight.contains(key) {
                let old_value = mem::take(value);
-                *value = self.highlight_value(old_value, words_to_highlight);
+                *value = self.highlight_value(old_value, matching_words);
            }
        }
    }
@ -722,7 +722,7 @@ async fn main() -> anyhow::Result<()> {
                search.facet_condition(condition);
            }
-            let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap();
+            let SearchResult { matching_words, candidates, documents_ids } = search.execute().unwrap();
            let number_of_candidates = candidates.len();
            let facets = if query.facet_distribution == Some(true) {
@ -748,7 +748,7 @@ async fn main() -> anyhow::Result<()> {
            for (_id, obkv) in index.documents(&rtxn, documents_ids).unwrap() {
                let mut object = obkv_to_json(&displayed_fields, &fields_ids_map, obkv).unwrap();
                if !disable_highlighting {
-                    highlighter.highlight_record(&mut object, &found_words, &attributes_to_highlight);
+                    highlighter.highlight_record(&mut object, &matching_words, &attributes_to_highlight);
                }
                documents.push(object);
--- a/infos/src/main.rs
+++ b/infos/src/main.rs
@ -598,7 +598,7 @@ fn export_documents(index: &Index, rtxn: &heed::RoTxn, internal_ids: Vec<u32>) -
    let fields_ids_map = index.fields_ids_map(rtxn)?;
    let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
-    let iter: Box<Iterator<Item = _>> = if internal_ids.is_empty() {
+    let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() {
        Box::new(index.documents.iter(rtxn)?.map(|result| {
            result.map(|(_id, obkv)| obkv)
        }))
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -3,8 +3,6 @@
 mod criterion;
 mod external_documents_ids;
 mod fields_ids_map;
 mod mdfs;
 mod query_tokens;
 mod search;
 mod update_store;
 pub mod facet;
@ -28,7 +26,7 @@ pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec};
 pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
 pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
 pub use self::index::Index;
-pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult};
+pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
 pub use self::update_store::UpdateStore;
 pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
--- a/milli/src/mdfs.rs
+++ b/milli/src/mdfs.rs
@ -1,163 +0,0 @@
 use std::collections::hash_map::Entry::{Occupied, Vacant};
 use std::collections::HashMap;
 use std::mem;
 use roaring::RoaringBitmap;
 use crate::Index;
 /// A mana depth first search implementation.
 pub struct Mdfs<'a> {
    index: &'a Index,
    rtxn: &'a heed::RoTxn<'a>,
    words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
    union_cache: HashMap<(usize, u8), RoaringBitmap>,
    candidates: RoaringBitmap,
    mana: u32,
    max_mana: u32,
 }
 impl<'a> Mdfs<'a> {
    pub fn new(
        index: &'a Index,
        rtxn: &'a heed::RoTxn,
        words: &'a [(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
        candidates: RoaringBitmap,
    ) -> Mdfs<'a>
    {
        // Compute the number of pairs (windows) we have for this list of words.
        let mana = words.len().saturating_sub(1) as u32;
        let max_mana = mana * 8;
        Mdfs { index, rtxn, words, union_cache: HashMap::new(), candidates, mana, max_mana }
    }
 }
 impl<'a> Iterator for Mdfs<'a> {
    type Item = anyhow::Result<(u32, RoaringBitmap)>;
    fn next(&mut self) -> Option<Self::Item> {
        // If there is less or only one word therefore the only
        // possible documents that we can return are the candidates.
        if self.words.len() <= 1 {
            if self.candidates.is_empty() { return None }
            return Some(Ok((0, mem::take(&mut self.candidates))));
        }
        while self.mana <= self.max_mana {
            let mut answer = RoaringBitmap::new();
            let result = mdfs_step(
                &self.index,
                &self.rtxn,
                self.mana,
                self.words,
                &self.candidates,
                &self.candidates,
                &mut self.union_cache,
                &mut answer,
            );
            match result {
                Ok(()) => {
                    // We always increase the mana for the next loop.
                    let proximity = self.mana;
                    self.mana += 1;
                    // If no documents were found we must not return and continue
                    // the search with more mana.
                    if !answer.is_empty() {
                        // We remove the answered documents from the list of
                        // candidates to be sure we don't search for them again.
                        self.candidates.difference_with(&answer);
                        // We return the answer.
                        return Some(Ok((proximity, answer)));
                    }
                },
                Err(e) => return Some(Err(e)),
            }
        }
        None
    }
 }
 fn mdfs_step(
    index: &Index,
    rtxn: &heed::RoTxn,
    mana: u32,
    words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
    candidates: &RoaringBitmap,
    parent_docids: &RoaringBitmap,
    union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
    answer: &mut RoaringBitmap,
 ) -> anyhow::Result<()>
 {
    use std::cmp::{min, max};
    let (words1, words2) = (&words[0].0, &words[1].0);
    let pairs = words_pair_combinations(words1, words2);
    let tail = &words[1..];
    let nb_children = tail.len() as u32 - 1;
    // The minimum amount of mana that you must consume is at least 1 and the
    // amount of mana that your children can consume. Because the last child must
    // consume the remaining mana, it is mandatory that there not too much at the end.
    let min_proximity = max(1, mana.saturating_sub(nb_children * 8)) as u8;
    // The maximum amount of mana that you can use is 8 or the remaining amount of
    // mana minus your children, as you can't just consume all the mana,
    // your children must have at least 1 mana.
    let max_proximity = min(8, mana - nb_children) as u8;
    for proximity in min_proximity..=max_proximity {
        let mut docids = match union_cache.entry((words.len(), proximity)) {
            Occupied(entry) => entry.get().clone(),
            Vacant(entry) => {
                let mut docids = RoaringBitmap::new();
                if proximity == 8 {
                    docids = candidates.clone();
                } else {
                    for (w1, w2) in pairs.iter().cloned() {
                        let key = (w1, w2, proximity);
                        if let Some(di) = index.word_pair_proximity_docids.get(rtxn, &key)? {
                            docids.union_with(&di);
                        }
                    }
                }
                entry.insert(docids).clone()
            }
        };
        // We must be sure that we only return docids that are present in the candidates.
        docids.intersect_with(parent_docids);
        if !docids.is_empty() {
            let mana = mana.checked_sub(proximity as u32).unwrap();
            if tail.len() < 2 {
                // We are the last pair, we return without recuring as we don't have any child.
                answer.union_with(&docids);
                return Ok(());
            } else {
                return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer);
            }
        }
    }
    Ok(())
 }
 fn words_pair_combinations<'h>(
    w1: &'h HashMap<String, (u8, RoaringBitmap)>,
    w2: &'h HashMap<String, (u8, RoaringBitmap)>,
 ) -> Vec<(&'h str, &'h str)>
 {
    let mut pairs = Vec::new();
    for (w1, (_typos, docids1)) in w1 {
        for (w2, (_typos, docids2)) in w2 {
            if !docids1.is_disjoint(&docids2) {
                pairs.push((w1.as_str(), w2.as_str()));
            }
        }
    }
    pairs
 }
--- a/milli/src/query_tokens.rs
+++ b/milli/src/query_tokens.rs
@ -1,217 +0,0 @@
 use meilisearch_tokenizer::{Token, TokenKind};
 #[derive(Debug)]
 enum State {
    Free,
    Quoted,
 }
 impl State {
    fn swap(&mut self) {
        match self {
            State::Quoted => *self = State::Free,
            State::Free => *self = State::Quoted,
        }
    }
 }
 #[derive(Debug, PartialEq, Eq)]
 pub enum QueryToken<'a> {
    Free(Token<'a>),
    Quoted(Token<'a>),
 }
 pub fn query_tokens<'a>(mut tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = QueryToken<'a>> {
    let mut state = State::Free;
    let f = move || {
        loop {
            let token = tokens.next()?;
            match token.kind() {
                _ if token.text().trim() == "\"" => state.swap(),
                TokenKind::Word => {
                    let token = match state {
                        State::Quoted => QueryToken::Quoted(token),
                        State::Free => QueryToken::Free(token),
                    };
                    return Some(token);
                },
                _ => (),
            }
        }
    };
    std::iter::from_fn(f)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use QueryToken::{Quoted, Free};
    use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
    use fst::Set;
    macro_rules! assert_eq_query_token {
        ($test:expr, Quoted($val:literal)) => {
            match $test {
                Quoted(val) => assert_eq!(val.text(), $val),
                Free(val) => panic!("expected Quoted(\"{}\"), found Free(\"{}\")", $val, val.text()),
            }
        };
        ($test:expr, Free($val:literal)) => {
            match $test {
                Quoted(val) => panic!("expected Free(\"{}\"), found Quoted(\"{}\")", $val, val.text()),
                Free(val) => assert_eq!(val.text(), $val),
            }
        };
    }
    #[test]
    fn empty() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert!(iter.next().is_none());
        let query = " ";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert!(iter.next().is_none());
    }
    #[test]
    fn one_quoted_string() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "\"hello\"";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn one_pending_quoted_string() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "\"hello";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn one_non_quoted_string() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "hello";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn quoted_directly_followed_by_free_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "\"hello\"world";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Free("world"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn free_directly_followed_by_quoted_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "hello\"world\"";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn free_followed_by_quoted_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "hello \"world\"";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn multiple_spaces_separated_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "hello    world   ";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Free("world"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn multi_interleaved_quoted_free_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "hello \"world\" coucou \"monde\"";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
        assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn multi_quoted_strings() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "\"hello world\" coucou \"monde est beau\"";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Quoted("hello"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("world"));
        assert_eq_query_token!(iter.next().unwrap(), Free("coucou"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("monde"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("est"));
        assert_eq_query_token!(iter.next().unwrap(), Quoted("beau"));
        assert!(iter.next().is_none());
    }
    #[test]
    fn chinese() {
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let query = "汽车男生";
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let mut iter = query_tokens(tokens);
        assert_eq_query_token!(iter.next().unwrap(), Free("汽车"));
        assert_eq_query_token!(iter.next().unwrap(), Free("男生"));
        assert!(iter.next().is_none());
    }
 }
--- a/milli/src/search/criteria/asc_desc.rs
+++ b/milli/src/search/criteria/asc_desc.rs
@ -0,0 +1,282 @@
 use std::collections::HashMap;
 use std::mem::take;
 use anyhow::bail;
 use itertools::Itertools;
 use log::debug;
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec};
 use crate::heed_codec::facet::{FieldDocIdFacetI64Codec, FieldDocIdFacetF64Codec};
 use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
 use crate::search::facet::FacetIter;
 use crate::search::query_tree::Operation;
 use crate::{FieldId, Index};
 use super::{Criterion, CriterionResult};
 pub struct AscDesc<'t> {
    index: &'t Index,
    rtxn: &'t heed::RoTxn<'t>,
    field_id: FieldId,
    facet_type: FacetType,
    ascending: bool,
    query_tree: Option<Operation>,
    candidates: RoaringBitmap,
    bucket_candidates: RoaringBitmap,
    faceted_candidates: RoaringBitmap,
    parent: Option<Box<dyn Criterion + 't>>,
 }
 impl<'t> AscDesc<'t> {
    pub fn initial_asc(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
        field_id: FieldId,
        facet_type: FacetType,
    ) -> anyhow::Result<Self>
    {
        Self::initial(index, rtxn, query_tree, candidates, field_id, facet_type, true)
    }
    pub fn initial_desc(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
        field_id: FieldId,
        facet_type: FacetType,
    ) -> anyhow::Result<Self>
    {
        Self::initial(index, rtxn, query_tree, candidates, field_id, facet_type, false)
    }
    pub fn asc(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        parent: Box<dyn Criterion + 't>,
        field_id: FieldId,
        facet_type: FacetType,
    ) -> anyhow::Result<Self>
    {
        Self::new(index, rtxn, parent, field_id, facet_type, true)
    }
    pub fn desc(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        parent: Box<dyn Criterion + 't>,
        field_id: FieldId,
        facet_type: FacetType,
    ) -> anyhow::Result<Self>
    {
        Self::new(index, rtxn, parent, field_id, facet_type, false)
    }
    fn initial(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
        field_id: FieldId,
        facet_type: FacetType,
        ascending: bool,
    ) -> anyhow::Result<Self>
    {
        let faceted_candidates = index.faceted_documents_ids(rtxn, field_id)?;
        let candidates = match &query_tree {
            Some(qt) => {
                let context = CriteriaBuilder::new(rtxn, index)?;
                let mut qt_candidates = resolve_query_tree(&context, qt, &mut HashMap::new())?;
                if let Some(candidates) = candidates {
                    qt_candidates.intersect_with(&candidates);
                }
                qt_candidates
            },
            None => candidates.unwrap_or(faceted_candidates.clone()),
        };
        Ok(AscDesc {
            index,
            rtxn,
            field_id,
            facet_type,
            ascending,
            query_tree,
            candidates,
            faceted_candidates,
            bucket_candidates: RoaringBitmap::new(),
            parent: None,
        })
    }
    fn new(
        index: &'t Index,
        rtxn: &'t heed::RoTxn,
        parent: Box<dyn Criterion + 't>,
        field_id: FieldId,
        facet_type: FacetType,
        ascending: bool,
    ) -> anyhow::Result<Self>
    {
        Ok(AscDesc {
            index,
            rtxn,
            field_id,
            facet_type,
            ascending,
            query_tree: None,
            candidates: RoaringBitmap::new(),
            faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
            bucket_candidates: RoaringBitmap::new(),
            parent: Some(parent),
        })
    }
 }
 impl<'t> Criterion for AscDesc<'t> {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
        loop {
            debug!("Facet {} iteration ({:?})",
                if self.ascending { "Asc" } else { "Desc" }, self.candidates,
            );
            match &mut self.candidates {
                candidates if candidates.is_empty() => {
                    let query_tree = self.query_tree.take();
                    let candidates = take(&mut self.candidates);
                    let bucket_candidates = take(&mut self.bucket_candidates);
                    match self.parent.as_mut() {
                        Some(parent) => {
                            match parent.next()? {
                                Some(CriterionResult { query_tree, mut candidates, bucket_candidates }) => {
                                    self.query_tree = query_tree;
                                    candidates.intersect_with(&self.faceted_candidates);
                                    self.candidates = candidates;
                                    self.bucket_candidates = bucket_candidates;
                                },
                                None => return Ok(None),
                            }
                        },
                        None => if query_tree.is_none() && bucket_candidates.is_empty() {
                            return Ok(None)
                        },
                    }
                    return Ok(Some(CriterionResult { query_tree, candidates, bucket_candidates }));
                },
                candidates => {
                    let bucket_candidates = match self.parent {
                        Some(_) => take(&mut self.bucket_candidates),
                        None => candidates.clone(),
                    };
                    let found_candidates = facet_ordered(
                        self.index,
                        self.rtxn,
                        self.field_id,
                        self.facet_type,
                        self.ascending,
                        candidates.clone(),
                    )?;
                    candidates.difference_with(&found_candidates);
                    return Ok(Some(CriterionResult {
                        query_tree: self.query_tree.clone(),
                        candidates: found_candidates,
                        bucket_candidates,
                    }));
                },
            }
        }
    }
 }
 fn facet_ordered(
    index: &Index,
    rtxn: &heed::RoTxn,
    field_id: FieldId,
    facet_type: FacetType,
    ascending: bool,
    candidates: RoaringBitmap,
 ) -> anyhow::Result<RoaringBitmap>
 {
    match facet_type {
        FacetType::Float => {
            if candidates.len() <= 1000 {
                let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
                let mut docids_values = Vec::with_capacity(candidates.len() as usize);
                for docid in candidates.iter() {
                    let left = (field_id, docid, f64::MIN);
                    let right = (field_id, docid, f64::MAX);
                    let mut iter = db.range(rtxn, &(left..=right))?;
                    let entry = if ascending { iter.next() } else { iter.last() };
                    if let Some(((_, _, value), ())) = entry.transpose()? {
                        docids_values.push((docid, OrderedFloat(value)));
                    }
                }
                docids_values.sort_unstable_by_key(|(_, value)| *value);
                let iter = docids_values.into_iter();
                let iter = if ascending {
                    Box::new(iter) as Box<dyn Iterator<Item = _>>
                } else {
                    Box::new(iter.rev())
                };
                match iter.group_by(|(_, v)| *v).into_iter().next() {
                    Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()),
                    None => Ok(RoaringBitmap::new())
                }
            } else {
                let facet_fn = if ascending {
                    FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing
                } else {
                    FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing
                };
                let mut iter = facet_fn(rtxn, index, field_id, candidates)?;
                Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default())
            }
        },
        FacetType::Integer => {
            if candidates.len() <= 1000 {
                let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetI64Codec>();
                let mut docids_values = Vec::with_capacity(candidates.len() as usize);
                for docid in candidates.iter() {
                    let left = (field_id, docid, i64::MIN);
                    let right = (field_id, docid, i64::MAX);
                    let mut iter = db.range(rtxn, &(left..=right))?;
                    let entry = if ascending { iter.next() } else { iter.last() };
                    if let Some(((_, _, value), ())) = entry.transpose()? {
                        docids_values.push((docid, value));
                    }
                }
                docids_values.sort_unstable_by_key(|(_, value)| *value);
                let iter = docids_values.into_iter();
                let iter = if ascending {
                    Box::new(iter) as Box<dyn Iterator<Item = _>>
                } else {
                    Box::new(iter.rev())
                };
                match iter.group_by(|(_, v)| *v).into_iter().next() {
                    Some((_, ids)) => Ok(ids.map(|(id, _)| id).into_iter().collect()),
                    None => Ok(RoaringBitmap::new())
                }
            } else {
                let facet_fn = if ascending {
                    FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
                } else {
                    FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
                };
                let mut iter = facet_fn(rtxn, index, field_id, candidates)?;
                Ok(iter.next().transpose()?.map(|(_, docids)| docids).unwrap_or_default())
            }
        },
        FacetType::String => bail!("criteria facet type must be a number"),
    }
 }
--- a/milli/src/search/criteria/fetcher.rs
+++ b/milli/src/search/criteria/fetcher.rs
@ -0,0 +1,113 @@
 use std::collections::HashMap;
 use std::mem::take;
 use log::debug;
 use roaring::RoaringBitmap;
 use crate::search::query_tree::Operation;
 use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
 pub struct Fetcher<'t> {
    ctx: &'t dyn Context,
    query_tree: Option<Operation>,
    candidates: Candidates,
    parent: Option<Box<dyn Criterion + 't>>,
    should_get_documents_ids: bool,
 }
 impl<'t> Fetcher<'t> {
    pub fn initial(
        ctx: &'t dyn Context,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
    ) -> Self
    {
        Fetcher {
            ctx,
            query_tree,
            candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
            parent: None,
            should_get_documents_ids: true,
        }
    }
    pub fn new(
        ctx: &'t dyn Context,
        parent: Box<dyn Criterion + 't>,
    ) -> Self
    {
        Fetcher {
            ctx,
            query_tree: None,
            candidates: Candidates::default(),
            parent: Some(parent),
            should_get_documents_ids: true,
        }
    }
 }
 impl<'t> Criterion for Fetcher<'t> {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
        use Candidates::{Allowed, Forbidden};
        loop {
            debug!("Fetcher iteration (should_get_documents_ids: {}) ({:?})",
                self.should_get_documents_ids, self.candidates,
            );
            let should_get_documents_ids = take(&mut self.should_get_documents_ids);
            match &mut self.candidates {
                Allowed(_) => {
                    let candidates = take(&mut self.candidates).into_inner();
                    let candidates = match &self.query_tree {
                        Some(qt) if should_get_documents_ids => {
                            let mut docids = resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?;
                            docids.intersect_with(&candidates);
                            docids
                        },
                        _ => candidates,
                    };
                    return Ok(Some(CriterionResult {
                        query_tree: self.query_tree.take(),
                        candidates: candidates.clone(),
                        bucket_candidates: candidates,
                    }));
                },
                Forbidden(_) => {
                    match self.parent.as_mut() {
                        Some(parent) => {
                            match parent.next()? {
                                Some(result) => return Ok(Some(result)),
                                None => if should_get_documents_ids {
                                    let candidates = match &self.query_tree {
                                        Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
                                        None => self.ctx.documents_ids()?,
                                    };
                                    return Ok(Some(CriterionResult {
                                        query_tree: self.query_tree.clone(),
                                        candidates: candidates.clone(),
                                        bucket_candidates: candidates,
                                    }));
                                },
                            }
                        },
                        None => if should_get_documents_ids {
                            let candidates = match &self.query_tree {
                                Some(qt) => resolve_query_tree(self.ctx, &qt, &mut HashMap::new())?,
                                None => self.ctx.documents_ids()?,
                            };
                            return Ok(Some(CriterionResult {
                                query_tree: self.query_tree.clone(),
                                candidates: candidates.clone(),
                                bucket_candidates: candidates,
                            }));
                        },
                    }
                    return Ok(None);
                },
            }
        }
    }
 }
--- a/milli/src/search/criteria/mod.rs
+++ b/milli/src/search/criteria/mod.rs
@ -0,0 +1,483 @@
 use std::collections::HashMap;
 use std::borrow::Cow;
 use anyhow::{bail, Context as _};
 use roaring::RoaringBitmap;
 use crate::facet::FacetType;
 use crate::search::word_derivations;
 use crate::{Index, FieldId};
 use super::query_tree::{Operation, Query, QueryKind};
 use self::typo::Typo;
 use self::words::Words;
 use self::asc_desc::AscDesc;
 use self::proximity::Proximity;
 use self::fetcher::Fetcher;
 pub mod typo;
 pub mod words;
 pub mod asc_desc;
 pub mod proximity;
 pub mod fetcher;
 pub trait Criterion {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>>;
 }
 /// The result of a call to the parent criterion.
 #[derive(Debug, Clone, PartialEq)]
 pub struct CriterionResult {
    /// The query tree that must be used by the children criterion to fetch candidates.
    pub query_tree: Option<Operation>,
    /// The candidates that this criterion is allowed to return subsets of.
    pub candidates: RoaringBitmap,
    /// Candidates that comes from the current bucket of the initial criterion.
    pub bucket_candidates: RoaringBitmap,
 }
 /// Either a set of candidates that defines the candidates
 /// that are allowed to be returned,
 /// or the candidates that must never be returned.
 #[derive(Debug)]
 enum Candidates {
    Allowed(RoaringBitmap),
    Forbidden(RoaringBitmap)
 }
 impl Candidates {
    fn into_inner(self) -> RoaringBitmap {
        match self {
            Self::Allowed(inner) => inner,
            Self::Forbidden(inner) => inner,
        }
    }
 }
 impl Default for Candidates {
    fn default() -> Self {
        Self::Forbidden(RoaringBitmap::new())
    }
 }
 pub trait Context {
    fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
    fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
    fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
    fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>>;
    fn words_fst<'t>(&self) -> &'t fst::Set<Cow<[u8]>>;
    fn in_prefix_cache(&self, word: &str) -> bool;
 }
 pub struct CriteriaBuilder<'t> {
    rtxn: &'t heed::RoTxn<'t>,
    index: &'t Index,
    words_fst: fst::Set<Cow<'t, [u8]>>,
    words_prefixes_fst: fst::Set<Cow<'t, [u8]>>,
 }
 impl<'a> Context for CriteriaBuilder<'a> {
    fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
        self.index.documents_ids(self.rtxn)
    }
    fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
        self.index.word_docids.get(self.rtxn, &word)
    }
    fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
        self.index.word_prefix_docids.get(self.rtxn, &word)
    }
    fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
        let key = (left, right, proximity);
        self.index.word_pair_proximity_docids.get(self.rtxn, &key)
    }
    fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
        let key = (left, right, proximity);
        self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
    }
    fn words_fst<'t>(&self) -> &'t fst::Set<Cow<[u8]>> {
        &self.words_fst
    }
    fn in_prefix_cache(&self, word: &str) -> bool {
        self.words_prefixes_fst.contains(word)
    }
 }
 impl<'t> CriteriaBuilder<'t> {
    pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> anyhow::Result<Self> {
        let words_fst = index.words_fst(rtxn)?;
        let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
        Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
    }
    pub fn build(
        &'t self,
        mut query_tree: Option<Operation>,
        mut facet_candidates: Option<RoaringBitmap>,
    ) -> anyhow::Result<Fetcher<'t>>
    {
        use crate::criterion::Criterion as Name;
        let fields_ids_map = self.index.fields_ids_map(&self.rtxn)?;
        let faceted_fields = self.index.faceted_fields(&self.rtxn)?;
        let field_id_facet_type = |field: &str| -> anyhow::Result<(FieldId, FacetType)> {
            let id = fields_ids_map.id(field).with_context(|| {
                format!("field {:?} isn't registered", field)
            })?;
            let facet_type = faceted_fields.get(field).with_context(|| {
                format!("field {:?} isn't faceted", field)
            })?;
            Ok((id, *facet_type))
        };
        let mut criterion = None as Option<Box<dyn Criterion>>;
        for name in self.index.criteria(&self.rtxn)? {
            criterion = Some(match criterion.take() {
                Some(father) => match name {
                    Name::Typo => Box::new(Typo::new(self, father)),
                    Name::Words => Box::new(Words::new(self, father)),
                    Name::Proximity => Box::new(Proximity::new(self, father)),
                    Name::Asc(field) => {
                        let (id, facet_type) = field_id_facet_type(&field)?;
                        Box::new(AscDesc::asc(&self.index, &self.rtxn, father, id, facet_type)?)
                    },
                    Name::Desc(field) => {
                        let (id, facet_type) = field_id_facet_type(&field)?;
                        Box::new(AscDesc::desc(&self.index, &self.rtxn, father, id, facet_type)?)
                    },
                    _otherwise => father,
                },
                None => match name {
                    Name::Typo => Box::new(Typo::initial(self, query_tree.take(), facet_candidates.take())),
                    Name::Words => Box::new(Words::initial(self, query_tree.take(), facet_candidates.take())),
                    Name::Proximity => Box::new(Proximity::initial(self, query_tree.take(), facet_candidates.take())),
                    Name::Asc(field) => {
                        let (id, facet_type) = field_id_facet_type(&field)?;
                        Box::new(AscDesc::initial_asc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), id, facet_type)?)
                    },
                    Name::Desc(field) => {
                        let (id, facet_type) = field_id_facet_type(&field)?;
                        Box::new(AscDesc::initial_desc(&self.index, &self.rtxn, query_tree.take(), facet_candidates.take(), id, facet_type)?)
                    },
                    _otherwise => continue,
                },
            });
        }
        match criterion {
            Some(criterion) => Ok(Fetcher::new(self, criterion)),
            None => Ok(Fetcher::initial(self, query_tree, facet_candidates)),
        }
    }
 }
 pub fn resolve_query_tree<'t>(
    ctx: &'t dyn Context,
    query_tree: &Operation,
    cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
 ) -> anyhow::Result<RoaringBitmap>
 {
    fn resolve_operation<'t>(
        ctx: &'t dyn Context,
        query_tree: &Operation,
        cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
    ) -> anyhow::Result<RoaringBitmap>
    {
        use Operation::{And, Consecutive, Or, Query};
        match query_tree {
            And(ops) => {
                let mut ops = ops.iter().map(|op| {
                    resolve_operation(ctx, op, cache)
                }).collect::<anyhow::Result<Vec<_>>>()?;
                ops.sort_unstable_by_key(|cds| cds.len());
                let mut candidates = RoaringBitmap::new();
                let mut first_loop = true;
                for docids in ops {
                    if first_loop {
                        candidates = docids;
                        first_loop = false;
                    } else {
                        candidates.intersect_with(&docids);
                    }
                }
                Ok(candidates)
            },
            Consecutive(ops) => {
                let mut candidates = RoaringBitmap::new();
                let mut first_loop = true;
                for slice in ops.windows(2) {
                    match (&slice[0], &slice[1]) {
                        (Operation::Query(left), Operation::Query(right)) => {
                            match query_pair_proximity_docids(ctx, left, right, 1)? {
                                pair_docids if pair_docids.is_empty() => {
                                    return Ok(RoaringBitmap::new())
                                },
                                pair_docids if first_loop => {
                                    candidates = pair_docids;
                                    first_loop = false;
                                },
                                pair_docids => {
                                    candidates.intersect_with(&pair_docids);
                                },
                            }
                        },
                        _ => bail!("invalid consecutive query type"),
                    }
                }
                Ok(candidates)
            },
            Or(_, ops) => {
                let mut candidates = RoaringBitmap::new();
                for op in ops {
                    let docids = resolve_operation(ctx, op, cache)?;
                    candidates.union_with(&docids);
                }
                Ok(candidates)
            },
            Query(q) => Ok(query_docids(ctx, q)?),
        }
    }
    resolve_operation(ctx, query_tree, cache)
 }
 fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
    ctx: &dyn Context,
    left_words: &[(T, u8)],
    right_words: &[(U, u8)],
    proximity: u8
 ) -> anyhow::Result<RoaringBitmap> {
    let mut docids = RoaringBitmap::new();
    for (left, _l_typo) in left_words {
        for (right, _r_typo) in right_words {
            let current_docids = ctx.word_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
            docids.union_with(&current_docids);
        }
    }
    Ok(docids)
 }
 fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitmap> {
    match &query.kind {
        QueryKind::Exact { word, .. } => {
            if query.prefix && ctx.in_prefix_cache(&word) {
                Ok(ctx.word_prefix_docids(&word)?.unwrap_or_default())
            } else if query.prefix {
                let words = word_derivations(&word, true, 0, ctx.words_fst())?;
                let mut docids = RoaringBitmap::new();
                for (word, _typo) in words {
                    let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
                    docids.union_with(&current_docids);
                }
                Ok(docids)
            } else {
                Ok(ctx.word_docids(&word)?.unwrap_or_default())
            }
        },
        QueryKind::Tolerant { typo, word } => {
            let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst())?;
            let mut docids = RoaringBitmap::new();
            for (word, _typo) in words {
                let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
                docids.union_with(&current_docids);
            }
            Ok(docids)
        },
    }
 }
 fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, proximity: u8) -> anyhow::Result<RoaringBitmap> {
    if proximity >= 8 {
        let mut candidates = query_docids(ctx, left)?;
        let right_candidates = query_docids(ctx, right)?;
        candidates.intersect_with(&right_candidates);
        return Ok(candidates);
    }
    let prefix = right.prefix;
    match (&left.kind, &right.kind) {
        (QueryKind::Exact { word: left, .. }, QueryKind::Exact { word: right, .. }) => {
            if prefix && ctx.in_prefix_cache(&right) {
                Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
            } else if prefix {
                let r_words = word_derivations(&right, true, 0, ctx.words_fst())?;
                all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
            } else {
                Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default())
            }
        },
        (QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
            let l_words = word_derivations(&left, false, *typo, ctx.words_fst())?;
            if prefix && ctx.in_prefix_cache(&right) {
                let mut docids = RoaringBitmap::new();
                for (left, _) in l_words {
                    let current_docids = ctx.word_prefix_pair_proximity_docids(left.as_ref(), right.as_ref(), proximity)?.unwrap_or_default();
                    docids.union_with(&current_docids);
                }
                Ok(docids)
            } else if prefix {
                let r_words = word_derivations(&right, true, 0, ctx.words_fst())?;
                all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
            } else {
                all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity)
            }
        },
        (QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => {
            let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst())?;
            all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
        },
        (QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => {
            let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst())?;
            let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst())?;
            all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity)
        },
    }
 }
 #[cfg(test)]
 pub mod test {
    use maplit::hashmap;
    use rand::{Rng, SeedableRng, rngs::StdRng};
    use super::*;
    use std::collections::HashMap;
    fn s(s: &str) -> String { s.to_string() }
    pub struct TestContext<'t> {
        words_fst: fst::Set<Cow<'t, [u8]>>,
        word_docids: HashMap<String, RoaringBitmap>,
        word_prefix_docids: HashMap<String, RoaringBitmap>,
        word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
        word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
    }
    impl<'a> Context for TestContext<'a> {
        fn documents_ids(&self) -> heed::Result<RoaringBitmap> {
            Ok(self.word_docids.iter().fold(RoaringBitmap::new(), |acc, (_, docids)| acc | docids))
        }
        fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
            Ok(self.word_docids.get(&word.to_string()).cloned())
        }
        fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
            Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
        }
        fn word_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
            let key = (left.to_string(), right.to_string(), proximity.into());
            Ok(self.word_pair_proximity_docids.get(&key).cloned())
        }
        fn word_prefix_pair_proximity_docids(&self, left: &str, right: &str, proximity: u8) -> heed::Result<Option<RoaringBitmap>> {
            let key = (left.to_string(), right.to_string(), proximity.into());
            Ok(self.word_prefix_pair_proximity_docids.get(&key).cloned())
        }
        fn words_fst<'t>(&self) -> &'t fst::Set<Cow<[u8]>> {
            &self.words_fst
        }
        fn in_prefix_cache(&self, word: &str) -> bool {
            self.word_prefix_docids.contains_key(&word.to_string())
        }
    }
    impl<'a> Default for TestContext<'a> {
        fn default() -> TestContext<'a> {
            let mut rng = StdRng::seed_from_u64(102);
            let rng = &mut rng;
            fn random_postings<R: Rng>(rng: &mut R, len: usize) -> RoaringBitmap {
                let mut values = Vec::<u32>::with_capacity(len);
                while values.len() != len {
                    values.push(rng.gen());
                }
                values.sort_unstable();
                RoaringBitmap::from_sorted_iter(values.into_iter())
            }
            let word_docids = hashmap!{
                s("hello")      => random_postings(rng,   1500),
                s("hi")         => random_postings(rng,   4000),
                s("word")       => random_postings(rng,   2500),
                s("split")      => random_postings(rng,    400),
                s("ngrams")     => random_postings(rng,   1400),
                s("world")      => random_postings(rng, 15_000),
                s("earth")      => random_postings(rng,   8000),
                s("2021")       => random_postings(rng,    100),
                s("2020")       => random_postings(rng,    500),
                s("is")         => random_postings(rng, 50_000),
                s("this")       => random_postings(rng, 50_000),
                s("good")       => random_postings(rng,   1250),
                s("morning")    => random_postings(rng,    125),
            };
            let word_prefix_docids = hashmap!{
                s("h")   => &word_docids[&s("hello")] | &word_docids[&s("hi")],
                s("wor") => &word_docids[&s("word")]  | &word_docids[&s("world")],
                s("20")  => &word_docids[&s("2020")]  | &word_docids[&s("2021")],
            };
            let hello_world = &word_docids[&s("hello")] & &word_docids[&s("world")];
            let hello_world_split = (hello_world.len() / 2) as usize;
            let hello_world_1 = hello_world.iter().take(hello_world_split).collect();
            let hello_world_2 = hello_world.iter().skip(hello_world_split).collect();
            let hello_word = &word_docids[&s("hello")] & &word_docids[&s("word")];
            let hello_word_split = (hello_word.len() / 2) as usize;
            let hello_word_4 = hello_word.iter().take(hello_word_split).collect();
            let hello_word_6 = hello_word.iter().skip(hello_word_split).take(hello_word_split/2).collect();
            let hello_word_7 = hello_word.iter().skip(hello_word_split + hello_word_split/2).collect();
            let word_pair_proximity_docids = hashmap!{
                (s("good"), s("morning"), 1)   => &word_docids[&s("good")] & &word_docids[&s("morning")],
                (s("hello"), s("world"), 1)   => hello_world_1,
                (s("hello"), s("world"), 4)   => hello_world_2,
                (s("this"), s("is"), 1)   => &word_docids[&s("this")] & &word_docids[&s("is")],
                (s("is"), s("2021"), 1)   => &word_docids[&s("this")] & &word_docids[&s("is")] & &word_docids[&s("2021")],
                (s("is"), s("2020"), 1)   => &word_docids[&s("this")] & &word_docids[&s("is")] & (&word_docids[&s("2020")] - &word_docids[&s("2021")]),
                (s("this"), s("2021"), 2)   => &word_docids[&s("this")] & &word_docids[&s("is")] & &word_docids[&s("2021")],
                (s("this"), s("2020"), 2)   => &word_docids[&s("this")] & &word_docids[&s("is")] & (&word_docids[&s("2020")] - &word_docids[&s("2021")]),
                (s("word"), s("split"), 1)   => &word_docids[&s("word")] & &word_docids[&s("split")],
                (s("world"), s("split"), 1)   => (&word_docids[&s("world")] & &word_docids[&s("split")]) - &word_docids[&s("word")],
                (s("hello"), s("word"), 4) => hello_word_4,
                (s("hello"), s("word"), 6) => hello_word_6,
                (s("hello"), s("word"), 7) => hello_word_7,
                (s("split"), s("ngrams"), 3)   => (&word_docids[&s("split")] & &word_docids[&s("ngrams")]) - &word_docids[&s("word")],
                (s("split"), s("ngrams"), 5)   => &word_docids[&s("split")] & &word_docids[&s("ngrams")] & &word_docids[&s("word")],
                (s("this"), s("ngrams"), 1)   => (&word_docids[&s("split")] & &word_docids[&s("this")] & &word_docids[&s("ngrams")] ) - &word_docids[&s("word")],
                (s("this"), s("ngrams"), 2)   => &word_docids[&s("split")] & &word_docids[&s("this")] & &word_docids[&s("ngrams")] & &word_docids[&s("word")],
            };
            let word_prefix_pair_proximity_docids = hashmap!{
                (s("hello"), s("wor"), 1) => word_pair_proximity_docids.get(&(s("hello"), s("world"), 1)).unwrap().clone(),
                (s("hello"), s("wor"), 4) => word_pair_proximity_docids.get(&(s("hello"), s("world"), 4)).unwrap() | word_pair_proximity_docids.get(&(s("hello"), s("word"), 4)).unwrap(),
                (s("hello"), s("wor"), 6) => word_pair_proximity_docids.get(&(s("hello"), s("word"), 6)).unwrap().clone(),
                (s("hello"), s("wor"), 7) => word_pair_proximity_docids.get(&(s("hello"), s("word"), 7)).unwrap().clone(),
                (s("is"), s("20"), 1) => word_pair_proximity_docids.get(&(s("is"), s("2020"), 1)).unwrap() | word_pair_proximity_docids.get(&(s("is"), s("2021"), 1)).unwrap(),
                (s("this"), s("20"), 2) => word_pair_proximity_docids.get(&(s("this"), s("2020"), 2)).unwrap() | word_pair_proximity_docids.get(&(s("this"), s("2021"), 2)).unwrap(),
            };
            let mut keys = word_docids.keys().collect::<Vec<_>>();
            keys.sort_unstable();
            let words_fst = fst::Set::from_iter(keys).unwrap().map_data(|v| Cow::Owned(v)).unwrap();
            TestContext {
                words_fst,
                word_docids,
                word_prefix_docids,
                word_pair_proximity_docids,
                word_prefix_pair_proximity_docids,
            }
        }
    }
 }
--- a/milli/src/search/criteria/proximity.rs
+++ b/milli/src/search/criteria/proximity.rs
@ -0,0 +1,291 @@
 use std::collections::HashMap;
 use std::mem::take;
 use roaring::RoaringBitmap;
 use log::debug;
 use crate::search::query_tree::{maximum_proximity, Operation, Query};
 use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids};
 pub struct Proximity<'t> {
    ctx: &'t dyn Context,
    query_tree: Option<(usize, Operation)>,
    proximity: u8,
    candidates: Candidates,
    bucket_candidates: RoaringBitmap,
    parent: Option<Box<dyn Criterion + 't>>,
    candidates_cache: HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>,
 }
 impl<'t> Proximity<'t> {
    pub fn initial(
        ctx: &'t dyn Context,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
    ) -> Self
    {
        Proximity {
            ctx,
            query_tree: query_tree.map(|op| (maximum_proximity(&op), op)),
            proximity: 0,
            candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
            bucket_candidates: RoaringBitmap::new(),
            parent: None,
            candidates_cache: HashMap::new(),
        }
    }
    pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
        Proximity {
            ctx,
            query_tree: None,
            proximity: 0,
            candidates: Candidates::default(),
            bucket_candidates: RoaringBitmap::new(),
            parent: Some(parent),
            candidates_cache: HashMap::new(),
        }
    }
 }
 impl<'t> Criterion for Proximity<'t> {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
        use Candidates::{Allowed, Forbidden};
        loop {
            debug!("Proximity at iteration {} (max {:?}) ({:?})",
                self.proximity,
                self.query_tree.as_ref().map(|(mp, _)| mp),
                self.candidates,
            );
            match (&mut self.query_tree, &mut self.candidates) {
                (_, Allowed(candidates)) if candidates.is_empty() => {
                    return Ok(Some(CriterionResult {
                        query_tree: self.query_tree.take().map(|(_, qt)| qt),
                        candidates: take(&mut self.candidates).into_inner(),
                        bucket_candidates: take(&mut self.bucket_candidates),
                    }));
                },
                (Some((max_prox, query_tree)), Allowed(candidates)) => {
                    if self.proximity as usize > *max_prox {
                        self.query_tree = None;
                        self.candidates = Candidates::default();
                    } else {
                        let mut new_candidates = resolve_candidates(
                            self.ctx,
                            &query_tree,
                            self.proximity,
                            &mut self.candidates_cache,
                        )?;
                        new_candidates.intersect_with(&candidates);
                        candidates.difference_with(&new_candidates);
                        self.proximity += 1;
                        let bucket_candidates = match self.parent {
                            Some(_) => take(&mut self.bucket_candidates),
                            None => new_candidates.clone(),
                        };
                        return Ok(Some(CriterionResult {
                            query_tree: Some(query_tree.clone()),
                            candidates: new_candidates,
                            bucket_candidates,
                        }));
                    }
                },
                (Some((max_prox, query_tree)), Forbidden(candidates)) => {
                    if self.proximity as usize > *max_prox {
                        self.query_tree = None;
                        self.candidates = Candidates::default();
                    } else {
                        let mut new_candidates = resolve_candidates(
                            self.ctx,
                            &query_tree,
                            self.proximity,
                            &mut self.candidates_cache,
                        )?;
                        new_candidates.difference_with(&candidates);
                        candidates.union_with(&new_candidates);
                        self.proximity += 1;
                        let bucket_candidates = match self.parent {
                            Some(_) => take(&mut self.bucket_candidates),
                            None => new_candidates.clone(),
                        };
                        return Ok(Some(CriterionResult {
                            query_tree: Some(query_tree.clone()),
                            candidates: new_candidates,
                            bucket_candidates,
                        }));
                    }
                },
                (None, Allowed(_)) => {
                    let candidates = take(&mut self.candidates).into_inner();
                    return Ok(Some(CriterionResult {
                        query_tree: None,
                        candidates: candidates.clone(),
                        bucket_candidates: candidates,
                    }));
                },
                (None, Forbidden(_)) => {
                    match self.parent.as_mut() {
                        Some(parent) => {
                            match parent.next()? {
                                Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
                                    self.query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
                                    self.proximity = 0;
                                    self.candidates = Candidates::Allowed(candidates);
                                    self.bucket_candidates.union_with(&bucket_candidates);
                                },
                                None => return Ok(None),
                            }
                        },
                        None => return Ok(None),
                    }
                },
            }
        }
    }
 }
 fn resolve_candidates<'t>(
    ctx: &'t dyn Context,
    query_tree: &Operation,
    proximity: u8,
    cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>,
 ) -> anyhow::Result<RoaringBitmap>
 {
    fn resolve_operation<'t>(
        ctx: &'t dyn Context,
        query_tree: &Operation,
        proximity: u8,
        cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>,
    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
    {
        use Operation::{And, Consecutive, Or, Query};
        let result = match query_tree {
            And(ops) => mdfs(ctx, ops, proximity, cache)?,
            Consecutive(ops) => if proximity == 0 {
                mdfs(ctx, ops, 0, cache)?
            } else {
                Default::default()
            },
            Or(_, ops) => {
                let mut output = Vec::new();
                for op in ops {
                    let result = resolve_operation(ctx, op, proximity, cache)?;
                    output.extend(result);
                }
                output
            },
            Query(q) => if proximity == 0 {
                let candidates = query_docids(ctx, q)?;
                vec![(q.clone(), q.clone(), candidates)]
            } else {
                Default::default()
            },
        };
        Ok(result)
    }
    fn mdfs_pair<'t>(
        ctx: &'t dyn Context,
        left: &Operation,
        right: &Operation,
        proximity: u8,
        cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>,
    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
    {
        fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
            (0..=mana.min(left_max)).map(move |m| (m, mana - m))
        }
        let pair_max_proximity = 7;
        let mut output = Vec::new();
        for (pair_p, left_right_p) in pair_combinations(proximity, pair_max_proximity) {
            for (left_p, right_p) in pair_combinations(left_right_p, left_right_p) {
                let left_key = (left.clone(), left_p);
                if !cache.contains_key(&left_key) {
                    let candidates = resolve_operation(ctx, left, left_p, cache)?;
                    cache.insert(left_key.clone(), candidates);
                }
                let right_key = (right.clone(), right_p);
                if !cache.contains_key(&right_key) {
                    let candidates = resolve_operation(ctx, right, right_p, cache)?;
                    cache.insert(right_key.clone(), candidates);
                }
                let lefts = cache.get(&left_key).unwrap();
                let rights = cache.get(&right_key).unwrap();
                for (ll, lr, lcandidates) in lefts {
                    for (rl, rr, rcandidates) in rights {
                        let mut candidates = query_pair_proximity_docids(ctx, lr, rl, pair_p + 1)?;
                        if lcandidates.len() < rcandidates.len() {
                            candidates.intersect_with(lcandidates);
                            candidates.intersect_with(rcandidates);
                        } else {
                            candidates.intersect_with(rcandidates);
                            candidates.intersect_with(lcandidates);
                        }
                        if !candidates.is_empty() {
                            output.push((ll.clone(), rr.clone(), candidates));
                        }
                    }
                }
            }
        }
        Ok(output)
    }
    fn mdfs<'t>(
        ctx: &'t dyn Context,
        branches: &[Operation],
        proximity: u8,
        cache: &mut HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>,
    ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>>
    {
        // Extract the first two elements but gives the tail
        // that is just after the first element.
        let next = branches.split_first().map(|(h1, t)| {
            (h1, t.split_first().map(|(h2, _)| (h2, t)))
        });
        match next {
            Some((head1, Some((head2, [_])))) => mdfs_pair(ctx, head1, head2, proximity, cache),
            Some((head1, Some((head2, tail)))) => {
                let mut output = Vec::new();
                for p in 0..=proximity {
                    for (lhead, _, head_candidates) in mdfs_pair(ctx, head1, head2, p, cache)? {
                        if !head_candidates.is_empty() {
                            for (_, rtail, mut candidates) in mdfs(ctx, tail, proximity - p, cache)? {
                                candidates.intersect_with(&head_candidates);
                                if !candidates.is_empty() {
                                    output.push((lhead.clone(), rtail, candidates));
                                }
                            }
                        }
                    }
                }
                Ok(output)
            },
            Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache),
            None => return Ok(Default::default()),
        }
    }
    let mut candidates = RoaringBitmap::new();
    for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache)? {
        candidates.union_with(&cds);
    }
    Ok(candidates)
 }
--- a/milli/src/search/criteria/typo.rs
+++ b/milli/src/search/criteria/typo.rs
@ -0,0 +1,482 @@
 use std::{borrow::Cow, collections::HashMap, mem::take};
 use anyhow::bail;
 use log::debug;
 use roaring::RoaringBitmap;
 use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
 use crate::search::word_derivations;
 use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids};
 pub struct Typo<'t> {
    ctx: &'t dyn Context,
    query_tree: Option<(usize, Operation)>,
    number_typos: u8,
    candidates: Candidates,
    bucket_candidates: RoaringBitmap,
    parent: Option<Box<dyn Criterion + 't>>,
    candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
    typo_cache: HashMap<(String, bool, u8), Vec<(String, u8)>>,
 }
 impl<'t> Typo<'t> {
    pub fn initial(
        ctx: &'t dyn Context,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
    ) -> Self
    {
        Typo {
            ctx,
            query_tree: query_tree.map(|op| (maximum_typo(&op), op)),
            number_typos: 0,
            candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
            bucket_candidates: RoaringBitmap::new(),
            parent: None,
            candidates_cache: HashMap::new(),
            typo_cache: HashMap::new(),
        }
    }
    pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
        Typo {
            ctx,
            query_tree: None,
            number_typos: 0,
            candidates: Candidates::default(),
            bucket_candidates: RoaringBitmap::new(),
            parent: Some(parent),
            candidates_cache: HashMap::new(),
            typo_cache: HashMap::new(),
        }
    }
 }
 impl<'t> Criterion for Typo<'t> {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
        use Candidates::{Allowed, Forbidden};
        loop {
            debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates);
            match (&mut self.query_tree, &mut self.candidates) {
                (_, Allowed(candidates)) if candidates.is_empty() => {
                    return Ok(Some(CriterionResult {
                        query_tree: self.query_tree.take().map(|(_, qt)| qt),
                        candidates: take(&mut self.candidates).into_inner(),
                        bucket_candidates: take(&mut self.bucket_candidates),
                    }));
                },
                (Some((max_typos, query_tree)), Allowed(candidates)) => {
                    if self.number_typos as usize > *max_typos {
                        self.query_tree = None;
                        self.candidates = Candidates::default();
                    } else {
                        let fst = self.ctx.words_fst();
                        let new_query_tree = if self.number_typos < 2 {
                            alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?
                        } else if self.number_typos == 2 {
                            *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?;
                            query_tree.clone()
                        } else {
                            query_tree.clone()
                        };
                        let mut new_candidates = resolve_candidates(self.ctx, &new_query_tree, self.number_typos, &mut self.candidates_cache)?;
                        new_candidates.intersect_with(&candidates);
                        candidates.difference_with(&new_candidates);
                        self.number_typos += 1;
                        let bucket_candidates = match self.parent {
                            Some(_) => take(&mut self.bucket_candidates),
                            None => new_candidates.clone(),
                        };
                        return Ok(Some(CriterionResult {
                            query_tree: Some(new_query_tree),
                            candidates: new_candidates,
                            bucket_candidates,
                        }));
                    }
                },
                (Some((max_typos, query_tree)), Forbidden(candidates)) => {
                    if self.number_typos as usize > *max_typos {
                        self.query_tree = None;
                        self.candidates = Candidates::default();
                    } else {
                        let fst = self.ctx.words_fst();
                        let new_query_tree = if self.number_typos < 2 {
                            alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?
                        } else if self.number_typos == 2 {
                            *query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, &mut self.typo_cache)?;
                            query_tree.clone()
                        } else {
                            query_tree.clone()
                        };
                        let mut new_candidates = resolve_candidates(self.ctx, &new_query_tree, self.number_typos, &mut self.candidates_cache)?;
                        new_candidates.difference_with(&candidates);
                        candidates.union_with(&new_candidates);
                        self.number_typos += 1;
                        let bucket_candidates = match self.parent {
                            Some(_) => take(&mut self.bucket_candidates),
                            None => new_candidates.clone(),
                        };
                        return Ok(Some(CriterionResult {
                            query_tree: Some(new_query_tree),
                            candidates: new_candidates,
                            bucket_candidates,
                        }));
                    }
                },
                (None, Allowed(_)) => {
                    let candidates = take(&mut self.candidates).into_inner();
                    return Ok(Some(CriterionResult {
                        query_tree: None,
                        candidates: candidates.clone(),
                        bucket_candidates: candidates,
                    }));
                },
                (None, Forbidden(_)) => {
                    match self.parent.as_mut() {
                        Some(parent) => {
                            match parent.next()? {
                                Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
                                    self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
                                    self.number_typos = 0;
                                    self.candidates = Candidates::Allowed(candidates);
                                    self.bucket_candidates.union_with(&bucket_candidates);
                                },
                                None => return Ok(None),
                            }
                        },
                        None => return Ok(None),
                    }
                },
            }
        }
    }
 }
 /// Modify the query tree by replacing every tolerant query by an Or operation
 /// containing all of the corresponding exact words in the words FST. Each tolerant
 /// query will only be replaced by exact query with up to `number_typos` maximum typos.
 fn alterate_query_tree(
    words_fst: &fst::Set<Cow<[u8]>>,
    mut query_tree: Operation,
    number_typos: u8,
    typo_cache: &mut HashMap<(String, bool, u8), Vec<(String, u8)>>,
 ) -> anyhow::Result<Operation>
 {
    fn recurse(
        words_fst: &fst::Set<Cow<[u8]>>,
        operation: &mut Operation,
        number_typos: u8,
        typo_cache: &mut HashMap<(String, bool, u8), Vec<(String, u8)>>,
    ) -> anyhow::Result<()>
    {
        use Operation::{And, Consecutive, Or};
        match operation {
            And(ops) | Consecutive(ops) | Or(_, ops) => {
                ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, typo_cache))
            },
            Operation::Query(q) => {
                // TODO may be optimized when number_typos == 0
                if let QueryKind::Tolerant { typo, word } = &q.kind {
                    // if no typo is allowed we don't call word_derivations function,
                    // and directly create an Exact query
                    if number_typos == 0 {
                        *operation = Operation::Query(Query {
                            prefix: q.prefix,
                            kind: QueryKind::Exact { original_typo: 0, word: word.clone() },
                        });
                    } else {
                        let typo = *typo.min(&number_typos);
                        let cache_key = (word.clone(), q.prefix, typo);
                        let words = if let Some(derivations) = typo_cache.get(&cache_key) {
                            derivations.clone()
                        } else {
                            let derivations = word_derivations(word, q.prefix, typo, words_fst)?;
                            typo_cache.insert(cache_key, derivations.clone());
                            derivations
                        };
                        let queries = words.into_iter().map(|(word, typo)| {
                            Operation::Query(Query {
                                prefix: false,
                                kind: QueryKind::Exact { original_typo: typo, word },
                            })
                        }).collect();
                        *operation = Operation::or(false, queries);
                    }
                }
                Ok(())
            },
        }
    }
    recurse(words_fst, &mut query_tree, number_typos, typo_cache)?;
    Ok(query_tree)
 }
 fn resolve_candidates<'t>(
    ctx: &'t dyn Context,
    query_tree: &Operation,
    number_typos: u8,
    cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
 ) -> anyhow::Result<RoaringBitmap>
 {
    fn resolve_operation<'t>(
        ctx: &'t dyn Context,
        query_tree: &Operation,
        number_typos: u8,
        cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
    ) -> anyhow::Result<RoaringBitmap>
    {
        use Operation::{And, Consecutive, Or, Query};
        match query_tree {
            And(ops) => {
                mdfs(ctx, ops, number_typos, cache)
            },
            Consecutive(ops) => {
                let mut candidates = RoaringBitmap::new();
                let mut first_loop = true;
                for slice in ops.windows(2) {
                    match (&slice[0], &slice[1]) {
                        (Operation::Query(left), Operation::Query(right)) => {
                            match query_pair_proximity_docids(ctx, left, right, 1)? {
                                pair_docids if pair_docids.is_empty() => {
                                    return Ok(RoaringBitmap::new())
                                },
                                pair_docids if first_loop => {
                                    candidates = pair_docids;
                                    first_loop = false;
                                },
                                pair_docids => {
                                    candidates.intersect_with(&pair_docids);
                                },
                            }
                        },
                        _ => bail!("invalid consecutive query type"),
                    }
                }
                Ok(candidates)
            },
            Or(_, ops) => {
                let mut candidates = RoaringBitmap::new();
                for op in ops {
                    let docids = resolve_operation(ctx, op, number_typos, cache)?;
                    candidates.union_with(&docids);
                }
                Ok(candidates)
            },
            Query(q) => if q.kind.typo() == number_typos {
                Ok(query_docids(ctx, q)?)
            } else {
                Ok(RoaringBitmap::new())
            },
        }
    }
    fn mdfs<'t>(
        ctx: &'t dyn Context,
        branches: &[Operation],
        mana: u8,
        cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
    ) -> anyhow::Result<RoaringBitmap>
    {
        match branches.split_first() {
            Some((head, [])) => {
                let cache_key = (head.clone(), mana);
                if let Some(candidates) = cache.get(&cache_key) {
                    Ok(candidates.clone())
                } else {
                    let candidates = resolve_operation(ctx, head, mana, cache)?;
                    cache.insert(cache_key, candidates.clone());
                    Ok(candidates)
                }
            },
            Some((head, tail)) => {
                let mut candidates = RoaringBitmap::new();
                for m in 0..=mana {
                    let mut head_candidates = {
                        let cache_key = (head.clone(), m);
                        if let Some(candidates) = cache.get(&cache_key) {
                            candidates.clone()
                        } else {
                            let candidates = resolve_operation(ctx, head, m, cache)?;
                            cache.insert(cache_key, candidates.clone());
                            candidates
                        }
                    };
                    if !head_candidates.is_empty() {
                        let tail_candidates = mdfs(ctx, tail, mana - m, cache)?;
                        head_candidates.intersect_with(&tail_candidates);
                        candidates.union_with(&head_candidates);
                    }
                }
                Ok(candidates)
            },
            None => Ok(RoaringBitmap::new()),
        }
    }
    resolve_operation(ctx, query_tree, number_typos, cache)
 }
 #[cfg(test)]
 mod test {
    use super::*;
    use super::super::test::TestContext;
    #[test]
    fn initial_placeholder_no_facets() {
        let context = TestContext::default();
        let query_tree = None;
        let facet_candidates = None;
        let mut criteria = Typo::initial(&context, query_tree, facet_candidates);
        assert!(criteria.next().unwrap().is_none());
    }
    #[test]
    fn initial_query_tree_no_facets() {
        let context = TestContext::default();
        let query_tree = Operation::Or(false, vec![
            Operation::And(vec![
                Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
            ])
        ]);
        let facet_candidates = None;
        let mut criteria = Typo::initial(&context, Some(query_tree), facet_candidates);
        let candidates_1 = context.word_docids("split").unwrap().unwrap()
            & context.word_docids("this").unwrap().unwrap()
            & context.word_docids("world").unwrap().unwrap();
        let expected_1 = CriterionResult {
            query_tree: Some(Operation::Or(false, vec![
                Operation::And(vec![
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
                ]),
            ])),
            candidates: candidates_1.clone(),
            bucket_candidates: candidates_1,
        };
        assert_eq!(criteria.next().unwrap(), Some(expected_1));
        let candidates_2 = (
                context.word_docids("split").unwrap().unwrap()
                & context.word_docids("this").unwrap().unwrap()
                & context.word_docids("word").unwrap().unwrap()
            ) - context.word_docids("world").unwrap().unwrap();
        let expected_2 = CriterionResult {
            query_tree: Some(Operation::Or(false, vec![
                Operation::And(vec![
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                    Operation::Or(false, vec![
                        Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
                        Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
                    ]),
                ]),
            ])),
            candidates: candidates_2.clone(),
            bucket_candidates: candidates_2,
        };
        assert_eq!(criteria.next().unwrap(), Some(expected_2));
    }
    #[test]
    fn initial_placeholder_with_facets() {
        let context = TestContext::default();
        let query_tree = None;
        let facet_candidates = context.word_docids("earth").unwrap().unwrap();
        let mut criteria = Typo::initial(&context, query_tree, Some(facet_candidates.clone()));
        let expected = CriterionResult {
            query_tree: None,
            candidates: facet_candidates.clone(),
            bucket_candidates: facet_candidates,
        };
        // first iteration, returns the facet candidates
        assert_eq!(criteria.next().unwrap(), Some(expected));
        // second iteration, returns None because there is no more things to do
        assert!(criteria.next().unwrap().is_none());
    }
    #[test]
    fn initial_query_tree_with_facets() {
        let context = TestContext::default();
        let query_tree = Operation::Or(false, vec![
            Operation::And(vec![
                Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "world".to_string()) }),
            ])
        ]);
        let facet_candidates = context.word_docids("earth").unwrap().unwrap();
        let mut criteria = Typo::initial(&context, Some(query_tree), Some(facet_candidates.clone()));
        let candidates_1 = context.word_docids("split").unwrap().unwrap()
            & context.word_docids("this").unwrap().unwrap()
            & context.word_docids("world").unwrap().unwrap();
        let expected_1 = CriterionResult {
            query_tree: Some(Operation::Or(false, vec![
                Operation::And(vec![
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
                ]),
            ])),
            candidates: &candidates_1 & &facet_candidates,
            bucket_candidates: candidates_1 & &facet_candidates,
        };
        assert_eq!(criteria.next().unwrap(), Some(expected_1));
        let candidates_2 = (
                context.word_docids("split").unwrap().unwrap()
                & context.word_docids("this").unwrap().unwrap()
                & context.word_docids("word").unwrap().unwrap()
            ) - context.word_docids("world").unwrap().unwrap();
        let expected_2 = CriterionResult {
            query_tree: Some(Operation::Or(false, vec![
                Operation::And(vec![
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }),
                    Operation::Query(Query { prefix: false, kind: QueryKind::exact("this".to_string()) }),
                    Operation::Or(false, vec![
                        Operation::Query(Query { prefix: false, kind: QueryKind::exact_with_typo(1, "word".to_string()) }),
                        Operation::Query(Query { prefix: false, kind: QueryKind::exact("world".to_string()) }),
                    ]),
                ]),
            ])),
            candidates: &candidates_2 & &facet_candidates,
            bucket_candidates: candidates_2 & &facet_candidates,
        };
        assert_eq!(criteria.next().unwrap(), Some(expected_2));
    }
 }
--- a/milli/src/search/criteria/words.rs
+++ b/milli/src/search/criteria/words.rs
@ -0,0 +1,128 @@
 use std::collections::HashMap;
 use std::mem::take;
 use log::debug;
 use roaring::RoaringBitmap;
 use crate::search::query_tree::Operation;
 use super::{resolve_query_tree, Candidates, Criterion, CriterionResult, Context};
 pub struct Words<'t> {
    ctx: &'t dyn Context,
    query_trees: Vec<Operation>,
    candidates: Candidates,
    bucket_candidates: RoaringBitmap,
    parent: Option<Box<dyn Criterion + 't>>,
    candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
 }
 impl<'t> Words<'t> {
    pub fn initial(
        ctx: &'t dyn Context,
        query_tree: Option<Operation>,
        candidates: Option<RoaringBitmap>,
    ) -> Self
    {
        Words {
            ctx,
            query_trees: query_tree.map(explode_query_tree).unwrap_or_default(),
            candidates: candidates.map_or_else(Candidates::default, Candidates::Allowed),
            bucket_candidates: RoaringBitmap::new(),
            parent: None,
            candidates_cache: HashMap::default(),
        }
    }
    pub fn new(ctx: &'t dyn Context, parent: Box<dyn Criterion + 't>) -> Self {
        Words {
            ctx,
            query_trees: Vec::default(),
            candidates: Candidates::default(),
            bucket_candidates: RoaringBitmap::new(),
            parent: Some(parent),
            candidates_cache: HashMap::default(),
        }
    }
 }
 impl<'t> Criterion for Words<'t> {
    fn next(&mut self) -> anyhow::Result<Option<CriterionResult>> {
        use Candidates::{Allowed, Forbidden};
        loop {
            debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
            match (self.query_trees.pop(), &mut self.candidates) {
                (query_tree, Allowed(candidates)) if candidates.is_empty() => {
                    self.query_trees = Vec::new();
                    return Ok(Some(CriterionResult {
                        query_tree,
                        candidates: take(&mut self.candidates).into_inner(),
                        bucket_candidates: take(&mut self.bucket_candidates),
                    }));
                },
                (Some(qt), Allowed(candidates)) => {
                    let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
                    found_candidates.intersect_with(&candidates);
                    candidates.difference_with(&found_candidates);
                    let bucket_candidates = match self.parent {
                        Some(_) => take(&mut self.bucket_candidates),
                        None => found_candidates.clone(),
                    };
                    return Ok(Some(CriterionResult {
                        query_tree: Some(qt),
                        candidates: found_candidates,
                        bucket_candidates,
                    }));
                },
                (Some(qt), Forbidden(candidates)) => {
                    let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache)?;
                    found_candidates.difference_with(&candidates);
                    candidates.union_with(&found_candidates);
                    let bucket_candidates = match self.parent {
                        Some(_) => take(&mut self.bucket_candidates),
                        None => found_candidates.clone(),
                    };
                    return Ok(Some(CriterionResult {
                        query_tree: Some(qt),
                        candidates: found_candidates,
                        bucket_candidates,
                    }));
                },
                (None, Allowed(_)) => {
                    let candidates = take(&mut self.candidates).into_inner();
                    return Ok(Some(CriterionResult {
                        query_tree: None,
                        candidates: candidates.clone(),
                        bucket_candidates: candidates,
                    }));
                },
                (None, Forbidden(_)) => {
                    match self.parent.as_mut() {
                        Some(parent) => {
                            match parent.next()? {
                                Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
                                    self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
                                    self.candidates = Candidates::Allowed(candidates);
                                    self.bucket_candidates.union_with(&bucket_candidates);
                                },
                                None => return Ok(None),
                            }
                        },
                        None => return Ok(None),
                    }
                },
            }
        }
    }
 }
 fn explode_query_tree(query_tree: Operation) -> Vec<Operation> {
    match query_tree {
        Operation::Or(true, ops) => ops,
        otherwise => vec![otherwise],
    }
 }
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -1,27 +1,21 @@
 use std::borrow::Cow;
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::time::Instant;
 use anyhow::{bail, Context};
 use fst::{IntoStreamer, Streamer, Set};
-use levenshtein_automata::DFA;
+use levenshtein_automata::{DFA, LevenshteinAutomatonBuilder as LevBuilder};
 use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
 use log::debug;
 use meilisearch_tokenizer::{AnalyzerConfig, Analyzer};
 use once_cell::sync::Lazy;
 use ordered_float::OrderedFloat;
 use roaring::bitmap::RoaringBitmap;
-use crate::facet::FacetType;
+use crate::search::criteria::{Criterion, CriterionResult};
-use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec};
+use crate::{Index, DocumentId};
 use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec};
 use crate::mdfs::Mdfs;
 use crate::query_tokens::{query_tokens, QueryToken};
 use crate::{Index, FieldId, DocumentId, Criterion};
 pub use self::facet::FacetIter;
 pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator};
-pub use self::facet::{FacetIter};
+pub use self::query_tree::MatchingWords;
 use self::query_tree::QueryTreeBuilder;
 // Building these factories is not free.
 static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
@ -30,6 +24,7 @@ static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
 mod facet;
 mod query_tree;
 mod criteria;
 pub struct Search<'a> {
    query: Option<String>,
@ -65,208 +60,23 @@ impl<'a> Search<'a> {
        self
    }
    /// Extracts the query words from the query string and returns the DFAs accordingly.
    /// TODO introduce settings for the number of typos regarding the words lengths.
    fn generate_query_dfas(query: &str) -> Vec<(String, bool, DFA)> {
        let (lev0, lev1, lev2) = (&LEVDIST0, &LEVDIST1, &LEVDIST2);
        let stop_words = Set::default();
        let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
        let analyzed = analyzer.analyze(query);
        let tokens = analyzed.tokens();
        let words: Vec<_> = query_tokens(tokens).collect();
        let ends_with_whitespace = query.chars().last().map_or(false, char::is_whitespace);
        let number_of_words = words.len();
        words.into_iter().enumerate().map(|(i, word)| {
            let (word, quoted) = match word {
                QueryToken::Free(token) => (token.text().to_string(), token.text().len() <= 3),
                QueryToken::Quoted(token) => (token.text().to_string(), true),
            };
            let is_last = i + 1 == number_of_words;
            let is_prefix = is_last && !ends_with_whitespace && !quoted;
            let lev = match word.len() {
                0..=4 => if quoted { lev0 } else { lev0 },
                5..=8 => if quoted { lev0 } else { lev1 },
                _     => if quoted { lev0 } else { lev2 },
            };
            let dfa = if is_prefix {
                lev.build_prefix_dfa(&word)
            } else {
                lev.build_dfa(&word)
            };
            (word, is_prefix, dfa)
        })
        .collect()
    }
    /// Fetch the words from the given FST related to the given DFAs along with
    /// the associated documents ids.
    fn fetch_words_docids(
        &self,
        fst: &fst::Set<Cow<[u8]>>,
        dfas: Vec<(String, bool, DFA)>,
    ) -> anyhow::Result<Vec<(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)>>
    {
        // A Vec storing all the derived words from the original query words, associated
        // with the distance from the original word and the docids where the words appears.
        let mut derived_words = Vec::<(HashMap::<String, (u8, RoaringBitmap)>, RoaringBitmap)>::with_capacity(dfas.len());
        for (_word, _is_prefix, dfa) in dfas {
            let mut acc_derived_words = HashMap::new();
            let mut unions_docids = RoaringBitmap::new();
            let mut stream = fst.search_with_state(&dfa).into_stream();
            while let Some((word, state)) = stream.next() {
                let word = std::str::from_utf8(word)?;
                let docids = self.index.word_docids.get(self.rtxn, word)?.unwrap();
                let distance = dfa.distance(state);
                unions_docids.union_with(&docids);
                acc_derived_words.insert(word.to_string(), (distance.to_u8(), docids));
            }
            derived_words.push((acc_derived_words, unions_docids));
        }
        Ok(derived_words)
    }
    /// Returns the set of docids that contains all of the query words.
    fn compute_candidates(
        derived_words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
    ) -> RoaringBitmap
    {
        // We sort the derived words by inverse popularity, this way intersections are faster.
        let mut derived_words: Vec<_> = derived_words.iter().collect();
        derived_words.sort_unstable_by_key(|(_, docids)| docids.len());
        // we do a union between all the docids of each of the derived words,
        // we got N unions (the number of original query words), we then intersect them.
        let mut candidates = RoaringBitmap::new();
        for (i, (_, union_docids)) in derived_words.iter().enumerate() {
            if i == 0 {
                candidates = union_docids.clone();
            } else {
                candidates.intersect_with(&union_docids);
            }
        }
        candidates
    }
    fn facet_ordered(
        &self,
        field_id: FieldId,
        facet_type: FacetType,
        ascending: bool,
        mut documents_ids: RoaringBitmap,
        limit: usize,
    ) -> anyhow::Result<Vec<DocumentId>>
    {
        let mut output: Vec<_> = match facet_type {
            FacetType::Float => {
                if documents_ids.len() <= 1000 {
                    let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
                    let mut docids_values = Vec::with_capacity(documents_ids.len() as usize);
                    for docid in documents_ids.iter() {
                        let left = (field_id, docid, f64::MIN);
                        let right = (field_id, docid, f64::MAX);
                        let mut iter = db.range(self.rtxn, &(left..=right))?;
                        let entry = if ascending { iter.next() } else { iter.last() };
                        if let Some(((_, _, value), ())) = entry.transpose()? {
                            docids_values.push((docid, OrderedFloat(value)));
                        }
                    }
                    docids_values.sort_unstable_by_key(|(_, value)| *value);
                    let iter = docids_values.into_iter().map(|(id, _)| id);
                    if ascending {
                        iter.take(limit).collect()
                    } else {
                        iter.rev().take(limit).collect()
                    }
                } else {
                    let facet_fn = if ascending {
                        FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing
                    } else {
                        FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing
                    };
                    let mut limit_tmp = limit;
                    let mut output = Vec::new();
                    for result in facet_fn(self.rtxn, self.index, field_id, documents_ids.clone())? {
                        let (_val, docids) = result?;
                        limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
                        output.push(docids);
                        if limit_tmp == 0 { break }
                    }
                    output.into_iter().flatten().take(limit).collect()
                }
            },
            FacetType::Integer => {
                if documents_ids.len() <= 1000 {
                    let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetI64Codec>();
                    let mut docids_values = Vec::with_capacity(documents_ids.len() as usize);
                    for docid in documents_ids.iter() {
                        let left = (field_id, docid, i64::MIN);
                        let right = (field_id, docid, i64::MAX);
                        let mut iter = db.range(self.rtxn, &(left..=right))?;
                        let entry = if ascending { iter.next() } else { iter.last() };
                        if let Some(((_, _, value), ())) = entry.transpose()? {
                            docids_values.push((docid, value));
                        }
                    }
                    docids_values.sort_unstable_by_key(|(_, value)| *value);
                    let iter = docids_values.into_iter().map(|(id, _)| id);
                    if ascending {
                        iter.take(limit).collect()
                    } else {
                        iter.rev().take(limit).collect()
                    }
                } else {
                    let facet_fn = if ascending {
                        FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
                    } else {
                        FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
                    };
                    let mut limit_tmp = limit;
                    let mut output = Vec::new();
                    for result in facet_fn(self.rtxn, self.index, field_id, documents_ids.clone())? {
                        let (_val, docids) = result?;
                        limit_tmp = limit_tmp.saturating_sub(docids.len() as usize);
                        output.push(docids);
                        if limit_tmp == 0 { break }
                    }
                    output.into_iter().flatten().take(limit).collect()
                }
            },
            FacetType::String => bail!("criteria facet type must be a number"),
        };
        // if there isn't enough documents to return we try to complete that list
        // with documents that are maybe not faceted under this field and therefore
        // not returned by the previous facet iteration.
        if output.len() < limit {
            output.iter().for_each(|n| { documents_ids.remove(*n); });
            let remaining = documents_ids.iter().take(limit - output.len());
            output.extend(remaining);
        }
        Ok(output)
    }
    pub fn execute(&self) -> anyhow::Result<SearchResult> {
-        let limit = self.limit;
+        // We create the query tree by spliting the query into tokens.
-        let fst = self.index.words_fst(self.rtxn)?;
+        let before = Instant::now();
-
+        let query_tree = match self.query.as_ref() {
-        // Construct the DFAs related to the query words.
+            Some(query) => {
-        let derived_words = match self.query.as_deref().map(Self::generate_query_dfas) {
+                let builder = QueryTreeBuilder::new(self.rtxn, self.index);
-            Some(dfas) if !dfas.is_empty() => Some(self.fetch_words_docids(&fst, dfas)?),
+                let stop_words = &Set::default();
-            _otherwise => None,
+                let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words));
                let result = analyzer.analyze(query);
                let tokens = result.tokens();
                builder.build(tokens)?
            },
            None => None,
        };
        debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed());
        // We create the original candidates with the facet conditions results.
        let before = Instant::now();
        let facet_candidates = match &self.facet_condition {
@ -276,100 +86,42 @@ impl<'a> Search<'a> {
        debug!("facet candidates: {:?} took {:.02?}", facet_candidates, before.elapsed());
-        let order_by_facet = {
+        let matching_words = match query_tree.as_ref() {
-            let criteria = self.index.criteria(self.rtxn)?;
+            Some(query_tree) => MatchingWords::from_query_tree(&query_tree),
-            let result = criteria.into_iter().flat_map(|criterion| {
+            None => MatchingWords::default(),
                match criterion {
                    Criterion::Asc(fid) => Some((fid, true)),
                    Criterion::Desc(fid) => Some((fid, false)),
                    _ => None
                }
            }).next();
            match result {
                Some((attr_name, is_ascending)) => {
                    let field_id_map = self.index.fields_ids_map(self.rtxn)?;
                    let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?;
                    let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?;
                    let ftype = *faceted_fields.get(&fid)
                        .with_context(|| format!("{:?} not found in the faceted fields.", attr_name))
                        .expect("corrupted data: ");
                    Some((fid, ftype, is_ascending))
                },
                None => None,
            }
        };
-        let before = Instant::now();
+        let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
-        let (candidates, derived_words) = match (facet_candidates, derived_words) {
+        let mut criteria = criteria_builder.build(query_tree, facet_candidates)?;
            (Some(mut facet_candidates), Some(derived_words)) => {
                let words_candidates = Self::compute_candidates(&derived_words);
                facet_candidates.intersect_with(&words_candidates);
                (facet_candidates, derived_words)
            },
            (None, Some(derived_words)) => {
                (Self::compute_candidates(&derived_words), derived_words)
            },
            (Some(facet_candidates), None) => {
                // If the query is not set or results in no DFAs but
                // there is some facet conditions we return a placeholder.
                let documents_ids = match order_by_facet {
                    Some((fid, ftype, is_ascending)) => {
                        self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)?
                    },
                    None => facet_candidates.iter().take(limit).collect(),
                };
                return Ok(SearchResult {
                    documents_ids,
                    candidates: facet_candidates,
                    ..Default::default()
                })
            },
            (None, None) => {
                // If the query is not set or results in no DFAs we return a placeholder.
                let all_docids = self.index.documents_ids(self.rtxn)?;
                let documents_ids = match order_by_facet {
                    Some((fid, ftype, is_ascending)) => {
                        self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)?
                    },
                    None => all_docids.iter().take(limit).collect(),
                };
                return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() })
            },
        };
-        debug!("candidates: {:?} took {:.02?}", candidates, before.elapsed());
+        let mut offset = self.offset;
        let mut limit = self.limit;
        let mut documents_ids = Vec::new();
        let mut initial_candidates = RoaringBitmap::new();
        while let Some(CriterionResult { candidates, bucket_candidates, .. }) = criteria.next()? {
-        // The mana depth first search is a revised DFS that explore
+            debug!("Number of candidates found {}", candidates.len());
        // solutions in the order of their proximities.
        let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone());
        let mut documents = Vec::new();
-        // We execute the Mdfs iterator until we find enough documents.
+            let mut len = candidates.len() as usize;
-        while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 {
+            let mut candidates = candidates.into_iter();
-            match mdfs.next().transpose()? {
+
-                Some((proximity, answer)) => {
+            initial_candidates.union_with(&bucket_candidates);
-                    debug!("answer with a proximity of {}: {:?}", proximity, answer);
+
-                    documents.push(answer);
+            if offset != 0 {
-                },
+                candidates.by_ref().skip(offset).for_each(drop);
-                None => break,
+                offset = offset.saturating_sub(len.min(offset));
-            }
+                len = len.saturating_sub(len.min(offset));
            }
-        let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
+            if len != 0 {
-        let documents_ids = match order_by_facet {
+                documents_ids.extend(candidates.take(limit));
-            Some((fid, ftype, order)) => {
+                limit = limit.saturating_sub(len.min(limit));
                let mut ordered_documents = Vec::new();
                for documents_ids in documents {
                    let docids = self.facet_ordered(fid, ftype, order, documents_ids, limit)?;
                    ordered_documents.push(docids);
                    if ordered_documents.iter().map(Vec::len).sum::<usize>() >= limit { break }
            }
                ordered_documents.into_iter().flatten().take(limit).collect()
            },
            None => documents.into_iter().flatten().take(limit).collect(),
        };
-        Ok(SearchResult { found_words, candidates, documents_ids })
+            if limit == 0 { break }
        }
        Ok(SearchResult { matching_words, candidates: initial_candidates, documents_ids })
    }
 }
@ -387,28 +139,21 @@ impl fmt::Debug for Search<'_> {
 #[derive(Default)]
 pub struct SearchResult {
-    pub found_words: HashSet<String>,
+    pub matching_words: MatchingWords,
    pub candidates: RoaringBitmap,
    // TODO those documents ids should be associated with their criteria scores.
    pub documents_ids: Vec<DocumentId>,
 }
-pub fn word_typos(word: &str, is_prefix: bool, max_typo: u8, fst: &fst::Set<Cow<[u8]>>) -> anyhow::Result<Vec<(String, u8)>> {
+pub fn word_derivations(
-    let dfa = {
+    word: &str,
-        let lev = match max_typo {
+    is_prefix: bool,
-            0 => &LEVDIST0,
+    max_typo: u8,
-            1 => &LEVDIST1,
+    fst: &fst::Set<Cow<[u8]>>,
-            _ => &LEVDIST2,
+) -> anyhow::Result<Vec<(String, u8)>>
-        };
+{
        if is_prefix {
            lev.build_prefix_dfa(&word)
        } else {
            lev.build_dfa(&word)
        }
    };
    let mut derived_words = Vec::new();
    let dfa = build_dfa(word, max_typo, is_prefix);
    let mut stream = fst.search_with_state(&dfa).into_stream();
    while let Some((word, state)) = stream.next() {
@ -419,3 +164,17 @@ pub fn word_typos(word: &str, is_prefix: bool, max_typo: u8, fst: &fst::Set<Cow<
    Ok(derived_words)
 }
 pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
    let lev = match typos {
        0 => &LEVDIST0,
        1 => &LEVDIST1,
        _ => &LEVDIST2,
    };
    if is_prefix {
        lev.build_prefix_dfa(word)
    } else {
        lev.build_dfa(word)
    }
 }
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs
@ -1,14 +1,13 @@
-#![allow(unused)]
+use std::collections::HashSet;
 use std::borrow::Cow;
 use std::collections::BTreeMap;
 use std::{fmt, cmp, mem};
 use levenshtein_automata::{DFA, Distance};
 use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream};
 use roaring::RoaringBitmap;
 use slice_group_by::GroupBy;
 use crate::Index;
 use super::build_dfa;
 type IsOptionalWord = bool;
 type IsPrefix = bool;
@ -81,6 +80,13 @@ impl Operation {
            Self::Consecutive(ops)
        }
    }
    pub fn query(&self) -> Option<&Query> {
        match self {
            Operation::Query(query) => Some(query),
            _ => None,
        }
    }
 }
 #[derive(Clone, Eq, PartialEq, Hash)]
@ -96,14 +102,26 @@ pub enum QueryKind {
 }
 impl QueryKind {
-    fn exact(word: String) -> Self {
+    pub fn exact(word: String) -> Self {
        QueryKind::Exact { original_typo: 0, word }
    }
-    fn tolerant(typo: u8, word: String) -> Self {
+    pub fn exact_with_typo(original_typo: u8, word: String) -> Self {
        QueryKind::Exact { original_typo, word }
    }
    pub fn tolerant(typo: u8, word: String) -> Self {
        QueryKind::Tolerant { typo, word }
    }
    pub fn is_tolerant(&self) -> bool {
        matches!(self, QueryKind::Tolerant { .. })
    }
    pub fn is_exact(&self) -> bool {
        matches!(self, QueryKind::Exact { .. })
    }
    pub fn typo(&self) -> u8 {
        match self {
            QueryKind::Tolerant { typo, .. } => *typo,
@ -266,69 +284,45 @@ fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result<Option<Vec<Operat
 }
 /// The query tree builder is the interface to build a query tree.
 #[derive(Default)]
 pub struct MatchingWords {
-    inner: BTreeMap<String, IsPrefix>
+    dfas: Vec<(DFA, u8)>,
 }
 impl MatchingWords {
    /// List all words which can be considered as a match for the query tree.
-    pub fn from_query_tree(tree: &Operation, fst: &fst::Set<Cow<[u8]>>) -> Self {
+    pub fn from_query_tree(tree: &Operation) -> Self {
-        Self { inner: fetch_words(tree, fst).into_iter().collect() }
+        Self {
            dfas: fetch_queries(tree).into_iter().map(|(w, t, p)| (build_dfa(w, t, p), t)).collect()
        }
    }
    /// Return true if the word match.
-    pub fn is_match(&self, word: &str) -> bool {
+    pub fn matches(&self, word: &str) -> bool {
-        fn first_char(s: &str) -> Option<&str> {
+        self.dfas.iter().any(|(dfa, typo)| match dfa.eval(word) {
-            s.chars().next().map(|c| &s[..c.len_utf8()])
+            Distance::Exact(t) => t <= *typo,
-        }
+            Distance::AtLeast(_) => false,
-
+        })
        match first_char(word) {
            Some(first) => {
                let left = first.to_owned();
                let right = word.to_owned();
                self.inner.range(left..=right).any(|(w, is_prefix)| *is_prefix || *w == word)
            },
            None => false
        }
    }
 }
 type FetchedWords = Vec<(String, IsPrefix)>;
 /// Lists all words which can be considered as a match for the query tree.
-fn fetch_words(tree: &Operation, fst: &fst::Set<Cow<[u8]>>) -> FetchedWords {
+fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> {
-    fn resolve_branch(tree: &[Operation], fst: &fst::Set<Cow<[u8]>>) -> FetchedWords {
+    fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) {
        tree.iter().map(|op| resolve_ops(op, fst)).flatten().collect()
    }
    fn resolve_query(query: &Query, fst: &fst::Set<Cow<[u8]>>) -> FetchedWords {
        match query.kind.clone() {
            QueryKind::Exact { word, .. } => vec![(word, query.prefix)],
            QueryKind::Tolerant { typo, word } => {
                if let Ok(words) = super::word_typos(&word, query.prefix, typo, fst) {
                    words.into_iter().map(|(w, _)| (w, query.prefix)).collect()
                } else {
                    vec![(word, query.prefix)]
                }
            }
        }
    }
    fn resolve_ops(tree: &Operation, fst: &fst::Set<Cow<[u8]>>) -> FetchedWords {
        match tree {
            Operation::Or(_, ops) | Operation::And(ops) | Operation::Consecutive(ops) => {
-                resolve_branch(ops.as_slice(), fst)
+                ops.as_slice().iter().for_each(|op| resolve_ops(op, out));
            },
-            Operation::Query(ops) => {
+            Operation::Query(Query { prefix, kind }) => {
-                resolve_query(ops, fst)
+                let typo = if kind.is_exact() { 0 } else { kind.typo() };
                out.insert((kind.word(), typo, *prefix));
            },
        }
    }
-    let mut words = resolve_ops(tree, fst);
+    let mut queries = HashSet::new();
-    words.sort_unstable();
+    resolve_ops(tree, &mut queries);
-    words.dedup();
+    queries
    words
 }
 /// Main function that creates the final query tree from the primitive query.
@ -537,7 +531,10 @@ pub fn maximum_proximity(operation: &Operation) -> usize {
    use Operation::{Or, And, Query, Consecutive};
    match operation {
        Or(_, ops) => ops.iter().map(maximum_proximity).max().unwrap_or(0),
-        And(ops) => ops.len().saturating_sub(1) * 8,
+        And(ops) => {
            ops.iter().map(maximum_proximity).sum::<usize>()
            + ops.len().saturating_sub(1) * 7
        },
        Query(_) | Consecutive(_) => 0,
    }
 }
@ -547,7 +544,7 @@ mod test {
    use std::collections::HashMap;
    use fst::Set;
-    use maplit::hashmap;
+    use maplit::{hashmap, hashset};
    use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
    use rand::{Rng, SeedableRng, rngs::StdRng};
@ -958,26 +955,26 @@ mod test {
        let context = TestContext::default();
        let query_tree = context.build(false, true, tokens).unwrap().unwrap();
-        let expected = vec![
+        let expected = hashset!{
-            ("city".to_string(), false),
+            ("word",                0, false),
-            ("earth".to_string(), false),
+            ("nyc",                 0, false),
-            ("nature".to_string(), false),
+            ("wordsplit",           2, false),
-            ("new".to_string(), false),
+            ("wordsplitnycworld",   2, true),
-            ("nyc".to_string(), false),
+            ("nature",              0, false),
-            ("split".to_string(), false),
+            ("new",                 0, false),
-            ("word".to_string(), false),
+            ("city",                0, false),
-            ("word".to_string(), true),
+            ("world",               1, true),
-            ("world".to_string(), true),
+            ("york",                0, false),
-            ("york".to_string(), false),
+            ("split",               0, false),
-
+            ("nycworld",            1, true),
-        ];
+            ("earth",               0, false),
            ("wordsplitnyc",        2, false),
        };
        let mut keys = context.postings.keys().collect::<Vec<_>>();
        keys.sort_unstable();
        let set = fst::Set::from_iter(keys).unwrap().map_data(|v| Cow::Owned(v)).unwrap();
        let words = fetch_words(&query_tree, &set);
        let words = fetch_queries(&query_tree);
        assert_eq!(expected, words);
    }
 }
--- a/milli/src/update/index_documents/store.rs
+++ b/milli/src/update/index_documents/store.rs
@ -13,7 +13,7 @@ use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
 use heed::BytesEncode;
 use linked_hash_map::LinkedHashMap;
 use log::{debug, info};
-use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
+use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind};
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::Value;
@ -274,13 +274,15 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
        self.insert_words_pairs_proximities_docids(words_pair_proximities, document_id)?;
        // We store document_id associated with all the words the record contains.
-        for (word, _) in words_positions.drain() {
+        for (word, _) in words_positions.iter() {
-            self.insert_word_docid(&word, document_id)?;
+            self.insert_word_docid(word, document_id)?;
        }
        self.documents_writer.insert(document_id.to_be_bytes(), record)?;
        Self::write_docid_word_positions(&mut self.docid_word_positions_writer, document_id, words_positions)?;
        words_positions.clear();
        // We store document_id associated with all the field id and values.
        for (field, values) in facet_values.drain() {
            for value in values {
@ -471,14 +473,11 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
                            };
                            let analyzed = self.analyzer.analyze(&content);
-                            let tokens = analyzed
+                            let tokens = process_tokens(analyzed.tokens());
                                .tokens()
                                .filter(|t| t.is_word())
                                .map(|t| t.text().to_string());
-                            for (pos, word) in tokens.enumerate().take(MAX_POSITION) {
+                            for (pos, token) in tokens.take_while(|(pos, _)| *pos < MAX_POSITION) {
                                let position = (attr as usize * MAX_POSITION + pos) as u32;
-                                words_positions.entry(word).or_insert_with(SmallVec32::new).push(position);
+                                words_positions.entry(token.text().to_string()).or_insert_with(SmallVec32::new).push(position);
                            }
                        }
                    }
@ -609,6 +608,36 @@ enum FacetValue {
    Integer(i64),
 }
 /// take an iterator on tokens and compute their relative position depending on separator kinds
 /// if it's an `Hard` separator we add an additional relative proximity of 8 between words,
 /// else we keep the standart proximity of 1 between words.
 fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = (usize, Token<'a>)> {
    tokens
        .skip_while(|token| token.is_separator().is_some())
        .scan((0, None), |(offset, prev_kind), token| {
                match token.kind {
                    TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
                        *offset += match *prev_kind {
                            Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
                            Some(_) => 1,
                            None => 0,
                        };
                        *prev_kind = Some(token.kind)
                    }
                    TokenKind::Separator(SeparatorKind::Hard) => {
                        *prev_kind = Some(token.kind);
                    }
                    TokenKind::Separator(SeparatorKind::Soft)
                        if *prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) => {
                        *prev_kind = Some(token.kind);
                    }
                    _ => (),
                }
            Some((*offset, token))
        })
    .filter(|(_, t)| t.is_word())
 }
 fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec8<FacetValue>> {
    use FacetValue::*;
--- a/milli/src/update/words_prefixes.rs
+++ b/milli/src/update/words_prefixes.rs
@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordsPrefixes<'t, 'u, 'i> {
            chunk_fusing_shrink_size: None,
            max_nb_chunks: None,
            max_memory: None,
-            threshold: 0.01, // 1%
+            threshold: 0.1 / 100.0, // .01%
            max_prefix_length: 4,
            _update_id: update_id,
        }