From 5e691c2140a630acfd89541f6052820112ee714b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 7 Jul 2019 12:41:20 +0200
Subject: [PATCH 01/19] feat: Introduce the QueryEnhancer type

---
 meilidb-core/src/lib.rs            |   1 +
 meilidb-core/src/query_builder.rs  |   2 +-
 meilidb-core/src/query_enhancer.rs | 395 +++++++++++++++++++++++++++++
 3 files changed, 397 insertions(+), 1 deletion(-)
 create mode 100644 meilidb-core/src/query_enhancer.rs
diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs
index 0976fbde8..f5975e3b5 100644
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@@ -4,6 +4,7 @@
 mod automaton;
 mod distinct_map;
 mod query_builder;
+mod query_enhancer;
 mod reordered_attrs;
 mod store;
 pub mod criterion;
diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 43da389a8..175430554 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -6,12 +6,12 @@ use std::{cmp, mem};
 
 use fst::{Streamer, IntoStreamer};
 use hashbrown::HashMap;
+use levenshtein_automata::DFA;
 use log::info;
 use meilidb_tokenizer::{is_cjk, split_query_string};
 use rayon::slice::ParallelSliceMut;
 use sdset::SetBuf;
 use slice_group_by::GroupByMut;
-use levenshtein_automata::DFA;
 
 use crate::automaton::{build_dfa, build_prefix_dfa};
 use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
diff --git a/meilidb-core/src/query_enhancer.rs b/meilidb-core/src/query_enhancer.rs
new file mode 100644
index 000000000..6280ae11e
--- /dev/null
+++ b/meilidb-core/src/query_enhancer.rs
@@ -0,0 +1,395 @@
+use std::ops::Range;
+use std::cmp::Ordering::{Less, Greater, Equal};
+
+/// Return `true` if the specified range can accept the given replacements words.
+/// Returns `false` if the replacements words are already present in the original query
+/// or if there is fewer replacement words than the range to replace.
+//
+//
+// ## Ignored because already present in original
+//
+//     new york city subway
+//     -------- ^^^^
+//   /          \
+//  [new york city]
+//
+//
+// ## Ignored because smaller than the original
+//
+//   new york city subway
+//   -------------
+//   \          /
+//    [new york]
+//
+//
+// ## Accepted because bigger than the original
+//
+//        NYC subway
+//        ---
+//       /   \
+//      /     \
+//     /       \
+//    /         \
+//   /           \
+//  [new york city]
+//
+fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
+where S: AsRef<str>,
+      T: AsRef<str>,
+{
+    if words.len() <= range.len() {
+        // there is fewer or equal replacement words
+        // than there is already in the replaced range
+        return false
+    }
+
+    // retrieve the part to rewrite but with the length
+    // of the replacement part
+    let original = query.iter().skip(range.start).take(words.len());
+
+    // check if the original query doesn't already contain
+    // the replacement words
+    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
+}
+
+struct FakeIntervalTree {
+    intervals: Vec<(Range<usize>, (usize, usize))>, // origin, real_length
+}
+
+impl FakeIntervalTree {
+    fn new(mut intervals: Vec<(Range<usize>, (usize, usize))>) -> FakeIntervalTree {
+        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
+        FakeIntervalTree { intervals }
+    }
+
+    fn query(&self, point: usize) -> Option<(Range<usize>, (usize, usize))> {
+        let element = self.intervals.binary_search_by(|(r, _)| {
+            if point >= r.start {
+                if point < r.end { Equal } else { Less }
+            } else { Greater }
+        });
+
+        let n = match element { Ok(n) => n, Err(n) => n };
+
+        match self.intervals.get(n) {
+            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
+            _otherwise => None,
+        }
+    }
+}
+
+pub struct QueryEnhancerBuilder<'a, S> {
+    query: &'a [S],
+    origins: Vec<usize>,
+    real_to_origin: Vec<(Range<usize>, (usize, usize))>,
+}
+
+impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
+    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
+        // we initialize origins query indices based on their positions
+        let origins: Vec<_> = (0..query.len() + 1).collect();
+        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
+
+        QueryEnhancerBuilder { query, origins, real_to_origin }
+    }
+
+    /// Update the final real to origin query indices mapping.
+    ///
+    /// `range` is the original words range that this `replacement` words replace
+    /// and `real` is the first real query index of these replacement words.
+    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
+    where T: AsRef<str>,
+    {
+        // check if the range of original words
+        // can be rewritten with the replacement words
+        if rewrite_range_with(self.query, range.clone(), replacement) {
+
+            // this range can be replaced so we need to
+            // modify the origins accordingly
+            let offset = replacement.len() - range.len();
+
+            let previous_padding = self.origins[range.end - 1];
+            let current_offset = (self.origins[range.end] - 1) - previous_padding;
+            let diff = offset.saturating_sub(current_offset);
+            self.origins[range.end] += diff;
+
+            for r in &mut self.origins[range.end + 1..] {
+                *r += diff;
+            }
+        }
+
+        // we need to store the real number and origins relations
+        // this way it will be possible to know by how many
+        // we need to pad real query indices
+        let real_range = real..real + replacement.len().max(range.len());
+        let real_length = replacement.len();
+        self.real_to_origin.push((real_range, (range.start, real_length)));
+    }
+
+    pub fn build(self) -> QueryEnhancer {
+        QueryEnhancer {
+            origins: self.origins,
+            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+        }
+    }
+}
+
+pub struct QueryEnhancer {
+    origins: Vec<usize>,
+    real_to_origin: FakeIntervalTree,
+}
+
+impl QueryEnhancer {
+    /// Returns the query indices to use to replace this real query index.
+    pub fn replacement(&self, real: u32) -> Range<u32> {
+        let real = real as usize;
+
+        // query the fake interval tree with the real query index
+        let (range, (origin, real_length)) =
+            self.real_to_origin
+            .query(real)
+            .expect("real has never been declared");
+
+        // if `real` is the end bound of the range
+        if (range.start + real_length - 1) == real {
+            let mut count = range.len();
+            let mut new_origin = origin;
+            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
+                let len = slice[1] - slice[0];
+                count = count.saturating_sub(len);
+                if count == 0 { new_origin = origin + i; break }
+            }
+
+            let n = real - range.start;
+            let start = self.origins[origin];
+            let end = self.origins[new_origin + 1];
+            let remaining = (end - start) - n;
+
+            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
+
+        } else {
+            // just return the origin along with
+            // the real position of the word
+            let n = real as usize - range.start;
+            let origin = self.origins[origin];
+
+            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn original_unmodified() {
+        let query = ["new", "york", "city", "subway"];
+        //             0       1       2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..2); // york
+        assert_eq!(enhancer.replacement(2), 2..3); // city
+        assert_eq!(enhancer.replacement(3), 3..4); // subway
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+    }
+
+    #[test]
+    fn simple_growing() {
+        let query = ["new", "york", "subway"];
+        //             0       1        2
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // new york = new york city
+        builder.declare(0..2, 3, &["new", "york", "city"]);
+        //                    ^      3       4       5
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // new
+        assert_eq!(enhancer.replacement(1), 1..3); // york
+        assert_eq!(enhancer.replacement(2), 3..4); // subway
+        assert_eq!(enhancer.replacement(3), 0..1); // new
+        assert_eq!(enhancer.replacement(4), 1..2); // york
+        assert_eq!(enhancer.replacement(5), 2..3); // city
+    }
+
+    #[test]
+    fn same_place_growings() {
+        let query = ["NY", "subway"];
+        //             0       1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NY = new york
+        builder.declare(0..1, 2, &["new", "york"]);
+        //                    ^      2       3
+
+        // NY = new york city
+        builder.declare(0..1, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // NY = NYC
+        builder.declare(0..1, 7, &["NYC"]);
+        //                    ^      7
+
+        // NY = new york city
+        builder.declare(0..1, 8, &["new", "york", "city"]);
+        //                    ^      8       9      10
+
+        // subway = underground train
+        builder.declare(1..2, 11, &["underground", "train"]);
+        //                    ^          11          12
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NY
+        assert_eq!(enhancer.replacement(1), 3..5); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..3); // york
+        assert_eq!(enhancer.replacement(4), 0..1); // new
+        assert_eq!(enhancer.replacement(5), 1..2); // york
+        assert_eq!(enhancer.replacement(6), 2..3); // city
+        assert_eq!(enhancer.replacement(7), 0..3); // NYC
+        assert_eq!(enhancer.replacement(8), 0..1); // new
+        assert_eq!(enhancer.replacement(9), 1..2); // york
+        assert_eq!(enhancer.replacement(10), 2..3); // city
+        assert_eq!(enhancer.replacement(11), 3..4); // underground
+        assert_eq!(enhancer.replacement(12), 4..5); // train
+    }
+
+    #[test]
+    fn bigger_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(0..1, 2, &["new", "york", "city"]);
+        //                    ^      2       3       4
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..3); // NYC
+        assert_eq!(enhancer.replacement(1), 3..4); // subway
+        assert_eq!(enhancer.replacement(2), 0..1); // new
+        assert_eq!(enhancer.replacement(3), 1..2); // york
+        assert_eq!(enhancer.replacement(4), 2..3); // city
+    }
+
+    #[test]
+    fn middle_query_growing() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..6); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+    }
+
+    #[test]
+    fn end_query_growing() {
+        let query = ["NYC", "subway"];
+        //             0        1
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(1..2, 2, &["underground", "train"]);
+        //                    ^         2            3
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // NYC
+        assert_eq!(enhancer.replacement(1), 1..3); // subway
+        assert_eq!(enhancer.replacement(2), 1..2); // underground
+        assert_eq!(enhancer.replacement(3), 2..3); // train
+    }
+
+    #[test]
+    fn multiple_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0), 0..1); // great
+        assert_eq!(enhancer.replacement(1), 1..2); // awesome
+        assert_eq!(enhancer.replacement(2), 2..5); // NYC
+        assert_eq!(enhancer.replacement(3), 5..7); // subway
+        assert_eq!(enhancer.replacement(4), 2..3); // new
+        assert_eq!(enhancer.replacement(5), 3..4); // york
+        assert_eq!(enhancer.replacement(6), 4..5); // city
+        assert_eq!(enhancer.replacement(7), 5..6); // underground
+        assert_eq!(enhancer.replacement(8), 6..7); // train
+    }
+
+    #[test]
+    fn multiple_probable_growings() {
+        let query = ["great", "awesome", "NYC", "subway"];
+        //              0         1        2        3
+        let mut builder = QueryEnhancerBuilder::new(&query);
+
+        // NYC = new york city
+        builder.declare(2..3, 4, &["new", "york", "city"]);
+        //                    ^      4       5       6
+
+        // subway = underground train
+        builder.declare(3..4, 7, &["underground", "train"]);
+        //                    ^          7           8
+
+        // great awesome = good
+        builder.declare(0..2, 9, &["good"]);
+        //                    ^       9
+
+        // awesome NYC = NY
+        builder.declare(1..3, 10, &["NY"]);
+        //                    ^^     10
+
+        // NYC subway = metro
+        builder.declare(2..4, 11, &["metro"]);
+        //                    ^^      11
+
+        let enhancer = builder.build();
+
+        assert_eq!(enhancer.replacement(0),  0..1); // great
+        assert_eq!(enhancer.replacement(1),  1..2); // awesome
+        assert_eq!(enhancer.replacement(2),  2..5); // NYC
+        assert_eq!(enhancer.replacement(3),  5..7); // subway
+        assert_eq!(enhancer.replacement(4),  2..3); // new
+        assert_eq!(enhancer.replacement(5),  3..4); // york
+        assert_eq!(enhancer.replacement(6),  4..5); // city
+        assert_eq!(enhancer.replacement(7),  5..6); // underground
+        assert_eq!(enhancer.replacement(8),  6..7); // train
+        assert_eq!(enhancer.replacement(9),  0..2); // good
+        assert_eq!(enhancer.replacement(10), 1..5); // NY
+        assert_eq!(enhancer.replacement(11), 2..5); // metro
+    }
+}

From f478bbf826a50f31cb5e40cae6f0ea95eed0d504 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 7 Jul 2019 20:27:37 +0200
Subject: [PATCH 02/19] feat: Introduce the QueryEnhancer in the query synonym
 system

---
 meilidb-core/src/query_builder.rs | 88 +++++++++++++++++++------------
 1 file changed, 54 insertions(+), 34 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 175430554..7e79ac15e 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -14,8 +14,9 @@ use sdset::SetBuf;
 use slice_group_by::GroupByMut;
 
 use crate::automaton::{build_dfa, build_prefix_dfa};
-use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
 use crate::criterion::Criteria;
+use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
+use crate::query_enhancer::{QueryEnhancerBuilder, QueryEnhancer};
 use crate::raw_documents_from_matches;
 use crate::reordered_attrs::ReorderedAttrs;
 use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
@@ -91,18 +92,36 @@ fn split_best_frequency<'a, S: Store>(
     Ok(best.map(|(_, l, r)| (l, r)))
 }
 
-fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<Vec<Automaton>, S::Error> {
+fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automaton>, QueryEnhancer), S::Error> {
     let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
     let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
-    let mut automatons = Vec::new();
-
     let synonyms = store.synonyms()?;
 
-    for n in 1..=NGRAMS {
-        let mut query_index = 0;
-        let mut ngrams = query_words.windows(n).peekable();
+    let mut automatons = Vec::new();
+    let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
 
-        while let Some(ngram_slice) = ngrams.next() {
+    // We must not declare the original words to the query enhancer
+    // *but* we need to push them in the automatons list first
+    let mut original_words = query_words.iter().enumerate().peekable();
+    while let Some((query_index, word)) = original_words.next() {
+
+        let has_following_word = original_words.peek().is_some();
+        let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
+
+        let automaton = if not_prefix_dfa {
+            Automaton::exact(query_index, word)
+        } else {
+            Automaton::prefix_exact(query_index, word)
+        };
+        automatons.push(automaton);
+    }
+
+    for n in 1..=NGRAMS {
+
+        let mut ngrams = query_words.windows(n).enumerate().peekable();
+        while let Some((query_index, ngram_slice)) = ngrams.next() {
+
+            let query_range = query_index..query_index + n;
             let ngram_nb_words = ngram_slice.len();
             let ngram = ngram_slice.join(" ");
 
@@ -127,15 +146,19 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<Vec<Automaton
                     let mut stream = synonyms.into_stream();
                     while let Some(synonyms) = stream.next() {
                         let synonyms = std::str::from_utf8(synonyms).unwrap();
-                        let nb_synonym_words = split_query_string(synonyms).count();
+                        let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
+                        let nb_synonym_words = synonyms_words.len();
 
-                        for synonym in split_query_string(synonyms) {
+                        let real_query_index = automatons.len();
+                        enhancer_builder.declare(query_range.clone(), real_query_index, &synonyms_words);
+
+                        for (i, synonym) in synonyms_words.into_iter().enumerate() {
                             let automaton = if nb_synonym_words == 1 {
-                                Automaton::exact(query_index, synonym)
+                                Automaton::exact(real_query_index + i, synonym)
                             } else {
-                                Automaton::non_exact(query_index, synonym)
+                                Automaton::non_exact(real_query_index + i, synonym)
                             };
-                            automatons.push((automaton, synonym.to_owned()));
+                            automatons.push(automaton);
                         }
                     }
                 }
@@ -145,37 +168,34 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<Vec<Automaton
                 // TODO we do not support "phrase query" in other words:
                 //      first term *must* follow the second term
                 if let Some((left, right)) = split_best_frequency(&ngram, store)? {
-                    let automaton = Automaton::exact(query_index, left);
-                    automatons.push((automaton, left.to_owned()));
 
-                    let automaton = Automaton::exact(query_index, right);
-                    automatons.push((automaton, right.to_owned()));
+                    let real_query_index = automatons.len();
+                    enhancer_builder.declare(query_range.clone(), real_query_index, &[left, right]);
+
+                    // TODO must mark it as "phrase query"
+                    //      (the next match must follow its query index)
+                    let automaton = Automaton::exact(real_query_index, left);
+                    automatons.push(automaton);
+
+                    let automaton = Automaton::exact(real_query_index + 1, right);
+                    automatons.push(automaton);
                 }
 
-                let automaton = if not_prefix_dfa {
-                    Automaton::exact(query_index, &ngram)
-                } else {
-                    Automaton::prefix_exact(query_index, &ngram)
-                };
-                automatons.push((automaton, ngram));
-
             } else {
                 // automaton of concatenation of query words
                 let concat = ngram_slice.concat();
                 let normalized = normalize_str(&concat);
-                let automaton = Automaton::exact(query_index, &normalized);
-                automatons.push((automaton, normalized));
-            }
 
-            query_index += 1;
+                let real_query_index = automatons.len();
+                enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
+
+                let automaton = Automaton::exact(real_query_index, &normalized);
+                automatons.push(automaton);
+            }
         }
     }
 
-    automatons.sort_unstable_by(|a, b| (a.0.query_index, &a.1).cmp(&(b.0.query_index, &b.1)));
-    automatons.dedup_by(|a, b| (a.0.query_index, &a.1) == (b.0.query_index, &b.1));
-    let automatons = automatons.into_iter().map(|(a, _)| a).collect();
-
-    Ok(automatons)
+    Ok((automatons, enhancer_builder.build()))
 }
 
 fn rewrite_matched_positions(matches: &mut [(DocumentId, TmpMatch, Highlight)]) {
@@ -238,7 +258,7 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI>
 where S: Store,
 {
     fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
-        let automatons = generate_automatons(query, &self.store)?;
+        let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
         let words = self.store.words()?.as_fst();
         let searchables = self.searchable_attrs.as_ref();
 

From e65d7418b7d06024a1b9a0e3ce42358f8ef3fdaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 12 Jul 2019 16:05:15 +0200
Subject: [PATCH 03/19] feat: Remove the query index from the Automaton type

---
 meilidb-core/src/query_builder.rs | 162 ++++++++++++++++++++++--------
 1 file changed, 120 insertions(+), 42 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 7e79ac15e..e88ff8deb 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -11,7 +11,7 @@ use log::info;
 use meilidb_tokenizer::{is_cjk, split_query_string};
 use rayon::slice::ParallelSliceMut;
 use sdset::SetBuf;
-use slice_group_by::GroupByMut;
+use slice_group_by::{GroupBy, GroupByMut};
 
 use crate::automaton::{build_dfa, build_prefix_dfa};
 use crate::criterion::Criteria;
@@ -24,34 +24,30 @@ use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
 const NGRAMS: usize = 3;
 
 struct Automaton {
-    query_index: usize,
     query_len: usize,
     is_exact: bool,
     dfa: DFA,
 }
 
 impl Automaton {
-    fn exact(query_index: usize, query: &str) -> Automaton {
+    fn exact(query: &str) -> Automaton {
         Automaton {
-            query_index,
             query_len: query.len(),
             is_exact: true,
             dfa: build_dfa(query),
         }
     }
 
-    fn prefix_exact(query_index: usize, query: &str) -> Automaton {
+    fn prefix_exact(query: &str) -> Automaton {
         Automaton {
-            query_index,
             query_len: query.len(),
             is_exact: true,
             dfa: build_prefix_dfa(query),
         }
     }
 
-    fn non_exact(query_index: usize, query: &str) -> Automaton {
+    fn non_exact(query: &str) -> Automaton {
         Automaton {
-            query_index,
             query_len: query.len(),
             is_exact: false,
             dfa: build_dfa(query),
@@ -102,16 +98,16 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
 
     // We must not declare the original words to the query enhancer
     // *but* we need to push them in the automatons list first
-    let mut original_words = query_words.iter().enumerate().peekable();
-    while let Some((query_index, word)) = original_words.next() {
+    let mut original_words = query_words.iter().peekable();
+    while let Some(word) = original_words.next() {
 
         let has_following_word = original_words.peek().is_some();
         let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
 
         let automaton = if not_prefix_dfa {
-            Automaton::exact(query_index, word)
+            Automaton::exact(word)
         } else {
-            Automaton::prefix_exact(query_index, word)
+            Automaton::prefix_exact(word)
         };
         automatons.push(automaton);
     }
@@ -152,11 +148,11 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
                         let real_query_index = automatons.len();
                         enhancer_builder.declare(query_range.clone(), real_query_index, &synonyms_words);
 
-                        for (i, synonym) in synonyms_words.into_iter().enumerate() {
+                        for synonym in synonyms_words {
                             let automaton = if nb_synonym_words == 1 {
-                                Automaton::exact(real_query_index + i, synonym)
+                                Automaton::exact(synonym)
                             } else {
-                                Automaton::non_exact(real_query_index + i, synonym)
+                                Automaton::non_exact(synonym)
                             };
                             automatons.push(automaton);
                         }
@@ -174,10 +170,10 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
 
                     // TODO must mark it as "phrase query"
                     //      (the next match must follow its query index)
-                    let automaton = Automaton::exact(real_query_index, left);
+                    let automaton = Automaton::exact(left);
                     automatons.push(automaton);
 
-                    let automaton = Automaton::exact(real_query_index + 1, right);
+                    let automaton = Automaton::exact(right);
                     automatons.push(automaton);
                 }
 
@@ -189,7 +185,7 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
                 let real_query_index = automatons.len();
                 enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
 
-                let automaton = Automaton::exact(real_query_index, &normalized);
+                let automaton = Automaton::exact(&normalized);
                 automatons.push(automaton);
             }
         }
@@ -198,19 +194,6 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
     Ok((automatons, enhancer_builder.build()))
 }
 
-fn rewrite_matched_positions(matches: &mut [(DocumentId, TmpMatch, Highlight)]) {
-    for document_matches in matches.linear_group_by_mut(|(a, _, _), (b, _, _)| a == b) {
-        let mut offset = 0;
-        for query_indexes in document_matches.linear_group_by_mut(|(_, a, _), (_, b, _)| a.query_index == b.query_index) {
-            let word_index = query_indexes[0].1.word_index - offset as u16;
-            for (_, match_, _) in query_indexes.iter_mut() {
-                match_.word_index = word_index;
-            }
-            offset += query_indexes.len() - 1;
-        }
-    }
-}
-
 pub struct QueryBuilder<'c, S, FI = fn(DocumentId) -> bool> {
     store: S,
     criteria: Criteria<'c>,
@@ -275,7 +258,7 @@ where S: Store,
 
         while let Some((input, indexed_values)) = stream.next() {
             for iv in indexed_values {
-                let Automaton { query_index, is_exact, query_len, ref dfa } = automatons[iv.index];
+                let Automaton { is_exact, query_len, ref dfa } = automatons[iv.index];
                 let distance = dfa.eval(input).to_u8();
                 let is_exact = is_exact && distance == 0 && input.len() == query_len;
 
@@ -288,34 +271,129 @@ where S: Store,
                 for di in doc_indexes.as_slice() {
                     let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
                     if let Some(attribute) = attribute {
+
                         let match_ = TmpMatch {
-                            query_index: query_index as u32,
+                            query_index: iv.index as u32,
                             distance,
                             attribute,
                             word_index: di.word_index,
                             is_exact,
                         };
+
+                        // TODO do not store in the same matches vec
                         let highlight = Highlight {
                             attribute: di.attribute,
                             char_index: di.char_index,
                             char_length: di.char_length,
                         };
+
                         matches.push((di.document_id, match_, highlight));
                     }
                 }
             }
         }
 
-        // rewrite the matched positions for next criteria evaluations
-        matches.par_sort_unstable();
-        rewrite_matched_positions(&mut matches);
+        // we sort the matches to make them rewritable
+        matches.par_sort_unstable_by_key(|(id, match_, _)| {
+            (*id, match_.attribute, match_.word_index) // query_id ???
+        });
+
+        let mut padded_matches = Vec::with_capacity(matches.len());
+        for same_document in matches.linear_group_by(|a, b| a.0 == b.0) {
+
+            for same_attribute in same_document.linear_group_by(|a, b| a.1.attribute == b.1.attribute) {
+
+                let mut padding = 0;
+                let mut iter = same_attribute.linear_group_by(|a, b| a.1.word_index == b.1.word_index);
+                while let Some(same_word_index) = iter.next() {
+
+                    let mut biggest = 0;
+                    for (id, match_, highlight) in same_word_index {
+
+                        let mut replacement = query_enhancer.replacement(match_.query_index);
+                        let replacement_len = replacement.len() - 1;
+                        let nexts = iter.remainder().linear_group_by(|a, b| a.1.word_index == b.1.word_index);
+
+                        if let Some(query_index) = replacement.next() {
+                            let match_ = TmpMatch {
+                                query_index,
+                                word_index: match_.word_index + padding as u16,
+                                ..match_.clone()
+                            };
+                            padded_matches.push((*id, match_, *highlight));
+                        }
+
+                        let mut found = false;
+
+                        // look ahead and if there already is a match
+                        // corresponding to this padding word, abort the padding
+                        'padding: for (x, next_group) in nexts.enumerate() {
+
+                            for (i, query_index) in replacement.clone().enumerate().skip(x) {
+                                let padmatch_ = TmpMatch {
+                                    query_index,
+                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
+                                    ..match_.clone()
+                                };
+
+                                for (_, nmatch_, _) in next_group {
+                                    let mut rep = query_enhancer.replacement(nmatch_.query_index);
+                                    let query_index = rep.next().unwrap();
+                                    let nmatch_ = TmpMatch { query_index, ..nmatch_.clone() };
+                                    if nmatch_.query_index == padmatch_.query_index {
+
+                                        if !found {
+                                            // if we find a corresponding padding for the
+                                            // first time we must push preceding paddings
+                                            for (i, query_index) in replacement.clone().enumerate().take(i) {
+                                                let match_ = TmpMatch {
+                                                    query_index,
+                                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
+                                                    ..match_.clone()
+                                                };
+                                                padded_matches.push((*id, match_, *highlight));
+                                                biggest = biggest.max(i + 1);
+                                            }
+                                        }
+
+                                        padded_matches.push((*id, padmatch_, *highlight));
+                                        found = true;
+                                        continue 'padding;
+                                    }
+                                }
+                            }
+
+                            // if we do not find a corresponding padding in the
+                            // next groups so stop here and pad what was found
+                            break
+                        }
+
+                        if !found {
+                            // if no padding was found in the following matches
+                            // we must insert the entire padding
+                            for (i, query_index) in replacement.enumerate() {
+                                let match_ = TmpMatch {
+                                    query_index,
+                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
+                                    ..match_.clone()
+                                };
+                                padded_matches.push((*id, match_, *highlight));
+                            }
+
+                            biggest = biggest.max(replacement_len);
+                        }
+                    }
+
+                    padding += biggest;
+                }
+            }
+
+        }
+
+        let total_matches = padded_matches.len();
+        padded_matches.par_sort_unstable();
+        let padded_matches = SetBuf::new_unchecked(padded_matches);
 
-        let total_matches = matches.len();
-        let padded_matches = {
-            matches.par_sort_unstable();
-            matches.dedup();
-            SetBuf::new_unchecked(matches)
-        };
         let raw_documents = raw_documents_from_matches(padded_matches);
 
         info!("{} total documents to classify", raw_documents.len());

From 225a3bf184ae00697e18062b76e82be5eac92fb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 7 Jul 2019 19:57:42 +0200
Subject: [PATCH 04/19] test: Produce tests that work with the new cumulative
 word index system

---
 meilidb-core/src/query_builder.rs | 484 +++++++++++++++++++++++-------
 1 file changed, 370 insertions(+), 114 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index e88ff8deb..5268edd27 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -937,17 +937,22 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NY ± new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // NY ± york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // NY ± city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true, .. })); // subway
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
-            assert_matches!(iter.next(), None);             // position rewritten ^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), None);
 
@@ -957,24 +962,141 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NYC ± new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // NYC ± york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // NYC ± city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true, .. })); // subway
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // subway
-            assert_matches!(iter.next(), None);             // position rewritten ^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
+        });
+        assert_matches!(iter.next(), None);
+    }
+
+    #[test]
+    fn unique_to_multiword_synonyms_words_proximity() {
+        let mut store = InMemorySetStore::from_iter(vec![
+            ("new",    &[doc_char_index(0, 0, 0)][..]),
+            ("york",   &[doc_char_index(0, 1, 1)][..]),
+            ("city",   &[doc_char_index(0, 2, 2)][..]),
+            ("subway", &[doc_char_index(0, 3, 3)][..]),
+
+            ("york",   &[doc_char_index(1, 0, 0)][..]),
+            ("new",    &[doc_char_index(1, 1, 1)][..]),
+            ("subway", &[doc_char_index(1, 2, 2)][..]),
+
+            ("NY",     &[doc_char_index(2, 0, 0)][..]),
+            ("subway", &[doc_char_index(2, 1, 1)][..]),
+        ]);
+
+        store.add_synonym("NY",  SetBuf::from_dirty(vec!["york new"]));
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("NY", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY ± york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // NY ± new
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // new  = NY
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 1, .. })); // york  = NY
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // new = NY
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("new york", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // york
+            assert_matches!(matches.next(), None);                // position rewritten ^
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 1, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // new
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+    }
+
+    #[test]
+    fn unique_to_multiword_synonyms_cumulative_word_index() {
+        let mut store = InMemorySetStore::from_iter(vec![
+            ("NY",     &[doc_char_index(0, 0, 0)][..]),
+            ("subway", &[doc_char_index(0, 1, 1)][..]),
+
+            ("new",    &[doc_char_index(1, 0, 0)][..]),
+            ("york",   &[doc_char_index(1, 1, 1)][..]),
+            ("subway", &[doc_char_index(1, 2, 2)][..]),
+        ]);
+
+        store.add_synonym("new york", SetBuf::from_dirty(vec!["NY"]));
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("NY subway", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NY
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 2, is_exact: true, .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("new york subway", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new  = NY
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york = NY
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // subway
+            assert_matches!(matches.next(), None);
         });
         assert_matches!(iter.next(), None);
     }
 
     #[test]
     /// Unique word has multi-word synonyms
-    fn harder_unique_to_multiword_synonyms() {
+    fn harder_unique_to_multiword_synonyms_one() {
         let mut store = InMemorySetStore::from_iter(vec![
             ("new",     &[doc_char_index(0, 0, 0)][..]),
             ("york",    &[doc_char_index(0, 1, 1)][..]),
@@ -997,17 +1119,22 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
-            assert_matches!(iter.next(), None);
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true, .. })); // subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
-            assert_matches!(iter.next(), None);             // position rewritten ^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true,  .. })); // subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), None);
 
@@ -1017,16 +1144,22 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // NYC
+            //                                                       because one-word to one-word ^^^^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true, .. })); // subway
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true,  .. })); // subway
             assert_matches!(iter.next(), None);             // position rewritten ^
         });
         assert_matches!(iter.next(), None);
@@ -1059,19 +1192,25 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train       = subway
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 3, .. })); // broken
-            assert_matches!(iter.next(), None);             // position rewritten ^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: false, .. })); // underground = subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: false, .. })); // train       = subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 6, is_exact: true,  .. })); // broken
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
-            assert_matches!(iter.next(), None);
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new  = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // city = NY
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true, .. })); // underground = subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: true, .. })); // train       = subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), None);
 
@@ -1081,18 +1220,25 @@ mod tests {
 
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // city = NYC
+            //                                                       because one-word to one-word ^^^^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true, .. })); // underground = subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: true, .. })); // train       = subway
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new  = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // city = NY
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // underground = subway
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 2, .. })); // train       = subway
-            assert_matches!(iter.next(), None);             // position rewritten ^
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new  = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city = NYC
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: false, .. })); // underground = subway
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: false, .. })); // train       = subway
+            assert_matches!(iter.next(), None);                // position rewritten ^
         });
         assert_matches!(iter.next(), None);
     }
@@ -1116,49 +1262,43 @@ mod tests {
             ("broken",      &[doc_char_index(2, 4, 4)][..]),
         ]);
 
-        store.add_synonym("new york", SetBuf::from_dirty(vec!["NYC", "NY", "new york city"]));
-        store.add_synonym("new york city", SetBuf::from_dirty(vec!["NYC", "NY", "new york"]));
-        store.add_synonym("underground train", SetBuf::from_dirty(vec!["subway"]));
+        store.add_synonym("new york", SetBuf::from_dirty(vec![          "NYC", "NY", "new york city" ]));
+        store.add_synonym("new york city", SetBuf::from_dirty(vec![     "NYC", "NY", "new york"      ]));
+        store.add_synonym("underground train", SetBuf::from_dirty(vec![ "subway"                     ]));
 
         let builder = QueryBuilder::new(&store);
         let results = builder.query("new york underground train broken", 0..20).unwrap();
         let mut iter = results.into_iter();
 
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => {
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
             let mut matches = matches.into_iter();
-            let mut highlights = highlights.into_iter();
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // underground
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // train
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. }));
-
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // underground
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true,  .. })); // train
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 5, is_exact: true,  .. })); // broken
             assert_matches!(matches.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC    = new york
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, .. })); // subway = underground train
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 3, .. })); // broken
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // NYC = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // NYC = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // NYC = city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true,  .. })); // subway = underground
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: true,  .. })); // subway = train
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 6, is_exact: true,  .. })); // broken
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY     = new york
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, .. })); // subway = underground train
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // NY = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // NY = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // NY = city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // subway = underground
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true,  .. })); // subway = train
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
@@ -1167,55 +1307,169 @@ mod tests {
         let results = builder.query("new york city underground train broken", 0..20).unwrap();
         let mut iter = results.into_iter();
 
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, highlights }) => {
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
             let mut matches = matches.into_iter();
-            let mut highlights = highlights.into_iter();
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // york
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // new
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 0, .. })); // york
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // underground
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 2, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 2, .. })); // train
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 3, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 4, .. }));
-
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // underground
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true,  .. })); // train
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 5, is_exact: true,  .. })); // broken
             assert_matches!(matches.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NYC    = new york city
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 2, .. })); // subway = underground train
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 3, .. })); // broken
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NYC = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // NYC = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // NYC = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // NYC = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // NYC = city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 4, is_exact: true, .. })); // subway = underground
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 5, is_exact: true, .. })); // subway = train
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 5, word_index: 6, is_exact: true, .. })); // broken
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // NY     = new york city
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 1, .. })); // subway = underground train
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // NY = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // NY = new
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // NY = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // NY = york
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // NY = city
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // subway = underground
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true,  .. })); // subway = train
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
     }
 
+    #[test]
+    fn intercrossed_multiword_synonyms() {
+        let mut store = InMemorySetStore::from_iter(vec![
+            ("new",   &[doc_index(0, 0)][..]),
+            ("york",  &[doc_index(0, 1)][..]),
+            ("big",   &[doc_index(0, 2)][..]),
+            ("city",  &[doc_index(0, 3)][..]),
+        ]);
+
+        store.add_synonym("new york", SetBuf::from_dirty(vec![      "new york city" ]));
+        store.add_synonym("new york city", SetBuf::from_dirty(vec![ "new york"      ]));
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("new york big ", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: false, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new
+
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york
+
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 4, is_exact: false, .. })); // city
+
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // big
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+
+        let mut store = InMemorySetStore::from_iter(vec![
+            ("NY",     &[doc_index(0, 0)][..]),
+            ("city",   &[doc_index(0, 1)][..]),
+            ("subway", &[doc_index(0, 2)][..]),
+
+            ("NY",     &[doc_index(1, 0)][..]),
+            ("subway", &[doc_index(1, 1)][..]),
+
+            ("NY",     &[doc_index(2, 0)][..]),
+            ("york",   &[doc_index(2, 1)][..]),
+            ("city",   &[doc_index(2, 2)][..]),
+            ("subway", &[doc_index(2, 3)][..]),
+        ]);
+
+        store.add_synonym("NY", SetBuf::from_dirty(vec!["new york city story"]));
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("NY subway ", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true, .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true, .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true, .. })); // story
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true, .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 3, is_exact: true,  .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(2), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: false, .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: false, .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // city
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 3, is_exact: true,  .. })); // subway
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+    }
+
+    #[test]
+    fn cumulative_word_indices() {
+        let mut store = InMemorySetStore::from_iter(vec![
+            ("NYC",    &[doc_index(0, 0)][..]),
+            ("long",   &[doc_index(0, 1)][..]),
+            ("subway", &[doc_index(0, 2)][..]),
+            ("cool",   &[doc_index(0, 3)][..]),
+        ]);
+
+        store.add_synonym("new york city", SetBuf::from_dirty(vec!["NYC"]));
+        store.add_synonym("subway",        SetBuf::from_dirty(vec!["underground train"]));
+
+        let builder = QueryBuilder::new(&store);
+        let results = builder.query("new york city long subway cool ", 0..20).unwrap();
+        let mut iter = results.into_iter();
+
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+            let mut matches = matches.into_iter();
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, is_exact: true,  .. })); // new  = NYC
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, is_exact: true,  .. })); // york = NYC
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 2, word_index: 2, is_exact: true,  .. })); // city = NYC
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 3, word_index: 3, is_exact: true,  .. })); // long
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 4, word_index: 4, is_exact: true,  .. })); // subway = underground
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 5, word_index: 5, is_exact: true,  .. })); // subway = train
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 6, word_index: 6, is_exact: true,  .. })); // cool
+            assert_matches!(matches.next(), None);
+        });
+        assert_matches!(iter.next(), None);
+    }
+
     #[test]
     fn deunicoded_synonyms() {
         let mut store = InMemorySetStore::from_iter(vec![
-            ("iPhone",    &[doc_index(0, 0)][..]),
-            ("telephone", &[doc_index(1, 0)][..]), // meilidb-data indexes the unidecoded
-            ("téléphone", &[doc_index(1, 0)][..]), // and the original words with the same DocIndex
+            ("telephone", &[doc_index(0, 0)][..]), // meilidb-data indexes the unidecoded
+            ("téléphone", &[doc_index(0, 0)][..]), // and the original words with the same DocIndex
+
+            ("iphone",    &[doc_index(1, 0)][..]),
         ]);
 
-        store.add_synonym("téléphone", SetBuf::from_dirty(vec!["iPhone"]));
+        store.add_synonym("téléphone", SetBuf::from_dirty(vec!["iphone"]));
 
         let builder = QueryBuilder::new(&store);
         let results = builder.query("telephone", 0..20).unwrap();
@@ -1224,12 +1478,12 @@ mod tests {
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
@@ -1241,12 +1495,12 @@ mod tests {
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
@@ -1255,14 +1509,15 @@ mod tests {
         let results = builder.query("télephone", 0..20).unwrap();
         let mut iter = results.into_iter();
 
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, .. }));
             assert_matches!(iter.next(), None);
         });
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
+        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, distance: 1, .. })); // téléphone
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, distance: 1, word_index: 0, is_exact: false, .. })); // iphone
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, distance: 1, word_index: 0, is_exact: false, .. })); // téléphone
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
@@ -1282,8 +1537,9 @@ mod tests {
         assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
             let mut iter = matches.into_iter();
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 0, word_index: 0, distance: 0, .. })); // iphone
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 1, distance: 0, .. })); // iphone
             assert_matches!(iter.next(), Some(TmpMatch { query_index: 1, word_index: 0, distance: 1, .. })); // phone
-            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 1, distance: 0, .. })); // case
+            assert_matches!(iter.next(), Some(TmpMatch { query_index: 2, word_index: 2, distance: 0, .. })); // case
             assert_matches!(iter.next(), None);
         });
         assert_matches!(iter.next(), None);
@@ -1309,7 +1565,7 @@ mod tests {
             assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // porte
             assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
 
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // feuille
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // feuille
             assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
 
             assert_matches!(matches.next(), None);
@@ -1327,7 +1583,7 @@ mod tests {
             assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // search
             assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
 
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // engine
+            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // engine
             assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
 
             assert_matches!(matches.next(), None);

From 795557c046110873f636327ec926249346d0c093 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 15 Jul 2019 14:28:40 +0200
Subject: [PATCH 05/19] feat: Remove query splitting from the automaton
 generation

---
 meilidb-core/src/query_builder.rs | 88 +------------------------------
 1 file changed, 1 insertion(+), 87 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 5268edd27..1fb778094 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -65,29 +65,6 @@ pub fn normalize_str(string: &str) -> String {
     string
 }
 
-fn split_best_frequency<'a, S: Store>(
-    word: &'a str,
-    store: &S,
-) -> Result<Option<(&'a str, &'a str)>, S::Error>
-{
-    let chars = word.char_indices().skip(1);
-    let mut best = None;
-
-    for (i, _) in chars {
-        let (left, right) = word.split_at(i);
-
-        let left_freq = store.word_indexes(left.as_bytes())?.map_or(0, |i| i.len());
-        let right_freq = store.word_indexes(right.as_bytes())?.map_or(0, |i| i.len());
-        let min_freq = cmp::min(left_freq, right_freq);
-
-        if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
-            best = Some((min_freq, left, right));
-        }
-    }
-
-    Ok(best.map(|(_, l, r)| (l, r)))
-}
-
 fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automaton>, QueryEnhancer), S::Error> {
     let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
     let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
@@ -160,24 +137,7 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
                 }
             }
 
-            if n == 1 {
-                // TODO we do not support "phrase query" in other words:
-                //      first term *must* follow the second term
-                if let Some((left, right)) = split_best_frequency(&ngram, store)? {
-
-                    let real_query_index = automatons.len();
-                    enhancer_builder.declare(query_range.clone(), real_query_index, &[left, right]);
-
-                    // TODO must mark it as "phrase query"
-                    //      (the next match must follow its query index)
-                    let automaton = Automaton::exact(left);
-                    automatons.push(automaton);
-
-                    let automaton = Automaton::exact(right);
-                    automatons.push(automaton);
-                }
-
-            } else {
+            if n != 1 {
                 // automaton of concatenation of query words
                 let concat = ngram_slice.concat();
                 let normalized = normalize_str(&concat);
@@ -1544,50 +1504,4 @@ mod tests {
         });
         assert_matches!(iter.next(), None);
     }
-
-    #[test]
-    fn simple_split() {
-        let store = InMemorySetStore::from_iter(vec![
-            ("porte",   &[doc_char_index(0, 0, 0)][..]),
-            ("feuille", &[doc_char_index(0, 1, 1)][..]),
-            ("search",  &[doc_char_index(1, 0, 0)][..]),
-            ("engine",  &[doc_char_index(1, 1, 1)][..]),
-        ]);
-
-        let builder = QueryBuilder::new(&store);
-        let results = builder.query("portefeuille", 0..20).unwrap();
-        let mut iter = results.into_iter();
-
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, highlights }) => {
-            let mut matches = matches.into_iter();
-            let mut highlights = highlights.into_iter();
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // porte
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // feuille
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), None);
-        });
-        assert_matches!(iter.next(), None);
-
-        let builder = QueryBuilder::new(&store);
-        let results = builder.query("searchengine", 0..20).unwrap();
-        let mut iter = results.into_iter();
-
-        assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, highlights }) => {
-            let mut matches = matches.into_iter();
-            let mut highlights = highlights.into_iter();
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 0, word_index: 0, .. })); // search
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 0, .. }));
-
-            assert_matches!(matches.next(), Some(TmpMatch { query_index: 1, word_index: 1, .. })); // engine
-            assert_matches!(highlights.next(), Some(Highlight { char_index: 1, .. }));
-
-            assert_matches!(matches.next(), None);
-        });
-        assert_matches!(iter.next(), None);
-    }
 }

From 9959f2e952bdef34566541628cfec734e5d7ece3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 7 Jul 2019 19:55:15 +0200
Subject: [PATCH 06/19] feat: Move the RawDocument type to its own module

---
 meilidb-core/src/lib.rs          | 135 +------------------------------
 meilidb-core/src/raw_document.rs | 132 ++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 131 deletions(-)
 create mode 100644 meilidb-core/src/raw_document.rs

diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs
index f5975e3b5..b1a682e40 100644
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@@ -5,19 +5,18 @@ mod automaton;
 mod distinct_map;
 mod query_builder;
 mod query_enhancer;
+mod raw_document;
 mod reordered_attrs;
 mod store;
 pub mod criterion;
 
-use std::fmt;
-use std::sync::Arc;
-
-use sdset::SetBuf;
 use serde::{Serialize, Deserialize};
-use slice_group_by::GroupBy;
 use zerocopy::{AsBytes, FromBytes};
 
+use self::raw_document::raw_documents_from_matches;
+
 pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
+pub use self::raw_document::RawDocument;
 pub use self::store::Store;
 
 /// Represent an internally generated document unique identifier.
@@ -131,132 +130,6 @@ impl Document {
     }
 }
 
-#[derive(Clone)]
-pub struct RawDocument {
-    pub id: DocumentId,
-    pub matches: SharedMatches,
-    pub highlights: Vec<Highlight>,
-}
-
-impl RawDocument {
-    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
-        RawDocument { id, matches, highlights }
-    }
-
-    pub fn query_index(&self) -> &[u32] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn distance(&self) -> &[u8] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn attribute(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn word_index(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn is_exact(&self) -> &[bool] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
-    }
-}
-
-impl fmt::Debug for RawDocument {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("RawDocument")
-            .field("id", &self.id)
-            .field("query_index", &self.query_index())
-            .field("distance", &self.distance())
-            .field("attribute", &self.attribute())
-            .field("word_index", &self.word_index())
-            .field("is_exact", &self.is_exact())
-            .finish()
-    }
-}
-
-fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
-    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
-    let mut matches2 = Matches::with_capacity(matches.len());
-
-    for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
-        let document_id = group[0].0;
-        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
-        let end = start + group.len();
-
-        let highlights = group.iter().map(|(_, _, h)| *h).collect();
-        docs_ranges.push((document_id, Range { start, end }, highlights));
-
-        matches2.extend_from_slice(group);
-    }
-
-    let matches = Arc::new(matches2);
-    docs_ranges.into_iter().map(|(i, range, highlights)| {
-        let matches = SharedMatches { range, matches: matches.clone() };
-        RawDocument::new(i, matches, highlights)
-    }).collect()
-}
-
-#[derive(Debug, Copy, Clone)]
-struct Range {
-    start: usize,
-    end: usize,
-}
-
-#[derive(Clone)]
-pub struct SharedMatches {
-    range: Range,
-    matches: Arc<Matches>,
-}
-
-#[derive(Clone)]
-struct Matches {
-    query_index: Vec<u32>,
-    distance: Vec<u8>,
-    attribute: Vec<u16>,
-    word_index: Vec<u16>,
-    is_exact: Vec<bool>,
-}
-
-impl Matches {
-    fn with_capacity(cap: usize) -> Matches {
-        Matches {
-            query_index: Vec::with_capacity(cap),
-            distance: Vec::with_capacity(cap),
-            attribute: Vec::with_capacity(cap),
-            word_index: Vec::with_capacity(cap),
-            is_exact: Vec::with_capacity(cap),
-        }
-    }
-
-    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
-        for (_, match_, _) in matches {
-            self.query_index.push(match_.query_index);
-            self.distance.push(match_.distance);
-            self.attribute.push(match_.attribute);
-            self.word_index.push(match_.word_index);
-            self.is_exact.push(match_.is_exact);
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/meilidb-core/src/raw_document.rs b/meilidb-core/src/raw_document.rs
new file mode 100644
index 000000000..7a293439e
--- /dev/null
+++ b/meilidb-core/src/raw_document.rs
@@ -0,0 +1,132 @@
+use std::sync::Arc;
+use std::fmt;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+use crate::{TmpMatch, DocumentId, Highlight};
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+    pub highlights: Vec<Highlight>,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, matches: SharedMatches, highlights: Vec<Highlight>) -> RawDocument {
+        RawDocument { id, matches, highlights }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+}
+
+impl fmt::Debug for RawDocument {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str("RawDocument {\r\n")?;
+        f.write_fmt(format_args!("{:>15}: {:?},\r\n",    "id",          self.id))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "query_index", self.query_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "distance",    self.distance()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "attribute",   self.attribute()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n",  "word_index",  self.word_index()))?;
+        f.write_fmt(format_args!("{:>15}: {:^5?},\r\n", "is_exact",    self.is_exact()))?;
+        f.write_str("}")?;
+        Ok(())
+    }
+}
+
+pub fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
+    let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
+        let document_id = group[0].0;
+        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
+        let end = start + group.len();
+
+        let highlights = group.iter().map(|(_, _, h)| *h).collect();
+        docs_ranges.push((document_id, Range { start, end }, highlights));
+
+        matches2.extend_from_slice(group);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(i, range, highlights)| {
+        let matches = SharedMatches { range, matches: matches.clone() };
+        RawDocument::new(i, matches, highlights)
+    }).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u16>,
+    is_exact: Vec<bool>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
+        for (_, match_, _) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+        }
+    }
+}

From 89df496f0cd18a8322e7a7f9113536f3684fb3f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 15 Jul 2019 19:34:53 +0200
Subject: [PATCH 07/19] feat: Separate highlights from matches to make the code
 easier to follow

---
 meilidb-core/src/lib.rs           |  2 +-
 meilidb-core/src/query_builder.rs | 39 ++++++++++++++++++-------------
 meilidb-core/src/raw_document.rs  | 31 +++++++++++++++---------
 3 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs
index b1a682e40..6f6e46359 100644
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@@ -13,7 +13,7 @@ pub mod criterion;
 use serde::{Serialize, Deserialize};
 use zerocopy::{AsBytes, FromBytes};
 
-use self::raw_document::raw_documents_from_matches;
+use self::raw_document::raw_documents_from;
 
 pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
 pub use self::raw_document::RawDocument;
diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 1fb778094..c5a0ac847 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -17,7 +17,7 @@ use crate::automaton::{build_dfa, build_prefix_dfa};
 use crate::criterion::Criteria;
 use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
 use crate::query_enhancer::{QueryEnhancerBuilder, QueryEnhancer};
-use crate::raw_documents_from_matches;
+use crate::raw_documents_from;
 use crate::reordered_attrs::ReorderedAttrs;
 use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
 
@@ -215,6 +215,7 @@ where S: Store,
         };
 
         let mut matches = Vec::new();
+        let mut highlights = Vec::new();
 
         while let Some((input, indexed_values)) = stream.next() {
             for iv in indexed_values {
@@ -240,23 +241,21 @@ where S: Store,
                             is_exact,
                         };
 
-                        // TODO do not store in the same matches vec
                         let highlight = Highlight {
                             attribute: di.attribute,
                             char_index: di.char_index,
                             char_length: di.char_length,
                         };
 
-                        matches.push((di.document_id, match_, highlight));
+                        matches.push((di.document_id, match_));
+                        highlights.push((di.document_id, highlight));
                     }
                 }
             }
         }
 
         // we sort the matches to make them rewritable
-        matches.par_sort_unstable_by_key(|(id, match_, _)| {
-            (*id, match_.attribute, match_.word_index) // query_id ???
-        });
+        matches.par_sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
 
         let mut padded_matches = Vec::with_capacity(matches.len());
         for same_document in matches.linear_group_by(|a, b| a.0 == b.0) {
@@ -268,7 +267,7 @@ where S: Store,
                 while let Some(same_word_index) = iter.next() {
 
                     let mut biggest = 0;
-                    for (id, match_, highlight) in same_word_index {
+                    for (id, match_) in same_word_index {
 
                         let mut replacement = query_enhancer.replacement(match_.query_index);
                         let replacement_len = replacement.len() - 1;
@@ -280,7 +279,7 @@ where S: Store,
                                 word_index: match_.word_index + padding as u16,
                                 ..match_.clone()
                             };
-                            padded_matches.push((*id, match_, *highlight));
+                            padded_matches.push((*id, match_));
                         }
 
                         let mut found = false;
@@ -296,7 +295,7 @@ where S: Store,
                                     ..match_.clone()
                                 };
 
-                                for (_, nmatch_, _) in next_group {
+                                for (_, nmatch_) in next_group {
                                     let mut rep = query_enhancer.replacement(nmatch_.query_index);
                                     let query_index = rep.next().unwrap();
                                     let nmatch_ = TmpMatch { query_index, ..nmatch_.clone() };
@@ -311,12 +310,12 @@ where S: Store,
                                                     word_index: match_.word_index + padding as u16 + (i + 1) as u16,
                                                     ..match_.clone()
                                                 };
-                                                padded_matches.push((*id, match_, *highlight));
+                                                padded_matches.push((*id, match_));
                                                 biggest = biggest.max(i + 1);
                                             }
                                         }
 
-                                        padded_matches.push((*id, padmatch_, *highlight));
+                                        padded_matches.push((*id, padmatch_));
                                         found = true;
                                         continue 'padding;
                                     }
@@ -337,7 +336,7 @@ where S: Store,
                                     word_index: match_.word_index + padding as u16 + (i + 1) as u16,
                                     ..match_.clone()
                                 };
-                                padded_matches.push((*id, match_, *highlight));
+                                padded_matches.push((*id, match_));
                             }
 
                             biggest = biggest.max(replacement_len);
@@ -350,11 +349,19 @@ where S: Store,
 
         }
 
-        let total_matches = padded_matches.len();
-        padded_matches.par_sort_unstable();
-        let padded_matches = SetBuf::new_unchecked(padded_matches);
 
-        let raw_documents = raw_documents_from_matches(padded_matches);
+        let matches = {
+            padded_matches.par_sort_unstable();
+            SetBuf::new_unchecked(padded_matches)
+        };
+
+        let highlights = {
+            highlights.par_sort_unstable_by_key(|(id, _)| *id);
+            SetBuf::new_unchecked(highlights)
+        };
+
+        let total_matches = matches.len();
+        let raw_documents = raw_documents_from(matches, highlights);
 
         info!("{} total documents to classify", raw_documents.len());
         info!("{} total matches to classify", total_matches);
diff --git a/meilidb-core/src/raw_document.rs b/meilidb-core/src/raw_document.rs
index 7a293439e..5d449a74a 100644
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@@ -66,25 +66,34 @@ impl fmt::Debug for RawDocument {
     }
 }
 
-pub fn raw_documents_from_matches(matches: SetBuf<(DocumentId, TmpMatch, Highlight)>) -> Vec<RawDocument> {
+pub fn raw_documents_from(
+    matches: SetBuf<(DocumentId, TmpMatch)>,
+    highlights: SetBuf<(DocumentId, Highlight)>,
+) -> Vec<RawDocument>
+{
     let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
     let mut matches2 = Matches::with_capacity(matches.len());
 
-    for group in matches.linear_group_by(|(a, _, _), (b, _, _)| a == b) {
-        let document_id = group[0].0;
-        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
-        let end = start + group.len();
+    let matches = matches.linear_group_by(|(a, _), (b, _)| a == b);
+    let highlights = highlights.linear_group_by(|(a, _), (b, _)| a == b);
 
-        let highlights = group.iter().map(|(_, _, h)| *h).collect();
+    for (mgroup, hgroup) in matches.zip(highlights) {
+        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
+
+        let document_id = mgroup[0].0;
+        let start = docs_ranges.last().map(|(_, r, _)| r.end).unwrap_or(0);
+        let end = start + mgroup.len();
+
+        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
         docs_ranges.push((document_id, Range { start, end }, highlights));
 
-        matches2.extend_from_slice(group);
+        matches2.extend_from_slice(mgroup);
     }
 
     let matches = Arc::new(matches2);
-    docs_ranges.into_iter().map(|(i, range, highlights)| {
+    docs_ranges.into_iter().map(|(id, range, highlights)| {
         let matches = SharedMatches { range, matches: matches.clone() };
-        RawDocument::new(i, matches, highlights)
+        RawDocument::new(id, matches, highlights)
     }).collect()
 }
 
@@ -120,8 +129,8 @@ impl Matches {
         }
     }
 
-    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch, Highlight)]) {
-        for (_, match_, _) in matches {
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
+        for (_, match_) in matches {
             self.query_index.push(match_.query_index);
             self.distance.push(match_.distance);
             self.attribute.push(match_.attribute);

From bf3c2c372554829e0ef314cd55eb09fb0378c07d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 2 Aug 2019 12:07:23 +0200
Subject: [PATCH 08/19] feat: Move the multi-word rewriting algorithm into its
 own function

---
 meilidb-core/Cargo.toml                       |   2 +-
 meilidb-core/src/criterion/sum_of_typos.rs    |   2 +-
 meilidb-core/src/query_builder.rs             | 222 ++++++++++--------
 meilidb-core/src/query_enhancer.rs            |  15 +-
 meilidb-core/src/raw_document.rs              |   4 +-
 .../src/database/synonyms_addition.rs         |   4 +-
 meilidb/examples/create-database.rs           |  72 +++++-
 7 files changed, 204 insertions(+), 117 deletions(-)

diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml
index 037a7788c..25fb57119 100644
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@@ -14,7 +14,7 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
 rayon = "1.0.3"
 sdset = "0.3.2"
 serde = { version = "1.0.88", features = ["derive"] }
-slice-group-by = "0.2.4"
+slice-group-by = "0.2.6"
 zerocopy = "0.2.2"
 
 [dependencies.fst]
diff --git a/meilidb-core/src/criterion/sum_of_typos.rs b/meilidb-core/src/criterion/sum_of_typos.rs
index d5cd75f08..6736e6caa 100644
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@@ -21,7 +21,7 @@ fn custom_log10(n: u8) -> f32 {
 
 #[inline]
 fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
-    let mut number_words = 0;
+    let mut number_words: usize = 0;
     let mut sum_typos = 0.0;
     let mut index = 0;
 
diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index c5a0ac847..7c3183ff4 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -197,6 +197,110 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI>
     }
 }
 
+fn multiword_rewrite_matches(
+    mut matches: Vec<(DocumentId, TmpMatch)>,
+    query_enhancer: &QueryEnhancer,
+) -> SetBuf<(DocumentId, TmpMatch)>
+{
+    let mut padded_matches = Vec::with_capacity(matches.len());
+
+    // we sort the matches by word index to make them rewritable
+    let start = Instant::now();
+    matches.par_sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
+    info!("rewrite sort by word_index took {:.2?}", start.elapsed());
+
+    let start = Instant::now();
+    // for each attribute of each document
+    for same_document_attribute in matches.linear_group_by_key(|(id, m)| (*id, m.attribute)) {
+
+        // padding will only be applied
+        // to word indices in the same attribute
+        let mut padding = 0;
+        let mut iter = same_document_attribute.linear_group_by_key(|(_, m)| m.word_index);
+
+        // for each match at the same position
+        // in this document attribute
+        while let Some(same_word_index) = iter.next() {
+
+            // find the biggest padding
+            let mut biggest = 0;
+            for (id, match_) in same_word_index {
+
+                let mut replacement = query_enhancer.replacement(match_.query_index);
+                let replacement_len = replacement.len();
+                let nexts = iter.remainder().linear_group_by_key(|(_, m)| m.word_index);
+
+                if let Some(query_index) = replacement.next() {
+                    let word_index = match_.word_index + padding as u16;
+                    let match_ = TmpMatch { query_index, word_index, ..match_.clone() };
+                    padded_matches.push((*id, match_));
+                }
+
+                let mut found = false;
+
+                // look ahead and if there already is a match
+                // corresponding to this padding word, abort the padding
+                'padding: for (x, next_group) in nexts.enumerate() {
+
+                    for (i, query_index) in replacement.clone().enumerate().skip(x) {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let padmatch = TmpMatch { query_index, word_index, ..match_.clone() };
+
+                        for (_, nmatch_) in next_group {
+                            let mut rep = query_enhancer.replacement(nmatch_.query_index);
+                            let query_index = rep.next().unwrap();
+                            if query_index == padmatch.query_index {
+
+                                if !found {
+                                    // if we find a corresponding padding for the
+                                    // first time we must push preceding paddings
+                                    for (i, query_index) in replacement.clone().enumerate().take(i) {
+                                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                                        let match_ = TmpMatch { query_index, word_index, ..match_.clone() };
+                                        padded_matches.push((*id, match_));
+                                        biggest = biggest.max(i + 1);
+                                    }
+                                }
+
+                                padded_matches.push((*id, padmatch));
+                                found = true;
+                                continue 'padding;
+                            }
+                        }
+                    }
+
+                    // if we do not find a corresponding padding in the
+                    // next groups so stop here and pad what was found
+                    break
+                }
+
+                if !found {
+                    // if no padding was found in the following matches
+                    // we must insert the entire padding
+                    for (i, query_index) in replacement.enumerate() {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let match_ = TmpMatch { query_index, word_index, ..match_.clone() };
+                        padded_matches.push((*id, match_));
+                    }
+
+                    biggest = biggest.max(replacement_len - 1);
+                }
+            }
+
+            padding += biggest;
+        }
+    }
+    info!("main multiword rewrite took {:.2?}", start.elapsed());
+
+    let start = Instant::now();
+    for document_matches in padded_matches.linear_group_by_key_mut(|(id, _)| *id) {
+        document_matches.sort_unstable();
+    }
+    info!("final rewrite sort took {:.2?}", start.elapsed());
+
+    SetBuf::new_unchecked(padded_matches)
+}
+
 impl<'c, S, FI> QueryBuilder<'c, S, FI>
 where S: Store,
 {
@@ -217,22 +321,26 @@ where S: Store,
         let mut matches = Vec::new();
         let mut highlights = Vec::new();
 
+        let mut query_db = std::time::Duration::default();
+
+        let start = Instant::now();
         while let Some((input, indexed_values)) = stream.next() {
             for iv in indexed_values {
                 let Automaton { is_exact, query_len, ref dfa } = automatons[iv.index];
                 let distance = dfa.eval(input).to_u8();
                 let is_exact = is_exact && distance == 0 && input.len() == query_len;
 
+                let start = Instant::now();
                 let doc_indexes = self.store.word_indexes(input)?;
                 let doc_indexes = match doc_indexes {
                     Some(doc_indexes) => doc_indexes,
                     None => continue,
                 };
+                query_db += start.elapsed();
 
                 for di in doc_indexes.as_slice() {
                     let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
                     if let Some(attribute) = attribute {
-
                         let match_ = TmpMatch {
                             query_index: iv.index as u32,
                             distance,
@@ -253,118 +361,28 @@ where S: Store,
                 }
             }
         }
+        info!("main query all took {:.2?} (get indexes {:.2?})", start.elapsed(), query_db);
 
-        // we sort the matches to make them rewritable
-        matches.par_sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
+        info!("{} total matches to rewrite", matches.len());
 
-        let mut padded_matches = Vec::with_capacity(matches.len());
-        for same_document in matches.linear_group_by(|a, b| a.0 == b.0) {
-
-            for same_attribute in same_document.linear_group_by(|a, b| a.1.attribute == b.1.attribute) {
-
-                let mut padding = 0;
-                let mut iter = same_attribute.linear_group_by(|a, b| a.1.word_index == b.1.word_index);
-                while let Some(same_word_index) = iter.next() {
-
-                    let mut biggest = 0;
-                    for (id, match_) in same_word_index {
-
-                        let mut replacement = query_enhancer.replacement(match_.query_index);
-                        let replacement_len = replacement.len() - 1;
-                        let nexts = iter.remainder().linear_group_by(|a, b| a.1.word_index == b.1.word_index);
-
-                        if let Some(query_index) = replacement.next() {
-                            let match_ = TmpMatch {
-                                query_index,
-                                word_index: match_.word_index + padding as u16,
-                                ..match_.clone()
-                            };
-                            padded_matches.push((*id, match_));
-                        }
-
-                        let mut found = false;
-
-                        // look ahead and if there already is a match
-                        // corresponding to this padding word, abort the padding
-                        'padding: for (x, next_group) in nexts.enumerate() {
-
-                            for (i, query_index) in replacement.clone().enumerate().skip(x) {
-                                let padmatch_ = TmpMatch {
-                                    query_index,
-                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
-                                    ..match_.clone()
-                                };
-
-                                for (_, nmatch_) in next_group {
-                                    let mut rep = query_enhancer.replacement(nmatch_.query_index);
-                                    let query_index = rep.next().unwrap();
-                                    let nmatch_ = TmpMatch { query_index, ..nmatch_.clone() };
-                                    if nmatch_.query_index == padmatch_.query_index {
-
-                                        if !found {
-                                            // if we find a corresponding padding for the
-                                            // first time we must push preceding paddings
-                                            for (i, query_index) in replacement.clone().enumerate().take(i) {
-                                                let match_ = TmpMatch {
-                                                    query_index,
-                                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
-                                                    ..match_.clone()
-                                                };
-                                                padded_matches.push((*id, match_));
-                                                biggest = biggest.max(i + 1);
-                                            }
-                                        }
-
-                                        padded_matches.push((*id, padmatch_));
-                                        found = true;
-                                        continue 'padding;
-                                    }
-                                }
-                            }
-
-                            // if we do not find a corresponding padding in the
-                            // next groups so stop here and pad what was found
-                            break
-                        }
-
-                        if !found {
-                            // if no padding was found in the following matches
-                            // we must insert the entire padding
-                            for (i, query_index) in replacement.enumerate() {
-                                let match_ = TmpMatch {
-                                    query_index,
-                                    word_index: match_.word_index + padding as u16 + (i + 1) as u16,
-                                    ..match_.clone()
-                                };
-                                padded_matches.push((*id, match_));
-                            }
-
-                            biggest = biggest.max(replacement_len);
-                        }
-                    }
-
-                    padding += biggest;
-                }
-            }
-
-        }
-
-
-        let matches = {
-            padded_matches.par_sort_unstable();
-            SetBuf::new_unchecked(padded_matches)
-        };
+        let start = Instant::now();
+        let matches = multiword_rewrite_matches(matches, &query_enhancer);
+        info!("multiword rewrite took {:.2?}", start.elapsed());
 
+        let start = Instant::now();
         let highlights = {
             highlights.par_sort_unstable_by_key(|(id, _)| *id);
             SetBuf::new_unchecked(highlights)
         };
+        info!("sorting highlights took {:.2?}", start.elapsed());
 
-        let total_matches = matches.len();
+        info!("{} total matches to classify", matches.len());
+
+        let start = Instant::now();
         let raw_documents = raw_documents_from(matches, highlights);
+        info!("making raw documents took {:.2?}", start.elapsed());
 
         info!("{} total documents to classify", raw_documents.len());
-        info!("{} total matches to classify", total_matches);
 
         Ok(raw_documents)
     }
diff --git a/meilidb-core/src/query_enhancer.rs b/meilidb-core/src/query_enhancer.rs
index 6280ae11e..165c1b094 100644
--- a/meilidb-core/src/query_enhancer.rs
+++ b/meilidb-core/src/query_enhancer.rs
@@ -52,17 +52,20 @@ where S: AsRef<str>,
     !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
 }
 
+type Origin = usize;
+type RealLength = usize;
+
 struct FakeIntervalTree {
-    intervals: Vec<(Range<usize>, (usize, usize))>, // origin, real_length
+    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
 }
 
 impl FakeIntervalTree {
-    fn new(mut intervals: Vec<(Range<usize>, (usize, usize))>) -> FakeIntervalTree {
+    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
         intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
         FakeIntervalTree { intervals }
     }
 
-    fn query(&self, point: usize) -> Option<(Range<usize>, (usize, usize))> {
+    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
         let element = self.intervals.binary_search_by(|(r, _)| {
             if point >= r.start {
                 if point < r.end { Equal } else { Less }
@@ -81,7 +84,7 @@ impl FakeIntervalTree {
 pub struct QueryEnhancerBuilder<'a, S> {
     query: &'a [S],
     origins: Vec<usize>,
-    real_to_origin: Vec<(Range<usize>, (usize, usize))>,
+    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
 }
 
 impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
@@ -147,8 +150,8 @@ impl QueryEnhancer {
         // query the fake interval tree with the real query index
         let (range, (origin, real_length)) =
             self.real_to_origin
-            .query(real)
-            .expect("real has never been declared");
+                .query(real)
+                .expect("real has never been declared");
 
         // if `real` is the end bound of the range
         if (range.start + real_length - 1) == real {
diff --git a/meilidb-core/src/raw_document.rs b/meilidb-core/src/raw_document.rs
index 5d449a74a..3567c3fd1 100644
--- a/meilidb-core/src/raw_document.rs
+++ b/meilidb-core/src/raw_document.rs
@@ -74,8 +74,8 @@ pub fn raw_documents_from(
     let mut docs_ranges: Vec<(_, Range, _)> = Vec::new();
     let mut matches2 = Matches::with_capacity(matches.len());
 
-    let matches = matches.linear_group_by(|(a, _), (b, _)| a == b);
-    let highlights = highlights.linear_group_by(|(a, _), (b, _)| a == b);
+    let matches = matches.linear_group_by_key(|(id, _)| *id);
+    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
 
     for (mgroup, hgroup) in matches.zip(highlights) {
         debug_assert_eq!(mgroup[0].0, hgroup[0].0);
diff --git a/meilidb-data/src/database/synonyms_addition.rs b/meilidb-data/src/database/synonyms_addition.rs
index 6e16ab97b..c37f0475a 100644
--- a/meilidb-data/src/database/synonyms_addition.rs
+++ b/meilidb-data/src/database/synonyms_addition.rs
@@ -21,10 +21,10 @@ impl<'a> SynonymsAddition<'a> {
     pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
     where S: AsRef<str>,
           T: AsRef<str>,
-          I: Iterator<Item=T>,
+          I: IntoIterator<Item=T>,
     {
         let synonym = normalize_str(synonym.as_ref());
-        let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
+        let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
         self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
     }
 
diff --git a/meilidb/examples/create-database.rs b/meilidb/examples/create-database.rs
index ed07e3742..d8e553ed3 100644
--- a/meilidb/examples/create-database.rs
+++ b/meilidb/examples/create-database.rs
@@ -31,9 +31,13 @@ pub struct Opt {
     #[structopt(long = "schema", parse(from_os_str))]
     pub schema_path: PathBuf,
 
+    /// The file with the synonyms.
+    #[structopt(long = "synonyms", parse(from_os_str))]
+    pub synonyms: Option<PathBuf>,
+
     /// The path to the list of stop words (one by line).
     #[structopt(long = "stop-words", parse(from_os_str))]
-    pub stop_words_path: Option<PathBuf>,
+    pub stop_words: Option<PathBuf>,
 
     #[structopt(long = "update-group-size")]
     pub update_group_size: Option<usize>,
@@ -45,12 +49,40 @@ struct Document<'a> (
     HashMap<Cow<'a, str>, Cow<'a, str>>
 );
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonym {
+    OneWay(SynonymOneWay),
+    MultiWay { synonyms: Vec<String> },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SynonymOneWay {
+    pub search_terms: String,
+    pub synonyms: Synonyms,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Synonyms {
+    Multiple(Vec<String>),
+    Single(String),
+}
+
+fn read_synomys(path: &Path) -> Result<Vec<Synonym>, Box<dyn Error>> {
+    let file = File::open(path)?;
+    let synonyms = serde_json::from_reader(file)?;
+    Ok(synonyms)
+}
+
 fn index(
     schema: Schema,
     database_path: &Path,
     csv_data_path: &Path,
     update_group_size: Option<usize>,
     stop_words: &HashSet<String>,
+    synonyms: Vec<Synonym>,
 ) -> Result<Database, Box<dyn Error>>
 {
     let database = Database::start_default(database_path)?;
@@ -62,6 +94,28 @@ fn index(
 
     let index = database.create_index("test", schema.clone())?;
 
+    let mut synonyms_adder = index.synonyms_addition();
+    for synonym in synonyms {
+        match synonym {
+            Synonym::OneWay(SynonymOneWay { search_terms, synonyms }) => {
+                let alternatives = match synonyms {
+                    Synonyms::Multiple(alternatives) => alternatives,
+                    Synonyms::Single(alternative) => vec![alternative],
+                };
+                synonyms_adder.add_synonym(search_terms, alternatives);
+            },
+            Synonym::MultiWay { mut synonyms } => {
+                for _ in 0..synonyms.len() {
+                    if let Some((synonym, alternatives)) = synonyms.split_first() {
+                        synonyms_adder.add_synonym(synonym, alternatives);
+                    }
+                    synonyms.rotate_left(1);
+                }
+            },
+        }
+    }
+    synonyms_adder.finalize()?;
+
     let mut rdr = csv::Reader::from_path(csv_data_path)?;
     let mut raw_record = csv::StringRecord::new();
     let headers = rdr.headers()?.clone();
@@ -133,13 +187,25 @@ fn main() -> Result<(), Box<dyn Error>> {
         Schema::from_toml(file)?
     };
 
-    let stop_words = match opt.stop_words_path {
+    let stop_words = match opt.stop_words {
         Some(ref path) => retrieve_stop_words(path)?,
         None           => HashSet::new(),
     };
 
+    let synonyms = match opt.synonyms {
+        Some(ref path) => read_synomys(path)?,
+        None           => Vec::new(),
+    };
+
     let start = Instant::now();
-    let result = index(schema, &opt.database_path, &opt.csv_data_path, opt.update_group_size, &stop_words);
+    let result = index(
+        schema,
+        &opt.database_path,
+        &opt.csv_data_path,
+        opt.update_group_size,
+        &stop_words,
+        synonyms,
+    );
 
     if let Err(e) = result {
         return Err(e.into())

From a488c00a2e4daf0f9d4947b4e06fb66b92b7d733 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 26 Jul 2019 13:27:38 +0200
Subject: [PATCH 09/19] feat: Use RustyLine in the query-database example

---
 meilidb/Cargo.toml                 |   1 +
 meilidb/examples/query-database.rs | 114 +++++++++++++++--------------
 2 files changed, 60 insertions(+), 55 deletions(-)

diff --git a/meilidb/Cargo.toml b/meilidb/Cargo.toml
index 8ba89f212..7208067f0 100644
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@@ -18,6 +18,7 @@ meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
 quickcheck = "0.8.2"
 rand = "0.6.5"
 rand_xorshift = "0.1.1"
+rustyline = { version = "5.0.0", default-features = false }
 serde = { version = "1.0.91" , features = ["derive"] }
 serde_json = "1.0.39"
 structopt = "0.2.15"
diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs
index 72244d1b8..f9e2f8389 100644
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@@ -3,16 +3,17 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 use std::collections::btree_map::{BTreeMap, Entry};
 use std::collections::{HashMap, HashSet};
-use std::iter::FromIterator;
-use std::io::{self, Write};
-use std::time::{Instant, Duration};
-use std::path::PathBuf;
 use std::error::Error;
+use std::io::{self, Write};
+use std::iter::FromIterator;
+use std::path::PathBuf;
+use std::time::{Instant, Duration};
 
 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use structopt::StructOpt;
-use meilidb_core::Highlight;
+use rustyline::{Editor, Config};
 
+use meilidb_core::Highlight;
 use meilidb_data::Database;
 use meilidb_schema::SchemaAttr;
 
@@ -140,9 +141,6 @@ fn main() -> Result<(), Box<dyn Error>> {
     let start = Instant::now();
     let database = Database::start_default(&opt.database_path)?;
 
-    let mut buffer = String::new();
-    let input = io::stdin();
-
     let index = database.open_index("test")?.unwrap();
     let schema = index.schema();
 
@@ -151,65 +149,71 @@ fn main() -> Result<(), Box<dyn Error>> {
     let fields = opt.displayed_fields.iter().map(String::as_str);
     let fields = HashSet::from_iter(fields);
 
-    loop {
-        print!("Searching for: ");
-        io::stdout().flush()?;
+    let config = Config::builder().auto_add_history(true).build();
+    let mut readline = Editor::<()>::with_config(config);
+    let _ = readline.load_history("query-history.txt");
 
-        if input.read_line(&mut buffer)? == 0 { break }
-        let query = buffer.trim_end_matches('\n');
+    for result in readline.iter("Searching for: ") {
+        match result {
+            Ok(query) => {
+                let start_total = Instant::now();
 
-        let start_total = Instant::now();
+                let builder = index.query_builder();
+                let documents = builder.query(&query, 0..opt.number_results)?;
 
-        let builder = index.query_builder();
-        let documents = builder.query(query, 0..opt.number_results)?;
+                let mut retrieve_duration = Duration::default();
 
-        let mut retrieve_duration = Duration::default();
+                let number_of_documents = documents.len();
+                for mut doc in documents {
 
-        let number_of_documents = documents.len();
-        for mut doc in documents {
+                    doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
 
-            doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
+                    let start_retrieve = Instant::now();
+                    let result = index.document::<Document>(Some(&fields), doc.id);
+                    retrieve_duration += start_retrieve.elapsed();
 
-            let start_retrieve = Instant::now();
-            let result = index.document::<Document>(Some(&fields), doc.id);
-            retrieve_duration += start_retrieve.elapsed();
+                    match result {
+                        Ok(Some(document)) => {
+                            for (name, text) in document {
+                                print!("{}: ", name);
 
-            match result {
-                Ok(Some(document)) => {
-                    for (name, text) in document {
-                        print!("{}: ", name);
-
-                        let attr = schema.attribute(&name).unwrap();
-                        let highlights = doc.highlights.iter()
-                                        .filter(|m| SchemaAttr::new(m.attribute) == attr)
-                                        .cloned();
-                        let (text, highlights) = crop_text(&text, highlights, opt.char_context);
-                        let areas = create_highlight_areas(&text, &highlights);
-                        display_highlights(&text, &areas)?;
-                        println!();
+                                let attr = schema.attribute(&name).unwrap();
+                                let highlights = doc.highlights.iter()
+                                                .filter(|m| SchemaAttr::new(m.attribute) == attr)
+                                                .cloned();
+                                let (text, highlights) = crop_text(&text, highlights, opt.char_context);
+                                let areas = create_highlight_areas(&text, &highlights);
+                                display_highlights(&text, &areas)?;
+                                println!();
+                            }
+                        },
+                        Ok(None) => eprintln!("missing document"),
+                        Err(e) => eprintln!("{}", e),
                     }
-                },
-                Ok(None) => eprintln!("missing document"),
-                Err(e) => eprintln!("{}", e),
+
+                    let mut matching_attributes = HashSet::new();
+                    for highlight in doc.highlights {
+                        let attr = SchemaAttr::new(highlight.attribute);
+                        let name = schema.attribute_name(attr);
+                        matching_attributes.insert(name);
+                    }
+
+                    let matching_attributes = Vec::from_iter(matching_attributes);
+                    println!("matching in: {:?}", matching_attributes);
+
+                    println!();
+                }
+
+                eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+                eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
+            },
+            Err(err) => {
+                println!("Error: {:?}", err);
+                break
             }
-
-            let mut matching_attributes = HashSet::new();
-            for highlight in doc.highlights {
-                let attr = SchemaAttr::new(highlight.attribute);
-                let name = schema.attribute_name(attr);
-                matching_attributes.insert(name);
-            }
-
-            let matching_attributes = Vec::from_iter(matching_attributes);
-            println!("matching in: {:?}", matching_attributes);
-
-            println!();
         }
-
-        eprintln!("document field retrieve took {:.2?}", retrieve_duration);
-        eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
-        buffer.clear();
     }
 
+    readline.save_history("query-history.txt").unwrap();
     Ok(())
 }

From ebc95cb8f279ac286c40fc4821b8ce1900bbef5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Tue, 30 Jul 2019 15:15:47 +0200
Subject: [PATCH 10/19] feat: Display the documents fields in the order they
 were declared

---
 meilidb/Cargo.toml                 | 1 +
 meilidb/examples/query-database.rs | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/meilidb/Cargo.toml b/meilidb/Cargo.toml
index 7208067f0..0eecba0a1 100644
--- a/meilidb/Cargo.toml
+++ b/meilidb/Cargo.toml
@@ -14,6 +14,7 @@ csv = "1.0.7"
 diskus = "0.5.0"
 env_logger = "0.6.1"
 jemallocator = "0.1.9"
+linked-hash-map = "0.5.2"
 meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
 quickcheck = "0.8.2"
 rand = "0.6.5"
diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs
index f9e2f8389..e6368727a 100644
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@@ -2,16 +2,17 @@
 static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 use std::collections::btree_map::{BTreeMap, Entry};
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::error::Error;
 use std::io::{self, Write};
 use std::iter::FromIterator;
 use std::path::PathBuf;
 use std::time::{Instant, Duration};
 
-use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
-use structopt::StructOpt;
+use linked_hash_map::LinkedHashMap;
 use rustyline::{Editor, Config};
+use structopt::StructOpt;
+use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 
 use meilidb_core::Highlight;
 use meilidb_data::Database;
@@ -35,7 +36,7 @@ pub struct Opt {
     pub char_context: usize,
 }
 
-type Document = HashMap<String, String>;
+type Document = LinkedHashMap<String, String>;
 
 fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
     let mut stdout = StandardStream::stdout(ColorChoice::Always);

From 81d44a0854f850cce15623e5afe7498ac86012c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 16 Aug 2019 12:17:23 +0200
Subject: [PATCH 11/19] feat: Order automatons by importance

---
 meilidb-core/src/query_builder.rs | 35 +++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 7c3183ff4..d88e293b7 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -2,7 +2,7 @@ use std::hash::Hash;
 use std::ops::Range;
 use std::rc::Rc;
 use std::time::Instant;
-use std::{cmp, mem};
+use std::{mem, cmp, cmp::Reverse};
 
 use fst::{Streamer, IntoStreamer};
 use hashbrown::HashMap;
@@ -24,30 +24,38 @@ use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
 const NGRAMS: usize = 3;
 
 struct Automaton {
+    index: usize,
+    ngram: usize,
     query_len: usize,
     is_exact: bool,
     dfa: DFA,
 }
 
 impl Automaton {
-    fn exact(query: &str) -> Automaton {
+    fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
         Automaton {
+            index,
+            ngram,
             query_len: query.len(),
             is_exact: true,
             dfa: build_dfa(query),
         }
     }
 
-    fn prefix_exact(query: &str) -> Automaton {
+    fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
         Automaton {
+            index,
+            ngram,
             query_len: query.len(),
             is_exact: true,
             dfa: build_prefix_dfa(query),
         }
     }
 
-    fn non_exact(query: &str) -> Automaton {
+    fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
         Automaton {
+            index,
+            ngram,
             query_len: query.len(),
             is_exact: false,
             dfa: build_dfa(query),
@@ -82,9 +90,9 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
         let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
 
         let automaton = if not_prefix_dfa {
-            Automaton::exact(word)
+            Automaton::exact(automatons.len(), 1, word)
         } else {
-            Automaton::prefix_exact(word)
+            Automaton::prefix_exact(automatons.len(), 1, word)
         };
         automatons.push(automaton);
     }
@@ -127,9 +135,9 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
 
                         for synonym in synonyms_words {
                             let automaton = if nb_synonym_words == 1 {
-                                Automaton::exact(synonym)
+                                Automaton::exact(automatons.len(), n, synonym)
                             } else {
-                                Automaton::non_exact(synonym)
+                                Automaton::non_exact(automatons.len(), n, synonym)
                             };
                             automatons.push(automaton);
                         }
@@ -145,12 +153,17 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
                 let real_query_index = automatons.len();
                 enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
 
-                let automaton = Automaton::exact(&normalized);
+                let automaton = Automaton::exact(automatons.len(), n, &normalized);
                 automatons.push(automaton);
             }
         }
     }
 
+    // order automatons, the most important first,
+    // we keep the original automatons at the front.
+    let original_len = query_words.len();
+    automatons[original_len..].sort_unstable_by_key(|a| (Reverse(a.is_exact), Reverse(a.ngram)));
+
     Ok((automatons, enhancer_builder.build()))
 }
 
@@ -326,7 +339,7 @@ where S: Store,
         let start = Instant::now();
         while let Some((input, indexed_values)) = stream.next() {
             for iv in indexed_values {
-                let Automaton { is_exact, query_len, ref dfa } = automatons[iv.index];
+                let Automaton { index, is_exact, query_len, ref dfa, .. } = automatons[iv.index];
                 let distance = dfa.eval(input).to_u8();
                 let is_exact = is_exact && distance == 0 && input.len() == query_len;
 
@@ -342,7 +355,7 @@ where S: Store,
                     let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
                     if let Some(attribute) = attribute {
                         let match_ = TmpMatch {
-                            query_index: iv.index as u32,
+                            query_index: index as u32,
                             distance,
                             attribute,
                             word_index: di.word_index,

From bb0a79c5770771ab07667441a2600f80ef47c27c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 16 Aug 2019 12:25:35 +0200
Subject: [PATCH 12/19] feat: Process automatons in the order they were sort

---
 meilidb-core/src/query_builder.rs | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index d88e293b7..636248c36 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -319,27 +319,20 @@ where S: Store,
 {
     fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
         let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
-        let words = self.store.words()?.as_fst();
+        let words = self.store.words()?;
         let searchables = self.searchable_attrs.as_ref();
 
-        let mut stream = {
-            let mut op_builder = fst::raw::OpBuilder::new();
-            for Automaton { dfa, .. } in &automatons {
-                let stream = words.search(dfa);
-                op_builder.push(stream);
-            }
-            op_builder.r#union()
-        };
-
         let mut matches = Vec::new();
         let mut highlights = Vec::new();
 
         let mut query_db = std::time::Duration::default();
-
         let start = Instant::now();
-        while let Some((input, indexed_values)) = stream.next() {
-            for iv in indexed_values {
-                let Automaton { index, is_exact, query_len, ref dfa, .. } = automatons[iv.index];
+
+        for automaton in automatons {
+            let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
+            let mut stream = words.search(&dfa).into_stream();
+
+            while let Some(input) = stream.next() {
                 let distance = dfa.eval(input).to_u8();
                 let is_exact = is_exact && distance == 0 && input.len() == query_len;
 
@@ -374,8 +367,8 @@ where S: Store,
                 }
             }
         }
-        info!("main query all took {:.2?} (get indexes {:.2?})", start.elapsed(), query_db);
 
+        info!("main query all took {:.2?} (get indexes {:.2?})", start.elapsed(), query_db);
         info!("{} total matches to rewrite", matches.len());
 
         let start = Instant::now();

From d9c9fafd78ed4a6754a6386975e8ebae84177777 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 16 Aug 2019 15:01:25 +0200
Subject: [PATCH 13/19] feat: Fetch doc indexes while there is time

---
 meilidb-core/src/query_builder.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 636248c36..76e47e1ab 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -1,7 +1,7 @@
 use std::hash::Hash;
 use std::ops::Range;
 use std::rc::Rc;
-use std::time::Instant;
+use std::time::{Instant, Duration};
 use std::{mem, cmp, cmp::Reverse};
 
 use fst::{Streamer, IntoStreamer};
@@ -325,10 +325,11 @@ where S: Store,
         let mut matches = Vec::new();
         let mut highlights = Vec::new();
 
-        let mut query_db = std::time::Duration::default();
+        let fetching_end_time = Instant::now() + Duration::from_millis(30);
+        let mut query_db = Duration::default();
         let start = Instant::now();
 
-        for automaton in automatons {
+        'automatons: for automaton in automatons {
             let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
             let mut stream = words.search(&dfa).into_stream();
 
@@ -345,6 +346,11 @@ where S: Store,
                 query_db += start.elapsed();
 
                 for di in doc_indexes.as_slice() {
+
+                    if Instant::now() > fetching_end_time {
+                        break 'automatons
+                    }
+
                     let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
                     if let Some(attribute) = attribute {
                         let match_ = TmpMatch {

From b7b60b5fe5775c689985cb88382d0905fb1f1a44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Fri, 16 Aug 2019 16:35:19 +0200
Subject: [PATCH 14/19] feat: Introduce a new thread to avoid waiting on doc
 indexes fetchs

---
 meilidb-core/Cargo.toml             |   1 +
 meilidb-core/src/lib.rs             |   2 +
 meilidb-core/src/query_builder.rs   | 140 ++++++++++++++++++----------
 meilidb-core/src/reordered_attrs.rs |   2 +-
 4 files changed, 93 insertions(+), 52 deletions(-)

diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml
index 25fb57119..29d2e61ef 100644
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2018"
 
 [dependencies]
 byteorder = "1.3.1"
+crossbeam-channel = "0.3.9"
 deunicode = "1.0.0"
 hashbrown = "0.2.2"
 lazy_static = "1.2.0"
diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs
index 6f6e46359..0a7844292 100644
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(checked_duration_since)]
+
 #[cfg(test)]
 #[macro_use] extern crate assert_matches;
 
diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 76e47e1ab..97a750d18 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -2,14 +2,15 @@ use std::hash::Hash;
 use std::ops::Range;
 use std::rc::Rc;
 use std::time::{Instant, Duration};
-use std::{mem, cmp, cmp::Reverse};
+use std::{iter, mem, cmp, cmp::Reverse};
 
 use fst::{Streamer, IntoStreamer};
 use hashbrown::HashMap;
 use levenshtein_automata::DFA;
-use log::info;
+use log::{info, error};
 use meilidb_tokenizer::{is_cjk, split_query_string};
 use rayon::slice::ParallelSliceMut;
+use rayon::iter::{ParallelIterator, ParallelBridge};
 use sdset::SetBuf;
 use slice_group_by::{GroupBy, GroupByMut};
 
@@ -315,66 +316,101 @@ fn multiword_rewrite_matches(
 }
 
 impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: Store,
+where S: 'static + Store + Send + Clone,
+      S::Error: Send,
 {
     fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
         let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
-        let words = self.store.words()?;
-        let searchables = self.searchable_attrs.as_ref();
+        let searchables = self.searchable_attrs.clone();
+        let store = self.store.clone();
 
         let mut matches = Vec::new();
         let mut highlights = Vec::new();
 
-        let fetching_end_time = Instant::now() + Duration::from_millis(30);
-        let mut query_db = Duration::default();
+        let recv_end_time = Instant::now() + Duration::from_millis(30);
         let start = Instant::now();
 
-        'automatons: for automaton in automatons {
-            let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
-            let mut stream = words.search(&dfa).into_stream();
+        let (sender, receiver) = crossbeam_channel::bounded(10);
 
-            while let Some(input) = stream.next() {
-                let distance = dfa.eval(input).to_u8();
-                let is_exact = is_exact && distance == 0 && input.len() == query_len;
-
-                let start = Instant::now();
-                let doc_indexes = self.store.word_indexes(input)?;
-                let doc_indexes = match doc_indexes {
-                    Some(doc_indexes) => doc_indexes,
-                    None => continue,
-                };
-                query_db += start.elapsed();
-
-                for di in doc_indexes.as_slice() {
-
-                    if Instant::now() > fetching_end_time {
-                        break 'automatons
-                    }
-
-                    let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
-                    if let Some(attribute) = attribute {
-                        let match_ = TmpMatch {
-                            query_index: index as u32,
-                            distance,
-                            attribute,
-                            word_index: di.word_index,
-                            is_exact,
-                        };
-
-                        let highlight = Highlight {
-                            attribute: di.attribute,
-                            char_index: di.char_index,
-                            char_length: di.char_length,
-                        };
-
-                        matches.push((di.document_id, match_));
-                        highlights.push((di.document_id, highlight));
-                    }
-                }
+        rayon::spawn(move || {
+            enum Error<E> {
+                SendError,
+                StoreError(E),
             }
+
+            let result = automatons
+                .into_iter()
+                .par_bridge()
+                .try_for_each_with((sender, store, searchables.as_ref()), |data, automaton| {
+                    let (sender, store, searchables) = data;
+                    let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
+
+                    let words = store.words().map_err(Error::StoreError)?;
+                    let mut stream = words.search(&dfa).into_stream();
+
+                    let mut matches = Vec::new();
+                    let mut highlights = Vec::new();
+
+                    while let Some(input) = stream.next() {
+                        let distance = dfa.eval(input).to_u8();
+                        let is_exact = is_exact && distance == 0 && input.len() == query_len;
+
+                        let doc_indexes = store.word_indexes(input).map_err(Error::StoreError)?;
+                        let doc_indexes = match doc_indexes {
+                            Some(doc_indexes) => doc_indexes,
+                            None => continue,
+                        };
+
+                        matches.reserve(doc_indexes.len());
+                        highlights.reserve(doc_indexes.len());
+
+                        for di in doc_indexes.as_slice() {
+
+                            let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
+                            if let Some(attribute) = attribute {
+                                let match_ = TmpMatch {
+                                    query_index: index as u32,
+                                    distance,
+                                    attribute,
+                                    word_index: di.word_index,
+                                    is_exact,
+                                };
+
+                                let highlight = Highlight {
+                                    attribute: di.attribute,
+                                    char_index: di.char_index,
+                                    char_length: di.char_length,
+                                };
+
+                                matches.push((di.document_id, match_));
+                                highlights.push((di.document_id, highlight));
+                            }
+                        }
+                    }
+
+                    sender.send((matches, highlights)).map_err(|_| Error::SendError)
+                });
+
+                if let Err(Error::StoreError(e)) = result {
+                    error!("{}", e);
+                }
+        });
+
+        let iter = receiver.recv().into_iter().chain(iter::from_fn(|| {
+            match recv_end_time.checked_duration_since(Instant::now()) {
+                Some(timeout) => receiver.recv_timeout(timeout).ok(),
+                None => None,
+            }
+        }));
+
+        for (mut rcv_matches, mut rcv_highlights) in iter {
+            matches.append(&mut rcv_matches);
+            highlights.append(&mut rcv_highlights);
         }
 
-        info!("main query all took {:.2?} (get indexes {:.2?})", start.elapsed(), query_db);
+        drop(receiver);
+
+        info!("main query all took {:.2?}", start.elapsed());
         info!("{} total matches to rewrite", matches.len());
 
         let start = Instant::now();
@@ -401,7 +437,8 @@ where S: Store,
 }
 
 impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: Store,
+where S: 'static + Store + Send + Clone,
+      S::Error: Send,
       FI: Fn(DocumentId) -> bool,
 {
     pub fn query(self, query: &str, range: Range<usize>) -> Result<Vec<Document>, S::Error> {
@@ -478,7 +515,8 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
 }
 
 impl<'c, S, FI, FD, K> DistinctQueryBuilder<'c, S, FI, FD>
-where S: Store,
+where S: 'static + Store + Send + Clone,
+      S::Error: Send,
       FI: Fn(DocumentId) -> bool,
       FD: Fn(DocumentId) -> Option<K>,
       K: Hash + Eq,
diff --git a/meilidb-core/src/reordered_attrs.rs b/meilidb-core/src/reordered_attrs.rs
index ad7b2c324..ed11045ab 100644
--- a/meilidb-core/src/reordered_attrs.rs
+++ b/meilidb-core/src/reordered_attrs.rs
@@ -1,4 +1,4 @@
-#[derive(Default)]
+#[derive(Default, Clone)]
 pub struct ReorderedAttrs {
     count: usize,
     reorders: Vec<Option<u16>>,

From 0ee56314fb1fc82d99341d85bbfcb2c9a1e80502 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Mon, 19 Aug 2019 11:10:54 +0200
Subject: [PATCH 15/19] feat: Try to simplify Store trait bound with a rayon
 scope

---
 meilidb-core/src/query_builder.rs | 168 +++++++++++++++---------------
 1 file changed, 85 insertions(+), 83 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 97a750d18..b436f8604 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -316,128 +316,130 @@ fn multiword_rewrite_matches(
 }
 
 impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: 'static + Store + Send + Clone,
+where S: Store + Sync,
       S::Error: Send,
 {
     fn query_all(&self, query: &str) -> Result<Vec<RawDocument>, S::Error> {
         let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
-        let searchables = self.searchable_attrs.clone();
-        let store = self.store.clone();
+        let searchables = self.searchable_attrs.as_ref();
+        let store = &self.store;
 
-        let mut matches = Vec::new();
-        let mut highlights = Vec::new();
-
-        let recv_end_time = Instant::now() + Duration::from_millis(30);
-        let start = Instant::now();
-
-        let (sender, receiver) = crossbeam_channel::bounded(10);
-
-        rayon::spawn(move || {
+        rayon::scope(move |s| {
             enum Error<E> {
                 SendError,
                 StoreError(E),
             }
 
-            let result = automatons
-                .into_iter()
-                .par_bridge()
-                .try_for_each_with((sender, store, searchables.as_ref()), |data, automaton| {
-                    let (sender, store, searchables) = data;
-                    let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
+            let mut matches = Vec::new();
+            let mut highlights = Vec::new();
 
-                    let words = store.words().map_err(Error::StoreError)?;
-                    let mut stream = words.search(&dfa).into_stream();
+            let recv_end_time = Instant::now() + Duration::from_millis(30);
+            let start = Instant::now();
 
-                    let mut matches = Vec::new();
-                    let mut highlights = Vec::new();
+            let (sender, receiver) = crossbeam_channel::bounded(10);
 
-                    while let Some(input) = stream.next() {
-                        let distance = dfa.eval(input).to_u8();
-                        let is_exact = is_exact && distance == 0 && input.len() == query_len;
+            s.spawn(move |_| {
+                let result = automatons
+                    .into_iter()
+                    .par_bridge()
+                    .try_for_each_with((sender, store, searchables), |data, automaton| {
+                        let (sender, store, searchables) = data;
+                        let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
 
-                        let doc_indexes = store.word_indexes(input).map_err(Error::StoreError)?;
-                        let doc_indexes = match doc_indexes {
-                            Some(doc_indexes) => doc_indexes,
-                            None => continue,
-                        };
+                        let words = store.words().map_err(Error::StoreError)?;
+                        let mut stream = words.search(&dfa).into_stream();
 
-                        matches.reserve(doc_indexes.len());
-                        highlights.reserve(doc_indexes.len());
+                        let mut matches = Vec::new();
+                        let mut highlights = Vec::new();
 
-                        for di in doc_indexes.as_slice() {
+                        while let Some(input) = stream.next() {
+                            let distance = dfa.eval(input).to_u8();
+                            let is_exact = is_exact && distance == 0 && input.len() == query_len;
 
-                            let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
-                            if let Some(attribute) = attribute {
-                                let match_ = TmpMatch {
-                                    query_index: index as u32,
-                                    distance,
-                                    attribute,
-                                    word_index: di.word_index,
-                                    is_exact,
-                                };
+                            let doc_indexes = store.word_indexes(input).map_err(Error::StoreError)?;
+                            let doc_indexes = match doc_indexes {
+                                Some(doc_indexes) => doc_indexes,
+                                None => continue,
+                            };
 
-                                let highlight = Highlight {
-                                    attribute: di.attribute,
-                                    char_index: di.char_index,
-                                    char_length: di.char_length,
-                                };
+                            matches.reserve(doc_indexes.len());
+                            highlights.reserve(doc_indexes.len());
 
-                                matches.push((di.document_id, match_));
-                                highlights.push((di.document_id, highlight));
+                            for di in doc_indexes.as_slice() {
+
+                                let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
+                                if let Some(attribute) = attribute {
+                                    let match_ = TmpMatch {
+                                        query_index: index as u32,
+                                        distance,
+                                        attribute,
+                                        word_index: di.word_index,
+                                        is_exact,
+                                    };
+
+                                    let highlight = Highlight {
+                                        attribute: di.attribute,
+                                        char_index: di.char_index,
+                                        char_length: di.char_length,
+                                    };
+
+                                    matches.push((di.document_id, match_));
+                                    highlights.push((di.document_id, highlight));
+                                }
                             }
                         }
-                    }
 
-                    sender.send((matches, highlights)).map_err(|_| Error::SendError)
-                });
+                        sender.send((matches, highlights)).map_err(|_| Error::SendError)
+                    });
 
                 if let Err(Error::StoreError(e)) = result {
                     error!("{}", e);
                 }
-        });
+            });
 
-        let iter = receiver.recv().into_iter().chain(iter::from_fn(|| {
-            match recv_end_time.checked_duration_since(Instant::now()) {
-                Some(timeout) => receiver.recv_timeout(timeout).ok(),
-                None => None,
+            let iter = receiver.recv().into_iter().chain(iter::from_fn(|| {
+                match recv_end_time.checked_duration_since(Instant::now()) {
+                    Some(timeout) => receiver.recv_timeout(timeout).ok(),
+                    None => None,
+                }
+            }));
+
+            for (mut rcv_matches, mut rcv_highlights) in iter {
+                matches.append(&mut rcv_matches);
+                highlights.append(&mut rcv_highlights);
             }
-        }));
 
-        for (mut rcv_matches, mut rcv_highlights) in iter {
-            matches.append(&mut rcv_matches);
-            highlights.append(&mut rcv_highlights);
-        }
+            drop(receiver);
 
-        drop(receiver);
+            info!("main query all took {:.2?}", start.elapsed());
+            info!("{} total matches to rewrite", matches.len());
 
-        info!("main query all took {:.2?}", start.elapsed());
-        info!("{} total matches to rewrite", matches.len());
+            let start = Instant::now();
+            let matches = multiword_rewrite_matches(matches, &query_enhancer);
+            info!("multiword rewrite took {:.2?}", start.elapsed());
 
-        let start = Instant::now();
-        let matches = multiword_rewrite_matches(matches, &query_enhancer);
-        info!("multiword rewrite took {:.2?}", start.elapsed());
+            let start = Instant::now();
+            let highlights = {
+                highlights.par_sort_unstable_by_key(|(id, _)| *id);
+                SetBuf::new_unchecked(highlights)
+            };
+            info!("sorting highlights took {:.2?}", start.elapsed());
 
-        let start = Instant::now();
-        let highlights = {
-            highlights.par_sort_unstable_by_key(|(id, _)| *id);
-            SetBuf::new_unchecked(highlights)
-        };
-        info!("sorting highlights took {:.2?}", start.elapsed());
+            info!("{} total matches to classify", matches.len());
 
-        info!("{} total matches to classify", matches.len());
+            let start = Instant::now();
+            let raw_documents = raw_documents_from(matches, highlights);
+            info!("making raw documents took {:.2?}", start.elapsed());
 
-        let start = Instant::now();
-        let raw_documents = raw_documents_from(matches, highlights);
-        info!("making raw documents took {:.2?}", start.elapsed());
+            info!("{} total documents to classify", raw_documents.len());
 
-        info!("{} total documents to classify", raw_documents.len());
-
-        Ok(raw_documents)
+            Ok(raw_documents)
+        })
     }
 }
 
 impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: 'static + Store + Send + Clone,
+where S: Store + Sync,
       S::Error: Send,
       FI: Fn(DocumentId) -> bool,
 {
@@ -515,7 +517,7 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
 }
 
 impl<'c, S, FI, FD, K> DistinctQueryBuilder<'c, S, FI, FD>
-where S: 'static + Store + Send + Clone,
+where S: Store + Sync,
       S::Error: Send,
       FI: Fn(DocumentId) -> bool,
       FD: Fn(DocumentId) -> Option<K>,

From 7dc9ea78fab427202e6b613366af0b6eea79cbda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 18 Aug 2019 18:57:41 +0200
Subject: [PATCH 16/19] feat: Make the automaton DFA construction lazy

---
 meilidb-core/src/query_builder.rs | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index b436f8604..2da52189b 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -29,17 +29,27 @@ struct Automaton {
     ngram: usize,
     query_len: usize,
     is_exact: bool,
-    dfa: DFA,
+    is_prefix: bool,
+    query: String,
 }
 
 impl Automaton {
+    fn dfa(&self) -> DFA {
+        if self.is_prefix {
+            build_prefix_dfa(&self.query)
+        } else {
+            build_dfa(&self.query)
+        }
+    }
+
     fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
         Automaton {
             index,
             ngram,
             query_len: query.len(),
             is_exact: true,
-            dfa: build_dfa(query),
+            is_prefix: false,
+            query: query.to_string(),
         }
     }
 
@@ -49,7 +59,8 @@ impl Automaton {
             ngram,
             query_len: query.len(),
             is_exact: true,
-            dfa: build_prefix_dfa(query),
+            is_prefix: true,
+            query: query.to_string(),
         }
     }
 
@@ -59,7 +70,8 @@ impl Automaton {
             ngram,
             query_len: query.len(),
             is_exact: false,
-            dfa: build_dfa(query),
+            is_prefix: false,
+            query: query.to_string(),
         }
     }
 }
@@ -344,7 +356,8 @@ where S: Store + Sync,
                     .par_bridge()
                     .try_for_each_with((sender, store, searchables), |data, automaton| {
                         let (sender, store, searchables) = data;
-                        let Automaton { index, is_exact, query_len, dfa, .. } = automaton;
+                        let Automaton { index, is_exact, query_len, .. } = automaton;
+                        let dfa = automaton.dfa();
 
                         let words = store.words().map_err(Error::StoreError)?;
                         let mut stream = words.search(&dfa).into_stream();

From 67302d09f34a2f1c2efea4b304aa0970a5ab64aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 18 Aug 2019 18:58:38 +0200
Subject: [PATCH 17/19] feat: Multiword rewrite while there is time

---
 meilidb-core/src/query_builder.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index 2da52189b..eae3e4bba 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -239,6 +239,12 @@ fn multiword_rewrite_matches(
     // for each attribute of each document
     for same_document_attribute in matches.linear_group_by_key(|(id, m)| (*id, m.attribute)) {
 
+        let elapsed = start.elapsed();
+        if elapsed > Duration::from_millis(10) {
+            info!("abort multiword rewrite after {:.2?}", elapsed);
+            break;
+        }
+
         // padding will only be applied
         // to word indices in the same attribute
         let mut padding = 0;

From 9c5ec110e5d1c5b3fbe7775efbb0a7b46eec5ed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 28 Aug 2019 13:23:03 +0200
Subject: [PATCH 18/19] feat: Introduce a way to enable or disable query
 timeouts

---
 meilidb-core/src/query_builder.rs             | 34 +++++++++++++++----
 .../src/database/synonyms_addition.rs         |  2 +-
 meilidb/examples/query-database.rs            |  2 +-
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index eae3e4bba..5847f9f1b 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -185,6 +185,7 @@ pub struct QueryBuilder<'c, S, FI = fn(DocumentId) -> bool> {
     criteria: Criteria<'c>,
     searchable_attrs: Option<ReorderedAttrs>,
     filter: Option<FI>,
+    fetch_timeout: Option<Duration>,
 }
 
 impl<'c, S> QueryBuilder<'c, S, fn(DocumentId) -> bool> {
@@ -193,7 +194,7 @@ impl<'c, S> QueryBuilder<'c, S, fn(DocumentId) -> bool> {
     }
 
     pub fn with_criteria(store: S, criteria: Criteria<'c>) -> Self {
-        QueryBuilder { store, criteria, searchable_attrs: None, filter: None }
+        QueryBuilder { store, criteria, searchable_attrs: None, filter: None, fetch_timeout: None }
     }
 }
 
@@ -207,9 +208,14 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI>
             criteria: self.criteria,
             searchable_attrs: self.searchable_attrs,
             filter: Some(function),
+            fetch_timeout: self.fetch_timeout,
         }
     }
 
+    pub fn with_fetch_timeout(self, timeout: Duration) -> QueryBuilder<'c, S, FI> {
+        QueryBuilder { fetch_timeout: Some(timeout), ..self }
+    }
+
     pub fn with_distinct<F, K>(self, function: F, size: usize) -> DistinctQueryBuilder<'c, S, FI, F>
     where F: Fn(DocumentId) -> Option<K>,
           K: Hash + Eq,
@@ -226,6 +232,7 @@ impl<'c, S, FI> QueryBuilder<'c, S, FI>
 fn multiword_rewrite_matches(
     mut matches: Vec<(DocumentId, TmpMatch)>,
     query_enhancer: &QueryEnhancer,
+    timeout: Option<Duration>,
 ) -> SetBuf<(DocumentId, TmpMatch)>
 {
     let mut padded_matches = Vec::with_capacity(matches.len());
@@ -240,7 +247,7 @@ fn multiword_rewrite_matches(
     for same_document_attribute in matches.linear_group_by_key(|(id, m)| (*id, m.attribute)) {
 
         let elapsed = start.elapsed();
-        if elapsed > Duration::from_millis(10) {
+        if timeout.map_or(false, |timeout| elapsed > timeout) {
             info!("abort multiword rewrite after {:.2?}", elapsed);
             break;
         }
@@ -341,6 +348,7 @@ where S: Store + Sync,
         let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
         let searchables = self.searchable_attrs.as_ref();
         let store = &self.store;
+        let fetch_timeout = &self.fetch_timeout;
 
         rayon::scope(move |s| {
             enum Error<E> {
@@ -351,10 +359,10 @@ where S: Store + Sync,
             let mut matches = Vec::new();
             let mut highlights = Vec::new();
 
-            let recv_end_time = Instant::now() + Duration::from_millis(30);
+            let recv_end_time = fetch_timeout.map(|d| Instant::now() + d * 75 / 100);
             let start = Instant::now();
 
-            let (sender, receiver) = crossbeam_channel::bounded(10);
+            let (sender, receiver) = crossbeam_channel::unbounded();
 
             s.spawn(move |_| {
                 let result = automatons
@@ -417,6 +425,11 @@ where S: Store + Sync,
             });
 
             let iter = receiver.recv().into_iter().chain(iter::from_fn(|| {
+                let recv_end_time = match recv_end_time {
+                    Some(time) => time,
+                    None => return receiver.recv().ok(),
+                };
+
                 match recv_end_time.checked_duration_since(Instant::now()) {
                     Some(timeout) => receiver.recv_timeout(timeout).ok(),
                     None => None,
@@ -434,7 +447,8 @@ where S: Store + Sync,
             info!("{} total matches to rewrite", matches.len());
 
             let start = Instant::now();
-            let matches = multiword_rewrite_matches(matches, &query_enhancer);
+            let timeout = fetch_timeout.map(|d| d * 25 / 100);
+            let matches = multiword_rewrite_matches(matches, &query_enhancer, timeout);
             info!("multiword rewrite took {:.2?}", start.elapsed());
 
             let start = Instant::now();
@@ -526,7 +540,15 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
         DistinctQueryBuilder {
             inner: self.inner.with_filter(function),
             function: self.function,
-            size: self.size
+            size: self.size,
+        }
+    }
+
+    pub fn with_fetch_timeout(self, timeout: Duration) -> DistinctQueryBuilder<'c, I, FI, FD> {
+        DistinctQueryBuilder {
+            inner: self.inner.with_fetch_timeout(timeout),
+            function: self.function,
+            size: self.size,
         }
     }
 
diff --git a/meilidb-data/src/database/synonyms_addition.rs b/meilidb-data/src/database/synonyms_addition.rs
index c37f0475a..563cb228f 100644
--- a/meilidb-data/src/database/synonyms_addition.rs
+++ b/meilidb-data/src/database/synonyms_addition.rs
@@ -73,7 +73,7 @@ impl<'a> SynonymsAddition<'a> {
 
         // update the "consistent" view of the Index
         let words = main.words_set()?.unwrap_or_default();
-        let ranked_map = lease_inner.ranked_map.clone();;
+        let ranked_map = lease_inner.ranked_map.clone();
         let schema = lease_inner.schema.clone();
         let raw = lease_inner.raw.clone();
         lease_inner.raw.compact();
diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs
index e6368727a..58f91c383 100644
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@@ -159,7 +159,7 @@ fn main() -> Result<(), Box<dyn Error>> {
             Ok(query) => {
                 let start_total = Instant::now();
 
-                let builder = index.query_builder();
+                let builder = index.query_builder().with_fetch_timeout(Duration::from_millis(40));
                 let documents = builder.query(&query, 0..opt.number_results)?;
 
                 let mut retrieve_duration = Duration::default();

From 8030a822ab59ed253a3fa99353b491eb545f928b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 28 Aug 2019 13:42:20 +0200
Subject: [PATCH 19/19] test: Add a way to setup the fetch timeout of the
 query-database example

---
 meilidb/examples/query-database.rs | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs
index 58f91c383..d939c0b70 100644
--- a/meilidb/examples/query-database.rs
+++ b/meilidb/examples/query-database.rs
@@ -24,6 +24,9 @@ pub struct Opt {
     #[structopt(parse(from_os_str))]
     pub database_path: PathBuf,
 
+    #[structopt(long = "fetch-timeout-ms")]
+    pub fetch_timeout_ms: Option<u64>,
+
     /// Fields that must be displayed.
     pub displayed_fields: Vec<String>,
 
@@ -159,7 +162,13 @@ fn main() -> Result<(), Box<dyn Error>> {
             Ok(query) => {
                 let start_total = Instant::now();
 
-                let builder = index.query_builder().with_fetch_timeout(Duration::from_millis(40));
+                let builder = match opt.fetch_timeout_ms {
+                    Some(timeout_ms) => {
+                        let timeout = Duration::from_millis(timeout_ms);
+                        index.query_builder().with_fetch_timeout(timeout)
+                    },
+                    None => index.query_builder(),
+                };
                 let documents = builder.query(&query, 0..opt.number_results)?;
 
                 let mut retrieve_duration = Duration::default();