From a3a28c56fad032cf832bc610b8051d0ffd7c76a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 14:17:50 +0100
Subject: [PATCH 1/6] feat: Replace compressed Match fields by uncompressed
 ones

---
 examples/query-database.rs               | 10 +++----
 src/data/doc_indexes.rs                  | 36 ++++++++++++++++--------
 src/database/serde/indexer_serializer.rs | 22 ++++++---------
 src/lib.rs                               | 26 +++++++++++------
 src/rank/query_builder.rs                |  4 ++-
 5 files changed, 56 insertions(+), 42 deletions(-)
diff --git a/examples/query-database.rs b/examples/query-database.rs
index 0a8771a51..d1e6a0e17 100644
--- a/examples/query-database.rs
+++ b/examples/query-database.rs
@@ -70,12 +70,10 @@ fn create_highlight_areas(text: &str, matches: &[Match], attribute: SchemaAttr)
     let mut byte_indexes = BTreeMap::new();
 
     for match_ in matches {
-        let match_attribute = match_.attribute.attribute();
+        let match_attribute = match_.attribute;
         if SchemaAttr::new(match_attribute) == attribute {
-            let word_area = match_.word_area;
-
-            let char_index = word_area.char_index() as usize;
-            let char_length = word_area.length() as usize;
+            let char_index = match_.char_index as usize;
+            let char_length = match_.char_length as usize;
             let (byte_index, byte_length) = char_to_byte_range(char_index, char_length, text);
 
             match byte_indexes.entry(byte_index) {
@@ -151,7 +149,7 @@ fn main() -> Result<(), Box<Error>> {
 
             let mut matching_attributes = HashSet::new();
             for _match in doc.matches {
-                let attr = SchemaAttr::new(_match.attribute.attribute());
+                let attr = SchemaAttr::new(_match.attribute);
                 let name = schema.attribute_name(attr);
                 matching_attributes.insert(name);
             }
diff --git a/src/data/doc_indexes.rs b/src/data/doc_indexes.rs
index b760765bf..4919b9fa0 100644
--- a/src/data/doc_indexes.rs
+++ b/src/data/doc_indexes.rs
@@ -158,18 +158,24 @@ mod tests {
     fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
         let a = DocIndex {
             document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
+            attribute: 3,
+            word_index: 11,
+            char_index: 30,
+            char_length: 4,
         };
         let b = DocIndex {
             document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
+            attribute: 4,
+            word_index: 21,
+            char_index: 35,
+            char_length: 6,
         };
         let c = DocIndex {
             document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
+            attribute: 8,
+            word_index: 2,
+            char_index: 89,
+            char_length: 6,
         };
 
         let mut builder = DocIndexesBuilder::memory();
@@ -193,18 +199,24 @@ mod tests {
     fn serialize_deserialize() -> Result<(), Box<Error>> {
         let a = DocIndex {
             document_id: DocumentId(0),
-            attribute: Attribute::new_faillible(3, 11),
-            word_area: WordArea::new_faillible(30, 4)
+            attribute: 3,
+            word_index: 11,
+            char_index: 30,
+            char_length: 4,
         };
         let b = DocIndex {
             document_id: DocumentId(1),
-            attribute: Attribute::new_faillible(4, 21),
-            word_area: WordArea::new_faillible(35, 6)
+            attribute: 4,
+            word_index: 21,
+            char_index: 35,
+            char_length: 6,
         };
         let c = DocIndex {
             document_id: DocumentId(2),
-            attribute: Attribute::new_faillible(8, 2),
-            word_area: WordArea::new_faillible(89, 6)
+            attribute: 8,
+            word_index: 2,
+            char_index: 89,
+            char_length: 6,
         };
 
         let mut builder = DocIndexesBuilder::memory();
diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs
index bdbfb281d..6271e1b7b 100644
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@@ -54,10 +54,8 @@ where B: TokenizerBuilder
             let document_id = self.document_id;
 
             // FIXME must u32::try_from instead
-            let attribute = match Attribute::new(self.attribute.0, word_index as u32) {
-                Ok(attribute) => attribute,
-                Err(_) => return Ok(()),
-            };
+            let attribute = self.attribute.0;
+            let word_index = word_index as u32;
 
             // insert the exact representation
             let word_lower = word.to_lowercase();
@@ -68,21 +66,17 @@ where B: TokenizerBuilder
             // and the unidecoded lowercased version
             let word_unidecoded = unidecode::unidecode(word).to_lowercase();
             if word_lower != word_unidecoded {
-                let word_area = match WordArea::new(char_index as u32, length) {
-                    Ok(word_area) => word_area,
-                    Err(_) => return Ok(()),
-                };
+                let char_index = char_index as u32;
+                let char_length = length;
 
-                let doc_index = DocIndex { document_id, attribute, word_area };
+                let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
                 self.update.insert_doc_index(word_unidecoded.into_bytes(), doc_index);
             }
 
-            let word_area = match WordArea::new(char_index as u32, length) {
-                Ok(word_area) => word_area,
-                Err(_) => return Ok(()),
-            };
+            let char_index = char_index as u32;
+            let char_length = length;
 
-            let doc_index = DocIndex { document_id, attribute, word_area };
+            let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
             self.update.insert_doc_index(word_lower.into_bytes(), doc_index);
         }
         Ok(())
diff --git a/src/lib.rs b/src/lib.rs
index 19e451f63..5f824b39a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -36,14 +36,16 @@ pub struct DocIndex {
 
     /// The attribute in the document where the word was found
     /// along with the index in it.
-    pub attribute: Attribute,
+    pub attribute: u16,
+    pub word_index: u32,
 
     /// The position in bytes where the word was found
     /// along with the length of it.
     ///
     /// It informs on the original word area in the text indexed
     /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
+    pub char_index: u32,
+    pub char_length: u16,
 }
 
 /// This structure represent a matching word with informations
@@ -68,7 +70,8 @@ pub struct Match {
 
     /// The attribute in the document where the word was found
     /// along with the index in it.
-    pub attribute: Attribute,
+    pub attribute: u16,
+    pub word_index: u32,
 
     /// Whether the word that match is an exact match or a prefix.
     pub is_exact: bool,
@@ -78,7 +81,8 @@ pub struct Match {
     ///
     /// It informs on the original word area in the text indexed
     /// without needing to run the tokenizer again.
-    pub word_area: WordArea,
+    pub char_index: u32,
+    pub char_length: u16,
 }
 
 impl Match {
@@ -86,9 +90,11 @@ impl Match {
         Match {
             query_index: 0,
             distance: 0,
-            attribute: Attribute::new_faillible(0, 0),
+            attribute: 0,
+            word_index: 0,
             is_exact: false,
-            word_area: WordArea::new_faillible(0, 0),
+            char_index: 0,
+            char_length: 0,
         }
     }
 
@@ -96,9 +102,11 @@ impl Match {
         Match {
             query_index: u32::max_value(),
             distance: u8::max_value(),
-            attribute: Attribute::max_value(),
+            attribute: u16::max_value(),
+            word_index: u32::max_value(),
             is_exact: true,
-            word_area: WordArea::max_value(),
+            char_index: u32::max_value(),
+            char_length: u16::max_value(),
         }
     }
 }
@@ -110,6 +118,6 @@ mod tests {
 
     #[test]
     fn docindex_mem_size() {
-        assert_eq!(mem::size_of::<DocIndex>(), 16);
+        assert_eq!(mem::size_of::<DocIndex>(), 24);
     }
 }
diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs
index 8146fc7fa..91d645160 100644
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@@ -111,8 +111,10 @@ where D: Deref<Target=DB>,
                         query_index: iv.index as u32,
                         distance: distance,
                         attribute: doc_index.attribute,
+                        word_index: doc_index.word_index,
                         is_exact: is_exact,
-                        word_area: doc_index.word_area,
+                        char_index: doc_index.char_index,
+                        char_length: doc_index.char_length,
                     };
                     matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
                 }

From 455cbf3bf46ad249da18b398086f52f728d513e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 14:22:31 +0100
Subject: [PATCH 2/6] feat: Make the search algorithm become fully data
 oriented

---
 Cargo.toml                                   |   1 +
 src/rank/criterion/exact.rs                  |  41 +++--
 src/rank/criterion/mod.rs                    |   7 +-
 src/rank/criterion/number_of_words.rs        |  26 +--
 src/rank/criterion/sort_by.rs                |   2 +-
 src/rank/criterion/sum_of_typos.rs           | 180 ++++---------------
 src/rank/criterion/sum_of_words_attribute.rs |  41 +++--
 src/rank/criterion/sum_of_words_position.rs  |  41 +++--
 src/rank/criterion/words_proximity.rs        |  96 ++++++----
 src/rank/mod.rs                              | 180 +++++++++++++++++--
 src/rank/query_builder.rs                    |  46 +++--
 11 files changed, 375 insertions(+), 286 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 5e7bba1fb..572cbf2aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ serde_derive = "1.0"
 serde_json = { version = "1.0", features = ["preserve_order"] }
 slice-group-by = "0.2"
 unidecode = "0.3"
+rayon = "1.0"
 
 [dependencies.toml]
 git = "https://github.com/Kerollmops/toml-rs.git"
diff --git a/src/rank/criterion/exact.rs b/src/rank/criterion/exact.rs
index 574649ed6..54b5b7b9f 100644
--- a/src/rank/criterion/exact.rs
+++ b/src/rank/criterion/exact.rs
@@ -1,33 +1,40 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;
 
 #[inline]
-fn contains_exact(matches: &&[Match]) -> bool {
-    matches.iter().any(|m| m.is_exact)
-}
+fn number_exact_matches(query_index: &[u32], is_exact: &[bool]) -> usize {
+    let mut count = 0;
+    let mut index = 0;
 
-#[inline]
-fn number_exact_matches(matches: &[Match]) -> usize {
-    matches.linear_group_by(match_query_index).filter(contains_exact).count()
+    for group in query_index.linear_group_by(PartialEq::eq) {
+        let len = group.len();
+        count += is_exact[index..index + len].contains(&true) as usize;
+        index += len;
+    }
+
+    count
 }
 
 #[derive(Debug, Clone, Copy)]
 pub struct Exact;
 
-impl<D> Criterion<D> for Exact
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_exact_matches(&lhs.matches);
-        let rhs = number_exact_matches(&rhs.matches);
+impl Criterion for Exact {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let is_exact = lhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let is_exact = rhs.is_exact();
+            number_exact_matches(query_index, is_exact)
+        };
 
         lhs.cmp(&rhs).reverse()
     }
diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs
index a5dc7ab26..c7c547851 100644
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@@ -4,16 +4,13 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-mod sort_by;
+// mod sort_by;
 mod document_id;
 
 use std::cmp::Ordering;
-use std::ops::Deref;
-
-use rocksdb::DB;
 
 use crate::database::DatabaseView;
-use crate::rank::Document;
+use crate::rank::RawDocument;
 
 pub use self::{
     sum_of_typos::SumOfTypos,
diff --git a/src/rank/criterion/number_of_words.rs b/src/rank/criterion/number_of_words.rs
index ac9ef9858..c8dd1edb4 100644
--- a/src/rank/criterion/number_of_words.rs
+++ b/src/rank/criterion/number_of_words.rs
@@ -1,28 +1,28 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;
 
 #[inline]
-fn number_of_query_words(matches: &[Match]) -> usize {
-    matches.linear_group_by(match_query_index).count()
+fn number_of_query_words(query_index: &[u32]) -> usize {
+    query_index.linear_group_by(PartialEq::eq).count()
 }
 
 #[derive(Debug, Clone, Copy)]
 pub struct NumberOfWords;
 
-impl<D> Criterion<D> for NumberOfWords
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = number_of_query_words(&lhs.matches);
-        let rhs = number_of_query_words(&rhs.matches);
+impl Criterion for NumberOfWords {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            number_of_query_words(query_index)
+        };
+        let rhs = {
+            let query_index = rhs.query_index();
+            number_of_query_words(query_index)
+        };
 
         lhs.cmp(&rhs).reverse()
     }
diff --git a/src/rank/criterion/sort_by.rs b/src/rank/criterion/sort_by.rs
index 8f1fef11c..53b8bcac1 100644
--- a/src/rank/criterion/sort_by.rs
+++ b/src/rank/criterion/sort_by.rs
@@ -7,7 +7,7 @@ use serde::de::DeserializeOwned;
 
 use crate::rank::criterion::Criterion;
 use crate::database::DatabaseView;
-use crate::rank::Document;
+use crate::rank::RawDocument;
 
 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs
index be742e787..5d98a42e7 100644
--- a/src/rank/criterion/sum_of_typos.rs
+++ b/src/rank/criterion/sum_of_typos.rs
@@ -1,24 +1,20 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;
 
 #[inline]
-fn sum_matches_typos(matches: &[Match]) -> isize {
+fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize {
     let mut sum_typos = 0;
     let mut number_words = 0;
+    let mut index = 0;
 
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    for group in matches.linear_group_by(match_query_index) {
-        sum_typos += unsafe { group.get_unchecked(0).distance as isize };
+    for group in query_index.linear_group_by(PartialEq::eq) {
+        sum_typos += distance[index] as isize;
         number_words += 1;
+        index += group.len();
     }
 
     sum_typos - number_words
@@ -27,78 +23,42 @@ fn sum_matches_typos(matches: &[Match]) -> isize {
 #[derive(Debug, Clone, Copy)]
 pub struct SumOfTypos;
 
-impl<D> Criterion<D> for SumOfTypos
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_typos(&lhs.matches);
-        let rhs = sum_matches_typos(&rhs.matches);
+impl Criterion for SumOfTypos {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
 
         lhs.cmp(&rhs)
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    use crate::{DocumentId, Attribute, WordArea};
-
     // typing: "Geox CEO"
     //
     // doc0: "Geox SpA: CEO and Executive"
     // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
     #[test]
     fn one_typo_reference() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
 
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 2),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0, 1];
+        let distance1 = &[1, 0];
 
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
+        let lhs = sum_matches_typos(query_index0, distance0);
+        let rhs = sum_matches_typos(query_index1, distance1);
         assert_eq!(lhs.cmp(&rhs), Ordering::Less);
     }
 
@@ -108,47 +68,14 @@ mod tests {
     // doc1: "bouton"
     #[test]
     fn no_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
 
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0];
+        let distance1 = &[0];
 
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
+        let lhs = sum_matches_typos(query_index0, distance0);
+        let rhs = sum_matches_typos(query_index1, distance1);
         assert_eq!(lhs.cmp(&rhs), Ordering::Less);
     }
 
@@ -158,47 +85,14 @@ mod tests {
     // doc1: "bouton"
     #[test]
     fn one_typo() {
-        let doc0 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-                Match {
-                    query_index: 1,
-                    distance: 1,
-                    attribute: Attribute::new_faillible(0, 1),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(0),
-                matches: matches,
-            }
-        };
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 1];
 
-        let doc1 = {
-            let matches = vec![
-                Match {
-                    query_index: 0,
-                    distance: 0,
-                    attribute: Attribute::new_faillible(0, 0),
-                    is_exact: false,
-                    word_area: WordArea::new_faillible(0, 6)
-                },
-            ];
-            Document {
-                id: DocumentId(1),
-                matches: matches,
-            }
-        };
+        let query_index1 = &[0];
+        let distance1 = &[0];
 
-        let lhs = sum_matches_typos(&doc0.matches);
-        let rhs = sum_matches_typos(&doc1.matches);
+        let lhs = sum_matches_typos(query_index0, distance0);
+        let rhs = sum_matches_typos(query_index1, distance1);
         assert_eq!(lhs.cmp(&rhs), Ordering::Equal);
     }
 }
diff --git a/src/rank/criterion/sum_of_words_attribute.rs b/src/rank/criterion/sum_of_words_attribute.rs
index fb4910c51..5c42f8552 100644
--- a/src/rank/criterion/sum_of_words_attribute.rs
+++ b/src/rank/criterion/sum_of_words_attribute.rs
@@ -1,32 +1,39 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::Match;
+use crate::rank::RawDocument;
 
 #[inline]
-fn sum_matches_attributes(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    matches.linear_group_by(match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.attribute() as usize }
-    }).sum()
+fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
+    let mut sum_attributes = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group_by(PartialEq::eq) {
+        sum_attributes += attribute[index] as usize;
+        index += group.len();
+    }
+
+    sum_attributes
 }
 
 #[derive(Debug, Clone, Copy)]
 pub struct SumOfWordsAttribute;
 
-impl<D> Criterion<D> for SumOfWordsAttribute
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attributes(&lhs.matches);
-        let rhs = sum_matches_attributes(&rhs.matches);
+impl Criterion for SumOfWordsAttribute {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
 
         lhs.cmp(&rhs)
     }
diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs
index 0978ac5fd..ad93dc4a8 100644
--- a/src/rank/criterion/sum_of_words_position.rs
+++ b/src/rank/criterion/sum_of_words_position.rs
@@ -1,32 +1,39 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::database::DatabaseView;
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::Match;
+use crate::rank::RawDocument;
 
 #[inline]
-fn sum_matches_attribute_index(matches: &[Match]) -> usize {
-    // note that GroupBy will never return an empty group
-    // so we can do this assumption safely
-    matches.linear_group_by(match_query_index).map(|group| {
-        unsafe { group.get_unchecked(0).attribute.word_index() as usize }
-    }).sum()
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u32]) -> usize {
+    let mut sum_word_index = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group_by(PartialEq::eq) {
+        sum_word_index += word_index[index] as usize;
+        index += group.len();
+    }
+
+    sum_word_index
 }
 
 #[derive(Debug, Clone, Copy)]
 pub struct SumOfWordsPosition;
 
-impl<D> Criterion<D> for SumOfWordsPosition
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = sum_matches_attribute_index(&lhs.matches);
-        let rhs = sum_matches_attribute_index(&rhs.matches);
+impl Criterion for SumOfWordsPosition {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let word_index = lhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let word_index = rhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
 
         lhs.cmp(&rhs)
     }
diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs
index a61de6b62..6f101d4d0 100644
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@@ -1,16 +1,17 @@
 use std::cmp::{self, Ordering};
-use std::ops::Deref;
 
-use rocksdb::DB;
 use slice_group_by::GroupBy;
 
-use crate::rank::{match_query_index, Document};
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::Match;
+use crate::rank::RawDocument;
 
 const MAX_DISTANCE: u32 = 8;
 
+#[inline]
+fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
+    (a.clone(), b.clone())
+}
+
 fn index_proximity(lhs: u32, rhs: u32) -> u32 {
     if lhs < rhs {
         cmp::min(rhs - lhs, MAX_DISTANCE)
@@ -19,30 +20,48 @@ fn index_proximity(lhs: u32, rhs: u32) -> u32 {
     }
 }
 
-fn attribute_proximity(lhs: &Match, rhs: &Match) -> u32 {
-    if lhs.attribute.attribute() != rhs.attribute.attribute() { return MAX_DISTANCE }
-    index_proximity(lhs.attribute.word_index(), rhs.attribute.word_index())
+fn attribute_proximity((lattr, lwi): (u16, u32), (rattr, rwi): (u16, u32)) -> u32 {
+    if lattr != rattr { return MAX_DISTANCE }
+    index_proximity(lwi, rwi)
 }
 
-fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
+fn min_proximity((lattr, lwi): (&[u16], &[u32]), (rattr, rwi): (&[u16], &[u32])) -> u32 {
     let mut min_prox = u32::max_value();
-    for a in lhs {
-        for b in rhs {
+    for a in lattr.iter().zip(lwi) {
+        for b in rattr.iter().zip(rwi) {
+            let a = clone_tuple(a);
+            let b = clone_tuple(b);
             min_prox = cmp::min(min_prox, attribute_proximity(a, b));
         }
     }
     min_prox
 }
 
-fn matches_proximity(matches: &[Match]) -> u32 {
+fn matches_proximity(query_index: &[u32], attribute: &[u16], word_index: &[u32]) -> u32 {
     let mut proximity = 0;
-    let mut iter = matches.linear_group_by(match_query_index);
 
-    // iterate over groups by windows of size 2
-    let mut last = iter.next();
+    let mut index = 0;
+    let mut iter = query_index.linear_group_by(PartialEq::eq);
+    let mut last = iter.next().map(|group| {
+        let len = group.len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+        index += len;
+
+        (rattr, rwi)
+    });
+
     while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
+        let len = rhs.len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+        let rhs = (rattr, rwi);
+
         proximity += min_proximity(lhs, rhs);
         last = Some(rhs);
+        index += len;
     }
 
     proximity
@@ -51,18 +70,26 @@ fn matches_proximity(matches: &[Match]) -> u32 {
 #[derive(Debug, Clone, Copy)]
 pub struct WordsProximity;
 
-impl<D> Criterion<D> for WordsProximity
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
-        let lhs = matches_proximity(&lhs.matches);
-        let rhs = matches_proximity(&rhs.matches);
+impl Criterion for WordsProximity {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            let word_index = lhs.word_index();
+            matches_proximity(query_index, attribute, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            let word_index = rhs.word_index();
+            matches_proximity(query_index, attribute, word_index)
+        };
 
         lhs.cmp(&rhs)
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -80,18 +107,14 @@ mod tests {
         // { id: 2, attr: 2, attr_index: 0 }
         // { id: 3, attr: 3, attr_index: 1 }
 
-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(2, 0), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(3, 1), ..Match::zero() },
-        ];
+        let query_index = &[0, 1, 2, 2, 3];
+        let attribute = &[0, 1, 1, 2, 3];
+        let word_index = &[0, 0, 1, 0, 1];
 
         //   soup -> of = 8
         // + of -> the  = 1
         // + the -> day = 8 (not 1)
-        assert_eq!(matches_proximity(matches), 17);
+        assert_eq!(matches_proximity(query_index, attribute, word_index), 17);
     }
 
     #[test]
@@ -106,19 +129,14 @@ mod tests {
         // { id: 3, attr: 0, attr_index: 1 }
         // { id: 3, attr: 1, attr_index: 3 }
 
-        let matches = &[
-            Match { query_index: 0, attribute: Attribute::new_faillible(0, 0), ..Match::zero() },
-            Match { query_index: 0, attribute: Attribute::new_faillible(1, 0), ..Match::zero() },
-            Match { query_index: 1, attribute: Attribute::new_faillible(1, 1), ..Match::zero() },
-            Match { query_index: 2, attribute: Attribute::new_faillible(1, 2), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(0, 1), ..Match::zero() },
-            Match { query_index: 3, attribute: Attribute::new_faillible(1, 3), ..Match::zero() },
-        ];
+        let query_index = &[0, 0, 1, 2, 3, 3];
+        let attribute = &[0, 1, 1, 1, 0, 1];
+        let word_index = &[0, 0, 1, 2, 1, 3];
 
         //   soup -> of = 1
         // + of -> the  = 1
         // + the -> day = 1
-        assert_eq!(matches_proximity(matches), 3);
+        assert_eq!(matches_proximity(query_index, attribute, word_index), 3);
     }
 }
 
diff --git a/src/rank/mod.rs b/src/rank/mod.rs
index 4d1b6b1ea..2c5a4bfc3 100644
--- a/src/rank/mod.rs
+++ b/src/rank/mod.rs
@@ -2,32 +2,182 @@ pub mod criterion;
 mod query_builder;
 mod distinct_map;
 
+use std::sync::Arc;
+
+use slice_group_by::GroupBy;
+use rayon::slice::ParallelSliceMut;
+
 use crate::{Match, DocumentId};
 
 pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
 
-#[inline]
-fn match_query_index(a: &Match, b: &Match) -> bool {
-    a.query_index == b.query_index
-}
-
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
     pub id: DocumentId,
     pub matches: Vec<Match>,
 }
 
 impl Document {
-    pub fn new(doc: DocumentId, match_: Match) -> Self {
-        unsafe { Self::from_sorted_matches(doc, vec![match_]) }
-    }
+    pub fn from_raw(raw: &RawDocument) -> Document {
+        let len = raw.matches.range.len();
+        let mut matches = Vec::with_capacity(len);
 
-    pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
-        matches.sort_unstable();
-        unsafe { Self::from_sorted_matches(doc, matches) }
-    }
+        let query_index = raw.query_index();
+        let distance = raw.distance();
+        let attribute = raw.attribute();
+        let word_index = raw.word_index();
+        let is_exact = raw.is_exact();
+        let char_index = raw.char_index();
+        let char_length = raw.char_length();
 
-    pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
-        Self { id, matches }
+        for i in 0..len {
+            let match_ = Match {
+                query_index: query_index[i],
+                distance: distance[i],
+                attribute: attribute[i],
+                word_index: word_index[i],
+                is_exact: is_exact[i],
+                char_index: char_index[i],
+                char_length: char_length[i],
+            };
+            matches.push(match_);
+        }
+
+        Document { id: raw.id, matches }
+    }
+}
+
+#[derive(Clone)]
+pub struct RawDocument {
+    pub id: DocumentId,
+    pub matches: SharedMatches,
+}
+
+impl RawDocument {
+    fn new(id: DocumentId, range: Range, matches: Arc<Matches>) -> RawDocument {
+        RawDocument { id, matches: SharedMatches { range, matches } }
+    }
+
+    pub fn query_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.query_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn distance(&self) -> &[u8] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn attribute(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn word_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.word_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn is_exact(&self) -> &[bool] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn char_index(&self) -> &[u32] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.char_index.get_unchecked(r.start..r.end) }
+    }
+
+    pub fn char_length(&self) -> &[u16] {
+        let r = self.matches.range;
+        // it is safe because construction/modifications
+        // can only be done in this module
+        unsafe { &self.matches.matches.char_length.get_unchecked(r.start..r.end) }
+    }
+}
+
+pub fn raw_documents_from_matches(mut matches: Vec<(DocumentId, Match)>) -> Vec<RawDocument> {
+    let mut docs_ranges = Vec::<(DocumentId, Range)>::new();
+    let mut matches2 = Matches::with_capacity(matches.len());
+
+    matches.par_sort_unstable();
+
+    for group in matches.linear_group_by(|(a, _), (b, _)| a == b) {
+        let id = group[0].0;
+        let start = docs_ranges.last().map(|(_, r)| r.end).unwrap_or(0);
+        let end = start + group.len();
+        docs_ranges.push((id, Range { start, end }));
+
+        matches2.extend_from_slice(group);
+    }
+
+    let matches = Arc::new(matches2);
+    docs_ranges.into_iter().map(|(i, r)| RawDocument::new(i, r, matches.clone())).collect()
+}
+
+#[derive(Debug, Copy, Clone)]
+struct Range {
+    start: usize,
+    end: usize,
+}
+
+impl Range {
+    fn len(self) -> usize {
+        self.end - self.start
+    }
+}
+
+#[derive(Clone)]
+pub struct SharedMatches {
+    range: Range,
+    matches: Arc<Matches>,
+}
+
+#[derive(Clone)]
+struct Matches {
+    query_index: Vec<u32>,
+    distance: Vec<u8>,
+    attribute: Vec<u16>,
+    word_index: Vec<u32>,
+    is_exact: Vec<bool>,
+    char_index: Vec<u32>,
+    char_length: Vec<u16>,
+}
+
+impl Matches {
+    fn with_capacity(cap: usize) -> Matches {
+        Matches {
+            query_index: Vec::with_capacity(cap),
+            distance: Vec::with_capacity(cap),
+            attribute: Vec::with_capacity(cap),
+            word_index: Vec::with_capacity(cap),
+            is_exact: Vec::with_capacity(cap),
+            char_index: Vec::with_capacity(cap),
+            char_length: Vec::with_capacity(cap),
+        }
+    }
+
+    fn extend_from_slice(&mut self, matches: &[(DocumentId, Match)]) {
+        for (_, match_) in matches {
+            self.query_index.push(match_.query_index);
+            self.distance.push(match_.distance);
+            self.attribute.push(match_.attribute);
+            self.word_index.push(match_.word_index);
+            self.is_exact.push(match_.is_exact);
+            self.char_index.push(match_.char_index);
+            self.char_length.push(match_.char_length);
+        }
     }
 }
diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs
index 91d645160..11fc75498 100644
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@@ -4,7 +4,9 @@ use std::error::Error;
 use std::hash::Hash;
 use std::rc::Rc;
 
+use rayon::slice::ParallelSliceMut;
 use slice_group_by::GroupByMut;
+use elapsed::measure_time;
 use hashbrown::HashMap;
 use fst::Streamer;
 use rocksdb::DB;
@@ -15,7 +17,7 @@ use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap};
 use crate::rank::criterion::Criteria;
 use crate::database::DatabaseView;
 use crate::{Match, DocumentId};
-use crate::rank::Document;
+use crate::rank::{raw_documents_from_matches, RawDocument, Document};
 
 fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
     let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
@@ -81,7 +83,7 @@ where D: Deref<Target=DB>,
         }
     }
 
-    fn query_all(&self, query: &str) -> Vec<Document> {
+    fn query_all(&self, query: &str) -> Vec<RawDocument> {
         let automatons = split_whitespace_automatons(query);
 
         let mut stream = {
@@ -94,7 +96,7 @@ where D: Deref<Target=DB>,
         };
 
         let mut number_matches = 0;
-        let mut matches = HashMap::new();
+        let mut matches = Vec::new();
 
         while let Some((input, indexed_values)) = stream.next() {
             for iv in indexed_values {
@@ -105,7 +107,6 @@ where D: Deref<Target=DB>,
                 let doc_indexes = &self.view.index().positive.indexes();
                 let doc_indexes = &doc_indexes[iv.value as usize];
 
-                number_matches += doc_indexes.len();
                 for doc_index in doc_indexes {
                     let match_ = Match {
                         query_index: iv.index as u32,
@@ -116,15 +117,18 @@ where D: Deref<Target=DB>,
                         char_index: doc_index.char_index,
                         char_length: doc_index.char_length,
                     };
-                    matches.entry(doc_index.document_id).or_insert_with(Vec::new).push(match_);
+                    matches.push((doc_index.document_id, match_));
                 }
             }
         }
 
-        info!("{} total documents to classify", matches.len());
-        info!("{} total matches to classify", number_matches);
+        let total_matches = matches.len();
+        let raw_documents = raw_documents_from_matches(matches);
 
-        matches.into_iter().map(|(i, m)| Document::from_matches(i, m)).collect()
+        info!("{} total documents to classify", raw_documents.len());
+        info!("{} total matches to classify", total_matches);
+
+        raw_documents
     }
 }
 
@@ -140,7 +144,7 @@ where D: Deref<Target=DB>,
             return builder.query(query, range);
         }
 
-        let (elapsed, mut documents) = elapsed::measure_time(|| self.query_all(query));
+        let (elapsed, mut documents) = measure_time(|| self.query_all(query));
         info!("query_all took {}", elapsed);
 
         let mut groups = vec![documents.as_mut_slice()];
@@ -177,12 +181,9 @@ where D: Deref<Target=DB>,
             }
         }
 
-        // `drain` removes the documents efficiently using `ptr::copy`
-        // TODO it could be more efficient to have a custom iterator
         let offset = cmp::min(documents.len(), range.start);
-        documents.drain(0..offset);
-        documents.truncate(range.len());
-        documents
+        let iter = documents.into_iter().skip(offset).take(range.len());
+        iter.map(|d| Document::from_raw(&d)).collect()
     }
 }
 
@@ -215,7 +216,9 @@ where D: Deref<Target=DB>,
       K: Hash + Eq,
 {
     pub fn query(self, query: &str, range: Range<usize>) -> Vec<Document> {
-        let mut documents = self.inner.query_all(query);
+        let (elapsed, mut documents) = measure_time(|| self.inner.query_all(query));
+        info!("query_all took {}", elapsed);
+
         let mut groups = vec![documents.as_mut_slice()];
         let mut key_cache = HashMap::new();
         let view = &self.inner.view;
@@ -227,12 +230,14 @@ where D: Deref<Target=DB>,
         let mut distinct_map = DistinctMap::new(self.size);
         let mut distinct_raw_offset = 0;
 
-        'criteria: for criterion in self.inner.criteria.as_ref() {
+        'criteria: for (ci, criterion) in self.inner.criteria.as_ref().iter().enumerate() {
             let tmp_groups = mem::replace(&mut groups, Vec::new());
             let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
             let mut documents_seen = 0;
 
             for group in tmp_groups {
+                info!("criterion {}, documents group of size {}", ci, group.len());
+
                 // if this group does not overlap with the requested range,
                 // push it without sorting and splitting it
                 if documents_seen + group.len() < distinct_raw_offset {
@@ -241,9 +246,12 @@ where D: Deref<Target=DB>,
                     continue;
                 }
 
-                group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
+                let (elapsed, _) = measure_time(|| {
+                    group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
+                });
+                info!("criterion {} sort took {}", ci, elapsed);
 
-                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
                     // we must compute the real distinguished len of this sub-group
                     for document in group.iter() {
                         let filter_accepted = match &self.inner.filter {
@@ -302,7 +310,7 @@ where D: Deref<Target=DB>,
                 };
 
                 if distinct_accepted && seen.len() > range.start {
-                    out_documents.push(document);
+                    out_documents.push(Document::from_raw(&document));
                     if out_documents.len() == range.len() { break }
                 }
             }

From 4c0ad5f96460aa5f9482b0651e356582a426748f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 14:23:18 +0100
Subject: [PATCH 3/6] feat: Simplify the Criterion Trait by removing the
 DatabaseView param

---
 src/rank/criterion/document_id.rs | 11 ++---
 src/rank/criterion/mod.rs         | 73 ++++++++++++-------------------
 src/rank/criterion/sort_by.rs     |  2 +-
 src/rank/query_builder.rs         | 10 ++---
 4 files changed, 38 insertions(+), 58 deletions(-)

diff --git a/src/rank/criterion/document_id.rs b/src/rank/criterion/document_id.rs
index 2d8ca34c2..a388cf2de 100644
--- a/src/rank/criterion/document_id.rs
+++ b/src/rank/criterion/document_id.rs
@@ -1,19 +1,14 @@
 use std::cmp::Ordering;
 use std::ops::Deref;
 
-use rocksdb::DB;
-
 use crate::rank::criterion::Criterion;
-use crate::database::DatabaseView;
-use crate::rank::Document;
+use crate::rank::RawDocument;
 
 #[derive(Debug, Clone, Copy)]
 pub struct DocumentId;
 
-impl<D> Criterion<D> for DocumentId
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
+impl Criterion for DocumentId {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
         lhs.id.cmp(&rhs.id)
     }
 }
diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs
index c7c547851..46b41ea0f 100644
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@@ -19,60 +19,51 @@ pub use self::{
     sum_of_words_attribute::SumOfWordsAttribute,
     sum_of_words_position::SumOfWordsPosition,
     exact::Exact,
-    sort_by::SortBy,
+    // sort_by::SortBy,
     document_id::DocumentId,
 };
 
-pub trait Criterion<D>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering;
+pub trait Criterion: Send + Sync {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
 
     #[inline]
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        self.evaluate(lhs, rhs, view) == Ordering::Equal
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        self.evaluate(lhs, rhs) == Ordering::Equal
     }
 }
 
-impl<'a, D, T: Criterion<D> + ?Sized> Criterion<D> for &'a T
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
+impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
     }
 
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
     }
 }
 
-impl<D, T: Criterion<D> + ?Sized> Criterion<D> for Box<T>
-where D: Deref<Target=DB>
-{
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
-        (**self).evaluate(lhs, rhs, view)
+impl<T: Criterion + ?Sized> Criterion for Box<T> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
     }
 
-    fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> bool {
-        (**self).eq(lhs, rhs, view)
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
     }
 }
 
 #[derive(Default)]
-pub struct CriteriaBuilder<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>
+pub struct CriteriaBuilder {
+    inner: Vec<Box<dyn Criterion>>
 }
 
-impl<D> CriteriaBuilder<D>
-where D: Deref<Target=DB>
+impl CriteriaBuilder
 {
-    pub fn new() -> CriteriaBuilder<D> {
+    pub fn new() -> CriteriaBuilder {
         CriteriaBuilder { inner: Vec::new() }
     }
 
-    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<D> {
+    pub fn with_capacity(capacity: usize) -> CriteriaBuilder {
         CriteriaBuilder { inner: Vec::with_capacity(capacity) }
     }
 
@@ -80,33 +71,29 @@ where D: Deref<Target=DB>
         self.inner.reserve(additional)
     }
 
-    pub fn add<C>(mut self, criterion: C) -> CriteriaBuilder<D>
-    where C: 'static + Criterion<D>,
+    pub fn add<C>(mut self, criterion: C) -> CriteriaBuilder
+    where C: 'static + Criterion,
     {
         self.push(criterion);
         self
     }
 
     pub fn push<C>(&mut self, criterion: C)
-    where C: 'static + Criterion<D>,
+    where C: 'static + Criterion,
     {
         self.inner.push(Box::new(criterion));
     }
 
-    pub fn build(self) -> Criteria<D> {
+    pub fn build(self) -> Criteria {
         Criteria { inner: self.inner }
     }
 }
 
-pub struct Criteria<D>
-where D: Deref<Target=DB>
-{
-    inner: Vec<Box<dyn Criterion<D>>>,
+pub struct Criteria {
+    inner: Vec<Box<dyn Criterion>>,
 }
 
-impl<D> Default for Criteria<D>
-where D: Deref<Target=DB>
-{
+impl Default for Criteria {
     fn default() -> Self {
         CriteriaBuilder::with_capacity(7)
             .add(SumOfTypos)
@@ -120,10 +107,8 @@ where D: Deref<Target=DB>
     }
 }
 
-impl<D> AsRef<[Box<dyn Criterion<D>>]> for Criteria<D>
-where D: Deref<Target=DB>
-{
-    fn as_ref(&self) -> &[Box<dyn Criterion<D>>] {
+impl AsRef<[Box<dyn Criterion>]> for Criteria {
+    fn as_ref(&self) -> &[Box<dyn Criterion>] {
         &self.inner
     }
 }
diff --git a/src/rank/criterion/sort_by.rs b/src/rank/criterion/sort_by.rs
index 53b8bcac1..1604a492a 100644
--- a/src/rank/criterion/sort_by.rs
+++ b/src/rank/criterion/sort_by.rs
@@ -66,7 +66,7 @@ impl<T, D> Criterion<D> for SortBy<T>
 where D: Deref<Target=DB>,
       T: DeserializeOwned + Ord,
 {
-    fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument, view: &DatabaseView<D>) -> Ordering {
         let lhs = match view.document_by_id::<T>(lhs.id) {
             Ok(doc) => Some(doc),
             Err(e) => { eprintln!("{}", e); None },
diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs
index 11fc75498..ff160da7f 100644
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@@ -43,7 +43,7 @@ pub struct QueryBuilder<'a, D, FI>
 where D: Deref<Target=DB>
 {
     view: &'a DatabaseView<D>,
-    criteria: Criteria<D>,
+    criteria: Criteria,
     filter: Option<FI>,
 }
 
@@ -58,7 +58,7 @@ where D: Deref<Target=DB>
 impl<'a, D, FI> QueryBuilder<'a, D, FI>
 where D: Deref<Target=DB>,
 {
-    pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {
+    pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria) -> Result<Self, Box<Error>> {
         Ok(QueryBuilder { view, criteria, filter: None })
     }
 
@@ -165,12 +165,12 @@ where D: Deref<Target=DB>,
                     continue;
                 }
 
-                let (elapsed, ()) = elapsed::measure_time(|| {
-                    group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
+                let (elapsed, _) = measure_time(|| {
+                    group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
                 });
                 info!("criterion {} sort took {}", ci, elapsed);
 
-                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
+                for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
                     documents_seen += group.len();
                     groups.push(group);
 

From 2e905bac089b2f1cc0a2ed3d136446171c6bc6b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 14:28:14 +0100
Subject: [PATCH 4/6] chore: Remove Attribute and WordArea structures

---
 src/attribute.rs                         | 105 -----------------------
 src/data/doc_indexes.rs                  |   5 +-
 src/database/serde/indexer_serializer.rs |   2 +-
 src/lib.rs                               |   4 -
 src/rank/criterion/document_id.rs        |   1 -
 src/rank/criterion/mod.rs                |   2 -
 src/rank/criterion/words_proximity.rs    |   2 -
 src/rank/query_builder.rs                |   2 -
 src/word_area.rs                         | 102 ----------------------
 9 files changed, 2 insertions(+), 223 deletions(-)
 delete mode 100644 src/attribute.rs
 delete mode 100644 src/word_area.rs

diff --git a/src/attribute.rs b/src/attribute.rs
deleted file mode 100644
index 4c075e475..000000000
--- a/src/attribute.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-use std::fmt;
-
-/// Represent an attribute number along with the word index
-/// according to the tokenizer used.
-///
-/// It can accept up to 1024 attributes and word positions
-/// can be maximum 2^22.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Attribute(u32);
-
-impl Attribute {
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    pub(crate) fn new(attribute: u16, index: u32) -> Result<Attribute, AttributeError> {
-        if attribute & 0b1111_1100_0000_0000 != 0 {
-            return Err(AttributeError::AttributeTooBig)
-        }
-
-        if index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
-            return Err(AttributeError::IndexTooBig)
-        }
-
-        let attribute = u32::from(attribute) << 22;
-        Ok(Attribute(attribute | index))
-    }
-
-    /// Construct an `Attribute` from an attribute number and
-    /// the word position of a match according to the tokenizer used.
-    ///
-    /// # Panics
-    ///
-    /// The attribute must not be greater than 1024
-    /// and the word index not greater than 2^22.
-    pub(crate) fn new_faillible(attribute: u16, index: u32) -> Attribute {
-        match Attribute::new(attribute, index) {
-            Ok(attribute) => attribute,
-            Err(AttributeError::AttributeTooBig) => {
-                panic!("attribute must not be greater than 1024")
-            },
-            Err(AttributeError::IndexTooBig) => {
-                panic!("attribute word index must not be greater than 2^22")
-            },
-        }
-    }
-
-    pub(crate) fn max_value() -> Attribute {
-        Attribute(u32::max_value())
-    }
-
-    #[inline]
-    pub fn attribute(self) -> u16 {
-        (self.0 >> 22) as u16
-    }
-
-    #[inline]
-    pub fn word_index(self) -> u32 {
-        self.0 & 0b0000_0000_0011_1111_1111_1111_1111
-    }
-}
-
-impl fmt::Debug for Attribute {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("Attribute")
-            .field("attribute", &self.attribute())
-            .field("word_index", &self.word_index())
-            .finish()
-    }
-}
-
-pub enum AttributeError {
-    AttributeTooBig,
-    IndexTooBig,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use quickcheck::{quickcheck, TestResult};
-
-    quickcheck! {
-        fn qc_attribute(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr > 2_u16.pow(10) || gen_index > 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let attribute = Attribute::new_faillible(gen_attr, gen_index);
-
-            let valid_attribute = attribute.attribute() == gen_attr;
-            let valid_index = attribute.word_index() == gen_index;
-
-            TestResult::from_bool(valid_attribute && valid_index)
-        }
-
-        fn qc_attribute_ord(gen_attr: u16, gen_index: u32) -> TestResult {
-            if gen_attr >= 2_u16.pow(10) || gen_index >= 2_u32.pow(22) {
-                return TestResult::discard()
-            }
-
-            let a = Attribute::new_faillible(gen_attr, gen_index);
-            let b = Attribute::new_faillible(gen_attr + 1, gen_index + 1);
-
-            TestResult::from_bool(a < b)
-        }
-    }
-}
diff --git a/src/data/doc_indexes.rs b/src/data/doc_indexes.rs
index 4919b9fa0..67106a948 100644
--- a/src/data/doc_indexes.rs
+++ b/src/data/doc_indexes.rs
@@ -147,12 +147,9 @@ impl<W: Write> DocIndexesBuilder<W> {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-
     use std::error::Error;
-    use crate::{Attribute, WordArea};
-
     use crate::DocumentId;
+    use super::*;
 
     #[test]
     fn builder_serialize_deserialize() -> Result<(), Box<Error>> {
diff --git a/src/database/serde/indexer_serializer.rs b/src/database/serde/indexer_serializer.rs
index 6271e1b7b..63bb016d8 100644
--- a/src/database/serde/indexer_serializer.rs
+++ b/src/database/serde/indexer_serializer.rs
@@ -3,7 +3,7 @@ use crate::database::serde::SerializerError;
 use crate::database::schema::SchemaAttr;
 use crate::tokenizer::TokenizerBuilder;
 use crate::tokenizer::Token;
-use crate::{DocumentId, DocIndex, Attribute, WordArea};
+use crate::{DocumentId, DocIndex};
 
 use hashbrown::HashSet;
 use serde::Serialize;
diff --git a/src/lib.rs b/src/lib.rs
index 5f824b39a..bfa0b3cd9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,16 +5,12 @@ pub mod database;
 pub mod data;
 pub mod rank;
 pub mod tokenizer;
-mod attribute;
-mod word_area;
 mod common_words;
 
 pub use rocksdb;
 
 pub use self::tokenizer::Tokenizer;
 pub use self::common_words::CommonWords;
-pub use self::attribute::{Attribute, AttributeError};
-pub use self::word_area::{WordArea, WordAreaError};
 
 /// Represent an internally generated document unique identifier.
 ///
diff --git a/src/rank/criterion/document_id.rs b/src/rank/criterion/document_id.rs
index a388cf2de..8e4cf91b5 100644
--- a/src/rank/criterion/document_id.rs
+++ b/src/rank/criterion/document_id.rs
@@ -1,5 +1,4 @@
 use std::cmp::Ordering;
-use std::ops::Deref;
 
 use crate::rank::criterion::Criterion;
 use crate::rank::RawDocument;
diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs
index 46b41ea0f..6272cf89d 100644
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@@ -8,8 +8,6 @@ mod exact;
 mod document_id;
 
 use std::cmp::Ordering;
-
-use crate::database::DatabaseView;
 use crate::rank::RawDocument;
 
 pub use self::{
diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs
index 6f101d4d0..b5d98e147 100644
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@@ -94,8 +94,6 @@ impl Criterion for WordsProximity {
 mod tests {
     use super::*;
 
-    use crate::Attribute;
-
     #[test]
     fn three_different_attributes() {
 
diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs
index ff160da7f..e6c49be6d 100644
--- a/src/rank/query_builder.rs
+++ b/src/rank/query_builder.rs
@@ -95,7 +95,6 @@ where D: Deref<Target=DB>,
             op_builder.union()
         };
 
-        let mut number_matches = 0;
         let mut matches = Vec::new();
 
         while let Some((input, indexed_values)) = stream.next() {
@@ -148,7 +147,6 @@ where D: Deref<Target=DB>,
         info!("query_all took {}", elapsed);
 
         let mut groups = vec![documents.as_mut_slice()];
-        let view = &self.view;
 
         'criteria: for (ci, criterion) in self.criteria.as_ref().iter().enumerate() {
             let tmp_groups = mem::replace(&mut groups, Vec::new());
diff --git a/src/word_area.rs b/src/word_area.rs
deleted file mode 100644
index 593b462a6..000000000
--- a/src/word_area.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use std::fmt;
-
-/// Represent a word position in bytes along with the length of it.
-///
-/// It can represent words byte index to maximum 2^22 and
-/// up to words of length 1024.
-#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct WordArea(u32);
-
-impl WordArea {
-    /// Construct a `WordArea` from a word position in expresed as
-    /// a number of characters and the length of it.
-    ///
-    /// # Panics
-    ///
-    /// The char index must not be greater than 2^22
-    /// and the length not greater than 1024.
-    pub(crate) fn new(char_index: u32, length: u16) -> Result<WordArea, WordAreaError> {
-        if char_index & 0b1111_1111_1100_0000_0000_0000_0000 != 0 {
-            return Err(WordAreaError::ByteIndexTooBig)
-        }
-
-        if length & 0b1111_1100_0000_0000 != 0 {
-            return Err(WordAreaError::LengthTooBig)
-        }
-
-        let char_index = char_index << 10;
-        Ok(WordArea(char_index | u32::from(length)))
-    }
-
-    pub(crate) fn new_faillible(char_index: u32, length: u16) -> WordArea {
-        match WordArea::new(char_index, length) {
-            Ok(word_area) => word_area,
-            Err(WordAreaError::ByteIndexTooBig) => {
-                panic!("word area byte index must not be greater than 2^22")
-            },
-            Err(WordAreaError::LengthTooBig) => {
-                panic!("word area length must not be greater than 1024")
-            },
-        }
-    }
-
-    pub(crate) fn max_value() -> WordArea {
-        WordArea(u32::max_value())
-    }
-
-    #[inline]
-    pub fn char_index(self) -> u32 {
-        self.0 >> 10
-    }
-
-    #[inline]
-    pub fn length(self) -> u16 {
-        (self.0 & 0b0000_0000_0000_0000_0011_1111_1111) as u16
-    }
-}
-
-impl fmt::Debug for WordArea {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("WordArea")
-            .field("char_index", &self.char_index())
-            .field("length", &self.length())
-            .finish()
-    }
-}
-
-pub enum WordAreaError {
-    ByteIndexTooBig,
-    LengthTooBig,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use quickcheck::{quickcheck, TestResult};
-
-    quickcheck! {
-        fn qc_word_area(gen_char_index: u32, gen_length: u16) -> TestResult {
-            if gen_char_index > 2_u32.pow(22) || gen_length > 2_u16.pow(10) {
-                return TestResult::discard()
-            }
-
-            let word_area = WordArea::new_faillible(gen_char_index, gen_length);
-
-            let valid_char_index = word_area.char_index() == gen_char_index;
-            let valid_length = word_area.length() == gen_length;
-
-            TestResult::from_bool(valid_char_index && valid_length)
-        }
-
-        fn qc_word_area_ord(gen_char_index: u32, gen_length: u16) -> TestResult {
-            if gen_char_index >= 2_u32.pow(22) || gen_length >= 2_u16.pow(10) {
-                return TestResult::discard()
-            }
-
-            let a = WordArea::new_faillible(gen_char_index, gen_length);
-            let b = WordArea::new_faillible(gen_char_index + 1, gen_length + 1);
-
-            TestResult::from_bool(a < b)
-        }
-    }
-}

From 5efbc5ceb3fc4f95064d3f4848a8bce4839d4a80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 14:42:12 +0100
Subject: [PATCH 5/6] feat: Introduce the revisited SortBy criterion

---
 src/rank/criterion/mod.rs     |  4 ++--
 src/rank/criterion/sort_by.rs | 43 ++++++++++++++++++-----------------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs
index 6272cf89d..07c6a37e1 100644
--- a/src/rank/criterion/mod.rs
+++ b/src/rank/criterion/mod.rs
@@ -4,7 +4,7 @@ mod words_proximity;
 mod sum_of_words_attribute;
 mod sum_of_words_position;
 mod exact;
-// mod sort_by;
+mod sort_by;
 mod document_id;
 
 use std::cmp::Ordering;
@@ -17,7 +17,7 @@ pub use self::{
     sum_of_words_attribute::SumOfWordsAttribute,
     sum_of_words_position::SumOfWordsPosition,
     exact::Exact,
-    // sort_by::SortBy,
+    sort_by::SortBy,
     document_id::DocumentId,
 };
 
diff --git a/src/rank/criterion/sort_by.rs b/src/rank/criterion/sort_by.rs
index 1604a492a..d1c7abf8c 100644
--- a/src/rank/criterion/sort_by.rs
+++ b/src/rank/criterion/sort_by.rs
@@ -24,7 +24,7 @@ use crate::rank::RawDocument;
 ///
 /// # Example
 ///
-/// ```no-test
+/// ```ignore
 /// use serde_derive::Deserialize;
 /// use meilidb::rank::criterion::*;
 ///
@@ -40,39 +40,40 @@ use crate::rank::RawDocument;
 ///        .add(SumOfWordsAttribute)
 ///        .add(SumOfWordsPosition)
 ///        .add(Exact)
-///        .add(SortBy::<TimeOnly>::new())
+///        .add(SortBy::<TimeOnly>::new(&view))
 ///        .add(DocumentId);
 ///
 /// let criterion = builder.build();
 ///
 /// ```
-pub struct SortBy<T> {
+pub struct SortBy<'a, T, D>
+where D: Deref<Target=DB> + Send + Sync,
+      T: Send + Sync
+{
+    view: &'a DatabaseView<D>,
     _phantom: marker::PhantomData<T>,
 }
 
-impl<T> SortBy<T> {
-    pub fn new() -> Self {
-        SortBy::default()
-    }
-}
-
-impl<T> Default for SortBy<T> {
-    fn default() -> SortBy<T> {
-        SortBy { _phantom: marker::PhantomData }
-    }
-}
-
-impl<T, D> Criterion<D> for SortBy<T>
-where D: Deref<Target=DB>,
-      T: DeserializeOwned + Ord,
+impl<'a, T, D> SortBy<'a, T, D>
+where D: Deref<Target=DB> + Send + Sync,
+      T: Send + Sync
 {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument, view: &DatabaseView<D>) -> Ordering {
-        let lhs = match view.document_by_id::<T>(lhs.id) {
+    pub fn new(view: &'a DatabaseView<D>) -> Self {
+        SortBy { view, _phantom: marker::PhantomData }
+    }
+}
+
+impl<'a, T, D> Criterion for SortBy<'a, T, D>
+where D: Deref<Target=DB> + Send + Sync,
+      T: DeserializeOwned + Ord + Send + Sync,
+{
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = match self.view.document_by_id::<T>(lhs.id) {
             Ok(doc) => Some(doc),
             Err(e) => { eprintln!("{}", e); None },
         };
 
-        let rhs = match view.document_by_id::<T>(rhs.id) {
+        let rhs = match self.view.document_by_id::<T>(rhs.id) {
             Ok(doc) => Some(doc),
             Err(e) => { eprintln!("{}", e); None },
         };

From 2bd5b4ab86875986733e1bad0af2700db95c0981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sat, 2 Feb 2019 15:12:54 +0100
Subject: [PATCH 6/6] feat: Remove useless WordsProximity criterion benchmark

---
 src/rank/criterion/words_proximity.rs | 39 ---------------------------
 1 file changed, 39 deletions(-)

diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs
index b5d98e147..614d8f7ff 100644
--- a/src/rank/criterion/words_proximity.rs
+++ b/src/rank/criterion/words_proximity.rs
@@ -137,42 +137,3 @@ mod tests {
         assert_eq!(matches_proximity(query_index, attribute, word_index), 3);
     }
 }
-
-#[cfg(all(feature = "nightly", test))]
-mod bench {
-    extern crate test;
-
-    use super::*;
-    use std::error::Error;
-    use self::test::Bencher;
-
-    use rand_xorshift::XorShiftRng;
-    use rand::{Rng, SeedableRng};
-
-    use crate::Attribute;
-
-    #[bench]
-    fn evaluate_proximity(bench: &mut Bencher) -> Result<(), Box<Error>> {
-        let number_matches = 30_000;
-        let mut matches = Vec::with_capacity(number_matches);
-        let mut rng = XorShiftRng::seed_from_u64(42);
-
-        for _ in 0..number_matches {
-            let query_index = rng.gen_range(0, 4);
-
-            let attribute = rng.gen_range(0, 5);
-            let word_index = rng.gen_range(0, 15);
-            let attribute = Attribute::new_faillible(attribute, word_index);
-
-            let match_ = Match { query_index, attribute, ..Match::zero() };
-            matches.push(match_);
-        }
-
-        bench.iter(|| {
-            let proximity = matches_proximity(&matches);
-            test::black_box(move || proximity)
-        });
-
-        Ok(())
-    }
-}