From 34e0211567ab269b24a8d76a6c08d998b7a07cac Mon Sep 17 00:00:00 2001
From: Kerollmops <renault.cle@gmail.com>
Date: Sat, 25 Aug 2018 12:35:29 +0200
Subject: [PATCH] feat: Simplify the RankedStrem code logic

---
 raptor/src/lib.rs      |   5 +-
 raptor/src/rank/mod.rs | 108 +++++++++++++++++++----------------------
 2 files changed, 53 insertions(+), 60 deletions(-)
diff --git a/raptor/src/lib.rs b/raptor/src/lib.rs
index 39bf59f2a..7e6de58e7 100644
--- a/raptor/src/lib.rs
+++ b/raptor/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(nll)]
+
 extern crate fst;
 extern crate fnv;
 extern crate group_by;
@@ -77,7 +79,8 @@ pub struct Match {
     /// (i.e. at the start or the end of the attribute).
     ///
     /// The index in the attribute is limited to a maximum of `2^32`
-    /// this is because we index only the first 1000 words in an attribute.
+    /// this is because we index only the first 1000 words
+    /// in an attribute.
     pub attribute_index: u32,
 
     /// Whether the word that match is an exact match or a prefix.
diff --git a/raptor/src/rank/mod.rs b/raptor/src/rank/mod.rs
index 50617d226..54ef0a391 100644
--- a/raptor/src/rank/mod.rs
+++ b/raptor/src/rank/mod.rs
@@ -60,20 +60,18 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
         exact,
     ];
 
-    {
-        let mut groups = vec![documents.as_mut_slice()];
+    let mut groups = vec![documents.as_mut_slice()];
 
-        for sort in sorts {
-            let mut temp = mem::replace(&mut groups, Vec::new());
-            let mut computed = 0;
+    for sort in sorts {
+        let temp = mem::replace(&mut groups, Vec::new());
+        let mut computed = 0;
 
-            for group in temp {
-                group.sort_unstable_by(sort);
-                for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
-                    computed += group.len();
-                    groups.push(group);
-                    if computed >= limit { break }
-                }
+        for group in temp {
+            group.sort_unstable_by(sort);
+            for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) {
+                computed += group.len();
+                groups.push(group);
+                if computed >= limit { break }
             }
         }
     }
@@ -82,7 +80,37 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
     documents.into_iter()
 }
 
-pub enum RankedStream<'m, 'v> {
+pub struct RankedStream<'m, 'v>(RankedStreamInner<'m, 'v>);
+
+impl<'m, 'v> RankedStream<'m, 'v> {
+    pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
+        let mut op = OpWithStateBuilder::new(indexes);
+
+        for automaton in automatons.iter().map(|l| l.dfa.clone()) {
+            let stream = map.search(automaton).with_state();
+            op.push(stream);
+        }
+
+        let inner = RankedStreamInner::Fed {
+            inner: op.union(),
+            automatons: automatons,
+            limit: limit,
+            matches: FnvHashMap::default(),
+        };
+
+        RankedStream(inner)
+    }
+}
+
+impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
+    type Item = Document;
+
+    fn next(&'a mut self) -> Option<Self::Item> {
+        self.0.next()
+    }
+}
+
+enum RankedStreamInner<'m, 'v> {
     Fed {
         inner: UnionWithState<'m, 'v, u32>,
         automatons: Vec<Levenshtein>,
@@ -94,59 +122,27 @@ pub enum RankedStream<'m, 'v> {
     },
 }
 
-impl<'m, 'v> RankedStream<'m, 'v> {
-    pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
-        let mut op = OpWithStateBuilder::new(indexes);
-
-        for automaton in automatons.iter().map(|l| l.dfa.clone()) {
-            let stream = map.search(automaton).with_state();
-            op.push(stream);
-        }
-
-        RankedStream::Fed {
-            inner: op.union(),
-            automatons: automatons,
-            limit: limit,
-            matches: FnvHashMap::default(),
-        }
-    }
-}
-
-impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
+impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStreamInner<'m, 'v> {
     type Item = Document;
 
     fn next(&'a mut self) -> Option<Self::Item> {
         loop {
-            // TODO remove that when NLL are here !
-            let mut transfert_matches = None;
-            let mut transfert_limit = None;
-
             match self {
-                RankedStream::Fed { inner, automatons, limit, matches } => {
+                RankedStreamInner::Fed { inner, automatons, limit, matches } => {
                     match inner.next() {
                         Some((string, indexed_values)) => {
                             for iv in indexed_values {
 
-                                // TODO extend documents matches by batch of query_index
-                                //      that way it will be possible to discard matches that
-                                //      have an invalid distance *before* adding them
-                                //      to the matches of the documents and, that way, avoid a sort
-
                                 let automaton = &automatons[iv.index];
                                 let distance = automaton.dfa.distance(iv.state).to_u8();
 
-                                // TODO remove the Pool system !
-                                //      this is an internal Pool rule but
-                                //      it is more efficient to test that here
-                                // if pool.limitation.is_reached() && distance != 0 { continue }
-
                                 for di in iv.values {
                                     let match_ = Match {
                                         query_index: iv.index as u32,
                                         distance: distance,
                                         attribute: di.attribute,
                                         attribute_index: di.attribute_index,
-                                        is_exact: string.len() == automaton.query_len,
+                                        is_exact: distance == 0 && string.len() == automaton.query_len,
                                     };
                                     matches.entry(di.document)
                                             .or_insert_with(Vec::new)
@@ -155,23 +151,17 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
                             }
                         },
                         None => {
-                            // TODO remove this when NLL are here !
-                            transfert_matches = Some(mem::replace(matches, FnvHashMap::default()));
-                            transfert_limit = Some(mem::replace(limit, 0));
+                            let matches = mem::replace(matches, FnvHashMap::default());
+                            *self = RankedStreamInner::Pours {
+                                inner: matches_into_iter(matches, *limit).into_iter()
+                            };
                         },
                     }
                 },
-                RankedStream::Pours { inner } => {
+                RankedStreamInner::Pours { inner } => {
                     return inner.next()
                 },
             }
-
-            // transform the `RankedStream` into a `Pours`
-            if let (Some(matches), Some(limit)) = (transfert_matches, transfert_limit) {
-                *self = RankedStream::Pours {
-                    inner: matches_into_iter(matches, limit).into_iter(),
-                }
-            }
         }
     }
 }