feat: Simplify the RankedStrem code logic

2024-11-26 03:55:07 +08:00 · 2018-08-25 12:35:29 +02:00 · 2018-08-25 12:35:29 +02:00 · 34e0211567
commit 34e0211567
parent 9dce74e9c8
2 changed files with 53 additions and 60 deletions
--- a/raptor/src/lib.rs
+++ b/raptor/src/lib.rs
@ -1,3 +1,5 @@
 #![feature(nll)]
 extern crate fst;
 extern crate fnv;
 extern crate group_by;
@ -77,7 +79,8 @@ pub struct Match {
    /// (i.e. at the start or the end of the attribute).
    ///
    /// The index in the attribute is limited to a maximum of `2^32`
-    /// this is because we index only the first 1000 words in an attribute.
+    /// this is because we index only the first 1000 words
    /// in an attribute.
    pub attribute_index: u32,
    /// Whether the word that match is an exact match or a prefix.
--- a/raptor/src/rank/mod.rs
+++ b/raptor/src/rank/mod.rs
@ -60,11 +60,10 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
        exact,
    ];
    {
    let mut groups = vec![documents.as_mut_slice()];
    for sort in sorts {
-            let mut temp = mem::replace(&mut groups, Vec::new());
+        let temp = mem::replace(&mut groups, Vec::new());
        let mut computed = 0;
        for group in temp {
@ -76,13 +75,42 @@ fn matches_into_iter(matches: FnvHashMap<DocumentId, Vec<Match>>, limit: usize)
            }
        }
    }
    }
    documents.truncate(limit);
    documents.into_iter()
 }
-pub enum RankedStream<'m, 'v> {
+pub struct RankedStream<'m, 'v>(RankedStreamInner<'m, 'v>);
 impl<'m, 'v> RankedStream<'m, 'v> {
    pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
        let mut op = OpWithStateBuilder::new(indexes);
        for automaton in automatons.iter().map(|l| l.dfa.clone()) {
            let stream = map.search(automaton).with_state();
            op.push(stream);
        }
        let inner = RankedStreamInner::Fed {
            inner: op.union(),
            automatons: automatons,
            limit: limit,
            matches: FnvHashMap::default(),
        };
        RankedStream(inner)
    }
 }
 impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
    type Item = Document;
    fn next(&'a mut self) -> Option<Self::Item> {
        self.0.next()
    }
 }
 enum RankedStreamInner<'m, 'v> {
    Fed {
        inner: UnionWithState<'m, 'v, u32>,
        automatons: Vec<Levenshtein>,
@ -94,59 +122,27 @@ pub enum RankedStream<'m, 'v> {
    },
 }
-impl<'m, 'v> RankedStream<'m, 'v> {
+impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStreamInner<'m, 'v> {
    pub fn new(map: &'m fst::Map, indexes: &'v DocIndexes, automatons: Vec<Levenshtein>, limit: usize) -> Self {
        let mut op = OpWithStateBuilder::new(indexes);
        for automaton in automatons.iter().map(|l| l.dfa.clone()) {
            let stream = map.search(automaton).with_state();
            op.push(stream);
        }
        RankedStream::Fed {
            inner: op.union(),
            automatons: automatons,
            limit: limit,
            matches: FnvHashMap::default(),
        }
    }
 }
 impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
    type Item = Document;
    fn next(&'a mut self) -> Option<Self::Item> {
        loop {
            // TODO remove that when NLL are here !
            let mut transfert_matches = None;
            let mut transfert_limit = None;
            match self {
-                RankedStream::Fed { inner, automatons, limit, matches } => {
+                RankedStreamInner::Fed { inner, automatons, limit, matches } => {
                    match inner.next() {
                        Some((string, indexed_values)) => {
                            for iv in indexed_values {
                                // TODO extend documents matches by batch of query_index
                                //      that way it will be possible to discard matches that
                                //      have an invalid distance *before* adding them
                                //      to the matches of the documents and, that way, avoid a sort
                                let automaton = &automatons[iv.index];
                                let distance = automaton.dfa.distance(iv.state).to_u8();
                                // TODO remove the Pool system !
                                //      this is an internal Pool rule but
                                //      it is more efficient to test that here
                                // if pool.limitation.is_reached() && distance != 0 { continue }
                                for di in iv.values {
                                    let match_ = Match {
                                        query_index: iv.index as u32,
                                        distance: distance,
                                        attribute: di.attribute,
                                        attribute_index: di.attribute_index,
-                                        is_exact: string.len() == automaton.query_len,
+                                        is_exact: distance == 0 && string.len() == automaton.query_len,
                                    };
                                    matches.entry(di.document)
                                            .or_insert_with(Vec::new)
@ -155,23 +151,17 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
                            }
                        },
                        None => {
-                            // TODO remove this when NLL are here !
+                            let matches = mem::replace(matches, FnvHashMap::default());
-                            transfert_matches = Some(mem::replace(matches, FnvHashMap::default()));
+                            *self = RankedStreamInner::Pours {
-                            transfert_limit = Some(mem::replace(limit, 0));
+                                inner: matches_into_iter(matches, *limit).into_iter()
                            };
                        },
                    }
                },
-                RankedStream::Pours { inner } => {
+                RankedStreamInner::Pours { inner } => {
                    return inner.next()
                },
            }
            // transform the `RankedStream` into a `Pours`
            if let (Some(matches), Some(limit)) = (transfert_matches, transfert_limit) {
                *self = RankedStream::Pours {
                    inner: matches_into_iter(matches, limit).into_iter(),
                }
            }
        }
    }
 }