From 3acac1458f84cf499ca81a6b03b9381e2b66a209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 11 Oct 2018 16:09:28 +0200 Subject: [PATCH] feat: Allow querying ranges in the results list --- examples/serve-console.rs | 2 +- examples/serve-http.rs | 2 +- src/lib.rs | 2 ++ src/rank/ranked_stream.rs | 36 +++++++++++++++++++++++++----------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/examples/serve-console.rs b/examples/serve-console.rs index 9eff1fbac..59e2f4a8e 100644 --- a/examples/serve-console.rs +++ b/examples/serve-console.rs @@ -73,7 +73,7 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query: builder.criteria(criterion::default()); let mut stream = builder.build(); - let documents = stream.retrieve_documents(20); + let documents = stream.retrieve_documents(0..20); for document in documents { let id_key = format!("{}-id", document.id); diff --git a/examples/serve-http.rs b/examples/serve-http.rs index f059973f3..c5eafd3c4 100644 --- a/examples/serve-http.rs +++ b/examples/serve-http.rs @@ -103,7 +103,7 @@ where M: AsRef, builder.criteria(criterion::default()); let mut stream = builder.build(); - let documents = stream.retrieve_documents(20); + let documents = stream.retrieve_documents(0..20); let mut body = Vec::new(); write!(&mut body, "[")?; diff --git a/src/lib.rs b/src/lib.rs index ab4597301..9ee316a2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![feature(range_contains)] + #[macro_use] extern crate lazy_static; pub mod rank; diff --git a/src/rank/ranked_stream.rs b/src/rank/ranked_stream.rs index 3a658a6fc..c32861762 100644 --- a/src/rank/ranked_stream.rs +++ b/src/rank/ranked_stream.rs @@ -1,5 +1,6 @@ +use std::ops::Range; use std::rc::Rc; -use std::{mem, vec}; +use std::{mem, vec, cmp}; use fnv::FnvHashMap; use fst::Streamer; @@ -51,7 +52,7 @@ pub struct RankedStream<'a, 'm, C> { } impl<'a, 'm, C> RankedStream<'a, 'm, C> { - pub fn retrieve_documents(&mut self, limit: usize) -> Vec + pub fn retrieve_documents(&mut self, range: Range) -> Vec where C: Criterion { let mut matches = FnvHashMap::default(); @@ -84,20 +85,33 @@ impl<'a, 'm, C> RankedStream<'a, 'm, C> { let mut groups = vec![documents.as_mut_slice()]; for criterion in self.criteria { - let temp = mem::replace(&mut groups, Vec::new()); - let mut computed = 0; + let tmp_groups = mem::replace(&mut groups, Vec::new()); + let mut current_range = Range { start: 0, end: 0 }; - 'grp: for group in temp { - group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); - for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { - computed += group.len(); - groups.push(group); - if computed >= limit { break 'grp } + 'grp: for group in tmp_groups { + current_range.end += group.len(); + + // if a part of the current group is in the range returned + // we must sort it and emit the sub-groups + if current_range.contains(&range.start) { + group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); + for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { + groups.push(group); + if current_range.end >= range.end { break 'grp } + } + } else { + groups.push(group) } + + current_range.start = current_range.end; } } - documents.truncate(limit); + // TODO find a better algorithm, here we allocate for too many documents + // and we do a useless allocation, we should reuse the documents Vec + let start = cmp::min(range.start, documents.len()); + let mut documents = documents.split_off(start); + documents.truncate(range.len()); documents } }