feat: Allow querying ranges in the results list

This commit is contained in:
Clément Renault 2018-10-11 16:09:28 +02:00
parent 8cd07462aa
commit 3acac1458f
4 changed files with 29 additions and 13 deletions

View File

@ -73,7 +73,7 @@ fn search(metadata: &Metadata, database: &DB, common_words: &CommonWords, query:
builder.criteria(criterion::default()); builder.criteria(criterion::default());
let mut stream = builder.build(); let mut stream = builder.build();
let documents = stream.retrieve_documents(20); let documents = stream.retrieve_documents(0..20);
for document in documents { for document in documents {
let id_key = format!("{}-id", document.id); let id_key = format!("{}-id", document.id);

View File

@ -103,7 +103,7 @@ where M: AsRef<Metadata>,
builder.criteria(criterion::default()); builder.criteria(criterion::default());
let mut stream = builder.build(); let mut stream = builder.build();
let documents = stream.retrieve_documents(20); let documents = stream.retrieve_documents(0..20);
let mut body = Vec::new(); let mut body = Vec::new();
write!(&mut body, "[")?; write!(&mut body, "[")?;

View File

@ -1,3 +1,5 @@
#![feature(range_contains)]
#[macro_use] extern crate lazy_static; #[macro_use] extern crate lazy_static;
pub mod rank; pub mod rank;

View File

@ -1,5 +1,6 @@
use std::ops::Range;
use std::rc::Rc; use std::rc::Rc;
use std::{mem, vec}; use std::{mem, vec, cmp};
use fnv::FnvHashMap; use fnv::FnvHashMap;
use fst::Streamer; use fst::Streamer;
@ -51,7 +52,7 @@ pub struct RankedStream<'a, 'm, C> {
} }
impl<'a, 'm, C> RankedStream<'a, 'm, C> { impl<'a, 'm, C> RankedStream<'a, 'm, C> {
pub fn retrieve_documents(&mut self, limit: usize) -> Vec<Document> pub fn retrieve_documents(&mut self, range: Range<usize>) -> Vec<Document>
where C: Criterion where C: Criterion
{ {
let mut matches = FnvHashMap::default(); let mut matches = FnvHashMap::default();
@ -84,20 +85,33 @@ impl<'a, 'm, C> RankedStream<'a, 'm, C> {
let mut groups = vec![documents.as_mut_slice()]; let mut groups = vec![documents.as_mut_slice()];
for criterion in self.criteria { for criterion in self.criteria {
let temp = mem::replace(&mut groups, Vec::new()); let tmp_groups = mem::replace(&mut groups, Vec::new());
let mut computed = 0; let mut current_range = Range { start: 0, end: 0 };
'grp: for group in temp { 'grp: for group in tmp_groups {
current_range.end += group.len();
// if a part of the current group is in the range returned
// we must sort it and emit the sub-groups
if current_range.contains(&range.start) {
group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) {
computed += group.len();
groups.push(group); groups.push(group);
if computed >= limit { break 'grp } if current_range.end >= range.end { break 'grp }
} }
} else {
groups.push(group)
}
current_range.start = current_range.end;
} }
} }
documents.truncate(limit); // TODO find a better algorithm, here we allocate for too many documents
// and we do a useless allocation, we should reuse the documents Vec
let start = cmp::min(range.start, documents.len());
let mut documents = documents.split_off(start);
documents.truncate(range.len());
documents documents
} }
} }