diff --git a/src/database/database_view.rs b/src/database/database_view.rs index 1db5047b0..576fd1f9b 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -7,7 +7,9 @@ use serde::de::DeserializeOwned; use crate::database::{retrieve_data_schema, DocumentKey, DocumentKeyAttr}; use crate::database::deserializer::Deserializer; +use crate::rank::criterion::Criterion; use crate::database::schema::Schema; +use crate::rank::QueryBuilder; use crate::DocumentId; pub struct DatabaseView<'a> { @@ -21,14 +23,26 @@ impl<'a> DatabaseView<'a> { Ok(DatabaseView { snapshot, schema }) } + pub fn schema(&self) -> &Schema { + &self.schema + } + pub fn into_snapshot(self) -> Snapshot<&'a DB> { self.snapshot } + pub fn snapshot(&self) -> &Snapshot<&'a DB> { + &self.snapshot + } + pub fn get(&self, key: &[u8]) -> Result, Box> { Ok(self.snapshot.get(key)?) } + pub fn query_builder(&self) -> Result>, Box> { + QueryBuilder::new(self) + } + // TODO create an enum error type pub fn retrieve_document(&self, id: DocumentId) -> Result> where D: DeserializeOwned diff --git a/src/database/mod.rs b/src/database/mod.rs index db864fa37..c347b11ce 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -8,7 +8,7 @@ use rocksdb::{DB, DBVector, MergeOperands, SeekKey}; use rocksdb::rocksdb::{Writable, Snapshot}; pub use self::document_key::{DocumentKey, DocumentKeyAttr}; -pub use self::database_view::DatabaseView; +pub use self::database_view::{DatabaseView, DocumentIter}; use self::blob::positive::PositiveBlob; use self::update::Update; use self::schema::Schema; @@ -126,7 +126,7 @@ impl fmt::Debug for Database { let mut iter = self.0.iter(); iter.seek(SeekKey::Start); let mut first = true; - for (key, value) in &mut iter { + for (key, _value) in &mut iter { if !first { write!(f, ", ")?; } first = false; let key = String::from_utf8_lossy(&key); diff --git a/src/database/update/positive/unordered_builder.rs b/src/database/update/positive/unordered_builder.rs index b44455360..7a2139c98 100644 --- a/src/database/update/positive/unordered_builder.rs +++ b/src/database/update/positive/unordered_builder.rs @@ -1,3 +1,5 @@ +#![allow(unused)] + use std::collections::BTreeMap; use std::error::Error; use std::io::Write; diff --git a/src/database/update/positive/update.rs b/src/database/update/positive/update.rs index 8924073aa..4b57405ab 100644 --- a/src/database/update/positive/update.rs +++ b/src/database/update/positive/update.rs @@ -107,7 +107,7 @@ struct Serializer<'a, B> { macro_rules! forward_to_unserializable_type { ($($ty:ident => $se_method:ident,)*) => { $( - fn $se_method(self, v: $ty) -> Result { + fn $se_method(self, _v: $ty) -> Result { Err(SerializerError::UnserializableType { name: "$ty" }) } )* @@ -145,11 +145,11 @@ where B: TokenizerBuilder f64 => serialize_f64, } - fn serialize_str(self, v: &str) -> Result { + fn serialize_str(self, _v: &str) -> Result { Err(SerializerError::UnserializableType { name: "str" }) } - fn serialize_bytes(self, v: &[u8]) -> Result { + fn serialize_bytes(self, _v: &[u8]) -> Result { Err(SerializerError::UnserializableType { name: "&[u8]" }) } @@ -375,7 +375,7 @@ where B: TokenizerBuilder Ok(()) } - fn serialize_bytes(self, v: &[u8]) -> Result { + fn serialize_bytes(self, _v: &[u8]) -> Result { Err(SerializerError::UnserializableType { name: "&[u8]" }) } diff --git a/src/rank/criterion/exact.rs b/src/rank/criterion/exact.rs index 2cdb9c0dd..9ea59eae3 100644 --- a/src/rank/criterion/exact.rs +++ b/src/rank/criterion/exact.rs @@ -1,8 +1,11 @@ use std::cmp::Ordering; + use group_by::GroupBy; -use crate::Match; + use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::database::DatabaseView; +use crate::Match; #[inline] fn contains_exact(matches: &[Match]) -> bool { @@ -18,7 +21,7 @@ fn number_exact_matches(matches: &[Match]) -> usize { pub struct Exact; impl Criterion for Exact { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = number_exact_matches(&lhs.matches); let rhs = number_exact_matches(&rhs.matches); diff --git a/src/rank/criterion/mod.rs b/src/rank/criterion/mod.rs index bf54e9863..a4590ae90 100644 --- a/src/rank/criterion/mod.rs +++ b/src/rank/criterion/mod.rs @@ -7,6 +7,8 @@ mod exact; use std::vec; use std::cmp::Ordering; + +use crate::database::DatabaseView; use crate::rank::Document; pub use self::{ @@ -20,31 +22,31 @@ pub use self::{ pub trait Criterion { #[inline] - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering; + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering; #[inline] - fn eq(&self, lhs: &Document, rhs: &Document) -> bool { - self.evaluate(lhs, rhs) == Ordering::Equal + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + self.evaluate(lhs, rhs, view) == Ordering::Equal } } impl<'a, T: Criterion + ?Sized> Criterion for &'a T { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { - (**self).evaluate(lhs, rhs) + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { + (**self).evaluate(lhs, rhs, view) } - fn eq(&self, lhs: &Document, rhs: &Document) -> bool { - (**self).eq(lhs, rhs) + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + (**self).eq(lhs, rhs, view) } } impl Criterion for Box { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { - (**self).evaluate(lhs, rhs) + fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> Ordering { + (**self).evaluate(lhs, rhs, view) } - fn eq(&self, lhs: &Document, rhs: &Document) -> bool { - (**self).eq(lhs, rhs) + fn eq(&self, lhs: &Document, rhs: &Document, view: &DatabaseView) -> bool { + (**self).eq(lhs, rhs, view) } } @@ -52,7 +54,7 @@ impl Criterion for Box { pub struct DocumentId; impl Criterion for DocumentId { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { lhs.id.cmp(&rhs.id) } } diff --git a/src/rank/criterion/number_of_words.rs b/src/rank/criterion/number_of_words.rs index 902e49fc0..b20586f39 100644 --- a/src/rank/criterion/number_of_words.rs +++ b/src/rank/criterion/number_of_words.rs @@ -1,8 +1,11 @@ use std::cmp::Ordering; + use group_by::GroupBy; -use crate::Match; + use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::database::DatabaseView; +use crate::Match; #[inline] fn number_of_query_words(matches: &[Match]) -> usize { @@ -13,7 +16,7 @@ fn number_of_query_words(matches: &[Match]) -> usize { pub struct NumberOfWords; impl Criterion for NumberOfWords { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = number_of_query_words(&lhs.matches); let rhs = number_of_query_words(&rhs.matches); diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index 911a6c67f..5cbd4bac1 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -1,8 +1,11 @@ use std::cmp::Ordering; + use group_by::GroupBy; -use crate::Match; + use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::database::DatabaseView; +use crate::Match; #[inline] fn sum_matches_typos(matches: &[Match]) -> i8 { @@ -23,7 +26,7 @@ fn sum_matches_typos(matches: &[Match]) -> i8 { pub struct SumOfTypos; impl Criterion for SumOfTypos { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_typos(&lhs.matches); let rhs = sum_matches_typos(&rhs.matches); @@ -64,7 +67,9 @@ mod tests { } }; - assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Less); + let lhs = sum_matches_typos(&doc0.matches); + let rhs = sum_matches_typos(&doc1.matches); + assert_eq!(lhs.cmp(&rhs), Ordering::Less); } // typing: "bouton manchette" @@ -94,7 +99,9 @@ mod tests { } }; - assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Less); + let lhs = sum_matches_typos(&doc0.matches); + let rhs = sum_matches_typos(&doc1.matches); + assert_eq!(lhs.cmp(&rhs), Ordering::Less); } // typing: "bouton manchztte" @@ -124,6 +131,8 @@ mod tests { } }; - assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Equal); + let lhs = sum_matches_typos(&doc0.matches); + let rhs = sum_matches_typos(&doc1.matches); + assert_eq!(lhs.cmp(&rhs), Ordering::Equal); } } diff --git a/src/rank/criterion/sum_of_words_attribute.rs b/src/rank/criterion/sum_of_words_attribute.rs index 95629e2b5..d373dedef 100644 --- a/src/rank/criterion/sum_of_words_attribute.rs +++ b/src/rank/criterion/sum_of_words_attribute.rs @@ -1,8 +1,11 @@ use std::cmp::Ordering; + use group_by::GroupBy; -use crate::Match; + +use crate::database::DatabaseView; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::Match; #[inline] fn sum_matches_attributes(matches: &[Match]) -> u8 { @@ -17,7 +20,7 @@ fn sum_matches_attributes(matches: &[Match]) -> u8 { pub struct SumOfWordsAttribute; impl Criterion for SumOfWordsAttribute { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_attributes(&lhs.matches); let rhs = sum_matches_attributes(&rhs.matches); diff --git a/src/rank/criterion/sum_of_words_position.rs b/src/rank/criterion/sum_of_words_position.rs index 5a230fed2..cd41d5b72 100644 --- a/src/rank/criterion/sum_of_words_position.rs +++ b/src/rank/criterion/sum_of_words_position.rs @@ -1,8 +1,11 @@ use std::cmp::Ordering; + use group_by::GroupBy; -use crate::Match; + +use crate::database::DatabaseView; use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::Match; #[inline] fn sum_matches_attribute_index(matches: &[Match]) -> u32 { @@ -17,7 +20,7 @@ fn sum_matches_attribute_index(matches: &[Match]) -> u32 { pub struct SumOfWordsPosition; impl Criterion for SumOfWordsPosition { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = sum_matches_attribute_index(&lhs.matches); let rhs = sum_matches_attribute_index(&rhs.matches); diff --git a/src/rank/criterion/words_proximity.rs b/src/rank/criterion/words_proximity.rs index 8c2344595..abbd0e99f 100644 --- a/src/rank/criterion/words_proximity.rs +++ b/src/rank/criterion/words_proximity.rs @@ -1,8 +1,11 @@ use std::cmp::{self, Ordering}; + use group_by::GroupBy; -use crate::Match; + use crate::rank::{match_query_index, Document}; use crate::rank::criterion::Criterion; +use crate::database::DatabaseView; +use crate::Match; const MAX_DISTANCE: u32 = 8; @@ -47,7 +50,7 @@ fn matches_proximity(matches: &[Match]) -> u32 { pub struct WordsProximity; impl Criterion for WordsProximity { - fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { + fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView) -> Ordering { let lhs = matches_proximity(&lhs.matches); let rhs = matches_proximity(&rhs.matches); diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index 34ca4f212..6dc13eb9b 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -1,9 +1,8 @@ -use std::ops::{Deref, Range}; -use std::{mem, vec, str}; use std::error::Error; use std::hash::Hash; +use std::ops::Range; +use std::{mem, vec, str}; -use ::rocksdb::rocksdb::{DB, Snapshot}; use group_by::GroupByMut; use hashbrown::HashMap; use fst::Streamer; @@ -13,16 +12,10 @@ use crate::rank::criterion::{self, Criterion}; use crate::rank::distinct_map::DistinctMap; use crate::database::retrieve_data_index; use crate::database::blob::PositiveBlob; +use crate::database::DatabaseView; use crate::{Match, DocumentId}; use crate::rank::Document; -fn clamp_range(range: Range, big: Range) -> Range { - Range { - start: range.start.min(big.end).max(big.start), - end: range.end.min(big.end).max(big.start), - } -} - fn split_whitespace_automatons(query: &str) -> Vec { let mut automatons = Vec::new(); for query in query.split_whitespace().map(str::to_lowercase) { @@ -32,24 +25,22 @@ fn split_whitespace_automatons(query: &str) -> Vec { automatons } -pub struct QueryBuilder, C> { - snapshot: Snapshot, +pub struct QueryBuilder<'a, C> { + view: &'a DatabaseView<'a>, blob: PositiveBlob, criteria: Vec, } -impl> QueryBuilder> { - pub fn new(snapshot: Snapshot) -> Result> { - QueryBuilder::with_criteria(snapshot, criterion::default()) +impl<'a> QueryBuilder<'a, Box> { + pub fn new(view: &'a DatabaseView<'a>) -> Result> { + QueryBuilder::with_criteria(view, criterion::default()) } } -impl QueryBuilder -where T: Deref, -{ - pub fn with_criteria(snapshot: Snapshot, criteria: Vec) -> Result> { - let blob = retrieve_data_index(&snapshot)?; - Ok(QueryBuilder { snapshot, blob, criteria }) +impl<'a, C> QueryBuilder<'a, C> { + pub fn with_criteria(view: &'a DatabaseView<'a>, criteria: Vec) -> Result> { + let blob = retrieve_data_index(view.snapshot())?; + Ok(QueryBuilder { view, blob, criteria }) } pub fn criteria(&mut self, criteria: Vec) -> &mut Self { @@ -57,7 +48,7 @@ where T: Deref, self } - pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder { + pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'a, F, C> { DistinctQueryBuilder { inner: self, function: function, @@ -105,23 +96,21 @@ where T: Deref, } } -impl QueryBuilder -where T: Deref, - C: Criterion, +impl<'a, C> QueryBuilder<'a, C> +where C: Criterion { pub fn query(&self, query: &str, limit: usize) -> Vec { let mut documents = self.query_all(query); let mut groups = vec![documents.as_mut_slice()]; + let view = &self.view; 'group: for criterion in &self.criteria { let tmp_groups = mem::replace(&mut groups, Vec::new()); let mut computed = 0; for group in tmp_groups { - - group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); - for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { - + group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); + for group in GroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { computed += group.len(); groups.push(group); if computed >= limit { break 'group } @@ -134,41 +123,38 @@ where T: Deref, } } -pub struct DistinctQueryBuilder, F, C> { - inner: QueryBuilder, +pub struct DistinctQueryBuilder<'a, F, C> { + inner: QueryBuilder<'a, C>, function: F, size: usize, } -pub struct DocDatabase; - -impl, F, K, C> DistinctQueryBuilder -where T: Deref, - F: Fn(DocumentId, &DocDatabase) -> Option, +impl<'a, F, K, C> DistinctQueryBuilder<'a, F, C> +where F: Fn(DocumentId, &DatabaseView) -> Option, K: Hash + Eq, C: Criterion, { pub fn query(&self, query: &str, range: Range) -> Vec { let mut documents = self.inner.query_all(query); let mut groups = vec![documents.as_mut_slice()]; + let view = &self.inner.view; for criterion in &self.inner.criteria { let tmp_groups = mem::replace(&mut groups, Vec::new()); for group in tmp_groups { - group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); - for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { + group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); + for group in GroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { groups.push(group); } } } - let doc_database = DocDatabase; let mut out_documents = Vec::with_capacity(range.len()); let mut seen = DistinctMap::new(self.size); for document in documents { - let accepted = match (self.function)(document.id, &doc_database) { + let accepted = match (self.function)(document.id, &self.inner.view) { Some(key) => seen.digest(key), None => seen.accept_without_key(), };