feat: Introduce the Criteria type

This commit is contained in:
Clément Renault 2018-12-10 20:14:16 +01:00
parent d615f89c56
commit b5df87a403
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
3 changed files with 48 additions and 111 deletions

View File

@ -11,7 +11,6 @@ use crate::database::{DocumentKey, DocumentKeyAttr};
use crate::database::{retrieve_data_schema, retrieve_data_index}; use crate::database::{retrieve_data_schema, retrieve_data_index};
use crate::database::blob::positive::PositiveBlob; use crate::database::blob::positive::PositiveBlob;
use crate::database::deserializer::Deserializer; use crate::database::deserializer::Deserializer;
use crate::rank::criterion::Criterion;
use crate::database::schema::Schema; use crate::database::schema::Schema;
use crate::rank::QueryBuilder; use crate::rank::QueryBuilder;
use crate::DocumentId; use crate::DocumentId;
@ -72,7 +71,7 @@ where D: Deref<Target=DB>
Ok(()) Ok(())
} }
pub fn query_builder(&self) -> Result<QueryBuilder<D, Box<dyn Criterion<D>>>, Box<Error>> { pub fn query_builder(&self) -> Result<QueryBuilder<D>, Box<Error>> {
QueryBuilder::new(self) QueryBuilder::new(self)
} }

View File

@ -4,12 +4,12 @@ mod words_proximity;
mod sum_of_words_attribute; mod sum_of_words_attribute;
mod sum_of_words_position; mod sum_of_words_position;
mod exact; mod exact;
mod sort_by;
mod document_id;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use std::marker;
use serde::de::DeserializeOwned;
use rocksdb::DB; use rocksdb::DB;
use crate::database::DatabaseView; use crate::database::DatabaseView;
@ -22,6 +22,8 @@ pub use self::{
sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_attribute::SumOfWordsAttribute,
sum_of_words_position::SumOfWordsPosition, sum_of_words_position::SumOfWordsPosition,
exact::Exact, exact::Exact,
sort_by::SortBy,
document_id::DocumentId,
}; };
pub trait Criterion<D> pub trait Criterion<D>
@ -60,84 +62,6 @@ where D: Deref<Target=DB>
} }
} }
#[derive(Debug, Clone, Copy)]
pub struct DocumentId;
impl<D> Criterion<D> for DocumentId
where D: Deref<Target=DB>
{
fn evaluate(&self, lhs: &Document, rhs: &Document, _: &DatabaseView<D>) -> Ordering {
lhs.id.cmp(&rhs.id)
}
}
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
///
/// # Note
///
/// If a document cannot be deserialized it will be considered [`None`][].
///
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
/// so you must check the [`Ord`] of `Option` implementation.
///
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
///
/// # Example
///
/// ```
/// use serde_derive::Deserialize;
/// use meilidb::rank::criterion::*;
///
/// #[derive(Deserialize, PartialOrd, Ord, PartialEq, Eq)]
/// struct TimeOnly {
/// time: String,
/// }
///
/// let builder = CriteriaBuilder::with_capacity(7)
/// .add(SumOfTypos)
/// .add(NumberOfWords)
/// .add(WordsProximity)
/// .add(SumOfWordsAttribute)
/// .add(SumOfWordsPosition)
/// .add(Exact)
/// .add(SortBy::<TimeOnly>::new())
/// .add(DocumentId);
///
/// let criterion = builder.build();
///
/// ```
#[derive(Default)]
pub struct SortBy<T> {
_phantom: marker::PhantomData<T>,
}
impl<T> SortBy<T> {
pub fn new() -> Self {
SortBy { _phantom: marker::PhantomData }
}
}
impl<T, D> Criterion<D> for SortBy<T>
where D: Deref<Target=DB>,
T: DeserializeOwned + Ord,
{
fn evaluate(&self, lhs: &Document, rhs: &Document, view: &DatabaseView<D>) -> Ordering {
let lhs = match view.retrieve_document::<T>(lhs.id) {
Ok(doc) => Some(doc),
Err(e) => { eprintln!("{}", e); None },
};
let rhs = match view.retrieve_document::<T>(rhs.id) {
Ok(doc) => Some(doc),
Err(e) => { eprintln!("{}", e); None },
};
lhs.cmp(&rhs)
}
}
pub struct CriteriaBuilder<D> pub struct CriteriaBuilder<D>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
@ -172,14 +96,21 @@ where D: Deref<Target=DB>
self.inner.push(Box::new(criterion)); self.inner.push(Box::new(criterion));
} }
pub fn build(self) -> Vec<Box<dyn Criterion<D>>> { pub fn build(self) -> Criteria<D> {
self.inner Criteria { inner: self.inner }
} }
} }
pub fn default<D>() -> Vec<Box<dyn Criterion<D>>> pub struct Criteria<D>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
inner: Vec<Box<dyn Criterion<D>>>,
}
impl<D> Default for Criteria<D>
where D: Deref<Target=DB>
{
fn default() -> Self {
CriteriaBuilder::with_capacity(7) CriteriaBuilder::with_capacity(7)
.add(SumOfTypos) .add(SumOfTypos)
.add(NumberOfWords) .add(NumberOfWords)
@ -189,4 +120,13 @@ where D: Deref<Target=DB>
.add(Exact) .add(Exact)
.add(DocumentId) .add(DocumentId)
.build() .build()
}
}
impl<D> AsRef<[Box<dyn Criterion<D>>]> for Criteria<D>
where D: Deref<Target=DB>
{
fn as_ref(&self) -> &[Box<dyn Criterion<D>>] {
&self.inner
}
} }

View File

@ -9,8 +9,8 @@ use fst::Streamer;
use rocksdb::DB; use rocksdb::DB;
use crate::automaton::{self, DfaExt, AutomatonExt}; use crate::automaton::{self, DfaExt, AutomatonExt};
use crate::rank::criterion::{self, Criterion};
use crate::rank::distinct_map::DistinctMap; use crate::rank::distinct_map::DistinctMap;
use crate::rank::criterion::Criteria;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::{Match, DocumentId}; use crate::{Match, DocumentId};
use crate::rank::Document; use crate::rank::Document;
@ -28,34 +28,34 @@ fn split_whitespace_automatons(query: &str) -> Vec<DfaExt> {
automatons automatons
} }
pub struct QueryBuilder<'a, D, C> pub struct QueryBuilder<'a, D>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
view: &'a DatabaseView<D>, view: &'a DatabaseView<D>,
criteria: Vec<C>, criteria: Criteria<D>,
} }
impl<'a, D> QueryBuilder<'a, D, Box<dyn Criterion<D>>> impl<'a, D> QueryBuilder<'a, D>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> { pub fn new(view: &'a DatabaseView<D>) -> Result<Self, Box<Error>> {
QueryBuilder::with_criteria(view, criterion::default()) QueryBuilder::with_criteria(view, Criteria::default())
} }
} }
impl<'a, D, C> QueryBuilder<'a, D, C> impl<'a, D> QueryBuilder<'a, D>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Vec<C>) -> Result<Self, Box<Error>> { pub fn with_criteria(view: &'a DatabaseView<D>, criteria: Criteria<D>) -> Result<Self, Box<Error>> {
Ok(QueryBuilder { view, criteria }) Ok(QueryBuilder { view, criteria })
} }
pub fn criteria(&mut self, criteria: Vec<C>) -> &mut Self { pub fn criteria(&mut self, criteria: Criteria<D>) -> &mut Self {
self.criteria = criteria; self.criteria = criteria;
self self
} }
pub fn with_distinct<F>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F, C> { pub fn with_distinct<F>(self, function: F, size: usize) -> DistinctQueryBuilder<'a, D, F> {
DistinctQueryBuilder { DistinctQueryBuilder {
inner: self, inner: self,
function: function, function: function,
@ -103,16 +103,15 @@ where D: Deref<Target=DB>
} }
} }
impl<'a, D, C> QueryBuilder<'a, D, C> impl<'a, D> QueryBuilder<'a, D>
where D: Deref<Target=DB>, where D: Deref<Target=DB>,
C: Criterion<D>
{ {
pub fn query(&self, query: &str, limit: usize) -> Vec<Document> { pub fn query(&self, query: &str, limit: usize) -> Vec<Document> {
let mut documents = self.query_all(query); let mut documents = self.query_all(query);
let mut groups = vec![documents.as_mut_slice()]; let mut groups = vec![documents.as_mut_slice()];
let view = &self.view; let view = &self.view;
for criterion in &self.criteria { for criterion in self.criteria.as_ref() {
let tmp_groups = mem::replace(&mut groups, Vec::new()); let tmp_groups = mem::replace(&mut groups, Vec::new());
let mut computed = 0; let mut computed = 0;
@ -131,26 +130,25 @@ where D: Deref<Target=DB>,
} }
} }
pub struct DistinctQueryBuilder<'a, D, F, C> pub struct DistinctQueryBuilder<'a, D, F>
where D: Deref<Target=DB> where D: Deref<Target=DB>
{ {
inner: QueryBuilder<'a, D, C>, inner: QueryBuilder<'a, D>,
function: F, function: F,
size: usize, size: usize,
} }
impl<'a, D, F, K, C> DistinctQueryBuilder<'a, D, F, C> impl<'a, D, F, K> DistinctQueryBuilder<'a, D, F>
where D: Deref<Target=DB>, where D: Deref<Target=DB>,
F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>, F: Fn(DocumentId, &DatabaseView<D>) -> Option<K>,
K: Hash + Eq, K: Hash + Eq,
C: Criterion<D>,
{ {
pub fn query(&self, query: &str, range: Range<usize>) -> Vec<Document> { pub fn query(&self, query: &str, range: Range<usize>) -> Vec<Document> {
let mut documents = self.inner.query_all(query); let mut documents = self.inner.query_all(query);
let mut groups = vec![documents.as_mut_slice()]; let mut groups = vec![documents.as_mut_slice()];
let view = &self.inner.view; let view = &self.inner.view;
for criterion in &self.inner.criteria { for criterion in self.inner.criteria.as_ref() {
let tmp_groups = mem::replace(&mut groups, Vec::new()); let tmp_groups = mem::replace(&mut groups, Vec::new());
for group in tmp_groups { for group in tmp_groups {