mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-31 15:31:53 +08:00
feat: Introduce the SortByAttr custom ranking helper
This commit is contained in:
parent
084c3a95b6
commit
83cd071827
@ -23,6 +23,8 @@ pub use self::serde::SerializerError;
|
||||
pub use self::schema::Schema;
|
||||
pub use self::index::Index;
|
||||
|
||||
pub type RankedMap = HashMap<(DocumentId, SchemaAttr), i64>;
|
||||
|
||||
const DATA_INDEX: &[u8] = b"data-index";
|
||||
const DATA_RANKED_MAP: &[u8] = b"data-ranked-map";
|
||||
const DATA_SCHEMA: &[u8] = b"data-schema";
|
||||
@ -65,9 +67,8 @@ where D: Deref<Target=DB>
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>)
|
||||
-> Result<HashMap<(DocumentId, SchemaAttr), i64>, Box<Error>>
|
||||
where D: Deref<Target=DB>
|
||||
fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>) -> Result<RankedMap, Box<Error>>
|
||||
where D: Deref<Target=DB>,
|
||||
{
|
||||
match snapshot.get(DATA_RANKED_MAP)? {
|
||||
Some(vector) => Ok(bincode::deserialize(&*vector)?),
|
||||
@ -94,9 +95,9 @@ fn merge_indexes(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u
|
||||
}
|
||||
|
||||
fn merge_ranked_maps(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
||||
let mut ranked_map: Option<HashMap<_, _>> = None;
|
||||
let mut ranked_map: Option<RankedMap> = None;
|
||||
for bytes in existing.into_iter().chain(operands) {
|
||||
let operand: HashMap<(DocumentId, SchemaAttr), i64> = bincode::deserialize(bytes).unwrap();
|
||||
let operand: RankedMap = bincode::deserialize(bytes).unwrap();
|
||||
match ranked_map {
|
||||
Some(ref mut ranked_map) => ranked_map.extend(operand),
|
||||
None => { ranked_map.replace(operand); },
|
||||
@ -174,7 +175,6 @@ impl DatabaseIndex {
|
||||
let snapshot = Snapshot::new(db.clone());
|
||||
let view = ArcCell::new(Arc::new(DatabaseView::new(snapshot)?));
|
||||
|
||||
|
||||
Ok(DatabaseIndex {
|
||||
db: db,
|
||||
view: view,
|
||||
|
@ -16,8 +16,9 @@ use crate::tokenizer::TokenizerBuilder;
|
||||
use crate::data::{DocIds, DocIndexes};
|
||||
use crate::database::schema::Schema;
|
||||
use crate::database::index::Index;
|
||||
use crate::{DocumentId, DocIndex};
|
||||
use crate::database::RankedMap;
|
||||
use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
|
||||
use crate::{DocumentId, DocIndex};
|
||||
|
||||
pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec
|
||||
|
||||
@ -78,7 +79,7 @@ use UpdateType::{Updated, Deleted};
|
||||
|
||||
pub struct RawUpdateBuilder {
|
||||
documents_update: HashMap<DocumentId, UpdateType>,
|
||||
documents_ranked_fields: HashMap<(DocumentId, SchemaAttr), i64>,
|
||||
documents_ranked_fields: RankedMap,
|
||||
indexed_words: BTreeMap<Token, Vec<DocIndex>>,
|
||||
batch: WriteBatch,
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
use hashbrown::HashMap;
|
||||
use std::error::Error;
|
||||
use std::path::Path;
|
||||
use std::ops::Deref;
|
||||
@ -15,6 +14,7 @@ use crate::rank::{QueryBuilder, FilterFunc};
|
||||
use crate::database::schema::SchemaAttr;
|
||||
use crate::database::schema::Schema;
|
||||
use crate::database::index::Index;
|
||||
use crate::database::RankedMap;
|
||||
use crate::DocumentId;
|
||||
|
||||
pub struct DatabaseView<D>
|
||||
@ -22,7 +22,7 @@ where D: Deref<Target=DB>
|
||||
{
|
||||
snapshot: Snapshot<D>,
|
||||
index: Index,
|
||||
ranked_map: HashMap<(DocumentId, SchemaAttr), i64>,
|
||||
ranked_map: RankedMap,
|
||||
schema: Schema,
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ where D: Deref<Target=DB>
|
||||
&self.index
|
||||
}
|
||||
|
||||
pub fn ranked_map(&self) -> &HashMap<(DocumentId, SchemaAttr), i64> {
|
||||
pub fn ranked_map(&self) -> &RankedMap {
|
||||
&self.ranked_map
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@ mod words_proximity;
|
||||
mod sum_of_words_attribute;
|
||||
mod sum_of_words_position;
|
||||
mod exact;
|
||||
mod sort_by;
|
||||
mod sort_by_attr;
|
||||
mod document_id;
|
||||
|
||||
use std::cmp::Ordering;
|
||||
@ -17,7 +17,7 @@ pub use self::{
|
||||
sum_of_words_attribute::SumOfWordsAttribute,
|
||||
sum_of_words_position::SumOfWordsPosition,
|
||||
exact::Exact,
|
||||
sort_by::SortBy,
|
||||
sort_by_attr::SortByAttr,
|
||||
document_id::DocumentId,
|
||||
};
|
||||
|
||||
|
@ -1,83 +0,0 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::ops::Deref;
|
||||
use std::marker;
|
||||
|
||||
use rocksdb::DB;
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::DatabaseView;
|
||||
use crate::rank::RawDocument;
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
/// some of their stored attributes.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// If a document cannot be deserialized it will be considered [`None`][].
|
||||
///
|
||||
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
|
||||
/// so you must check the [`Ord`] of `Option` implementation.
|
||||
///
|
||||
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
|
||||
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use serde_derive::Deserialize;
|
||||
/// use meilidb::rank::criterion::*;
|
||||
///
|
||||
/// #[derive(Deserialize, PartialOrd, Ord, PartialEq, Eq)]
|
||||
/// struct TimeOnly {
|
||||
/// time: String,
|
||||
/// }
|
||||
///
|
||||
/// let builder = CriteriaBuilder::with_capacity(8)
|
||||
/// .add(SumOfTypos)
|
||||
/// .add(NumberOfWords)
|
||||
/// .add(WordsProximity)
|
||||
/// .add(SumOfWordsAttribute)
|
||||
/// .add(SumOfWordsPosition)
|
||||
/// .add(Exact)
|
||||
/// .add(SortBy::<TimeOnly>::new(&view))
|
||||
/// .add(DocumentId);
|
||||
///
|
||||
/// let criterion = builder.build();
|
||||
///
|
||||
/// ```
|
||||
pub struct SortBy<'a, T, D>
|
||||
where D: Deref<Target=DB> + Send + Sync,
|
||||
T: Send + Sync
|
||||
{
|
||||
view: &'a DatabaseView<D>,
|
||||
_phantom: marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T, D> SortBy<'a, T, D>
|
||||
where D: Deref<Target=DB> + Send + Sync,
|
||||
T: Send + Sync
|
||||
{
|
||||
pub fn new(view: &'a DatabaseView<D>) -> Self {
|
||||
SortBy { view, _phantom: marker::PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T, D> Criterion for SortBy<'a, T, D>
|
||||
where D: Deref<Target=DB> + Send + Sync,
|
||||
T: DeserializeOwned + Ord + Send + Sync,
|
||||
{
|
||||
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = match self.view.document_by_id::<T>(lhs.id) {
|
||||
Ok(doc) => Some(doc),
|
||||
Err(e) => { eprintln!("{}", e); None },
|
||||
};
|
||||
|
||||
let rhs = match self.view.document_by_id::<T>(rhs.id) {
|
||||
Ok(doc) => Some(doc),
|
||||
Err(e) => { eprintln!("{}", e); None },
|
||||
};
|
||||
|
||||
lhs.cmp(&rhs)
|
||||
}
|
||||
}
|
122
src/rank/criterion/sort_by_attr.rs
Normal file
122
src/rank/criterion/sort_by_attr.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use crate::database::schema::{Schema, SchemaAttr};
|
||||
use crate::rank::criterion::Criterion;
|
||||
use crate::database::RankedMap;
|
||||
use crate::rank::RawDocument;
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
/// some of their stored attributes.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// If a document cannot be deserialized it will be considered [`None`][].
|
||||
///
|
||||
/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
|
||||
/// so you must check the [`Ord`] of `Option` implementation.
|
||||
///
|
||||
/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
|
||||
/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use serde_derive::Deserialize;
|
||||
/// use meilidb::rank::criterion::*;
|
||||
///
|
||||
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
|
||||
///
|
||||
/// let builder = CriteriaBuilder::with_capacity(8)
|
||||
/// .add(SumOfTypos)
|
||||
/// .add(NumberOfWords)
|
||||
/// .add(WordsProximity)
|
||||
/// .add(SumOfWordsAttribute)
|
||||
/// .add(SumOfWordsPosition)
|
||||
/// .add(Exact)
|
||||
/// .add(custom_ranking)
|
||||
/// .add(DocumentId);
|
||||
///
|
||||
/// let criterion = builder.build();
|
||||
///
|
||||
/// ```
|
||||
pub struct SortByAttr<'a> {
|
||||
ranked_map: &'a RankedMap,
|
||||
attr: SchemaAttr,
|
||||
reversed: bool,
|
||||
}
|
||||
|
||||
impl<'a> SortByAttr<'a> {
|
||||
pub fn lower_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
SortByAttr::new(ranked_map, schema, attr_name, false)
|
||||
}
|
||||
|
||||
pub fn higher_is_better(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
SortByAttr::new(ranked_map, schema, attr_name, true)
|
||||
}
|
||||
|
||||
fn new(
|
||||
ranked_map: &'a RankedMap,
|
||||
schema: &Schema,
|
||||
attr_name: &str,
|
||||
reversed: bool,
|
||||
) -> Result<SortByAttr<'a>, SortByAttrError>
|
||||
{
|
||||
let attr = match schema.attribute(attr_name) {
|
||||
Some(attr) => attr,
|
||||
None => return Err(SortByAttrError::AttributeNotFound),
|
||||
};
|
||||
|
||||
if schema.props(attr).is_ranked() {
|
||||
return Err(SortByAttrError::AttributeNotRegisteredForRanking);
|
||||
}
|
||||
|
||||
Ok(SortByAttr { ranked_map, attr, reversed })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Criterion for SortByAttr<'a> {
|
||||
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = self.ranked_map.get(&(lhs.id, self.attr));
|
||||
let rhs = self.ranked_map.get(&(rhs.id, self.attr));
|
||||
|
||||
match (lhs, rhs) {
|
||||
(Some(lhs), Some(rhs)) => {
|
||||
let order = lhs.cmp(&rhs);
|
||||
if self.reversed { order.reverse() } else { order }
|
||||
},
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SortByAttrError {
|
||||
AttributeNotFound,
|
||||
AttributeNotRegisteredForRanking,
|
||||
}
|
||||
|
||||
impl fmt::Display for SortByAttrError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use SortByAttrError::*;
|
||||
match self {
|
||||
AttributeNotFound => f.write_str("attribute not found in the schema"),
|
||||
AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SortByAttrError { }
|
Loading…
x
Reference in New Issue
Block a user