mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Implement the attribute ranking rule edge computation
This commit is contained in:
parent
e55efc419e
commit
d6a7c28e4d
@ -34,6 +34,9 @@ pub struct DatabaseCache<'ctx> {
|
|||||||
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
|
||||||
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||||
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||||
|
pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<&'ctx [u8]>>,
|
||||||
|
pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
|
pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
|
||||||
}
|
}
|
||||||
impl<'ctx> DatabaseCache<'ctx> {
|
impl<'ctx> DatabaseCache<'ctx> {
|
||||||
fn get_value<'v, K1, KC>(
|
fn get_value<'v, K1, KC>(
|
||||||
@ -284,4 +287,68 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
.map(|bytes| CboRoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding.into()))
|
||||||
.transpose()
|
.transpose()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_db_word_prefix_fid_docids(
|
||||||
|
&mut self,
|
||||||
|
word_prefix: Interned<String>,
|
||||||
|
fid: u16,
|
||||||
|
) -> Result<Option<&'ctx [u8]>> {
|
||||||
|
DatabaseCache::get_value(
|
||||||
|
self.txn,
|
||||||
|
(word_prefix, fid),
|
||||||
|
&(self.word_interner.get(word_prefix).as_str(), fid),
|
||||||
|
&mut self.db_cache.word_prefix_fid_docids,
|
||||||
|
self.index.word_prefix_fid_docids.remap_data_type::<ByteSlice>(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
|
||||||
|
let fids = match self.db_cache.word_fids.entry(word) {
|
||||||
|
Entry::Occupied(fids) => fids.get().clone(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let key = self.word_interner.get(word).as_bytes();
|
||||||
|
let mut fids = vec![];
|
||||||
|
let remap_key_type = self
|
||||||
|
.index
|
||||||
|
.word_fid_docids
|
||||||
|
.remap_types::<ByteSlice, ByteSlice>()
|
||||||
|
.prefix_iter(self.txn, key)?
|
||||||
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
|
for result in remap_key_type {
|
||||||
|
let ((_, fid), value) = result?;
|
||||||
|
// filling other caches to avoid searching for them again
|
||||||
|
self.db_cache.word_fid_docids.insert((word, fid), Some(value));
|
||||||
|
fids.push(fid);
|
||||||
|
}
|
||||||
|
entry.insert(fids.clone());
|
||||||
|
fids
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(fids)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_db_word_prefix_fids(&mut self, word_prefix: Interned<String>) -> Result<Vec<u16>> {
|
||||||
|
let fids = match self.db_cache.word_prefix_fids.entry(word_prefix) {
|
||||||
|
Entry::Occupied(fids) => fids.get().clone(),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
let key = self.word_interner.get(word_prefix).as_bytes();
|
||||||
|
let mut fids = vec![];
|
||||||
|
let remap_key_type = self
|
||||||
|
.index
|
||||||
|
.word_prefix_fid_docids
|
||||||
|
.remap_types::<ByteSlice, ByteSlice>()
|
||||||
|
.prefix_iter(self.txn, key)?
|
||||||
|
.remap_key_type::<StrBEU16Codec>();
|
||||||
|
for result in remap_key_type {
|
||||||
|
let ((_, fid), value) = result?;
|
||||||
|
// filling other caches to avoid searching for them again
|
||||||
|
self.db_cache.word_prefix_fid_docids.insert((word_prefix, fid), Some(value));
|
||||||
|
fids.push(fid);
|
||||||
|
}
|
||||||
|
entry.insert(fids.clone());
|
||||||
|
fids
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(fids)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,4 +13,8 @@ impl Interned<Phrase> {
|
|||||||
let p = ctx.phrase_interner.get(self);
|
let p = ctx.phrase_interner.get(self);
|
||||||
p.words.iter().flatten().map(|w| ctx.word_interner.get(*w)).join(" ")
|
p.words.iter().flatten().map(|w| ctx.word_interner.get(*w)).join(" ")
|
||||||
}
|
}
|
||||||
|
pub fn words(self, ctx: &SearchContext) -> Vec<Option<Interned<String>>> {
|
||||||
|
let p = ctx.phrase_interner.get(self);
|
||||||
|
p.words.clone()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use fxhash::FxHashSet;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
@ -10,7 +11,7 @@ use crate::Result;
|
|||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct AttributeCondition {
|
pub struct AttributeCondition {
|
||||||
term: LocatedQueryTermSubset,
|
term: LocatedQueryTermSubset,
|
||||||
nbr_typos: u8,
|
fid: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum AttributeGraph {}
|
pub enum AttributeGraph {}
|
||||||
@ -44,39 +45,37 @@ impl RankingRuleGraphTrait for AttributeGraph {
|
|||||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||||
let term = to_term;
|
let term = to_term;
|
||||||
|
|
||||||
let mut edges = vec![];
|
let mut all_fields = FxHashSet::default();
|
||||||
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||||
// ...
|
let fields = ctx.get_db_word_fids(word)?;
|
||||||
|
all_fields.extend(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ngrams have a base typo cost
|
for phrase in term.term_subset.all_phrases(ctx)? {
|
||||||
// 2-gram -> equivalent to 1 typo
|
for &word in phrase.words(ctx).iter().flatten() {
|
||||||
// 3-gram -> equivalent to 2 typos
|
let fields = ctx.get_db_word_fids(word)?;
|
||||||
let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };
|
all_fields.extend(fields);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) {
|
if let Some(word_prefix) = term.term_subset.use_prefix_db(ctx) {
|
||||||
let mut term = term.clone();
|
let fields = ctx.get_db_word_prefix_fids(word_prefix)?;
|
||||||
match nbr_typos {
|
all_fields.extend(fields);
|
||||||
0 => {
|
|
||||||
term.term_subset.clear_one_typo_subset();
|
|
||||||
term.term_subset.clear_two_typo_subset();
|
|
||||||
}
|
}
|
||||||
1 => {
|
|
||||||
term.term_subset.clear_zero_typo_subset();
|
|
||||||
term.term_subset.clear_two_typo_subset();
|
|
||||||
}
|
|
||||||
2 => {
|
|
||||||
term.term_subset.clear_zero_typo_subset();
|
|
||||||
term.term_subset.clear_one_typo_subset();
|
|
||||||
}
|
|
||||||
_ => panic!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
|
let mut edges = vec![];
|
||||||
|
for fid in all_fields {
|
||||||
|
// TODO: We can improve performances and relevancy by storing
|
||||||
|
// the term subsets associated to each field ids fetched.
|
||||||
edges.push((
|
edges.push((
|
||||||
nbr_typos as u32 + base_cost,
|
fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||||
conditions_interner.insert(AttributeCondition { term, nbr_typos }),
|
conditions_interner.insert(AttributeCondition {
|
||||||
|
term: term.clone(), // TODO remove this ugly clone
|
||||||
|
fid,
|
||||||
|
}),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,8 @@ mod exactness;
|
|||||||
mod proximity;
|
mod proximity;
|
||||||
/// Implementation of the `typo` ranking rule
|
/// Implementation of the `typo` ranking rule
|
||||||
mod typo;
|
mod typo;
|
||||||
|
/// Implementation of the `attribute` ranking rule
|
||||||
|
mod attribute;
|
||||||
|
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user