mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Introduce ContextMut and Context structs
This commit is contained in:
parent
d75339a271
commit
d93e35cace
@ -21,7 +21,7 @@ use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder};
|
||||
|
||||
use crate::criterion::Criteria;
|
||||
use crate::criterion::{Criteria, Context, ContextMut};
|
||||
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
|
||||
use crate::raw_document::RawDocument;
|
||||
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
|
||||
@ -61,7 +61,7 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
let (automatons, query_enhancer) =
|
||||
let (mut automatons, mut query_enhancer) =
|
||||
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
|
||||
|
||||
debug!("{:?}", query_enhancer);
|
||||
@ -102,14 +102,27 @@ where
|
||||
|
||||
for mut group in tmp_groups {
|
||||
let before_criterion_preparation = Instant::now();
|
||||
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons);
|
||||
|
||||
let ctx = ContextMut {
|
||||
postings_lists: &mut arena,
|
||||
query_enhancer: &mut query_enhancer,
|
||||
automatons: &mut automatons,
|
||||
};
|
||||
|
||||
criterion.prepare(ctx, &mut group);
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_enhancer: &query_enhancer,
|
||||
automatons: &automatons,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena));
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) {
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
debug!("{:?} produced a group of size {}", criterion.name(), group.len());
|
||||
|
||||
documents_seen += group.len();
|
||||
@ -147,7 +160,7 @@ where
|
||||
FI: Fn(DocumentId) -> bool,
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let (automatons, query_enhancer) =
|
||||
let (mut automatons, mut query_enhancer) =
|
||||
construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
|
||||
|
||||
let before_postings_lists_fetching = Instant::now();
|
||||
@ -201,15 +214,27 @@ where
|
||||
continue;
|
||||
}
|
||||
|
||||
let ctx = ContextMut {
|
||||
postings_lists: &mut arena,
|
||||
query_enhancer: &mut query_enhancer,
|
||||
automatons: &mut automatons,
|
||||
};
|
||||
|
||||
let before_criterion_preparation = Instant::now();
|
||||
criterion.prepare(&mut group, &mut arena, &query_enhancer, &automatons);
|
||||
criterion.prepare(ctx, &mut group);
|
||||
debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
|
||||
|
||||
let ctx = Context {
|
||||
postings_lists: &arena,
|
||||
query_enhancer: &query_enhancer,
|
||||
automatons: &automatons,
|
||||
};
|
||||
|
||||
let before_criterion_sort = Instant::now();
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, &arena));
|
||||
group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
|
||||
debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
|
||||
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, &arena)) {
|
||||
for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
|
||||
// we must compute the real distinguished len of this sub-group
|
||||
for document in group.iter() {
|
||||
let filter_accepted = match &filter {
|
||||
|
@ -1,36 +1,23 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
|
||||
use super::{Criterion, prepare_raw_matches};
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
|
||||
pub struct Attribute;
|
||||
|
||||
impl Criterion for Attribute {
|
||||
fn name(&self) -> &str { "attribute" }
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
|
||||
}
|
||||
|
||||
fn evaluate<'a, 'tag, 'txn>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
|
||||
let mut sum_of_attribute = 0;
|
||||
|
@ -1,34 +1,14 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
use super::Criterion;
|
||||
use super::{Criterion, Context};
|
||||
|
||||
pub struct DocumentId;
|
||||
|
||||
impl Criterion for DocumentId {
|
||||
fn name(&self) -> &str { "stable document id" }
|
||||
|
||||
fn prepare(
|
||||
&self,
|
||||
documents: &mut [RawDocument],
|
||||
postings_lists: &mut SmallArena<PostingsListView>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
) {
|
||||
// ...
|
||||
}
|
||||
|
||||
fn evaluate(
|
||||
&self,
|
||||
lhs: &RawDocument,
|
||||
rhs: &RawDocument,
|
||||
postings_lists: &SmallArena<PostingsListView>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = &lhs.id;
|
||||
let rhs = &rhs.id;
|
||||
|
||||
|
@ -1,37 +1,21 @@
|
||||
use std::cmp::{Ordering, Reverse};
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, BareMatch, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
use super::Criterion;
|
||||
use crate::bucket_sort::BareMatch;
|
||||
use super::{Criterion, Context, ContextMut};
|
||||
|
||||
pub struct Exact;
|
||||
|
||||
impl Criterion for Exact {
|
||||
fn name(&self) -> &str { "exact" }
|
||||
|
||||
fn prepare(
|
||||
&self,
|
||||
documents: &mut [RawDocument],
|
||||
postings_lists: &mut SmallArena<PostingsListView>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
) {
|
||||
fn prepare(&self, _ctx: ContextMut, documents: &mut [RawDocument]) {
|
||||
for document in documents {
|
||||
document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate(
|
||||
&self,
|
||||
lhs: &RawDocument,
|
||||
rhs: &RawDocument,
|
||||
postings_lists: &SmallArena<PostingsListView>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
|
||||
let mut sum_exact_query_words = 0;
|
||||
|
@ -29,33 +29,45 @@ pub use self::sort_by_attr::SortByAttr;
|
||||
pub trait Criterion {
|
||||
fn name(&self) -> &str;
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
);
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
/* ... */
|
||||
}
|
||||
|
||||
fn evaluate<'a, 'tag, 'txn>(
|
||||
fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> Ordering;
|
||||
|
||||
#[inline]
|
||||
fn eq<'a, 'tag, 'txn>(
|
||||
fn eq<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> bool
|
||||
{
|
||||
self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal
|
||||
self.evaluate(ctx, lhs, rhs) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ContextMut<'p, 'tag, 'txn, 'q, 'a> {
|
||||
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_enhancer: &'q mut QueryEnhancer,
|
||||
pub automatons: &'a mut [QueryWordAutomaton],
|
||||
}
|
||||
|
||||
pub struct Context<'p, 'tag, 'txn, 'q, 'a> {
|
||||
pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_enhancer: &'q QueryEnhancer,
|
||||
pub automatons: &'a [QueryWordAutomaton],
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CriteriaBuilder<'a> {
|
||||
inner: Vec<Box<dyn Criterion + 'a>>,
|
||||
|
@ -1,38 +1,25 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
|
||||
use crate::bucket_sort::{SimpleMatch};
|
||||
use crate::RawDocument;
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
|
||||
use super::{Criterion, prepare_raw_matches};
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
pub struct Proximity;
|
||||
|
||||
impl Criterion for Proximity {
|
||||
fn name(&self) -> &str { "proximity" }
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
|
||||
}
|
||||
|
||||
fn evaluate<'a, 'tag, 'txn>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) -> Ordering
|
||||
{
|
||||
const MAX_DISTANCE: u16 = 8;
|
||||
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
|
||||
if lhs < rhs {
|
||||
cmp::min(rhs - lhs, MAX_DISTANCE)
|
||||
|
@ -1,14 +1,9 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use meilisearch_schema::{Schema, SchemaAttr};
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
|
||||
use crate::criterion::Criterion;
|
||||
use crate::{RankedMap, RawDocument};
|
||||
use super::{Criterion, Context};
|
||||
|
||||
/// An helper struct that permit to sort documents by
|
||||
/// some of their stored attributes.
|
||||
@ -95,23 +90,7 @@ impl Criterion for SortByAttr<'_> {
|
||||
"sort by attribute"
|
||||
}
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
) {
|
||||
// ...
|
||||
}
|
||||
|
||||
fn evaluate<'a, 'tag, 'txn>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
let lhs = self.ranked_map.get(lhs.id, self.attr);
|
||||
let rhs = self.ranked_map.get(rhs.id, self.attr);
|
||||
|
||||
|
@ -6,30 +6,22 @@ use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
|
||||
use super::{Criterion, prepare_query_distances};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Typo;
|
||||
|
||||
impl Criterion for Typo {
|
||||
fn name(&self) -> &str { "typo" }
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
|
||||
prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
|
||||
}
|
||||
|
||||
fn evaluate(
|
||||
&self,
|
||||
lhs: &RawDocument,
|
||||
rhs: &RawDocument,
|
||||
postings_lists: &SmallArena<PostingsListView>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
// This function is a wrong logarithmic 10 function.
|
||||
// It is safe to panic on input number higher than 3,
|
||||
// the number of typos is never bigger than that.
|
||||
|
@ -1,35 +1,21 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
|
||||
use super::{Criterion, prepare_query_distances};
|
||||
use super::{Criterion, Context, ContextMut, prepare_query_distances};
|
||||
|
||||
pub struct Words;
|
||||
|
||||
impl Criterion for Words {
|
||||
fn name(&self) -> &str { "words" }
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
|
||||
prepare_query_distances(documents, ctx.query_enhancer, ctx.automatons, ctx.postings_lists);
|
||||
}
|
||||
|
||||
fn evaluate(
|
||||
&self,
|
||||
lhs: &RawDocument,
|
||||
rhs: &RawDocument,
|
||||
postings_lists: &SmallArena<PostingsListView>,
|
||||
) -> Ordering
|
||||
{
|
||||
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
|
||||
#[inline]
|
||||
fn number_of_query_words(distances: &[Option<u8>]) -> usize {
|
||||
distances.iter().cloned().filter(Option::is_some).count()
|
||||
|
@ -1,34 +1,29 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use crate::automaton::QueryEnhancer;
|
||||
use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
|
||||
use crate::RawDocument;
|
||||
|
||||
use super::{Criterion, prepare_raw_matches};
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
|
||||
|
||||
pub struct WordsPosition;
|
||||
|
||||
impl Criterion for WordsPosition {
|
||||
fn name(&self) -> &str { "words position" }
|
||||
|
||||
fn prepare<'a, 'tag, 'txn>(
|
||||
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
documents: &mut [RawDocument<'a, 'tag>],
|
||||
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
query_enhancer: &QueryEnhancer,
|
||||
automatons: &[QueryWordAutomaton],
|
||||
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
|
||||
documents: &mut [RawDocument<'r, 'tag>],
|
||||
) {
|
||||
prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
|
||||
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
|
||||
}
|
||||
|
||||
fn evaluate<'a, 'tag, 'txn>(
|
||||
fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
|
||||
&self,
|
||||
lhs: &RawDocument<'a, 'tag>,
|
||||
rhs: &RawDocument<'a, 'tag>,
|
||||
postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
|
||||
ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
|
||||
lhs: &RawDocument<'r, 'tag>,
|
||||
rhs: &RawDocument<'r, 'tag>,
|
||||
) -> Ordering
|
||||
{
|
||||
#[inline]
|
||||
|
Loading…
Reference in New Issue
Block a user