meilisearch/meilisearch-core/src/criterion/proximity.rs

67 lines
2.1 KiB
Rust
Raw Normal View History

2019-12-12 00:02:10 +08:00
use std::cmp::{self, Ordering};
use slice_group_by::GroupBy;
use crate::bucket_sort::{SimpleMatch};
2019-12-12 00:02:10 +08:00
use crate::RawDocument;
use super::{Criterion, Context, ContextMut, prepare_raw_matches};
2019-12-12 00:02:10 +08:00
const MAX_DISTANCE: u16 = 8;
2019-12-12 00:02:10 +08:00
pub struct Proximity;
impl Criterion for Proximity {
fn name(&self) -> &str { "proximity" }
fn prepare<'p, 'tag, 'txn, 'q, 'a, 'r>(
2019-12-12 00:02:10 +08:00
&self,
ctx: ContextMut<'p, 'tag, 'txn, 'q, 'a>,
documents: &mut [RawDocument<'r, 'tag>],
2019-12-12 00:02:10 +08:00
) {
prepare_raw_matches(documents, ctx.postings_lists, ctx.query_enhancer, ctx.automatons);
2019-12-12 00:02:10 +08:00
}
fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
2019-12-12 00:02:10 +08:00
fn index_proximity(lhs: u16, rhs: u16) -> u16 {
if lhs < rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
cmp::min(lhs - rhs, MAX_DISTANCE) + 1
}
}
fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
if lhs.attribute != rhs.attribute { MAX_DISTANCE }
else { index_proximity(lhs.word_index, rhs.word_index) }
}
fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
let mut min_prox = u16::max_value();
for a in lhs {
for b in rhs {
let prox = attribute_proximity(*a, *b);
min_prox = cmp::min(min_prox, prox);
}
}
min_prox
}
fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
let mut proximity = 0;
let mut iter = matches.linear_group_by_key(|m| m.query_index);
// iterate over groups by windows of size 2
let mut last = iter.next();
while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
proximity += min_proximity(lhs, rhs);
last = Some(rhs);
}
proximity
}
let lhs = matches_proximity(&lhs.processed_matches);
let rhs = matches_proximity(&rhs.processed_matches);
lhs.cmp(&rhs)
}
}