meilisearch/meilisearch-core/src/raw_document.rs

93 lines
3.3 KiB
Rust
Raw Normal View History

2019-12-12 00:02:10 +08:00
use compact_arena::SmallArena;
use itertools::EitherOrBoth;
use sdset::SetBuf;
2019-12-12 00:02:10 +08:00
use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsListView};
2019-12-12 00:02:10 +08:00
pub struct RawDocument<'a, 'tag> {
pub id: crate::DocumentId,
pub raw_matches: &'a mut [BareMatch<'tag>],
pub processed_matches: Vec<SimpleMatch>,
/// The list of minimum `distance` found
pub processed_distances: Vec<Option<u8>>,
2019-12-13 18:14:12 +08:00
/// Does this document contains a field
/// with one word that is exactly matching
pub contains_one_word_field: bool,
}
2019-12-12 00:02:10 +08:00
impl<'a, 'tag> RawDocument<'a, 'tag> {
pub fn new<'txn>(
raw_matches: &'a mut [BareMatch<'tag>],
automatons: &[QueryWordAutomaton],
postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
) -> Option<RawDocument<'a, 'tag>>
{
raw_matches.sort_unstable_by_key(|m| m.query_index);
let mut previous_word = None;
for i in 0..raw_matches.len() {
let a = &raw_matches[i];
let auta = &automatons[a.query_index as usize];
match auta.phrase_query {
Some((0, _)) => {
let b = match raw_matches.get(i + 1) {
Some(b) => b,
None => {
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
continue;
}
};
if a.query_index + 1 != b.query_index {
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
continue
}
let pla = &postings_lists[a.postings_list];
let plb = &postings_lists[b.postings_list];
let iter = itertools::merge_join_by(pla.iter(), plb.iter(), |a, b| {
2019-12-12 00:02:10 +08:00
a.attribute.cmp(&b.attribute).then((a.word_index + 1).cmp(&b.word_index))
});
let mut newa = Vec::new();
let mut newb = Vec::new();
for eb in iter {
if let EitherOrBoth::Both(a, b) = eb {
newa.push(*a);
newb.push(*b);
}
}
if !newa.is_empty() {
previous_word = Some(a.query_index);
}
postings_lists[a.postings_list].rewrite_with(SetBuf::new_unchecked(newa));
postings_lists[b.postings_list].rewrite_with(SetBuf::new_unchecked(newb));
},
Some((1, _)) => {
if previous_word.take() != Some(a.query_index - 1) {
postings_lists[a.postings_list].rewrite_with(SetBuf::default());
}
},
Some((_, _)) => unreachable!(),
None => (),
}
2019-10-18 19:05:28 +08:00
}
2019-12-12 00:02:10 +08:00
if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
return None
2019-10-18 19:05:28 +08:00
}
2019-12-12 00:02:10 +08:00
Some(RawDocument {
id: raw_matches[0].document_id,
raw_matches,
processed_matches: Vec::new(),
processed_distances: Vec::new(),
2019-12-13 18:14:12 +08:00
contains_one_word_field: false,
2019-10-18 19:05:28 +08:00
})
}
}