mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Merge pull request #64 from Kerollmops/precompute-query-index-groups
Precompute query index groups
This commit is contained in:
commit
510426c05c
@ -115,7 +115,7 @@ fn main() -> Result<(), Box<Error>> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
print!("{}: ", name);
|
print!("{}: ", name);
|
||||||
let areas = create_highlight_areas(&text, &doc.matches, attr);
|
let areas = create_highlight_areas(&text, doc.matches.as_matches(), attr);
|
||||||
display_highlights(&text, &areas)?;
|
display_highlights(&text, &areas)?;
|
||||||
println!();
|
println!();
|
||||||
}
|
}
|
||||||
@ -124,7 +124,7 @@ fn main() -> Result<(), Box<Error>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut matching_attributes = HashSet::new();
|
let mut matching_attributes = HashSet::new();
|
||||||
for _match in doc.matches {
|
for _match in doc.matches.as_matches() {
|
||||||
let attr = SchemaAttr::new(_match.attribute.attribute());
|
let attr = SchemaAttr::new(_match.attribute.attribute());
|
||||||
let name = schema.attribute_name(attr);
|
let name = schema.attribute_name(attr);
|
||||||
matching_attributes.insert(name);
|
matching_attributes.insert(name);
|
||||||
|
@ -2,21 +2,20 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{Document, Matches};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::Match;
|
use crate::Match;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn contains_exact(matches: &[Match]) -> bool {
|
fn contains_exact(matches: &&[Match]) -> bool {
|
||||||
matches.iter().any(|m| m.is_exact)
|
matches.iter().any(|m| m.is_exact)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn number_exact_matches(matches: &[Match]) -> usize {
|
fn number_exact_matches(matches: &Matches) -> usize {
|
||||||
GroupBy::new(matches, match_query_index).map(contains_exact).count()
|
matches.query_index_groups().filter(contains_exact).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
@ -2,16 +2,14 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{Document, Matches};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::Match;
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn number_of_query_words(matches: &[Match]) -> usize {
|
fn number_of_query_words(matches: &Matches) -> usize {
|
||||||
GroupBy::new(matches, match_query_index).count()
|
matches.query_index_groups().count()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
@ -3,21 +3,18 @@ use std::ops::Deref;
|
|||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
|
|
||||||
use group_by::GroupBy;
|
use crate::rank::{Document, Matches};
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::Match;
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sum_matches_typos(matches: &[Match]) -> i8 {
|
fn sum_matches_typos(matches: &Matches) -> i8 {
|
||||||
let mut sum_typos = 0;
|
let mut sum_typos = 0;
|
||||||
let mut number_words = 0;
|
let mut number_words = 0;
|
||||||
|
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
for group in GroupBy::new(matches, match_query_index) {
|
for group in matches.query_index_groups() {
|
||||||
sum_typos += unsafe { group.get_unchecked(0).distance } as i8;
|
sum_typos += unsafe { group.get_unchecked(0).distance } as i8;
|
||||||
number_words += 1;
|
number_words += 1;
|
||||||
}
|
}
|
||||||
@ -44,7 +41,7 @@ where D: Deref<Target=DB>
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use crate::{DocumentId, Attribute, WordArea};
|
use crate::{Match, DocumentId, Attribute, WordArea};
|
||||||
|
|
||||||
// typing: "Geox CEO"
|
// typing: "Geox CEO"
|
||||||
//
|
//
|
||||||
@ -69,10 +66,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(0), matches)
|
||||||
id: DocumentId(0),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let doc1 = {
|
let doc1 = {
|
||||||
@ -92,10 +86,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(1), matches)
|
||||||
id: DocumentId(1),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let lhs = sum_matches_typos(&doc0.matches);
|
let lhs = sum_matches_typos(&doc0.matches);
|
||||||
@ -126,10 +117,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(0), matches)
|
||||||
id: DocumentId(0),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let doc1 = {
|
let doc1 = {
|
||||||
@ -142,10 +130,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(1), matches)
|
||||||
id: DocumentId(1),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let lhs = sum_matches_typos(&doc0.matches);
|
let lhs = sum_matches_typos(&doc0.matches);
|
||||||
@ -176,10 +161,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(0), matches)
|
||||||
id: DocumentId(0),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let doc1 = {
|
let doc1 = {
|
||||||
@ -192,10 +174,7 @@ mod tests {
|
|||||||
word_area: WordArea::new_faillible(0, 6)
|
word_area: WordArea::new_faillible(0, 6)
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
Document {
|
Document::from_unsorted_matches(DocumentId(1), matches)
|
||||||
id: DocumentId(1),
|
|
||||||
matches: matches,
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let lhs = sum_matches_typos(&doc0.matches);
|
let lhs = sum_matches_typos(&doc0.matches);
|
||||||
|
@ -2,19 +2,17 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
|
use crate::rank::{Document, Matches};
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::rank::{match_query_index, Document};
|
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::Match;
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sum_matches_attributes(matches: &[Match]) -> u16 {
|
fn sum_matches_attributes(matches: &Matches) -> usize {
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
GroupBy::new(matches, match_query_index).map(|group| unsafe {
|
matches.query_index_groups().map(|group| {
|
||||||
group.get_unchecked(0).attribute.attribute()
|
unsafe { group.get_unchecked(0).attribute.attribute() as usize }
|
||||||
}).sum()
|
}).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,19 +2,17 @@ use std::cmp::Ordering;
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
use crate::database::DatabaseView;
|
use crate::rank::{Document, Matches};
|
||||||
use crate::rank::{match_query_index, Document};
|
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::Match;
|
use crate::database::DatabaseView;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sum_matches_attribute_index(matches: &[Match]) -> u32 {
|
fn sum_matches_attribute_index(matches: &Matches) -> usize {
|
||||||
// note that GroupBy will never return an empty group
|
// note that GroupBy will never return an empty group
|
||||||
// so we can do this assumption safely
|
// so we can do this assumption safely
|
||||||
GroupBy::new(matches, match_query_index).map(|group| unsafe {
|
matches.query_index_groups().map(|group| {
|
||||||
group.get_unchecked(0).attribute.word_index()
|
unsafe { group.get_unchecked(0).attribute.word_index() as usize }
|
||||||
}).sum()
|
}).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,9 +2,8 @@ use std::cmp::{self, Ordering};
|
|||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use rocksdb::DB;
|
use rocksdb::DB;
|
||||||
use group_by::GroupBy;
|
|
||||||
|
|
||||||
use crate::rank::{match_query_index, Document};
|
use crate::rank::{Document, Matches};
|
||||||
use crate::rank::criterion::Criterion;
|
use crate::rank::criterion::Criterion;
|
||||||
use crate::database::DatabaseView;
|
use crate::database::DatabaseView;
|
||||||
use crate::Match;
|
use crate::Match;
|
||||||
@ -34,9 +33,9 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
|
|||||||
min_prox
|
min_prox
|
||||||
}
|
}
|
||||||
|
|
||||||
fn matches_proximity(matches: &[Match]) -> u32 {
|
fn matches_proximity(matches: &Matches) -> u32 {
|
||||||
let mut proximity = 0;
|
let mut proximity = 0;
|
||||||
let mut iter = GroupBy::new(matches, match_query_index);
|
let mut iter = matches.query_index_groups();
|
||||||
|
|
||||||
// iterate over groups by windows of size 2
|
// iterate over groups by windows of size 2
|
||||||
let mut last = iter.next();
|
let mut last = iter.next();
|
||||||
@ -91,7 +90,8 @@ mod tests {
|
|||||||
// soup -> of = 8
|
// soup -> of = 8
|
||||||
// + of -> the = 1
|
// + of -> the = 1
|
||||||
// + the -> day = 8 (not 1)
|
// + the -> day = 8 (not 1)
|
||||||
assert_eq!(matches_proximity(matches), 17);
|
let matches = Matches::from_unsorted(matches.to_vec());
|
||||||
|
assert_eq!(matches_proximity(&matches), 17);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -118,7 +118,8 @@ mod tests {
|
|||||||
// soup -> of = 1
|
// soup -> of = 1
|
||||||
// + of -> the = 1
|
// + of -> the = 1
|
||||||
// + the -> day = 1
|
// + the -> day = 1
|
||||||
assert_eq!(matches_proximity(matches), 3);
|
let matches = Matches::from_unsorted(matches.to_vec());
|
||||||
|
assert_eq!(matches_proximity(&matches), 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,6 +153,8 @@ mod bench {
|
|||||||
matches.push(match_);
|
matches.push(match_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let matches = Matches::from_unsorted(matches.to_vec());
|
||||||
|
|
||||||
bench.iter(|| {
|
bench.iter(|| {
|
||||||
let proximity = matches_proximity(&matches);
|
let proximity = matches_proximity(&matches);
|
||||||
test::black_box(move || proximity)
|
test::black_box(move || proximity)
|
||||||
|
121
src/rank/mod.rs
121
src/rank/mod.rs
@ -2,6 +2,13 @@ pub mod criterion;
|
|||||||
mod query_builder;
|
mod query_builder;
|
||||||
mod distinct_map;
|
mod distinct_map;
|
||||||
|
|
||||||
|
use std::iter::FusedIterator;
|
||||||
|
use std::slice::Iter;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use sdset::SetBuf;
|
||||||
|
use group_by::GroupBy;
|
||||||
|
|
||||||
use crate::{Match, DocumentId};
|
use crate::{Match, DocumentId};
|
||||||
|
|
||||||
pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
|
pub use self::query_builder::{FilterFunc, QueryBuilder, DistinctQueryBuilder};
|
||||||
@ -14,20 +21,120 @@ fn match_query_index(a: &Match, b: &Match) -> bool {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Document {
|
pub struct Document {
|
||||||
pub id: DocumentId,
|
pub id: DocumentId,
|
||||||
pub matches: Vec<Match>,
|
pub matches: Matches,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Document {
|
impl Document {
|
||||||
pub fn new(doc: DocumentId, match_: Match) -> Self {
|
pub fn new(id: DocumentId, match_: Match) -> Self {
|
||||||
unsafe { Self::from_sorted_matches(doc, vec![match_]) }
|
let matches = SetBuf::new_unchecked(vec![match_]);
|
||||||
|
Self::from_matches(id, matches)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_matches(doc: DocumentId, mut matches: Vec<Match>) -> Self {
|
pub fn from_matches(id: DocumentId, matches: SetBuf<Match>) -> Self {
|
||||||
matches.sort_unstable();
|
let matches = Matches::new(matches);
|
||||||
unsafe { Self::from_sorted_matches(doc, matches) }
|
Self { id, matches }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
|
pub fn from_unsorted_matches(id: DocumentId, matches: Vec<Match>) -> Self {
|
||||||
|
let matches = Matches::from_unsorted(matches);
|
||||||
Self { id, matches }
|
Self { id, matches }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Matches {
|
||||||
|
matches: SetBuf<Match>,
|
||||||
|
slices: Vec<Range<usize>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Matches {
|
||||||
|
pub fn new(matches: SetBuf<Match>) -> Matches {
|
||||||
|
let mut last_end = 0;
|
||||||
|
let mut slices = Vec::new();
|
||||||
|
|
||||||
|
for group in GroupBy::new(&matches, match_query_index) {
|
||||||
|
let start = last_end;
|
||||||
|
let end = last_end + group.len();
|
||||||
|
slices.push(Range { start, end });
|
||||||
|
last_end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matches { matches, slices }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_unsorted(mut matches: Vec<Match>) -> Matches {
|
||||||
|
matches.sort_unstable();
|
||||||
|
let matches = SetBuf::new_unchecked(matches);
|
||||||
|
Matches::new(matches)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_index_groups(&self) -> QueryIndexGroups {
|
||||||
|
QueryIndexGroups {
|
||||||
|
matches: &self.matches,
|
||||||
|
slices: self.slices.iter(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_matches(&self) -> &[Match] {
|
||||||
|
&self.matches
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct QueryIndexGroups<'a, 'b> {
|
||||||
|
matches: &'a [Match],
|
||||||
|
slices: Iter<'b, Range<usize>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for QueryIndexGroups<'a, '_> {
|
||||||
|
type Item = &'a [Match];
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.slices.next().cloned().map(|range| {
|
||||||
|
unsafe { self.matches.get_unchecked(range) }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
self.slices.size_hint()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn count(self) -> usize {
|
||||||
|
self.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||||
|
self.slices.nth(n).cloned().map(|range| {
|
||||||
|
unsafe { self.matches.get_unchecked(range) }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn last(self) -> Option<Self::Item> {
|
||||||
|
let (matches, slices) = (self.matches, self.slices);
|
||||||
|
slices.last().cloned().map(|range| {
|
||||||
|
unsafe { matches.get_unchecked(range) }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExactSizeIterator for QueryIndexGroups<'_, '_> {
|
||||||
|
#[inline]
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.slices.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for QueryIndexGroups<'_, '_> { }
|
||||||
|
|
||||||
|
impl DoubleEndedIterator for QueryIndexGroups<'_, '_> {
|
||||||
|
#[inline]
|
||||||
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
|
self.slices.next_back().cloned().map(|range| {
|
||||||
|
unsafe { self.matches.get_unchecked(range) }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -116,7 +116,7 @@ where D: Deref<Target=DB>,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
matches.into_iter().map(|(id, matches)| Document::from_matches(id, matches)).collect()
|
matches.into_iter().map(|(i, m)| Document::from_unsorted_matches(i, m)).collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user