mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Do less useless intersections
This commit is contained in:
parent
0ca1a4e805
commit
41f51adbec
@ -1,3 +1,4 @@
|
||||
use heed::types::Bytes;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::query_graph::QueryGraph;
|
||||
@ -5,7 +6,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::ExactTerm;
|
||||
use crate::{Result, SearchContext, SearchLogger};
|
||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
|
||||
|
||||
/// A ranking rule that produces 3 disjoint buckets:
|
||||
///
|
||||
@ -173,8 +174,7 @@ impl State {
|
||||
let bucketed_position = crate::bucketed_position(position + offset);
|
||||
let word_position_docids = ctx
|
||||
.get_db_word_position_docids(Some(universe), *word, bucketed_position)?
|
||||
.unwrap_or_default()
|
||||
& universe;
|
||||
.unwrap_or_default();
|
||||
candidates &= word_position_docids;
|
||||
if candidates.is_empty() {
|
||||
return Ok(State::Empty(query_graph.clone()));
|
||||
@ -205,16 +205,24 @@ impl State {
|
||||
.unwrap_or_default())
|
||||
}),
|
||||
)?;
|
||||
// TODO Why not doing this intersection in the MultiOps above?
|
||||
intersection &= &candidates;
|
||||
if !intersection.is_empty() {
|
||||
// Although not really worth it in terms of performance,
|
||||
// if would be good to put this in cache for the sake of consistency
|
||||
let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
|
||||
ctx.index
|
||||
let bitmap_bytes = ctx
|
||||
.index
|
||||
.field_id_word_count_docids
|
||||
.get(ctx.txn, &(fid, count_all_positions as u8))?
|
||||
.unwrap_or_default()
|
||||
& universe
|
||||
.remap_data_type::<Bytes>()
|
||||
.get(ctx.txn, &(fid, count_all_positions as u8))?;
|
||||
|
||||
match bitmap_bytes {
|
||||
Some(bytes) => {
|
||||
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
}
|
||||
} else {
|
||||
RoaringBitmap::default()
|
||||
};
|
||||
@ -237,6 +245,8 @@ impl State {
|
||||
let (state, output) = match state {
|
||||
State::Uninitialized => (state, None),
|
||||
State::ExactAttribute(query_graph, candidates_per_attribute) => {
|
||||
// TODO it can be much faster to do the intersections before the unions...
|
||||
// or maybe the candidates_per_attribute are not containing anything outside universe
|
||||
let mut candidates = MultiOps::union(candidates_per_attribute.iter().map(
|
||||
|FieldCandidates { start_with_exact, exact_word_count }| {
|
||||
start_with_exact & exact_word_count
|
||||
@ -255,6 +265,8 @@ impl State {
|
||||
)
|
||||
}
|
||||
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
||||
// TODO it can be much faster to do the intersections before the unions...
|
||||
// or maybe the candidates_per_attribute are not containing anything outside universe
|
||||
let mut candidates = MultiOps::union(candidates_per_attribute.into_iter().map(
|
||||
|FieldCandidates { mut start_with_exact, exact_word_count }| {
|
||||
start_with_exact -= exact_word_count;
|
||||
|
@ -29,14 +29,12 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
let FidCondition { term, .. } = condition;
|
||||
|
||||
let docids = if let Some(fid) = condition.fid {
|
||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
||||
let docids = compute_query_term_subset_docids_within_field_id(
|
||||
compute_query_term_subset_docids_within_field_id(
|
||||
ctx,
|
||||
Some(universe),
|
||||
&term.term_subset,
|
||||
fid,
|
||||
)?;
|
||||
docids & universe
|
||||
)?
|
||||
} else {
|
||||
RoaringBitmap::new()
|
||||
};
|
||||
|
@ -28,10 +28,10 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
) -> Result<ComputedCondition> {
|
||||
let PositionCondition { term, positions } = condition;
|
||||
let mut docids = RoaringBitmap::new();
|
||||
// TODO use MultiOps to do the big union
|
||||
for position in positions {
|
||||
// maybe compute_query_term_subset_docids_within_position should accept a universe as argument
|
||||
docids |= universe
|
||||
& compute_query_term_subset_docids_within_position(
|
||||
docids |= compute_query_term_subset_docids_within_position(
|
||||
ctx,
|
||||
Some(universe),
|
||||
&term.term_subset,
|
||||
|
@ -143,7 +143,6 @@ fn compute_prefix_edges(
|
||||
right_prefix,
|
||||
forward_proximity,
|
||||
)? {
|
||||
let new_docids = &universe & new_docids;
|
||||
if !new_docids.is_empty() {
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
@ -153,13 +152,13 @@ fn compute_prefix_edges(
|
||||
|
||||
// No swapping when computing the proximity between a phrase and a word
|
||||
if left_phrase.is_none() {
|
||||
// TODO check that the fact that the universe always changes is not an issue, e.g. caching stuff.
|
||||
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
||||
Some(&universe),
|
||||
right_prefix,
|
||||
left_word,
|
||||
backward_proximity,
|
||||
)? {
|
||||
let new_docids = &universe & new_docids;
|
||||
if !new_docids.is_empty() {
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
@ -185,9 +184,7 @@ fn compute_non_prefix_edges(
|
||||
let mut universe = universe.clone();
|
||||
|
||||
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
||||
// TODO do the intersection in the method, again!
|
||||
let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?;
|
||||
universe &= phrase_docids;
|
||||
universe &= ctx.get_phrase_docids(Some(&universe), phrase)?;
|
||||
if universe.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -251,6 +251,7 @@ pub fn compute_phrase_docids(
|
||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
||||
bitmaps.sort_unstable_by_key(|a| a.len());
|
||||
|
||||
// TODO use MultiOps intersection which and remove the above sort
|
||||
for bitmap in bitmaps {
|
||||
candidates &= bitmap;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user