Improve performance of position rr

This commit is contained in:
Loïc Lecrenier 2023-05-01 12:06:10 +02:00
parent 2a7f9adf78
commit 3b2c8b9f25

View File

@ -1,4 +1,4 @@
use fxhash::FxHashSet; use fxhash::{FxHashMap, FxHashSet};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait}; use super::{ComputedCondition, RankingRuleGraphTrait};
@ -11,7 +11,7 @@ use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub struct PositionCondition { pub struct PositionCondition {
term: LocatedQueryTermSubset, term: LocatedQueryTermSubset,
position: u16, positions: Vec<u16>,
} }
pub enum PositionGraph {} pub enum PositionGraph {}
@ -24,14 +24,17 @@ impl RankingRuleGraphTrait for PositionGraph {
condition: &Self::Condition, condition: &Self::Condition,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<ComputedCondition> { ) -> Result<ComputedCondition> {
let PositionCondition { term, .. } = condition; let PositionCondition { term, positions } = condition;
// maybe compute_query_term_subset_docids_within_position_id should accept a universe as argument let mut docids = RoaringBitmap::new();
let mut docids = compute_query_term_subset_docids_within_position( for position in positions {
// maybe compute_query_term_subset_docids_within_position should accept a universe as argument
docids |= universe
& compute_query_term_subset_docids_within_position(
ctx, ctx,
&term.term_subset, &term.term_subset,
condition.position, *position,
)?; )?;
docids &= universe; }
Ok(ComputedCondition { Ok(ComputedCondition {
docids, docids,
@ -72,7 +75,8 @@ impl RankingRuleGraphTrait for PositionGraph {
all_positions.extend(positions); all_positions.extend(positions);
} }
let mut edges = vec![]; let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
for position in all_positions { for position in all_positions {
let cost = { let cost = {
let mut cost = 0; let mut cost = 0;
@ -85,7 +89,11 @@ impl RankingRuleGraphTrait for PositionGraph {
} }
cost cost
}; };
positions_for_costs.entry(cost).or_default().push(position);
}
let mut edges = vec![];
for (cost, positions) in positions_for_costs {
// TODO: We can improve performances and relevancy by storing // TODO: We can improve performances and relevancy by storing
// the term subsets associated to each position fetched. // the term subsets associated to each position fetched.
// //
@ -94,7 +102,7 @@ impl RankingRuleGraphTrait for PositionGraph {
cost, cost,
conditions_interner.insert(PositionCondition { conditions_interner.insert(PositionCondition {
term: term.clone(), // TODO remove this ugly clone term: term.clone(), // TODO remove this ugly clone
position, positions,
}), }),
)); ));
} }