From e58426109a6fc1b3cb5ba544d8e3f1020a74fbd8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 5 Apr 2023 14:43:16 +0200 Subject: [PATCH] Fix panics and issues in exactness graph ranking rule --- .../new/ranking_rule_graph/exactness/mod.rs | 48 ++++++++----------- .../extract/extract_word_position_docids.rs | 5 +- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index a1e19a015..3d558e87b 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -1,24 +1,11 @@ +use heed::BytesDecode; use roaring::RoaringBitmap; use super::{ComputedCondition, DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait}; use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::query_graph::{QueryGraph, QueryNode}; use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset}; -use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger}; - -/// - Exactness as first ranking rule: TermsMatchingStrategy? prefer a document that matches 1 word exactly and no other -/// word than a doc that matches 9 words non exactly but none exactly -/// - `TermsMatchingStrategy` as a word + exactness optimization: we could consider -/// -/// "naive vision" -/// condition from one node to another: -/// - word exactly present: cost 0 -/// - word typo/ngram/prefix/missing: cost 1, not remove from query graph, edge btwn the two nodes, return the universe without condition when resolving, destination query term is inside -/// -/// Three strategies: -/// 1. ExactAttribute: word position / word_fid_docid -/// 2. AttributeStart: -/// 3. AttributeContainsExact => implementable via `RankingRuleGraphTrait` +use crate::{Result, RoaringBitmapCodec, SearchContext, SearchLogger}; #[derive(Clone, PartialEq, Eq, Hash)] pub enum ExactnessCondition { @@ -42,7 +29,7 @@ fn compute_docids( ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), ExactTerm::Word(word) => { if let Some(word_candidates) = ctx.get_db_word_docids(word)? { - CboRoaringBitmapCodec::deserialize_from(word_candidates)? + RoaringBitmapCodec::bytes_decode(word_candidates).ok_or(heed::Error::Decoding)? } else { return Ok(Default::default()); } @@ -86,22 +73,29 @@ impl RankingRuleGraphTrait for ExactnessGraph { let skip_condition = ExactnessCondition::Skip(dest_node.clone()); let skip_condition = conditions_interner.insert(skip_condition); - Ok(vec![(0, exact_condition), (1, skip_condition)]) + + Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)]) } fn log_state( - graph: &RankingRuleGraph, - paths: &[Vec>], - dead_ends_cache: &DeadEndsCache, - universe: &RoaringBitmap, - costs: &MappedInterner>, - cost: u64, - logger: &mut dyn SearchLogger, + _graph: &RankingRuleGraph, + _paths: &[Vec>], + _dead_ends_cache: &DeadEndsCache, + _niverse: &RoaringBitmap, + _costs: &MappedInterner>, + _cost: u64, + _logger: &mut dyn SearchLogger, ) { - todo!() } - fn label_for_condition(ctx: &mut SearchContext, condition: &Self::Condition) -> Result { - todo!() + fn label_for_condition( + _ctx: &mut SearchContext, + condition: &Self::Condition, + ) -> Result { + Ok(match condition { + ExactnessCondition::ExactInAttribute(_) => "exact", + ExactnessCondition::Skip(_) => "skip", + } + .to_owned()) } } diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index cd3ec691b..eef5089bc 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -7,10 +7,7 @@ use super::helpers::{ }; use crate::error::SerializationError; use crate::index::db_name::DOCID_WORD_POSITIONS; -use crate::{ - absolute_from_relative_position, bucketed_position, relative_from_absolute_position, - DocumentId, Result, -}; +use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Result}; /// Extracts the word positions and the documents ids where this word appear. ///