From 17f6d658408331370cddde1e209085d0f0ac1004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 8 May 2023 14:53:35 +0200 Subject: [PATCH] Reduce maximum indexed proximity for word pairs --- milli/src/proximity.rs | 2 +- milli/src/search/new/ranking_rule_graph/proximity/build.rs | 6 ++++-- .../extract/extract_word_pair_proximity_docids.rs | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/milli/src/proximity.rs b/milli/src/proximity.rs index 62f490119..8261015a3 100644 --- a/milli/src/proximity.rs +++ b/milli/src/proximity.rs @@ -2,7 +2,7 @@ use std::cmp; use crate::{relative_from_absolute_position, Position}; -pub const MAX_DISTANCE: u32 = 8; +pub const MAX_DISTANCE: u32 = 4; pub fn index_proximity(lhs: u32, rhs: u32) -> u32 { if lhs <= rhs { diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index 660d59b3e..11c68a68b 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -6,6 +6,8 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::SearchContext; use crate::Result; +const MAX_PROX: usize = crate::proximity::MAX_DISTANCE as usize; + pub fn build_edges( _ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -35,7 +37,7 @@ pub fn build_edges( } let mut conditions = vec![]; - for cost in right_ngram_length..(7 + right_ngram_length) { + for cost in right_ngram_length..(MAX_PROX + right_ngram_length) { conditions.push(( cost as u32, conditions_interner.insert(ProximityCondition::Uninit { @@ -47,7 +49,7 @@ pub fn build_edges( } conditions.push(( - (7 + right_ngram_length) as u32, + (MAX_PROX + right_ngram_length) as u32, conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }), )); diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 6707fc268..8efb8e08c 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -91,7 +91,7 @@ fn document_word_positions_into_sorter( while !word_positions_heap.is_empty() { while let Some(peeked_word_position) = word_positions_heap.pop() { ordered_peeked_word_positions.push(peeked_word_position); - if ordered_peeked_word_positions.len() == 7 { + if ordered_peeked_word_positions.len() == (MAX_DISTANCE - 1) as usize { break; } }