From 41dbdd2d1835beb631c5cc318fa8707af4ef627a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 19 Nov 2024 16:08:25 +0100 Subject: [PATCH] Fix filtered_placeholder_search_should_not_return_deleted_documents and word_scale_set_and_reset --- .../extract_word_pair_proximity_docids.rs | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index bbc6365df..945f0b8b3 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -149,6 +149,15 @@ fn word_positions_into_word_pair_proximity( } } +fn drain_word_positions( + word_positions: &mut VecDeque<(Rc, u16)>, + word_pair_proximity: &mut impl FnMut((Rc, Rc), u8), +) { + while !word_positions.is_empty() { + word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); + } +} + fn process_document_tokens<'doc>( document: impl Document<'doc>, document_tokenizer: &DocumentTokenizer, @@ -156,7 +165,12 @@ fn process_document_tokens<'doc>( word_positions: &mut VecDeque<(Rc, u16)>, word_pair_proximity: &mut impl FnMut((Rc, Rc), u8), ) -> Result<()> { - let mut token_fn = |_fname: &str, _fid: FieldId, pos: u16, word: &str| { + let mut field_id = None; + let mut token_fn = |_fname: &str, fid: FieldId, pos: u16, word: &str| { + if field_id != Some(fid) { + field_id = Some(fid); + drain_word_positions(word_positions, word_pair_proximity); + } // drain the proximity window until the head word is considered close to the word we are inserting. while word_positions .front() @@ -171,9 +185,6 @@ fn process_document_tokens<'doc>( }; document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?; - while !word_positions.is_empty() { - word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); - } - + drain_word_positions(word_positions, word_pair_proximity); Ok(()) }