From 762e320c3594c9936fd6dd97a70438e5b3d37ff9 Mon Sep 17 00:00:00 2001 From: msvaljek Date: Fri, 7 Oct 2022 12:59:12 +0200 Subject: [PATCH] Add proximity calculation for the same word --- .../extract/extract_word_pair_proximity_docids.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 6add9d980..9448f0e23 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -127,6 +127,17 @@ fn document_word_positions_into_sorter<'b>( // Advance the head and push it in the heap. if let Some(mut head) = ordered_peeked_word_positions.pop() { if let Some(next_position) = head.iter.next() { + let prox = positions_proximity(head.position, next_position); + + if prox > 0 && prox < MAX_DISTANCE { + word_pair_proximity + .entry((head.word.clone(), head.word.clone())) + .and_modify(|p| { + *p = cmp::min(*p, prox); + }) + .or_insert(prox); + } + word_positions_heap.push(PeekedWordPosition { word: head.word, position: next_position,