From e54280fbfc86b2c5b84361026e8a811715a5f347 Mon Sep 17 00:00:00 2001 From: many Date: Wed, 8 Sep 2021 15:24:52 +0200 Subject: [PATCH] Skip empty normalized words --- .../extract/extract_docid_word_positions.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 894a193bf..ca65f0874 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -67,14 +67,17 @@ pub fn extract_docid_word_positions( for (index, token) in tokens { let token = token.text().trim(); - key_buffer.truncate(mem::size_of::()); - key_buffer.extend_from_slice(token.as_bytes()); + if !token.is_empty() { + key_buffer.truncate(mem::size_of::()); + key_buffer.extend_from_slice(token.as_bytes()); - let position: u32 = index - .try_into() - .map_err(|_| SerializationError::InvalidNumberSerialization)?; - let position = field_id as u32 * ONE_ATTRIBUTE + position; - docid_word_positions_sorter.insert(&key_buffer, &position.to_ne_bytes())?; + let position: u32 = index + .try_into() + .map_err(|_| SerializationError::InvalidNumberSerialization)?; + let position = field_id as u32 * ONE_ATTRIBUTE + position; + docid_word_positions_sorter + .insert(&key_buffer, &position.to_ne_bytes())?; + } } } }