From d18ee58ab99b8dea7d632576cab8acf1e9a78926 Mon Sep 17 00:00:00 2001 From: many Date: Wed, 8 Sep 2021 15:24:32 +0200 Subject: [PATCH 1/2] Check if key are not empty in validator --- milli/src/update/index_documents/helpers/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index 3f38d4f25..128288982 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -17,7 +17,7 @@ pub use merge_functions::{ }; pub fn valid_lmdb_key(key: impl AsRef<[u8]>) -> bool { - key.as_ref().len() <= 511 + key.as_ref().len() <= 511 && !key.as_ref().is_empty() } /// Divides one slice into two at an index, returns `None` if mid is out of bounds. From e54280fbfc86b2c5b84361026e8a811715a5f347 Mon Sep 17 00:00:00 2001 From: many Date: Wed, 8 Sep 2021 15:24:52 +0200 Subject: [PATCH 2/2] Skip empty normalized words --- .../extract/extract_docid_word_positions.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 894a193bf..ca65f0874 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -67,14 +67,17 @@ pub fn extract_docid_word_positions( for (index, token) in tokens { let token = token.text().trim(); - key_buffer.truncate(mem::size_of::()); - key_buffer.extend_from_slice(token.as_bytes()); + if !token.is_empty() { + key_buffer.truncate(mem::size_of::()); + key_buffer.extend_from_slice(token.as_bytes()); - let position: u32 = index - .try_into() - .map_err(|_| SerializationError::InvalidNumberSerialization)?; - let position = field_id as u32 * ONE_ATTRIBUTE + position; - docid_word_positions_sorter.insert(&key_buffer, &position.to_ne_bytes())?; + let position: u32 = index + .try_into() + .map_err(|_| SerializationError::InvalidNumberSerialization)?; + let position = field_id as u32 * ONE_ATTRIBUTE + position; + docid_word_positions_sorter + .insert(&key_buffer, &position.to_ne_bytes())?; + } } } }