From 476e4d3dbed3ccf91c3bb95249a557b92f035562 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 24 Oct 2023 10:19:32 +0200 Subject: [PATCH] Use value buffer instead of the initial value when writting the final result in the sorter --- .../extract/extract_docid_word_positions.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 36258b275..e5d95cbdb 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -115,6 +115,7 @@ pub fn extract_docid_word_positions( let (add_obkv, add_script_language_word_count) = add?; // merge deletions and additions. + // transforming two KV> into one KV>> value_buffer.clear(); del_add_from_two_obkvs( KvReader::::new(del_obkv), @@ -122,8 +123,8 @@ pub fn extract_docid_word_positions( &mut value_buffer, )?; - // write them into the sorter. - let obkv = KvReader::::new(value); + // write each KV> into the sorter, field by field. + let obkv = KvReader::::new(&value_buffer); for (field_id, value) in obkv.iter() { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&field_id.to_be_bytes()); @@ -151,6 +152,7 @@ pub fn extract_docid_word_positions( } } + // the returned sorter is serialized as: key: (DocId, FieldId), value: KV>. sorter_into_reader(docid_word_positions_sorter, indexer) .map(|reader| (documents_ids, reader, script_language_docids)) } @@ -266,6 +268,7 @@ fn lang_safe_tokens_from_document<'a>( } } + // returns a (KV>, HashMap>) Ok((&buffers.obkv_buffer, script_language_word_count)) } @@ -331,6 +334,7 @@ fn tokens_from_document<'a>( } } + // returns a KV> Ok(document_writer.into_inner().map(|v| v.as_slice())?) }