From 679c0b0f970c111b3c86309354bb82ce6be47e3a Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 6 Sep 2023 12:20:25 +0200 Subject: [PATCH] Extract the vectors from the non-flattened version of the documents --- .../src/update/index_documents/extract/mod.rs | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 6259c7272..6a3d6d972 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -55,7 +55,13 @@ pub(crate) fn data_from_obkv_documents( original_obkv_chunks .par_bridge() .map(|original_documents_chunk| { - send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone()) + send_original_documents_data( + original_documents_chunk, + indexer, + lmdb_writer_sx.clone(), + vectors_field_id, + primary_key_id, + ) }) .collect::>()?; @@ -72,7 +78,6 @@ pub(crate) fn data_from_obkv_documents( &faceted_fields, primary_key_id, geo_fields_ids, - vectors_field_id, &stop_words, max_positions_per_attributes, ) @@ -257,11 +262,33 @@ fn spawn_extraction_task( /// - documents fn send_original_documents_data( original_documents_chunk: Result>, + indexer: GrenadParameters, lmdb_writer_sx: Sender>, + vectors_field_id: Option, + primary_key_id: FieldId, ) -> Result<()> { let original_documents_chunk = original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; + if let Some(vectors_field_id) = vectors_field_id { + let documents_chunk_cloned = original_documents_chunk.clone(); + let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); + rayon::spawn(move || { + let result = extract_vector_points( + documents_chunk_cloned, + indexer, + primary_key_id, + vectors_field_id, + ); + let _ = match result { + Ok(vector_points) => { + lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points))) + } + Err(error) => lmdb_writer_sx_cloned.send(Err(error)), + }; + }); + } + // TODO: create a custom internal error lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap(); Ok(()) @@ -283,7 +310,6 @@ fn send_and_extract_flattened_documents_data( faceted_fields: &HashSet, primary_key_id: FieldId, geo_fields_ids: Option<(FieldId, FieldId)>, - vectors_field_id: Option, stop_words: &Option>, max_positions_per_attributes: Option, ) -> Result<( @@ -312,25 +338,6 @@ fn send_and_extract_flattened_documents_data( }); } - if let Some(vectors_field_id) = vectors_field_id { - let documents_chunk_cloned = flattened_documents_chunk.clone(); - let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); - rayon::spawn(move || { - let result = extract_vector_points( - documents_chunk_cloned, - indexer, - primary_key_id, - vectors_field_id, - ); - let _ = match result { - Ok(vector_points) => { - lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points))) - } - Err(error) => lmdb_writer_sx_cloned.send(Err(error)), - }; - }); - } - let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = rayon::join( || {