diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index d97d1403c..3eb761bce 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -14,6 +14,7 @@ use roaring::RoaringBitmap; use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; +use crate::index::IndexEmbeddingConfig; use crate::prompt::Prompt; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; @@ -96,6 +97,7 @@ struct EmbedderVectorExtractor { pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, + embedders_configs: &[IndexEmbeddingConfig], settings_diff: &InnerIndexSettingsDiff, ) -> Result> { let reindex_vectors = settings_diff.reindex_vectors(); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 80214e7c8..6399b40f8 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -30,6 +30,7 @@ use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids use self::extract_word_position_docids::extract_word_position_docids; use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters}; use super::{helpers, TypedChunk}; +use crate::index::IndexEmbeddingConfig; use crate::update::settings::InnerIndexSettingsDiff; use crate::{FieldId, Result, ThreadPoolNoAbortBuilder}; @@ -43,6 +44,7 @@ pub(crate) fn data_from_obkv_documents( indexer: GrenadParameters, lmdb_writer_sx: Sender>, primary_key_id: FieldId, + embedders_configs: Arc>, settings_diff: Arc, max_positions_per_attributes: Option, ) -> Result<()> { @@ -55,6 +57,7 @@ pub(crate) fn data_from_obkv_documents( original_documents_chunk, indexer, lmdb_writer_sx.clone(), + embedders_configs.clone(), settings_diff.clone(), ) }) @@ -210,6 +213,7 @@ fn send_original_documents_data( original_documents_chunk: Result>>, indexer: GrenadParameters, lmdb_writer_sx: Sender>, + embedders_configs: Arc>, settings_diff: Arc, ) -> Result<()> { let original_documents_chunk = @@ -226,11 +230,17 @@ fn send_original_documents_data( if index_vectors { let settings_diff = settings_diff.clone(); + let embedders_configs = embedders_configs.clone(); let original_documents_chunk = original_documents_chunk.clone(); let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - match extract_vector_points(original_documents_chunk.clone(), indexer, &settings_diff) { + match extract_vector_points( + original_documents_chunk.clone(), + indexer, + &embedders_configs, + &settings_diff, + ) { Ok(extracted_vectors) => { for ExtractedVectorPoints { manual_vectors, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 2dc93f67a..907554753 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -286,6 +286,7 @@ where settings_diff.new.recompute_searchables(self.wtxn, self.index)?; let settings_diff = Arc::new(settings_diff); + let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?); let backup_pool; let pool = match self.indexer_config.thread_pool { @@ -399,6 +400,7 @@ where pool_params, lmdb_writer_sx.clone(), primary_key_id, + embedders_configs.clone(), settings_diff_cloned, max_positions_per_attributes, )