From 651c30899ed62260174cfa329dbe198778504690 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 19 Nov 2024 15:16:32 +0100 Subject: [PATCH] Allow fetching embedders from inside tests --- crates/milli/src/index.rs | 9 ++++++--- .../milli/src/update/index_documents/transform.rs | 2 +- crates/milli/src/update/mod.rs | 2 +- crates/milli/src/update/new/indexer/mod.rs | 2 +- crates/milli/src/update/settings.rs | 15 +++++++++++---- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index d77b6b1da..dd3a6f299 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1689,6 +1689,7 @@ pub(crate) mod tests { use crate::error::{Error, InternalError}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::update::new::indexer; + use crate::update::settings::InnerIndexSettings; use crate::update::{ self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Setting, Settings, }; @@ -1749,8 +1750,8 @@ pub(crate) mod tests { let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?; let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); - /// TODO: fetch configs from the index + let embedders = + InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; let mut indexer = indexer::DocumentOperation::new(self.index_documents_config.update_method); indexer.add_documents(&documents).unwrap(); @@ -1830,7 +1831,9 @@ pub(crate) mod tests { let db_fields_ids_map = self.inner.fields_ids_map(&rtxn)?; let mut new_fields_ids_map = db_fields_ids_map.clone(); - let embedders = EmbeddingConfigs::default(); + let embedders = + InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; + let mut indexer = indexer::DocumentOperation::new(self.index_documents_config.update_method); let external_document_ids: Vec<_> = diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index 1b041afcc..38bf90435 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -620,7 +620,7 @@ impl<'a, 'i> Transform<'a, 'i> { fst_new_external_documents_ids_builder.insert(key, value) })?; - let old_inner_settings = InnerIndexSettings::from_index(self.index, wtxn)?; + let old_inner_settings = InnerIndexSettings::from_index(self.index, wtxn, None)?; let fields_ids_map = self.fields_ids_map; let primary_key_id = self.index.primary_key(wtxn)?.and_then(|name| fields_ids_map.id(name)); let mut new_inner_settings = old_inner_settings.clone(); diff --git a/crates/milli/src/update/mod.rs b/crates/milli/src/update/mod.rs index 772a73236..5888a20db 100644 --- a/crates/milli/src/update/mod.rs +++ b/crates/milli/src/update/mod.rs @@ -19,7 +19,7 @@ pub(crate) mod facet; mod index_documents; mod indexer_config; pub mod new; -mod settings; +pub(crate) mod settings; mod update_step; mod word_prefix_docids; mod words_prefix_integer_docids; diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index dfc3d9b02..18402da5f 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -475,7 +475,7 @@ where } // used to update the localized and weighted maps while sharing the update code with the settings pipeline. - let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn)?; + let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; inner_index_settings.recompute_facets(wtxn, index)?; inner_index_settings.recompute_searchables(wtxn, index)?; index.put_field_distribution(wtxn, &field_distribution)?; diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 2a0f38457..9d550f9fb 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -1169,7 +1169,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { { self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; - let old_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?; + let old_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; // never trigger re-indexing self.update_displayed()?; @@ -1199,7 +1199,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let embedding_config_updates = self.update_embedding_configs()?; - let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?; + let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; new_inner_settings.recompute_facets(self.wtxn, self.index)?; let primary_key_id = self @@ -1427,7 +1427,11 @@ pub(crate) struct InnerIndexSettings { } impl InnerIndexSettings { - pub fn from_index(index: &Index, rtxn: &heed::RoTxn<'_>) -> Result { + pub fn from_index( + index: &Index, + rtxn: &heed::RoTxn<'_>, + embedding_configs: Option, + ) -> Result { let stop_words = index.stop_words(rtxn)?; let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap()); let allowed_separators = index.allowed_separators(rtxn)?; @@ -1441,7 +1445,10 @@ impl InnerIndexSettings { let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?; let exact_attributes = index.exact_attributes_ids(rtxn)?; let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); - let embedding_configs = embedders(index.embedding_configs(rtxn)?)?; + let embedding_configs = match embedding_configs { + Some(embedding_configs) => embedding_configs, + None => embedders(index.embedding_configs(rtxn)?)?, + }; let existing_fields: HashSet<_> = index .field_distribution(rtxn)? .into_iter()