From 1005a60fb855ec18747601dd73fa3ffffde070ab Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 19 Feb 2025 11:03:48 +0100 Subject: [PATCH] Fixup dump settings --- crates/dump/src/reader/v6/mod.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/crates/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs index 9e0d07c78..3f469a38d 100644 --- a/crates/dump/src/reader/v6/mod.rs +++ b/crates/dump/src/reader/v6/mod.rs @@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind}; use std::path::Path; pub use meilisearch_types::milli; +use meilisearch_types::milli::vector::hf::OverridePooling; use tempfile::TempDir; use time::OffsetDateTime; use tracing::debug; @@ -252,7 +253,29 @@ impl V6IndexReader { } pub fn settings(&mut self) -> Result> { - let settings: Settings = serde_json::from_reader(&mut self.settings)?; + let mut settings: Settings = serde_json::from_reader(&mut self.settings)?; + patch_embedders(&mut settings); Ok(settings.check()) } } + +fn patch_embedders(settings: &mut Settings) { + if let Setting::Set(embedders) = &mut settings.embedders { + for (_, settings) in embedders { + let Setting::Set(settings) = &mut settings.inner else { + continue; + }; + if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace) + { + continue; + } + settings.pooling = match settings.pooling { + Setting::Set(pooling) => Setting::Set(pooling), + // if the pooling for a hugging face embedder is not set, force it to `forceMean` + // for backward compatibility with v1.13 + // dumps created in v1.14 and up will have the setting set for hugging face embedders + Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean), + }; + } + } +}