From 517f5332d67b30f06056557daa4a8b32d43f9449 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 10:39:19 +0100 Subject: [PATCH] Allow actually passing `dimensions` for OpenAI source -> make sure the settings change is rejected or the settings task fails when the specified model doesn't support overriding `dimensions` and the passed `dimensions` differs from the model's default dimensions. --- meilisearch-types/src/error.rs | 1 + milli/src/error.rs | 7 +++++++ milli/src/update/settings.rs | 24 +++++++++++++++++++----- milli/src/vector/settings.rs | 24 ++++++++++++++++++++---- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 2182b1836..796eb5713 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -347,6 +347,7 @@ impl ErrorCode for milli::Error { UserError::InvalidFieldForSource { .. } | UserError::MissingFieldForSource { .. } | UserError::InvalidOpenAiModel { .. } + | UserError::InvalidOpenAiModelDimensions { .. } | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, diff --git a/milli/src/error.rs b/milli/src/error.rs index 5a4fbc7f5..9cb984db1 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -227,6 +227,13 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco source_: crate::vector::settings::EmbedderSource, embedder_name: String, }, + #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")] + InvalidOpenAiModelDimensions { + embedder_name: String, + model: &'static str, + dimensions: usize, + expected_dimensions: usize, + }, } impl From for Error { diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index d770bcd74..b8289626b 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -974,6 +974,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { crate::vector::settings::EmbeddingSettings::apply_default_source( &mut setting, ); + crate::vector::settings::EmbeddingSettings::apply_default_openai_model( + &mut setting, + ); let setting = validate_embedding_settings(setting, &name)?; changed = true; new_configs.insert(name, setting); @@ -1132,14 +1135,25 @@ pub fn validate_embedding_settings( match inferred_source { EmbedderSource::OpenAi => { check_unset(&revision, "revision", inferred_source, name)?; - check_unset(&dimensions, "dimensions", inferred_source, name)?; if let Setting::Set(model) = &model { - crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or( - crate::error::UserError::InvalidOpenAiModel { + let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) + .ok_or(crate::error::UserError::InvalidOpenAiModel { embedder_name: name.to_owned(), model: model.clone(), - }, - )?; + })?; + if let Setting::Set(dimensions) = dimensions { + if !model.supports_overriding_dimensions() + && dimensions != model.default_dimensions() + { + return Err(crate::error::UserError::InvalidOpenAiModelDimensions { + embedder_name: name.to_owned(), + model: model.name(), + dimensions, + expected_dimensions: model.default_dimensions(), + } + .into()); + } + } } } EmbedderSource::HuggingFace => { diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 6fe8eddc0..834a1c81d 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,6 +1,7 @@ use deserr::Deserr; use serde::{Deserialize, Serialize}; +use super::openai; use crate::prompt::PromptData; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -82,7 +83,7 @@ impl EmbeddingSettings { Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::REVISION => &[EmbedderSource::HuggingFace], Self::API_KEY => &[EmbedderSource::OpenAi], - Self::DIMENSIONS => &[EmbedderSource::UserProvided], + Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], _other => unreachable!("unknown field"), } @@ -90,9 +91,13 @@ impl EmbeddingSettings { pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { match source { - EmbedderSource::OpenAi => { - &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE] - } + EmbedderSource::OpenAi => &[ + Self::SOURCE, + Self::MODEL, + Self::API_KEY, + Self::DOCUMENT_TEMPLATE, + Self::DIMENSIONS, + ], EmbedderSource::HuggingFace => { &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] } @@ -109,6 +114,17 @@ impl EmbeddingSettings { *source = Setting::Set(EmbedderSource::default()) } } + + pub(crate) fn apply_default_openai_model(setting: &mut Setting) { + if let Setting::Set(EmbeddingSettings { + source: Setting::Set(EmbedderSource::OpenAi), + model: model @ (Setting::NotSet | Setting::Reset), + .. + }) = setting + { + *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) + } + } } #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]