Allow actually passing dimensions for OpenAI source

-> make sure the settings change is rejected or the settings task fails when the specified model doesn't support
overriding `dimensions` and the passed `dimensions` differs from the model's default dimensions.
This commit is contained in:
Louis Dureuil 2024-02-07 10:39:19 +01:00
parent 9ac5750096
commit 517f5332d6
No known key found for this signature in database
4 changed files with 47 additions and 9 deletions

View File

@ -347,6 +347,7 @@ impl ErrorCode for milli::Error {
UserError::InvalidFieldForSource { .. } UserError::InvalidFieldForSource { .. }
| UserError::MissingFieldForSource { .. } | UserError::MissingFieldForSource { .. }
| UserError::InvalidOpenAiModel { .. } | UserError::InvalidOpenAiModel { .. }
| UserError::InvalidOpenAiModelDimensions { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,

View File

@ -227,6 +227,13 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
source_: crate::vector::settings::EmbedderSource, source_: crate::vector::settings::EmbedderSource,
embedder_name: String, embedder_name: String,
}, },
#[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
InvalidOpenAiModelDimensions {
embedder_name: String,
model: &'static str,
dimensions: usize,
expected_dimensions: usize,
},
} }
impl From<crate::vector::Error> for Error { impl From<crate::vector::Error> for Error {

View File

@ -974,6 +974,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
crate::vector::settings::EmbeddingSettings::apply_default_source( crate::vector::settings::EmbeddingSettings::apply_default_source(
&mut setting, &mut setting,
); );
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
&mut setting,
);
let setting = validate_embedding_settings(setting, &name)?; let setting = validate_embedding_settings(setting, &name)?;
changed = true; changed = true;
new_configs.insert(name, setting); new_configs.insert(name, setting);
@ -1132,14 +1135,25 @@ pub fn validate_embedding_settings(
match inferred_source { match inferred_source {
EmbedderSource::OpenAi => { EmbedderSource::OpenAi => {
check_unset(&revision, "revision", inferred_source, name)?; check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&dimensions, "dimensions", inferred_source, name)?;
if let Setting::Set(model) = &model { if let Setting::Set(model) = &model {
crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or( let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
crate::error::UserError::InvalidOpenAiModel { .ok_or(crate::error::UserError::InvalidOpenAiModel {
embedder_name: name.to_owned(), embedder_name: name.to_owned(),
model: model.clone(), model: model.clone(),
}, })?;
)?; if let Setting::Set(dimensions) = dimensions {
if !model.supports_overriding_dimensions()
&& dimensions != model.default_dimensions()
{
return Err(crate::error::UserError::InvalidOpenAiModelDimensions {
embedder_name: name.to_owned(),
model: model.name(),
dimensions,
expected_dimensions: model.default_dimensions(),
}
.into());
}
}
} }
} }
EmbedderSource::HuggingFace => { EmbedderSource::HuggingFace => {

View File

@ -1,6 +1,7 @@
use deserr::Deserr; use deserr::Deserr;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::openai;
use crate::prompt::PromptData; use crate::prompt::PromptData;
use crate::update::Setting; use crate::update::Setting;
use crate::vector::EmbeddingConfig; use crate::vector::EmbeddingConfig;
@ -82,7 +83,7 @@ impl EmbeddingSettings {
Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
Self::REVISION => &[EmbedderSource::HuggingFace], Self::REVISION => &[EmbedderSource::HuggingFace],
Self::API_KEY => &[EmbedderSource::OpenAi], Self::API_KEY => &[EmbedderSource::OpenAi],
Self::DIMENSIONS => &[EmbedderSource::UserProvided], Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
_other => unreachable!("unknown field"), _other => unreachable!("unknown field"),
} }
@ -90,9 +91,13 @@ impl EmbeddingSettings {
pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
match source { match source {
EmbedderSource::OpenAi => { EmbedderSource::OpenAi => &[
&[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE] Self::SOURCE,
} Self::MODEL,
Self::API_KEY,
Self::DOCUMENT_TEMPLATE,
Self::DIMENSIONS,
],
EmbedderSource::HuggingFace => { EmbedderSource::HuggingFace => {
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
} }
@ -109,6 +114,17 @@ impl EmbeddingSettings {
*source = Setting::Set(EmbedderSource::default()) *source = Setting::Set(EmbedderSource::default())
} }
} }
pub(crate) fn apply_default_openai_model(setting: &mut Setting<EmbeddingSettings>) {
if let Setting::Set(EmbeddingSettings {
source: Setting::Set(EmbedderSource::OpenAi),
model: model @ (Setting::NotSet | Setting::Reset),
..
}) = setting
{
*model = Setting::Set(openai::EmbeddingModel::default().name().to_owned())
}
}
} }
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]