Error changes

This commit is contained in:
Louis Dureuil 2024-07-24 14:32:29 +02:00
parent ecee0c922f
commit 1a297c048e
No known key found for this signature in database

View File

@ -4,6 +4,7 @@ use std::path::PathBuf;
use hf_hub::api::sync::ApiError; use hf_hub::api::sync::ApiError;
use super::parsed_vectors::ParsedVectorsDiff; use super::parsed_vectors::ParsedVectorsDiff;
use super::rest::ConfigurationSource;
use crate::error::FaultSource; use crate::error::FaultSource;
use crate::{FieldDistribution, PanicCatched}; use crate::{FieldDistribution, PanicCatched};
@ -45,48 +46,57 @@ pub struct EmbedError {
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub enum EmbedErrorKind { pub enum EmbedErrorKind {
#[error("could not tokenize: {0}")] #[error("could not tokenize:\n - {0}")]
Tokenize(Box<dyn std::error::Error + Send + Sync>), Tokenize(Box<dyn std::error::Error + Send + Sync>),
#[error("unexpected tensor shape: {0}")] #[error("unexpected tensor shape:\n - {0}")]
TensorShape(candle_core::Error), TensorShape(candle_core::Error),
#[error("unexpected tensor value: {0}")] #[error("unexpected tensor value:\n - {0}")]
TensorValue(candle_core::Error), TensorValue(candle_core::Error),
#[error("could not run model: {0}")] #[error("could not run model:\n - {0}")]
ModelForward(candle_core::Error), ModelForward(candle_core::Error),
#[error("attempt to embed the following text in a configuration where embeddings must be user provided: {0:?}")] #[error("attempt to embed the following text in a configuration where embeddings must be user provided:\n - `{0}`")]
ManualEmbed(String), ManualEmbed(String),
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0:?}")] #[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually{}", option_info(.0.as_deref(), "server replied with "))]
OllamaModelNotFoundError(Option<String>), OllamaModelNotFoundError(Option<String>),
#[error("error deserialization the response body as JSON: {0}")] #[error("error deserialization the response body as JSON:\n - {0}")]
RestResponseDeserialization(std::io::Error), RestResponseDeserialization(std::io::Error),
#[error("component `{0}` not found in path `{1}` in response: `{2}`")]
RestResponseMissingEmbeddings(String, String, String),
#[error("unexpected format of the embedding response: {0}")]
RestResponseFormat(serde_json::Error),
#[error("expected a response containing {0} embeddings, got only {1}")] #[error("expected a response containing {0} embeddings, got only {1}")]
RestResponseEmbeddingCount(usize, usize), RestResponseEmbeddingCount(usize, usize),
#[error("could not authenticate against embedding server: {0:?}")] #[error("could not authenticate against embedding server{}", option_info(.0.as_deref(), "server replied with "))]
RestUnauthorized(Option<String>), RestUnauthorized(Option<String>),
#[error("sent too many requests to embedding server: {0:?}")] #[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))]
RestTooManyRequests(Option<String>), RestTooManyRequests(Option<String>),
#[error("sent a bad request to embedding server: {0:?}")] #[error("sent a bad request to embedding server{}{}",
RestBadRequest(Option<String>), if ConfigurationSource::User == *.1 {
#[error("received internal error from embedding server: {0:?}")] "\n - Hint: check that the `request` in the embedder configuration matches the remote server's API"
} else {
""
},
option_info(.0.as_deref(), "server replied with "))]
RestBadRequest(Option<String>, ConfigurationSource),
#[error("received internal error HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
RestInternalServerError(u16, Option<String>), RestInternalServerError(u16, Option<String>),
#[error("received HTTP {0} from embedding server: {0:?}")] #[error("received unexpected HTTP {0} from embedding server{}", option_info(.1.as_deref(), "server replied with "))]
RestOtherStatusCode(u16, Option<String>), RestOtherStatusCode(u16, Option<String>),
#[error("could not reach embedding server: {0}")] #[error("could not reach embedding server:\n - {0}")]
RestNetwork(ureq::Transport), RestNetwork(ureq::Transport),
#[error("was expected '{}' to be an object in query '{0}'", .1.join("."))] #[error("error extracting embeddings from the response:\n - {0}")]
RestNotAnObject(serde_json::Value, Vec<String>), RestExtractionError(String),
#[error("while embedding tokenized, was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")] #[error("was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")]
OpenAiUnexpectedDimension(usize, usize), UnexpectedDimension(usize, usize),
#[error("no embedding was produced")] #[error("no embedding was produced")]
MissingEmbedding, MissingEmbedding,
#[error(transparent)] #[error(transparent)]
PanicInThreadPool(#[from] PanicCatched), PanicInThreadPool(#[from] PanicCatched),
} }
fn option_info(info: Option<&str>, prefix: &str) -> String {
match info {
Some(info) => format!("\n - {prefix}`{info}`"),
None => String::new(),
}
}
impl EmbedError { impl EmbedError {
pub fn tokenize(inner: Box<dyn std::error::Error + Send + Sync>) -> Self { pub fn tokenize(inner: Box<dyn std::error::Error + Send + Sync>) -> Self {
Self { kind: EmbedErrorKind::Tokenize(inner), fault: FaultSource::Runtime } Self { kind: EmbedErrorKind::Tokenize(inner), fault: FaultSource::Runtime }
@ -119,28 +129,6 @@ impl EmbedError {
} }
} }
pub(crate) fn rest_response_missing_embeddings<S: AsRef<str>>(
response: serde_json::Value,
component: &str,
response_field: &[S],
) -> EmbedError {
let response_field: Vec<&str> = response_field.iter().map(AsRef::as_ref).collect();
let response_field = response_field.join(".");
Self {
kind: EmbedErrorKind::RestResponseMissingEmbeddings(
component.to_owned(),
response_field,
serde_json::to_string_pretty(&response).unwrap_or_default(),
),
fault: FaultSource::Undecided,
}
}
pub(crate) fn rest_response_format(error: serde_json::Error) -> EmbedError {
Self { kind: EmbedErrorKind::RestResponseFormat(error), fault: FaultSource::Undecided }
}
pub(crate) fn rest_response_embedding_count(expected: usize, got: usize) -> EmbedError { pub(crate) fn rest_response_embedding_count(expected: usize, got: usize) -> EmbedError {
Self { Self {
kind: EmbedErrorKind::RestResponseEmbeddingCount(expected, got), kind: EmbedErrorKind::RestResponseEmbeddingCount(expected, got),
@ -159,8 +147,14 @@ impl EmbedError {
} }
} }
pub(crate) fn rest_bad_request(error_response: Option<String>) -> EmbedError { pub(crate) fn rest_bad_request(
Self { kind: EmbedErrorKind::RestBadRequest(error_response), fault: FaultSource::User } error_response: Option<String>,
configuration_source: ConfigurationSource,
) -> EmbedError {
Self {
kind: EmbedErrorKind::RestBadRequest(error_response, configuration_source),
fault: FaultSource::User,
}
} }
pub(crate) fn rest_internal_server_error( pub(crate) fn rest_internal_server_error(
@ -184,22 +178,19 @@ impl EmbedError {
Self { kind: EmbedErrorKind::RestNetwork(transport), fault: FaultSource::Runtime } Self { kind: EmbedErrorKind::RestNetwork(transport), fault: FaultSource::Runtime }
} }
pub(crate) fn rest_not_an_object( pub(crate) fn rest_unexpected_dimension(expected: usize, got: usize) -> EmbedError {
query: serde_json::Value,
input_path: Vec<String>,
) -> EmbedError {
Self { kind: EmbedErrorKind::RestNotAnObject(query, input_path), fault: FaultSource::User }
}
pub(crate) fn openai_unexpected_dimension(expected: usize, got: usize) -> EmbedError {
Self { Self {
kind: EmbedErrorKind::OpenAiUnexpectedDimension(expected, got), kind: EmbedErrorKind::UnexpectedDimension(expected, got),
fault: FaultSource::Runtime, fault: FaultSource::Runtime,
} }
} }
pub(crate) fn missing_embedding() -> EmbedError { pub(crate) fn missing_embedding() -> EmbedError {
Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided } Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided }
} }
pub(crate) fn rest_extraction_error(error: String) -> EmbedError {
Self { kind: EmbedErrorKind::RestExtractionError(error), fault: FaultSource::Runtime }
}
} }
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@ -290,10 +281,17 @@ impl NewEmbedderError {
fault: FaultSource::Runtime, fault: FaultSource::Runtime,
} }
} }
pub(crate) fn rest_could_not_parse_template(message: String) -> NewEmbedderError {
Self {
kind: NewEmbedderErrorKind::CouldNotParseTemplate(message),
fault: FaultSource::User,
}
}
} }
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
#[error("could not open config at {filename:?}: {inner}")] #[error("could not open config at {filename}: {inner}")]
pub struct OpenConfig { pub struct OpenConfig {
pub filename: PathBuf, pub filename: PathBuf,
pub inner: std::io::Error, pub inner: std::io::Error,
@ -339,18 +337,20 @@ pub enum NewEmbedderErrorKind {
UnsupportedModel(UnsupportedModel), UnsupportedModel(UnsupportedModel),
#[error(transparent)] #[error(transparent)]
OpenTokenizer(OpenTokenizer), OpenTokenizer(OpenTokenizer),
#[error("could not build weights from Pytorch weights: {0}")] #[error("could not build weights from Pytorch weights:\n - {0}")]
PytorchWeight(candle_core::Error), PytorchWeight(candle_core::Error),
#[error("could not build weights from Safetensor weights: {0}")] #[error("could not build weights from Safetensor weights:\n - {0}")]
SafetensorWeight(candle_core::Error), SafetensorWeight(candle_core::Error),
#[error("could not spawn HG_HUB API client: {0}")] #[error("could not spawn HG_HUB API client:\n - {0}")]
NewApiFail(ApiError), NewApiFail(ApiError),
#[error("fetching file from HG_HUB failed: {0}")] #[error("fetching file from HG_HUB failed:\n - {0}")]
ApiGet(ApiError), ApiGet(ApiError),
#[error("could not determine model dimensions: test embedding failed with {0}")] #[error("could not determine model dimensions:\n - test embedding failed with {0}")]
CouldNotDetermineDimension(EmbedError), CouldNotDetermineDimension(EmbedError),
#[error("loading model failed: {0}")] #[error("loading model failed:\n - {0}")]
LoadModel(candle_core::Error), LoadModel(candle_core::Error),
#[error("{0}")]
CouldNotParseTemplate(String),
} }
pub struct PossibleEmbeddingMistakes { pub struct PossibleEmbeddingMistakes {