diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 5689bb04f..82486f3a8 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -242,11 +242,9 @@ fn send_original_documents_data( let request_threads = rayon::ThreadPoolBuilder::new() .num_threads(crate::vector::REQUEST_PARALLELISM) .thread_name(|index| format!("embedding-request-{index}")) - .build() - .unwrap(); + .build()?; rayon::spawn(move || { - /// FIXME: unwrap for (name, (embedder, prompt)) in embedders { let result = extract_vector_points( documents_chunk_cloned.clone(), diff --git a/milli/src/vector/error.rs b/milli/src/vector/error.rs index 92f077924..1e0bcc7fb 100644 --- a/milli/src/vector/error.rs +++ b/milli/src/vector/error.rs @@ -52,8 +52,6 @@ pub enum EmbedErrorKind { ModelForward(candle_core::Error), #[error("attempt to embed the following text in a configuration where embeddings must be user provided: {0:?}")] ManualEmbed(String), - #[error("could not initialize asynchronous runtime: {0}")] - OpenAiRuntimeInit(std::io::Error), #[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0:?}")] OllamaModelNotFoundError(Option), #[error("error deserialization the response body as JSON: {0}")] @@ -76,6 +74,10 @@ pub enum EmbedErrorKind { RestOtherStatusCode(u16, Option), #[error("could not reach embedding server: {0}")] RestNetwork(ureq::Transport), + #[error("was expected '{}' to be an object in query '{0}'", .1.join("."))] + RestNotAnObject(serde_json::Value, Vec), + #[error("while embedding tokenized, was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")] + OpenAiUnexpectedDimension(usize, usize), } impl EmbedError { @@ -174,6 +176,20 @@ impl EmbedError { pub(crate) fn rest_network(transport: ureq::Transport) -> EmbedError { Self { kind: EmbedErrorKind::RestNetwork(transport), fault: FaultSource::Runtime } } + + pub(crate) fn rest_not_an_object( + query: serde_json::Value, + input_path: Vec, + ) -> EmbedError { + Self { kind: EmbedErrorKind::RestNotAnObject(query, input_path), fault: FaultSource::User } + } + + pub(crate) fn openai_unexpected_dimension(expected: usize, got: usize) -> EmbedError { + Self { + kind: EmbedErrorKind::OpenAiUnexpectedDimension(expected, got), + fault: FaultSource::Runtime, + } + } } #[derive(Debug, thiserror::Error)] diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index b2638966e..737878a1a 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -210,16 +210,19 @@ impl Embedder { while tokens.len() > max_token_count { let window = &tokens[..max_token_count]; let embedding = self.rest_embedder.embed_tokens(window)?; - /// FIXME: unwrap - embeddings_for_prompt.append(embedding.into_inner()).unwrap(); + embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| { + EmbedError::openai_unexpected_dimension(self.dimensions(), got.len()) + })?; tokens = &tokens[max_token_count - OVERLAP_SIZE..]; } // end of text let embedding = self.rest_embedder.embed_tokens(tokens)?; - /// FIXME: unwrap - embeddings_for_prompt.append(embedding.into_inner()).unwrap(); + + embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| { + EmbedError::openai_unexpected_dimension(self.dimensions(), got.len()) + })?; all_embeddings.push(embeddings_for_prompt); } diff --git a/milli/src/vector/rest.rs b/milli/src/vector/rest.rs index 6fd47d882..8650bb68d 100644 --- a/milli/src/vector/rest.rs +++ b/milli/src/vector/rest.rs @@ -189,19 +189,29 @@ where [input] => { let mut body = options.query.clone(); - /// FIXME unwrap - body.as_object_mut().unwrap().insert(input.clone(), input_value); + body.as_object_mut() + .ok_or_else(|| { + EmbedError::rest_not_an_object( + options.query.clone(), + options.input_field.clone(), + ) + })? + .insert(input.clone(), input_value); body } [path @ .., input] => { let mut body = options.query.clone(); - /// FIXME unwrap let mut current_value = &mut body; for component in path { current_value = current_value .as_object_mut() - .unwrap() + .ok_or_else(|| { + EmbedError::rest_not_an_object( + options.query.clone(), + options.input_field.clone(), + ) + })? .entry(component.clone()) .or_insert(serde_json::json!({})); }