diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index f6a4ecc04..82388172e 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -122,8 +122,11 @@ pub enum Error { Dump(#[from] dump::Error), #[error(transparent)] Heed(#[from] heed::Error), - #[error(transparent)] - Milli(#[from] milli::Error), + #[error("{}", match .index_name { + Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name), + _ => format!("{error}") + })] + Milli { error: milli::Error, index_name: Option }, #[error("An unexpected crash occurred when processing the task.")] ProcessBatchPanicked, #[error(transparent)] @@ -190,7 +193,7 @@ impl Error { | Error::AbortedTask | Error::Dump(_) | Error::Heed(_) - | Error::Milli(_) + | Error::Milli { .. } | Error::ProcessBatchPanicked | Error::FileStore(_) | Error::IoError(_) @@ -209,6 +212,10 @@ impl Error { pub fn with_custom_error_code(self, code: Code) -> Self { Self::WithCustomErrorCode(code, Box::new(self)) } + + pub fn from_milli(error: milli::Error, index_name: Option) -> Self { + Self::Milli { error, index_name } + } } impl ErrorCode for Error { @@ -236,7 +243,7 @@ impl ErrorCode for Error { // TODO: not sure of the Code to use Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, Error::Dump(e) => e.error_code(), - Error::Milli(e) => e.error_code(), + Error::Milli { error, .. } => error.error_code(), Error::ProcessBatchPanicked => Code::Internal, Error::Heed(e) => e.error_code(), Error::HeedTransaction(e) => e.error_code(), diff --git a/crates/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs index f8080d23b..c20782068 100644 --- a/crates/index-scheduler/src/index_mapper/index_map.rs +++ b/crates/index-scheduler/src/index_mapper/index_map.rs @@ -3,13 +3,13 @@ use std::path::Path; use std::time::Duration; use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; -use meilisearch_types::milli::Index; +use meilisearch_types::milli::{Index, Result}; use time::OffsetDateTime; use uuid::Uuid; use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing}; use crate::lru::{InsertionOutcome, LruMap}; -use crate::{clamp_to_page_size, Result}; +use crate::{clamp_to_page_size}; /// Keep an internally consistent view of the open indexes in memory. /// diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 3cccb5a69..500e4cf83 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use tracing::error; use uuid::Uuid; - +use meilisearch_types::milli; use self::index_map::IndexMap; use self::IndexStatus::{Available, BeingDeleted, Closing, Missing}; use crate::uuid_codec::UuidCodec; @@ -121,7 +121,7 @@ impl IndexStats { /// # Parameters /// /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. - pub fn new(index: &Index, rtxn: &RoTxn) -> Result { + pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result { Ok(IndexStats { number_of_documents: index.number_of_documents(rtxn)?, database_size: index.on_disk_size()?, @@ -189,7 +189,7 @@ impl IndexMapper { date, self.enable_mdb_writemap, self.index_base_map_size, - )?; + ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?; wtxn.commit()?; @@ -357,7 +357,8 @@ impl IndexMapper { }; let index_path = self.base_path.join(uuid.to_string()); // take the lock to reopen the environment. - reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?; + reopen.reopen(&mut self.index_map.write().unwrap(), &index_path) + .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?; continue; } BeingDeleted => return Err(Error::IndexNotFound(name.to_string())), @@ -378,7 +379,7 @@ impl IndexMapper { None, self.enable_mdb_writemap, self.index_base_map_size, - )?; + ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?; } Available(index) => break index, Closing(_) => { @@ -459,7 +460,7 @@ impl IndexMapper { None => { let index = self.index(rtxn, index_uid)?; let index_rtxn = index.read_txn()?; - IndexStats::new(&index, &index_rtxn) + IndexStats::new(&index, &index_rtxn).map_err(|e| Error::from_milli(e, Some(uuid.to_string()))) } } } diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index cef24c1ea..6d7558583 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -1678,9 +1678,9 @@ impl IndexScheduler { tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks."); } // If we have an abortion error we must stop the tick here and re-schedule tasks. - Err(Error::Milli(milli::Error::InternalError( - milli::InternalError::AbortedIndexation, - ))) + Err(Error::Milli{ + error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), .. + }) | Err(Error::AbortedTask) => { #[cfg(test)] self.breakpoint(Breakpoint::AbortedIndexation); @@ -1699,9 +1699,9 @@ impl IndexScheduler { // 2. close the associated environment // 3. resize it // 4. re-schedule tasks - Err(Error::Milli(milli::Error::UserError( - milli::UserError::MaxDatabaseSizeReached, - ))) if index_uid.is_some() => { + Err(Error::Milli { + error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), .. + }) if index_uid.is_some() => { // fixme: add index_uid to match to avoid the unwrap let index_uid = index_uid.unwrap(); // fixme: handle error more gracefully? not sure when this could happen @@ -1942,6 +1942,7 @@ impl IndexScheduler { // TODO: consider using a type alias or a struct embedder/template pub fn embedders( &self, + index_uid: String, embedding_configs: Vec, ) -> Result { let res: Result<_> = embedding_configs @@ -1953,7 +1954,10 @@ impl IndexScheduler { .. }| { let prompt = - Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?); + Arc::new(prompt.try_into() + .map_err(meilisearch_types::milli::Error::from) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))? + ); // optimistically return existing embedder { let embedders = self.embedders.read().unwrap(); @@ -1969,7 +1973,8 @@ impl IndexScheduler { let embedder = Arc::new( Embedder::new(embedder_options.clone()) .map_err(meilisearch_types::milli::vector::Error::from) - .map_err(meilisearch_types::milli::Error::from)?, + .map_err(meilisearch_types::milli::Error::from) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?, ); { let mut embedders = self.embedders.write().unwrap(); diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs index 5c4ce171f..6e7283a18 100644 --- a/crates/meilisearch/src/error.rs +++ b/crates/meilisearch/src/error.rs @@ -7,6 +7,7 @@ use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; use meilisearch_types::milli::OrderBy; use serde_json::Value; use tokio::task::JoinError; +use meilisearch_types::milli; #[derive(Debug, thiserror::Error)] pub enum MeilisearchHttpError { @@ -62,8 +63,11 @@ pub enum MeilisearchHttpError { HeedError(#[from] meilisearch_types::heed::Error), #[error(transparent)] IndexScheduler(#[from] index_scheduler::Error), - #[error(transparent)] - Milli(#[from] meilisearch_types::milli::Error), + #[error("{}", match .index_name { + Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name), + _ => format!("{error}") + })] + Milli { error: meilisearch_types::milli::Error, index_name: Option }, #[error(transparent)] Payload(#[from] PayloadError), #[error(transparent)] @@ -76,6 +80,12 @@ pub enum MeilisearchHttpError { MissingSearchHybrid, } +impl MeilisearchHttpError { + pub(crate) fn from_milli(error: milli::Error, index_name: Option) -> Self { + Self::Milli { error, index_name } + } +} + impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { @@ -95,7 +105,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::SerdeJson(_) => Code::Internal, MeilisearchHttpError::HeedError(_) => Code::Internal, MeilisearchHttpError::IndexScheduler(e) => e.error_code(), - MeilisearchHttpError::Milli(e) => e.error_code(), + MeilisearchHttpError::Milli{error, ..} => error.error_code(), MeilisearchHttpError::Payload(e) => e.error_code(), MeilisearchHttpError::FileStore(_) => Code::Internal, MeilisearchHttpError::DocumentFormat(e) => e.error_code(), diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 633ad2776..779af63f2 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -395,6 +395,7 @@ fn import_dump( for index_reader in dump_reader.indexes()? { let mut index_reader = index_reader?; let metadata = index_reader.metadata(); + let uid = metadata.uid.clone(); tracing::info!("Importing index `{}`.", metadata.uid); let date = Some((metadata.created_at, metadata.updated_at)); @@ -432,7 +433,7 @@ fn import_dump( let reader = DocumentsBatchReader::from_reader(reader)?; let embedder_configs = index.embedding_configs(&wtxn)?; - let embedders = index_scheduler.embedders(embedder_configs)?; + let embedders = index_scheduler.embedders(uid, embedder_configs)?; let builder = milli::update::IndexDocuments::new( &mut wtxn, diff --git a/crates/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs index 99a4a4f28..fc29d3406 100644 --- a/crates/meilisearch/src/routes/indexes/facet_search.rs +++ b/crates/meilisearch/src/routes/indexes/facet_search.rs @@ -185,7 +185,7 @@ pub async fn search( let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); - let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?; + let search_kind = search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?; let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_facet_search( diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index 7d073ec5f..1dda27a98 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -5,7 +5,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::{DeserializeError, Deserr, ValuePointerRef}; -use index_scheduler::IndexScheduler; +use index_scheduler::{Error, IndexScheduler}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -107,7 +107,7 @@ pub async fn list_indexes( if !filters.is_index_authorized(uid) { return Ok(None); } - Ok(Some(IndexView::new(uid.to_string(), index)?)) + Ok(Some(IndexView::new(uid.to_string(), index).map_err(|e| Error::from_milli(e, Some(uid.to_string())))?)) })?; // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened. let indexes: Vec = indexes.into_iter().flatten().collect(); diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs index 2f5cb4a36..609439b4a 100644 --- a/crates/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -243,11 +243,11 @@ pub async fn search_with_url_query( let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); - let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; + let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) + perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vector, index_scheduler.features()) }) .await; permit.drop().await; @@ -287,12 +287,12 @@ pub async fn search_with_post( let features = index_scheduler.features(); - let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; + let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) + perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vectors, index_scheduler.features()) }) .await; permit.drop().await; @@ -314,6 +314,7 @@ pub async fn search_with_post( pub fn search_kind( query: &SearchQuery, index_scheduler: &IndexScheduler, + index_uid: String, index: &milli::Index, features: RoFeatures, ) -> Result { @@ -332,7 +333,7 @@ pub fn search_kind( (None, _, None) => Ok(SearchKind::KeywordOnly), // hybrid.semantic_ratio == 1.0 => vector (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { - SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len())) + SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len())) } // hybrid.semantic_ratio == 0.0 => keyword (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { @@ -340,13 +341,14 @@ pub fn search_kind( } // no query, hybrid, vector => semantic (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { - SearchKind::semantic(index_scheduler, index, embedder, Some(v.len())) + SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len())) } // query, no hybrid, no vector => keyword (Some(_), None, None) => Ok(SearchKind::KeywordOnly), // query, hybrid, maybe vector => hybrid (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, + index_uid, index, embedder, **semantic_ratio, diff --git a/crates/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs index 79f42f0aa..a0fccff52 100644 --- a/crates/meilisearch/src/routes/indexes/similar.rs +++ b/crates/meilisearch/src/routes/indexes/similar.rs @@ -104,7 +104,7 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; let (embedder_name, embedder, quantized) = - SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; + SearchKind::embedder(&index_scheduler, index_uid.to_string(), &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs index f8b1bc6ee..c4496e41c 100644 --- a/crates/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -125,14 +125,16 @@ pub async fn multi_search_with_post( }) .with_index(query_index)?; + let index_uid_str = index_uid.to_string(); + let search_kind = - search_kind(&query, index_scheduler.get_ref(), &index, features) + search_kind(&query, index_scheduler.get_ref(), index_uid_str.clone(), &index, features) .with_index(query_index)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features) .with_index(query_index)?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vector, features) + perform_search(index_uid_str.clone(), &index, query, search_kind, retrieve_vector, features) }) .await .with_index(query_index)?; diff --git a/crates/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs index 5279c26bb..5aae82c66 100644 --- a/crates/meilisearch/src/search/federated.rs +++ b/crates/meilisearch/src/search/federated.rs @@ -560,7 +560,7 @@ pub fn perform_federated_search( // use an immediately invoked lambda to capture the result without returning from the function let res: Result<(), ResponseError> = (|| { - let search_kind = search_kind(&query, index_scheduler, &index, features)?; + let search_kind = search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?; let canonicalization_kind = match (&search_kind, &query.q) { (SearchKind::SemanticOnly { .. }, _) => { @@ -636,7 +636,7 @@ pub fn perform_federated_search( search.offset(0); search.limit(required_hit_count); - let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?; + let (result, _semantic_hit_count) = super::search_from_kind(index_uid.to_string(), search_kind, search)?; let format = AttributesFormat { attributes_to_retrieve: query.attributes_to_retrieve, retrieve_vectors, @@ -670,7 +670,8 @@ pub fn perform_federated_search( let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); - let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?; + let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder) + .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?; results_by_query.push(SearchResultByQuery { federation_options,