3263: Handle most io error instead of tagging everything as an internal r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/2255
Fix https://github.com/meilisearch/meilisearch/issues/2785
Close https://github.com/meilisearch/milli/pull/580

- [x] Find a way to catch the `io::Error` contained in `serde_json::Error`: We can't: https://docs.rs/serde_json/latest/serde_json/struct.Error.html
- [x] Check the `grenad::Error` as well => the `grenad::Error::Io` error are correctly converted to a `milli::Error::Io` error 
- [x] Ensure the error code mean the same thing under windows

Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-12-20 17:15:53 +00:00 committed by GitHub
commit 9925309492
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 78 additions and 82 deletions

2
Cargo.lock generated
View File

@ -2374,6 +2374,7 @@ dependencies = [
"csv", "csv",
"either", "either",
"enum-iterator", "enum-iterator",
"file-store",
"flate2", "flate2",
"fst", "fst",
"insta", "insta",
@ -2386,6 +2387,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"tar", "tar",
"tempfile",
"thiserror", "thiserror",
"time", "time",
"tokio", "tokio",

View File

@ -19,9 +19,10 @@ pub enum Error {
impl ErrorCode for Error { impl ErrorCode for Error {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
// Are these three really Internal errors? Error::Io(e) => e.error_code(),
// TODO look at that later.
Error::Io(_) => Code::Internal, // These errors either happen when creating a dump and don't need any error code,
// or come from an internal bad deserialization.
Error::Serde(_) => Code::Internal, Error::Serde(_) => Code::Internal,
Error::Uuid(_) => Code::Internal, Error::Uuid(_) => Code::Internal,

View File

@ -1,42 +0,0 @@
use meilisearch_auth::error::AuthControllerError;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
pub type Result<T> = std::result::Result<T, DumpError>;
#[derive(thiserror::Error, Debug)]
pub enum DumpError {
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
IndexResolver(Box<IndexResolverError>),
}
internal_error!(
DumpError: milli::heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,
serde_json::error::Error,
tempfile::PersistError,
fs_extra::error::Error,
AuthControllerError,
TaskError
);
impl From<IndexResolverError> for DumpError {
fn from(e: IndexResolverError) -> Self {
Self::IndexResolver(Box::new(e))
}
}
impl ErrorCode for DumpError {
fn error_code(&self) -> Code {
match self {
DumpError::Internal(_) => Code::Internal,
DumpError::IndexResolver(e) => e.error_code(),
}
}
}

View File

@ -114,17 +114,17 @@ impl ErrorCode for Error {
Error::Dump(e) => e.error_code(), Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(), Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal, Error::ProcessBatchPanicked => Code::Internal,
// TODO: TAMO: are all these errors really internal? Error::Heed(e) => e.error_code(),
Error::Heed(_) => Code::Internal, Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(_) => Code::Internal, Error::FileStore(e) => e.error_code(),
Error::IoError(_) => Code::Internal, Error::IoError(e) => e.error_code(),
Error::Persist(_) => Code::Internal, Error::Persist(e) => e.error_code(),
// Irrecoverable errors
Error::Anyhow(_) => Code::Internal, Error::Anyhow(_) => Code::Internal,
Error::CorruptedTaskQueue => Code::Internal, Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal, Error::CorruptedDump => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal, Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal, Error::CreateBatch(_) => Code::Internal,
Error::HeedTransaction(_) => Code::Internal,
} }
} }
} }

View File

@ -10,6 +10,7 @@ anyhow = "1.0.65"
csv = "1.1.6" csv = "1.1.6"
either = { version = "1.6.1", features = ["serde"] } either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3" enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
flate2 = "1.0.24" flate2 = "1.0.24"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.5.7" memmap2 = "0.5.7"
@ -20,6 +21,7 @@ roaring = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.145", features = ["derive"] } serde = { version = "1.0.145", features = ["derive"] }
serde_json = "1.0.85" serde_json = "1.0.85"
tar = "0.4.38" tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0.30" thiserror = "1.0.30"
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tokio = "1.0" tokio = "1.0"

View File

@ -13,7 +13,6 @@ use serde::{Deserialize, Deserializer};
use serde_json::error::Category; use serde_json::error::Category;
use crate::error::{Code, ErrorCode}; use crate::error::{Code, ErrorCode};
use crate::internal_error;
type Result<T> = std::result::Result<T, DocumentFormatError>; type Result<T> = std::result::Result<T, DocumentFormatError>;
@ -36,14 +35,14 @@ impl fmt::Display for PayloadType {
#[derive(Debug)] #[derive(Debug)]
pub enum DocumentFormatError { pub enum DocumentFormatError {
Internal(Box<dyn std::error::Error + Send + Sync + 'static>), Io(io::Error),
MalformedPayload(Error, PayloadType), MalformedPayload(Error, PayloadType),
} }
impl Display for DocumentFormatError { impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() { Self::MalformedPayload(me, b) => match me.borrow() {
Error::Json(se) => { Error::Json(se) => {
let mut message = match se.classify() { let mut message = match se.classify() {
@ -85,23 +84,27 @@ impl std::error::Error for DocumentFormatError {}
impl From<(PayloadType, Error)> for DocumentFormatError { impl From<(PayloadType, Error)> for DocumentFormatError {
fn from((ty, error): (PayloadType, Error)) -> Self { fn from((ty, error): (PayloadType, Error)) -> Self {
match error { match error {
Error::Io(e) => Self::Internal(Box::new(e)), Error::Io(e) => Self::Io(e),
e => Self::MalformedPayload(e, ty), e => Self::MalformedPayload(e, ty),
} }
} }
} }
impl From<io::Error> for DocumentFormatError {
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl ErrorCode for DocumentFormatError { impl ErrorCode for DocumentFormatError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
DocumentFormatError::Internal(_) => Code::Internal, DocumentFormatError::Io(e) => e.error_code(),
DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload, DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload,
} }
} }
} }
internal_error!(DocumentFormatError: io::Error);
/// Reads CSV from input and write an obkv batch to writer. /// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> { pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer); let mut builder = DocumentsBatchBuilder::new(writer);
@ -110,7 +113,7 @@ pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let count = builder.documents_count(); let count = builder.documents_count();
let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64) Ok(count as u64)
} }
@ -139,7 +142,7 @@ fn read_json_inner(
// The json data has been deserialized and does not need to be processed again. // The json data has been deserialized and does not need to be processed again.
// The data has been transferred to the writer during the deserialization process. // The data has been transferred to the writer during the deserialization process.
Ok(Ok(_)) => (), Ok(Ok(_)) => (),
Ok(Err(e)) => return Err(DocumentFormatError::Internal(Box::new(e))), Ok(Err(e)) => return Err(DocumentFormatError::Io(e)),
Err(_e) => { Err(_e) => {
// If we cannot deserialize the content as an array of object then we try // If we cannot deserialize the content as an array of object then we try
// to deserialize it with the original method to keep correct error messages. // to deserialize it with the original method to keep correct error messages.
@ -155,16 +158,13 @@ fn read_json_inner(
.map_err(|e| (payload_type, e))?; .map_err(|e| (payload_type, e))?;
for object in content.inner.map_right(|o| vec![o]).into_inner() { for object in content.inner.map_right(|o| vec![o]).into_inner() {
builder builder.append_json_object(&object).map_err(DocumentFormatError::Io)?;
.append_json_object(&object)
.map_err(Into::into)
.map_err(DocumentFormatError::Internal)?;
} }
} }
} }
let count = builder.documents_count(); let count = builder.documents_count();
let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;
Ok(count as u64) Ok(count as u64)
} }

View File

@ -1,4 +1,4 @@
use std::fmt; use std::{fmt, io};
use actix_web::http::StatusCode; use actix_web::http::StatusCode;
use actix_web::{self as aweb, HttpResponseBuilder}; use actix_web::{self as aweb, HttpResponseBuilder};
@ -23,7 +23,10 @@ pub struct ResponseError {
} }
impl ResponseError { impl ResponseError {
pub fn from_msg(message: String, code: Code) -> Self { pub fn from_msg(mut message: String, code: Code) -> Self {
if code == Code::IoError {
message.push_str(". This error generally happens when you have no space left on device or when your database doesn't have read or write right.");
}
Self { Self {
code: code.http(), code: code.http(),
message, message,
@ -47,13 +50,7 @@ where
T: ErrorCode, T: ErrorCode,
{ {
fn from(other: T) -> Self { fn from(other: T) -> Self {
Self { Self::from_msg(other.to_string(), other.error_code())
code: other.http_status(),
message: other.to_string(),
error_code: other.error_name(),
error_type: other.error_type(),
error_link: other.error_url(),
}
} }
} }
@ -111,8 +108,13 @@ impl fmt::Display for ErrorType {
} }
} }
#[derive(Serialize, Deserialize, Debug, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
pub enum Code { pub enum Code {
// error related to your setup
IoError,
NoSpaceLeftOnDevice,
TooManyOpenFiles,
// index related error // index related error
CreateIndex, CreateIndex,
IndexAlreadyExists, IndexAlreadyExists,
@ -145,7 +147,6 @@ pub enum Code {
InvalidToken, InvalidToken,
MissingAuthorizationHeader, MissingAuthorizationHeader,
MissingMasterKey, MissingMasterKey,
NoSpaceLeftOnDevice,
DumpNotFound, DumpNotFound,
InvalidTaskDateFilter, InvalidTaskDateFilter,
InvalidTaskStatusesFilter, InvalidTaskStatusesFilter,
@ -188,6 +189,15 @@ impl Code {
use Code::*; use Code::*;
match self { match self {
// related to the setup
IoError => ErrCode::invalid("io_error", StatusCode::UNPROCESSABLE_ENTITY),
TooManyOpenFiles => {
ErrCode::invalid("too_many_open_files", StatusCode::UNPROCESSABLE_ENTITY)
}
NoSpaceLeftOnDevice => {
ErrCode::invalid("no_space_left_on_device", StatusCode::UNPROCESSABLE_ENTITY)
}
// index related errors // index related errors
// create index is thrown on internal error while creating an index. // create index is thrown on internal error while creating an index.
CreateIndex => { CreateIndex => {
@ -266,9 +276,6 @@ impl Code {
ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST) ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST)
} }
DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND),
NoSpaceLeftOnDevice => {
ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR)
}
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
RetrieveDocument => { RetrieveDocument => {
ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST)
@ -380,7 +387,7 @@ impl ErrorCode for milli::Error {
match self { match self {
Error::InternalError(_) => Code::Internal, Error::InternalError(_) => Code::Internal,
Error::IoError(_) => Code::Internal, Error::IoError(e) => e.error_code(),
Error::UserError(ref error) => { Error::UserError(ref error) => {
match error { match error {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
@ -415,13 +422,28 @@ impl ErrorCode for milli::Error {
} }
} }
impl ErrorCode for file_store::Error {
fn error_code(&self) -> Code {
match self {
Self::IoError(e) => e.error_code(),
Self::PersistError(e) => e.error_code(),
}
}
}
impl ErrorCode for tempfile::PersistError {
fn error_code(&self) -> Code {
self.error.error_code()
}
}
impl ErrorCode for HeedError { impl ErrorCode for HeedError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached, HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached,
HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore, HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore,
HeedError::Io(_) HeedError::Io(e) => e.error_code(),
| HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding | HeedError::Encoding
| HeedError::Decoding | HeedError::Decoding
| HeedError::InvalidDatabaseTyping | HeedError::InvalidDatabaseTyping
@ -431,6 +453,17 @@ impl ErrorCode for HeedError {
} }
} }
impl ErrorCode for io::Error {
fn error_code(&self) -> Code {
match self.raw_os_error() {
Some(5) => Code::IoError,
Some(24) => Code::TooManyOpenFiles,
Some(28) => Code::NoSpaceLeftOnDevice,
_ => Code::Internal,
}
}
}
#[cfg(feature = "test-traits")] #[cfg(feature = "test-traits")]
mod strategy { mod strategy {
use proptest::strategy::Strategy; use proptest::strategy::Strategy;