From c5f22be6e1931c46f6cc6f56c799c9a4e68c789b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 9 Mar 2023 11:12:49 +0100 Subject: [PATCH 1/3] add boolean support for csv documents --- meilisearch/tests/documents/add_documents.rs | 109 +++++++++++++++++++ milli/src/documents/builder.rs | 28 ++++- milli/src/documents/mod.rs | 17 +++ 3 files changed, 151 insertions(+), 3 deletions(-) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 612a2cdb6..164d68582 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -279,6 +279,81 @@ async fn add_csv_document() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,name:string,race:string,age:number,cute:boolean +0,jean,bernese mountain,2.5,true +1,,,, +2,lilou,pug,-2,false"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "pets", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 0, + "indexUid": "pets", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "#id": 0, + "name": "jean", + "race": "bernese mountain", + "age": 2.5, + "cute": true + }, + { + "#id": 1, + "name": null, + "race": null, + "age": null, + "cute": null + }, + { + "#id": 2, + "name": "lilou", + "race": "pug", + "age": -2, + "cute": false + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + #[actix_rt::test] async fn add_csv_document_with_custom_delimiter() { let server = Server::new().await; @@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types_error() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,a:boolean,b:number +0,doggo,1"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); + + let document = "#id:number,a:boolean,b:number +0,true,doggo"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); +} + /// any other content-type is must be refused #[actix_rt::test] async fn error_add_documents_test_bad_content_types() { diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 1fa59168e..ace9340d7 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -116,12 +116,13 @@ impl DocumentsBatchBuilder { let value = &record[*i]; match type_ { AllowedType::Number => { - if value.trim().is_empty() { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; - } else if let Ok(integer) = value.trim().parse::() { + } else if let Ok(integer) = trimmed_value.parse::() { to_writer(&mut self.value_buffer, &integer)?; } else { - match value.trim().parse::() { + match trimmed_value.parse::() { Ok(float) => { to_writer(&mut self.value_buffer, &float)?; } @@ -135,6 +136,25 @@ impl DocumentsBatchBuilder { } } } + AllowedType::Boolean => { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { + to_writer(&mut self.value_buffer, &Value::Null)?; + } else { + match trimmed_value.parse::() { + Ok(bool) => { + to_writer(&mut self.value_buffer, &bool)?; + } + Err(error) => { + return Err(Error::ParseBool { + error, + line, + value: value.to_string(), + }); + } + } + } + } AllowedType::String => { if value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; @@ -173,6 +193,7 @@ impl DocumentsBatchBuilder { #[derive(Debug)] enum AllowedType { String, + Boolean, Number, } @@ -181,6 +202,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) { match header.rsplit_once(':') { Some((field_name, field_type)) => match field_type { "string" => (field_name, AllowedType::String), + "boolean" => (field_name, AllowedType::Boolean), "number" => (field_name, AllowedType::Number), // if the pattern isn't reconized, we keep the whole field. _otherwise => (header, AllowedType::String), diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index da3a07942..67b99db9a 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -90,6 +90,7 @@ impl DocumentsBatchIndex { #[derive(Debug)] pub enum Error { ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, + ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, InvalidDocumentFormat, InvalidEnrichedData, InvalidUtf8(Utf8Error), @@ -136,6 +137,9 @@ impl fmt::Display for Error { Error::ParseFloat { error, line, value } => { write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) } + Error::ParseBool { error, line, value } => { + write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) + } Error::InvalidDocumentFormat => { f.write_str("Invalid document addition format, missing the documents batch index.") } @@ -274,6 +278,19 @@ mod test { ]); } + #[test] + fn csv_types_dont_panic() { + let csv1_content = + "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; + let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); + + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + builder.append_csv(csv1).unwrap(); + let vector = builder.into_inner().unwrap(); + + DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); + } + #[test] fn out_of_order_csv_fields() { let csv1_content = "id:number,b\n1,0"; From eddefb0e0fcb17ba45e23d581443a9f52926e010 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 9 Mar 2023 11:23:57 +0100 Subject: [PATCH 2/3] refactor the error type of the milli::document thing silence a warning --- milli/src/documents/mod.rs | 79 ++++++++------------------------------ 1 file changed, 17 insertions(+), 62 deletions(-) diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 67b99db9a..43b31187d 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -3,7 +3,7 @@ mod enriched; mod reader; mod serde_impl; -use std::fmt::{self, Debug}; +use std::fmt::Debug; use std::io; use std::str::Utf8Error; @@ -87,75 +87,30 @@ impl DocumentsBatchIndex { } } -#[derive(Debug)] +#[derive(Debug, thiserror::Error)] pub enum Error { + #[error("Error parsing number {value:?} at line {line}: {error}")] ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, + #[error("Error parsing boolean {value:?} at line {line}: {error}")] ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, + #[error("Invalid document addition format, missing the documents batch index.")] InvalidDocumentFormat, + #[error("Invalid enriched data.")] InvalidEnrichedData, - InvalidUtf8(Utf8Error), - Csv(csv::Error), - Json(serde_json::Error), + #[error(transparent)] + InvalidUtf8(#[from] Utf8Error), + #[error(transparent)] + Csv(#[from] csv::Error), + #[error(transparent)] + Json(#[from] serde_json::Error), + #[error(transparent)] Serialize(serde_json::Error), - Grenad(grenad::Error), - Io(io::Error), + #[error(transparent)] + Grenad(#[from] grenad::Error), + #[error(transparent)] + Io(#[from] io::Error), } -impl From for Error { - fn from(e: csv::Error) -> Self { - Self::Csv(e) - } -} - -impl From for Error { - fn from(other: io::Error) -> Self { - Self::Io(other) - } -} - -impl From for Error { - fn from(other: serde_json::Error) -> Self { - Self::Json(other) - } -} - -impl From for Error { - fn from(other: grenad::Error) -> Self { - Self::Grenad(other) - } -} - -impl From for Error { - fn from(other: Utf8Error) -> Self { - Self::InvalidUtf8(other) - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::ParseFloat { error, line, value } => { - write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) - } - Error::ParseBool { error, line, value } => { - write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) - } - Error::InvalidDocumentFormat => { - f.write_str("Invalid document addition format, missing the documents batch index.") - } - Error::InvalidEnrichedData => f.write_str("Invalid enriched data."), - Error::InvalidUtf8(e) => write!(f, "{}", e), - Error::Io(e) => write!(f, "{}", e), - Error::Serialize(e) => write!(f, "{}", e), - Error::Grenad(e) => write!(f, "{}", e), - Error::Csv(e) => write!(f, "{}", e), - Error::Json(e) => write!(f, "{}", e), - } - } -} - -impl std::error::Error for Error {} - #[cfg(test)] pub fn objects_from_json_value(json: serde_json::Value) -> Vec { let documents = match json { From 0f33a65468b94a43e8eebd1582e2a3263727791a Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 13 Mar 2023 16:51:11 +0100 Subject: [PATCH 3/3] makes kero happy --- milli/src/documents/builder.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index ace9340d7..e5124f67f 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -114,9 +114,9 @@ impl DocumentsBatchBuilder { self.value_buffer.clear(); let value = &record[*i]; + let trimmed_value = value.trim(); match type_ { AllowedType::Number => { - let trimmed_value = value.trim(); if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; } else if let Ok(integer) = trimmed_value.parse::() { @@ -137,7 +137,6 @@ impl DocumentsBatchBuilder { } } AllowedType::Boolean => { - let trimmed_value = value.trim(); if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; } else {