add boolean support for csv documents

This commit is contained in:
Tamo 2023-03-09 11:12:49 +01:00
parent df3986cd83
commit c5f22be6e1
3 changed files with 151 additions and 3 deletions

View File

@ -279,6 +279,81 @@ async fn add_csv_document() {
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_types() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id:number,name:string,race:string,age:number,cute:boolean
0,jean,bernese mountain,2.5,true
1,,,,
2,lilou,pug,-2,false";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 3,
"indexedDocuments": 3
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": 0,
"name": "jean",
"race": "bernese mountain",
"age": 2.5,
"cute": true
},
{
"#id": 1,
"name": null,
"race": null,
"age": null,
"cute": null
},
{
"#id": 2,
"name": "lilou",
"race": "pug",
"age": -2,
"cute": false
}
],
"offset": 0,
"limit": 20,
"total": 3
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_types_error() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id:number,a:boolean,b:number
0,doggo,1";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
"code": "malformed_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#malformed_payload"
}
"###);
let document = "#id:number,a:boolean,b:number
0,true,doggo";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
"code": "malformed_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#malformed_payload"
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {

View File

@ -116,12 +116,13 @@ impl<W: Write> DocumentsBatchBuilder<W> {
let value = &record[*i];
match type_ {
AllowedType::Number => {
if value.trim().is_empty() {
let trimmed_value = value.trim();
if trimmed_value.is_empty() {
to_writer(&mut self.value_buffer, &Value::Null)?;
} else if let Ok(integer) = value.trim().parse::<i64>() {
} else if let Ok(integer) = trimmed_value.parse::<i64>() {
to_writer(&mut self.value_buffer, &integer)?;
} else {
match value.trim().parse::<f64>() {
match trimmed_value.parse::<f64>() {
Ok(float) => {
to_writer(&mut self.value_buffer, &float)?;
}
@ -135,6 +136,25 @@ impl<W: Write> DocumentsBatchBuilder<W> {
}
}
}
AllowedType::Boolean => {
let trimmed_value = value.trim();
if trimmed_value.is_empty() {
to_writer(&mut self.value_buffer, &Value::Null)?;
} else {
match trimmed_value.parse::<bool>() {
Ok(bool) => {
to_writer(&mut self.value_buffer, &bool)?;
}
Err(error) => {
return Err(Error::ParseBool {
error,
line,
value: value.to_string(),
});
}
}
}
}
AllowedType::String => {
if value.is_empty() {
to_writer(&mut self.value_buffer, &Value::Null)?;
@ -173,6 +193,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
#[derive(Debug)]
enum AllowedType {
String,
Boolean,
Number,
}
@ -181,6 +202,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
match header.rsplit_once(':') {
Some((field_name, field_type)) => match field_type {
"string" => (field_name, AllowedType::String),
"boolean" => (field_name, AllowedType::Boolean),
"number" => (field_name, AllowedType::Number),
// if the pattern isn't reconized, we keep the whole field.
_otherwise => (header, AllowedType::String),

View File

@ -90,6 +90,7 @@ impl DocumentsBatchIndex {
#[derive(Debug)]
pub enum Error {
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
InvalidDocumentFormat,
InvalidEnrichedData,
InvalidUtf8(Utf8Error),
@ -136,6 +137,9 @@ impl fmt::Display for Error {
Error::ParseFloat { error, line, value } => {
write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
}
Error::ParseBool { error, line, value } => {
write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error)
}
Error::InvalidDocumentFormat => {
f.write_str("Invalid document addition format, missing the documents batch index.")
}
@ -274,6 +278,19 @@ mod test {
]);
}
#[test]
fn csv_types_dont_panic() {
let csv1_content =
"id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
let mut builder = DocumentsBatchBuilder::new(Vec::new());
builder.append_csv(csv1).unwrap();
let vector = builder.into_inner().unwrap();
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
}
#[test]
fn out_of_order_csv_fields() {
let csv1_content = "id:number,b\n1,0";