diff --git a/milli/src/error.rs b/milli/src/error.rs index a2d5219c1..47b159223 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -60,7 +60,7 @@ pub enum UserError { DocumentLimitReached, InvalidDocumentId { document_id: Value }, InvalidFacetsDistribution { invalid_facets_name: BTreeSet }, - InvalidGeoField { document_id: Value }, + InvalidGeoField(GeoError), InvalidFilter(String), InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, SortRankingRuleMissing, @@ -76,6 +76,14 @@ pub enum UserError { InvalidMinTypoWordLenSetting(u8, u8), } +#[derive(Debug)] +pub enum GeoError { + MissingLatitude { document_id: Value }, + MissingLongitude { document_id: Value }, + BadLatitude { document_id: Value, value: Value }, + BadLongitude { document_id: Value, value: Value }, +} + impl From for Error { fn from(error: io::Error) -> Error { // TODO must be improved and more precise @@ -89,6 +97,12 @@ impl From for Error { } } +impl From for Error { + fn from(error: GeoError) -> Error { + Error::UserError(UserError::InvalidGeoField(error)) + } +} + impl From> for Error where Error: From, @@ -230,17 +244,7 @@ impl fmt::Display for UserError { name_list ) } - Self::InvalidGeoField { document_id } => { - let document_id = match document_id { - Value::String(id) => id.clone(), - _ => document_id.to_string(), - }; - write!( - f, - "The document with the id: `{}` contains an invalid `_geo` field.", - document_id - ) - }, + Self::InvalidGeoField(error) => write!(f, "{error}"), Self::InvalidDocumentId { document_id } => { let document_id = match document_id { Value::String(id) => id.clone(), @@ -314,6 +318,33 @@ impl fmt::Display for FieldIdMapMissingEntry { impl StdError for FieldIdMapMissingEntry {} +impl From for UserError { + fn from(geo_error: GeoError) -> Self { + UserError::InvalidGeoField(geo_error) + } +} + +impl fmt::Display for GeoError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + GeoError::MissingLatitude { document_id } => { + write!(f, "Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.") + } + GeoError::MissingLongitude { document_id } => { + write!(f, "Could not find longitude in the document with the id: `{document_id}`. Was expecting a `_geo.lng` field.") + } + GeoError::BadLatitude { document_id, value } => { + write!(f, "Could not parse latitude in the document with the id: `{document_id}`. Was expecting a number but instead got `{value}`.") + } + GeoError::BadLongitude { document_id, value } => { + write!(f, "Could not parse longitude in the document with the id: `{document_id}`. Was expecting a number but instead got `{value}`.") + } + } + } +} + +impl StdError for GeoError {} + impl fmt::Display for SerializationError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 65cb1c3ce..0ecb113b3 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -1,10 +1,13 @@ use std::fs::File; use std::io; +use std::result::Result as StdResult; use concat_arrays::concat_arrays; +use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; -use crate::{FieldId, InternalError, Result, UserError}; +use crate::error::GeoError; +use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. /// @@ -24,15 +27,31 @@ pub fn extract_geo_points( let mut cursor = obkv_documents.into_cursor()?; while let Some((docid_bytes, value)) = cursor.move_on_next()? { let obkv = obkv::KvReader::new(value); - let (lat, lng) = obkv.get(lat_fid).zip(obkv.get(lng_fid)).ok_or_else(|| { + // since we only needs the primary key when we throw an error we create this getter to + // lazily get it when needed + let primary_key = || -> Value { let primary_key = obkv.get(primary_key_id).unwrap(); - let primary_key = serde_json::from_slice(primary_key).unwrap(); - UserError::InvalidGeoField { document_id: primary_key } - })?; - let (lat, lng): (f64, f64) = ( + serde_json::from_slice(primary_key).unwrap() + }; + + // first we get the two fields + let lat = obkv + .get(lat_fid) + .ok_or_else(|| GeoError::MissingLatitude { document_id: primary_key() })?; + let lng = obkv + .get(lng_fid) + .ok_or_else(|| GeoError::MissingLongitude { document_id: primary_key() })?; + + // then we extract the values + let lat = extract_float_from_value( serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?, + ) + .map_err(|lat| GeoError::BadLatitude { document_id: primary_key(), value: lat })?; + + let lng = extract_float_from_value( serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, - ); + ) + .map_err(|lng| GeoError::BadLongitude { document_id: primary_key(), value: lng })?; let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; writer.insert(docid_bytes, bytes)?; @@ -40,3 +59,11 @@ pub fn extract_geo_points( Ok(writer_into_reader(writer)?) } + +fn extract_float_from_value(value: Value) -> StdResult { + match value { + Value::Number(ref n) => n.as_f64().ok_or(value), + Value::String(ref s) => s.parse::().map_err(|_| value), + value => Err(value), + } +} diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index ed2347b25..794e64abc 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1006,6 +1006,135 @@ mod tests { wtxn.commit().unwrap(); } + #[test] + fn index_all_flavour_of_geo() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let config = IndexerConfig::default(); + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update::Settings::new(&mut wtxn, &index, &config); + + builder.set_filterable_fields(hashset!(S("_geo"))); + builder.execute(|_| ()).unwrap(); + wtxn.commit().unwrap(); + + let indexing_config = IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }; + let mut wtxn = index.write_txn().unwrap(); + + let documents = documents!([ + { "id": 0, "_geo": { "lat": 31, "lng": [42] } }, + { "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 }, + { "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" }, + { "id": 3, "_geo.lat": 31, "_geo.lng": "42" }, + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let mut search = crate::Search::new(&rtxn, &index); + search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap()); + let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); + assert_eq!(documents_ids, vec![0, 1, 2, 3]); + } + + #[test] + fn geo_error() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let config = IndexerConfig::default(); + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update::Settings::new(&mut wtxn, &index, &config); + + builder.set_filterable_fields(hashset!(S("_geo"))); + builder.execute(|_| ()).unwrap(); + wtxn.commit().unwrap(); + + let indexing_config = IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }; + let mut wtxn = index.write_txn().unwrap(); + + let documents = documents!([ + { "id": 0, "_geo": { "lng": 42 } } + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + let error = builder.execute().unwrap_err(); + assert_eq!( + &error.to_string(), + r#"Could not find latitude in the document with the id: `0`. Was expecting a `_geo.lat` field."# + ); + + let documents = documents!([ + { "id": 0, "_geo": { "lat": 42 } } + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + let error = builder.execute().unwrap_err(); + assert_eq!( + &error.to_string(), + r#"Could not find longitude in the document with the id: `0`. Was expecting a `_geo.lng` field."# + ); + + let documents = documents!([ + { "id": 0, "_geo": { "lat": "lol", "lng": 42 } } + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + let error = builder.execute().unwrap_err(); + assert_eq!( + &error.to_string(), + r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `"lol"`."# + ); + + let documents = documents!([ + { "id": 0, "_geo": { "lat": [12, 13], "lng": 42 } } + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + let error = builder.execute().unwrap_err(); + assert_eq!( + &error.to_string(), + r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `[12,13]`."# + ); + + let documents = documents!([ + { "id": 0, "_geo": { "lat": 12, "lng": "hello" } } + ]); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(documents).unwrap(); + let error = builder.execute().unwrap_err(); + assert_eq!( + &error.to_string(), + r#"Could not parse longitude in the document with the id: `0`. Was expecting a number but instead got `"hello"`."# + ); + } + #[test] fn delete_documents_then_insert() { let path = tempfile::tempdir().unwrap();