diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 5c6bb78a1..8754453ba 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -117,3 +117,69 @@ async fn geo_bounding_box_with_string_and_number() { ) .await; } + +#[actix_rt::test] +async fn bug_4640() { + // https://github.com/meilisearch/meilisearch/issues/4640 + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.update_settings_filterable_attributes(json!(["_geo"])).await; + let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await; + index.wait_task(ret.uid()).await; + + // Sort the document with the second one first + index + .search( + json!({ + "sort": ["_geoPoint(45.4777599, 9.1967508):asc"], + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 2, + "name": "La Bella Italia", + "address": "456 Elm Street, Townsville", + "type": "Italian", + "rating": 9, + "_geo": { + "lat": "45.4777599", + "lng": "9.1967508" + } + }, + { + "id": 1, + "name": "Taco Truck", + "address": "444 Salsa Street, Burritoville", + "type": "Mexican", + "rating": 9, + "_geo": { + "lat": 34.0522, + "lng": -118.2437 + }, + "_geoDistance": 9714063 + }, + { + "id": 3, + "name": "Crêpe Truck", + "address": "2 Billig Avenue, Rouenville", + "type": "French", + "rating": 10 + } + ], + "query": "", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + }, + ) + .await; +} diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index a2b060255..3cbd7e49e 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -45,7 +45,6 @@ pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, - geo_fields_ids: Option<(FieldId, FieldId)>, ) -> Result { let max_memory = indexer.max_memory_by_thread(); @@ -125,12 +124,18 @@ pub fn extract_fid_docid_facet_values( add_exists.insert(document); } - let geo_support = - geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng); + let del_geo_support = settings_diff + .old + .geo_fields_ids + .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); + let add_geo_support = settings_diff + .new + .geo_fields_ids + .map_or(false, |(lat, lng)| field_id == lat || field_id == lng); let del_filterable_values = - del_value.map(|value| extract_facet_values(&value, geo_support)); + del_value.map(|value| extract_facet_values(&value, del_geo_support)); let add_filterable_values = - add_value.map(|value| extract_facet_values(&value, geo_support)); + add_value.map(|value| extract_facet_values(&value, add_geo_support)); // Those closures are just here to simplify things a bit. let mut insert_numbers_diff = |del_numbers, add_numbers| { diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 3d7463fba..f997d6ab7 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -8,6 +8,7 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::error::GeoError; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::extract_finite_float_from_value; +use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. @@ -18,7 +19,7 @@ pub fn extract_geo_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, primary_key_id: FieldId, - (lat_fid, lng_fid): (FieldId, FieldId), + settings_diff: &InnerIndexSettingsDiff, ) -> Result>> { let mut writer = create_writer( indexer.chunk_compression_type, @@ -38,47 +39,27 @@ pub fn extract_geo_points( serde_json::from_slice(document_id).unwrap() }; - // first we get the two fields - match (obkv.get(lat_fid), obkv.get(lng_fid)) { - (Some(lat), Some(lng)) => { - let deladd_lat_obkv = KvReaderDelAdd::new(lat); - let deladd_lng_obkv = KvReaderDelAdd::new(lng); + // extract old version + let del_lat_lng = + extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?; + // extract new version + let add_lat_lng = + extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?; - // then we extract the values - let del_lat_lng = deladd_lat_obkv - .get(DelAdd::Deletion) - .zip(deladd_lng_obkv.get(DelAdd::Deletion)) - .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id)) - .transpose()?; - let add_lat_lng = deladd_lat_obkv - .get(DelAdd::Addition) - .zip(deladd_lng_obkv.get(DelAdd::Addition)) - .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id)) - .transpose()?; - - if del_lat_lng != add_lat_lng { - let mut obkv = KvWriterDelAdd::memory(); - if let Some([lat, lng]) = del_lat_lng { - #[allow(clippy::drop_non_drop)] - let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; - obkv.insert(DelAdd::Deletion, bytes)?; - } - if let Some([lat, lng]) = add_lat_lng { - #[allow(clippy::drop_non_drop)] - let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; - obkv.insert(DelAdd::Addition, bytes)?; - } - let bytes = obkv.into_inner()?; - writer.insert(docid_bytes, bytes)?; - } + if del_lat_lng != add_lat_lng { + let mut obkv = KvWriterDelAdd::memory(); + if let Some([lat, lng]) = del_lat_lng { + #[allow(clippy::drop_non_drop)] + let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; + obkv.insert(DelAdd::Deletion, bytes)?; } - (None, Some(_)) => { - return Err(GeoError::MissingLatitude { document_id: document_id() }.into()) + if let Some([lat, lng]) = add_lat_lng { + #[allow(clippy::drop_non_drop)] + let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; + obkv.insert(DelAdd::Addition, bytes)?; } - (Some(_), None) => { - return Err(GeoError::MissingLongitude { document_id: document_id() }.into()) - } - (None, None) => (), + let bytes = obkv.into_inner()?; + writer.insert(docid_bytes, bytes)?; } } @@ -86,16 +67,37 @@ pub fn extract_geo_points( } /// Extract the finite floats lat and lng from two bytes slices. -fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> { - let lat = extract_finite_float_from_value( - serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?, - ) - .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?; +fn extract_lat_lng( + document: &obkv::KvReader, + settings: &InnerIndexSettings, + deladd: DelAdd, + document_id: impl Fn() -> Value, +) -> Result> { + match settings.geo_fields_ids { + Some((lat_fid, lng_fid)) => { + let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd)); + let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd)); + let (lat, lng) = match (lat, lng) { + (Some(lat), Some(lng)) => (lat, lng), + (Some(_), None) => { + return Err(GeoError::MissingLatitude { document_id: document_id() }.into()) + } + (None, Some(_)) => { + return Err(GeoError::MissingLongitude { document_id: document_id() }.into()) + } + (None, None) => return Ok(None), + }; + let lat = extract_finite_float_from_value( + serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?, + ) + .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?; - let lng = extract_finite_float_from_value( - serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, - ) - .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?; - - Ok([lat, lng]) + let lng = extract_finite_float_from_value( + serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?, + ) + .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?; + Ok(Some([lat, lng])) + } + None => Ok(None), + } } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 90723bc4a..18340a3ae 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -43,7 +43,6 @@ pub(crate) fn data_from_obkv_documents( indexer: GrenadParameters, lmdb_writer_sx: Sender>, primary_key_id: FieldId, - geo_fields_ids: Option<(FieldId, FieldId)>, settings_diff: Arc, max_positions_per_attributes: Option, ) -> Result<()> { @@ -70,7 +69,6 @@ pub(crate) fn data_from_obkv_documents( indexer, lmdb_writer_sx.clone(), primary_key_id, - geo_fields_ids, settings_diff.clone(), max_positions_per_attributes, ) @@ -293,7 +291,6 @@ fn send_and_extract_flattened_documents_data( indexer: GrenadParameters, lmdb_writer_sx: Sender>, primary_key_id: FieldId, - geo_fields_ids: Option<(FieldId, FieldId)>, settings_diff: Arc, max_positions_per_attributes: Option, ) -> Result<( @@ -303,12 +300,13 @@ fn send_and_extract_flattened_documents_data( let flattened_documents_chunk = flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; - if let Some(geo_fields_ids) = geo_fields_ids { + if settings_diff.run_geo_indexing() { let documents_chunk_cloned = flattened_documents_chunk.clone(); let lmdb_writer_sx_cloned = lmdb_writer_sx.clone(); + let settings_diff = settings_diff.clone(); rayon::spawn(move || { let result = - extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids); + extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff); let _ = match result { Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))), Err(error) => lmdb_writer_sx_cloned.send(Err(error)), @@ -347,7 +345,6 @@ fn send_and_extract_flattened_documents_data( flattened_documents_chunk.clone(), indexer, &settings_diff, - geo_fields_ids, )?; // send fid_docid_facet_numbers_chunk to DB writer diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 11caa91eb..afae8973a 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -315,28 +315,6 @@ where // get the primary key field id let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap(); - // get the fid of the `_geo.lat` and `_geo.lng` fields. - let mut field_id_map = self.index.fields_ids_map(self.wtxn)?; - - // self.index.fields_ids_map($a)? ==>> field_id_map - let geo_fields_ids = match field_id_map.id("_geo") { - Some(gfid) => { - let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid); - let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid); - // if `_geo` is faceted then we get the `lat` and `lng` - if is_sortable || is_filterable { - let field_ids = field_id_map - .insert("_geo.lat") - .zip(field_id_map.insert("_geo.lng")) - .ok_or(UserError::AttributeLimitReached)?; - Some(field_ids) - } else { - None - } - } - None => None, - }; - let pool_params = GrenadParameters { chunk_compression_type: self.indexer_config.chunk_compression_type, chunk_compression_level: self.indexer_config.chunk_compression_level, @@ -420,7 +398,6 @@ where pool_params, lmdb_writer_sx.clone(), primary_key_id, - geo_fields_ids, settings_diff.clone(), max_positions_per_attributes, ) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 133f0e3a8..24b32b6fa 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1142,6 +1142,11 @@ impl InnerIndexSettingsDiff { self.settings_update_only } + pub fn run_geo_indexing(&self) -> bool { + self.old.geo_fields_ids != self.new.geo_fields_ids + || (!self.settings_update_only && self.new.geo_fields_ids.is_some()) + } + pub fn modified_faceted_fields(&self) -> HashSet { &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields } @@ -1161,6 +1166,7 @@ pub(crate) struct InnerIndexSettings { pub proximity_precision: ProximityPrecision, pub embedding_configs: EmbeddingConfigs, pub existing_fields: HashSet, + pub geo_fields_ids: Option<(FieldId, FieldId)>, } impl InnerIndexSettings { @@ -1169,7 +1175,7 @@ impl InnerIndexSettings { let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap()); let allowed_separators = index.allowed_separators(rtxn)?; let dictionary = index.dictionary(rtxn)?; - let fields_ids_map = index.fields_ids_map(rtxn)?; + let mut fields_ids_map = index.fields_ids_map(rtxn)?; let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?; let user_defined_searchable_fields = user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); @@ -1184,6 +1190,24 @@ impl InnerIndexSettings { .into_iter() .filter_map(|(field, count)| (count != 0).then_some(field)) .collect(); + // index.fields_ids_map($a)? ==>> fields_ids_map + let geo_fields_ids = match fields_ids_map.id("_geo") { + Some(gfid) => { + let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid); + let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid); + // if `_geo` is faceted then we get the `lat` and `lng` + if is_sortable || is_filterable { + let field_ids = fields_ids_map + .insert("_geo.lat") + .zip(fields_ids_map.insert("_geo.lng")) + .ok_or(UserError::AttributeLimitReached)?; + Some(field_ids) + } else { + None + } + } + None => None, + }; Ok(Self { stop_words, @@ -1198,6 +1222,7 @@ impl InnerIndexSettings { proximity_precision, embedding_configs, existing_fields, + geo_fields_ids, }) }