diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 49355593c..2027b4db8 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use super::extract::FacetKind; use super::StdResult; -use crate::index::main_key::GEO_RTREE_KEY; +use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY}; use crate::index::IndexEmbeddingConfig; use crate::update::new::KvReaderFieldId; use crate::vector::Embedding; @@ -458,4 +458,19 @@ impl GeoSender<'_> { })) .map_err(|_| SendError(())) } + + pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> StdResult<(), SendError<()>> { + let mut buffer = Vec::new(); + bitmap.serialize_into(&mut buffer).unwrap(); + + self.0 + .send(WriterOperation::DbOperation(DbOperation { + database: Database::Main, + entry: EntryOperation::Write(KeyValueEntry::from_small_key_value( + GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(), + &buffer, + )), + })) + .map_err(|_| SendError(())) + } } diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index 692277597..ace6b236b 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -303,6 +303,8 @@ where { // will be used in 'inject_vectors let vectors_value: Box; + let geo_lat: Box; + let geo_lng: Box; document_buffer.clear(); let mut unordered_field_buffer = Vec::new(); @@ -352,8 +354,30 @@ where unordered_field_buffer.push((vectors_fid, &vectors_value)); } + if let Some(geo_value) = document.geo_field()? { + #[derive(serde::Deserialize)] + struct Geo { + lat: serde_json::Value, + lng: serde_json::Value, + } + + // We must flatten and generate the fields ids for the geo sub fields + if let Ok(Geo { lat, lng }) = serde_json::from_str(geo_value.get()) { + let lat_id = + fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?; + geo_lat = serde_json::value::to_raw_value(&lat).unwrap(); + unordered_field_buffer.push((lat_id, &geo_lat)); + + let lng_id = + fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?; + geo_lng = serde_json::value::to_raw_value(&lng).unwrap(); + unordered_field_buffer.push((lng_id, &geo_lng)); + } + } + unordered_field_buffer.sort_by_key(|(fid, _)| *fid); for (fid, value) in unordered_field_buffer.iter() { + dbg!(fields_ids_map.name(*fid), value.get()); writer.insert(*fid, value.get().as_bytes()).unwrap(); } @@ -406,6 +430,7 @@ impl<'doc> Versions<'doc> { pub fn is_empty(&self) -> bool { self.data.is_empty() } + pub fn top_level_field(&self, k: &str) -> Option<&'doc RawValue> { if k == RESERVED_VECTORS_FIELD_NAME || k == "_geo" { return None; diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs index 67631cfc9..c81f84f43 100644 --- a/crates/milli/src/update/new/merger.rs +++ b/crates/milli/src/update/new/merger.rs @@ -27,6 +27,8 @@ where MSP: Fn() -> bool + Sync, { let mut rtree = index.geo_rtree(rtxn)?.unwrap_or_default(); + let mut faceted = index.geo_faceted_documents_ids(rtxn)?; + for data in datastore { if must_stop_processing() { return Err(InternalError::AbortedIndexation.into()); @@ -35,12 +37,14 @@ where let mut frozen = data.into_inner().freeze()?; for result in frozen.iter_and_clear_removed() { let extracted_geo_point = result?; - rtree.remove(&GeoPoint::from(extracted_geo_point)); + debug_assert!(rtree.remove(&GeoPoint::from(extracted_geo_point)).is_some()); + debug_assert!(faceted.remove(extracted_geo_point.docid)); } for result in frozen.iter_and_clear_inserted() { let extracted_geo_point = result?; rtree.insert(GeoPoint::from(extracted_geo_point)); + debug_assert!(faceted.insert(extracted_geo_point.docid)); } } @@ -51,6 +55,7 @@ where let rtree_mmap = unsafe { Mmap::map(&file)? }; geo_sender.set_rtree(rtree_mmap).unwrap(); + geo_sender.set_geo_faceted(&faceted).unwrap(); Ok(()) }