diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 5394a6e86..c9fd2dffb 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -14,6 +14,7 @@ use super::FacetKind; use crate::heed_codec::facet::OrderedF64Codec; use crate::update::del_add::DelAdd; use crate::update::new::channel::FieldIdDocidFacetSender; +use crate::update::new::extract::perm_json_p; use crate::update::new::indexer::document_changes::{ extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, }; @@ -81,7 +82,7 @@ impl FacetedDocidsExtractor { inner.current(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, value| { + &mut |fid, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -90,6 +91,7 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_del, docid, fid, + depth, value, ) }, @@ -100,7 +102,7 @@ impl FacetedDocidsExtractor { inner.current(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, value| { + &mut |fid, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -109,6 +111,7 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_del, docid, fid, + depth, value, ) }, @@ -119,7 +122,7 @@ impl FacetedDocidsExtractor { inner.merged(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, value| { + &mut |fid, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -128,6 +131,7 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_add, docid, fid, + depth, value, ) }, @@ -138,7 +142,7 @@ impl FacetedDocidsExtractor { inner.inserted(), inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, value| { + &mut |fid, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -147,6 +151,7 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_add, docid, fid, + depth, value, ) }, @@ -166,6 +171,7 @@ impl FacetedDocidsExtractor { facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind), docid: DocumentId, fid: FieldId, + depth: perm_json_p::Depth, value: &Value, ) -> Result<()> { let mut buffer = BVec::new_in(doc_alloc); @@ -217,7 +223,7 @@ impl FacetedDocidsExtractor { } // Null // key: fid - Value::Null => { + Value::Null if depth == perm_json_p::Depth::OnBaseKey => { buffer.clear(); buffer.push(FacetKind::Null as u8); buffer.extend_from_slice(&fid.to_be_bytes()); @@ -225,13 +231,13 @@ impl FacetedDocidsExtractor { } // Empty // key: fid - Value::Array(a) if a.is_empty() => { + Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); cache_fn(cached_sorter, &buffer, docid) } - Value::Object(o) if o.is_empty() => { + Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index 141af7fbe..2820f04b9 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -10,15 +10,18 @@ pub fn extract_document_facets<'doc>( document: impl Document<'doc>, external_document_id: &str, field_id_map: &mut GlobalFieldsIdsMap, - facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>, + facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>, ) -> Result<()> { for res in document.iter_top_level_fields() { let (field_name, value) = res?; - let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) { - Some(field_id) => facet_fn(field_id, value), - None => Err(UserError::AttributeLimitReached.into()), - }; + let mut tokenize_field = + |name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map + .id_or_insert(name) + { + Some(field_id) => facet_fn(field_id, depth, value), + None => Err(UserError::AttributeLimitReached.into()), + }; // if the current field is searchable or contains a searchable attribute if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) { @@ -29,6 +32,7 @@ pub fn extract_document_facets<'doc>( Some(attributes_to_extract), &[], // skip no attributes field_name, + perm_json_p::Depth::OnBaseKey, &mut tokenize_field, )?, Value::Array(array) => perm_json_p::seek_leaf_values_in_array( @@ -36,9 +40,10 @@ pub fn extract_document_facets<'doc>( Some(attributes_to_extract), &[], // skip no attributes field_name, + perm_json_p::Depth::OnBaseKey, &mut tokenize_field, )?, - value => tokenize_field(field_name, &value)?, + value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, } } } @@ -51,8 +56,8 @@ pub fn extract_document_facets<'doc>( .zip(field_id_map.id_or_insert("_geo.lng")) .ok_or(UserError::AttributeLimitReached)?; - facet_fn(lat_fid, &lat.into())?; - facet_fn(lng_fid, &lng.into())?; + facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?; + facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?; } } } diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs index 7364434ee..17ee4209c 100644 --- a/crates/milli/src/update/new/extract/mod.rs +++ b/crates/milli/src/update/new/extract/mod.rs @@ -59,15 +59,24 @@ pub mod perm_json_p { && selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true) } + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum Depth { + /// The perm json ptr is currently on the field of an object + OnBaseKey, + /// The perm json ptr is currently inside of an array + InsideArray, + } + pub fn seek_leaf_values_in_object( value: &Map, selectors: Option<&[&str]>, skip_selectors: &[&str], base_key: &str, - seeker: &mut impl FnMut(&str, &Value) -> Result<()>, + base_depth: Depth, + seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, ) -> Result<()> { if value.is_empty() { - seeker(base_key, &Value::Object(Map::with_capacity(0)))?; + seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?; } for (key, value) in value.iter() { @@ -87,6 +96,7 @@ pub mod perm_json_p { selectors, skip_selectors, &base_key, + Depth::OnBaseKey, seeker, ), Value::Array(array) => seek_leaf_values_in_array( @@ -94,9 +104,10 @@ pub mod perm_json_p { selectors, skip_selectors, &base_key, + Depth::OnBaseKey, seeker, ), - value => seeker(&base_key, value), + value => seeker(&base_key, Depth::OnBaseKey, value), }?; } } @@ -109,21 +120,32 @@ pub mod perm_json_p { selectors: Option<&[&str]>, skip_selectors: &[&str], base_key: &str, - seeker: &mut impl FnMut(&str, &Value) -> Result<()>, + base_depth: Depth, + seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, ) -> Result<()> { if values.is_empty() { - seeker(base_key, &Value::Array(vec![]))?; + seeker(base_key, base_depth, &Value::Array(vec![]))?; } for value in values { match value { - Value::Object(object) => { - seek_leaf_values_in_object(object, selectors, skip_selectors, base_key, seeker) - } - Value::Array(array) => { - seek_leaf_values_in_array(array, selectors, skip_selectors, base_key, seeker) - } - value => seeker(base_key, value), + Value::Object(object) => seek_leaf_values_in_object( + object, + selectors, + skip_selectors, + base_key, + Depth::InsideArray, + seeker, + ), + Value::Array(array) => seek_leaf_values_in_array( + array, + selectors, + skip_selectors, + base_key, + Depth::InsideArray, + seeker, + ), + value => seeker(base_key, Depth::InsideArray, value), }?; } diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs index bc7a2acd3..06f5479e4 100644 --- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs +++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs @@ -5,7 +5,7 @@ use serde_json::Value; use crate::update::new::document::Document; use crate::update::new::extract::perm_json_p::{ - seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, + seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, }; use crate::{ FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError, @@ -35,7 +35,7 @@ impl<'a> DocumentTokenizer<'a> { for entry in document.iter_top_level_fields() { let (field_name, value) = entry?; - let mut tokenize_field = |field_name: &str, value: &Value| { + let mut tokenize_field = |field_name: &str, _depth, value: &Value| { let Some(field_id) = field_id_map.id_or_insert(field_name) else { return Err(UserError::AttributeLimitReached.into()); }; @@ -96,6 +96,7 @@ impl<'a> DocumentTokenizer<'a> { self.attribute_to_extract, self.attribute_to_skip, field_name, + Depth::OnBaseKey, &mut tokenize_field, )?, Value::Array(array) => seek_leaf_values_in_array( @@ -103,9 +104,10 @@ impl<'a> DocumentTokenizer<'a> { self.attribute_to_extract, self.attribute_to_skip, field_name, + Depth::OnBaseKey, &mut tokenize_field, )?, - value => tokenize_field(field_name, &value)?, + value => tokenize_field(field_name, Depth::OnBaseKey, &value)?, } } }