From ea016d97afd2dfdae2fe15a12a7bfd6554d3a097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 14 Mar 2023 18:08:12 +0100 Subject: [PATCH] Implementing an IS EMPTY filter --- filter-parser/src/condition.rs | 17 +++++ filter-parser/src/error.rs | 6 +- filter-parser/src/lib.rs | 25 +++++-- filter-parser/src/value.rs | 1 + meilisearch/tests/search/errors.rs | 4 +- milli/src/index.rs | 20 +++++- milli/src/search/facet/filter.rs | 4 ++ milli/src/update/clear_documents.rs | 2 + milli/src/update/delete_documents.rs | 8 +++ .../extract/extract_fid_docid_facet_values.rs | 29 +++++++-- .../src/update/index_documents/extract/mod.rs | 65 +++++++++++++------ .../src/update/index_documents/typed_chunk.rs | 12 ++++ 12 files changed, 156 insertions(+), 37 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index fe424539f..9abe1c6ea 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -21,6 +21,7 @@ pub enum Condition<'a> { Equal(Token<'a>), NotEqual(Token<'a>), Null, + Empty, Exists, LowerThan(Token<'a>), LowerThanOrEqual(Token<'a>), @@ -61,6 +62,22 @@ pub fn parse_is_not_null(input: Span) -> IResult { Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null })))) } +/// empty = value "IS" WS+ "EMPTY" +pub fn parse_is_empty(input: Span) -> IResult { + let (input, key) = parse_value(input)?; + + let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?; + Ok((input, FilterCondition::Condition { fid: key, op: Empty })) +} + +/// empty = value "IS" WS+ "NOT" WS+ "EMPTY" +pub fn parse_is_not_empty(input: Span) -> IResult { + let (input, key) = parse_value(input)?; + + let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?; + Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty })))) +} + /// exist = value "EXISTS" pub fn parse_exists(input: Span) -> IResult { let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?; diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index fc6ad8f6d..cbb83c841 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> { ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } - ErrorKind::InvalidPrimary if input.trim().is_empty() => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")? - } ErrorKind::InvalidPrimary => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)? + let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::ExpectedEof => { writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index c75ada205..69eb6700f 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -47,7 +47,10 @@ mod value; use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; -use condition::{parse_exists, parse_is_not_null, parse_is_null, parse_not_exists}; +use condition::{ + parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null, + parse_not_exists, +}; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; @@ -416,6 +419,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_condition, parse_is_null, parse_is_not_null, + parse_is_empty, + parse_is_not_empty, parse_exists, parse_not_exists, parse_to, @@ -509,6 +514,13 @@ pub mod tests { insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL"); insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)"); + // Test EMPTY + NOT EMPTY + insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY"); + insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)"); + insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); + insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY"); + insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)"); + // Test EXISTS + NOT EXITS insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); @@ -587,7 +599,7 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -597,12 +609,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -667,12 +679,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -812,6 +824,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::Equal(token) => write!(f, "= {token}"), Condition::NotEqual(token) => write!(f, "!= {token}"), Condition::Null => write!(f, "IS NULL"), + Condition::Empty => write!(f, "IS EMPTY"), Condition::Exists => write!(f, "EXISTS"), Condition::LowerThan(token) => write!(f, "< {token}"), Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index f8f1c43bc..518c0a25a 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -188,6 +188,7 @@ fn is_keyword(s: &str) -> bool { | "EXISTS" | "IS" | "NULL" + | "EMPTY" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index ab42700f3..2a0e4a39d 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/milli/src/index.rs b/milli/src/index.rs index 3316028df..c60857bd0 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -81,6 +81,7 @@ pub mod db_name { pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; + pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; @@ -130,9 +131,10 @@ pub struct Index { /// Maps the facet field id and the docids for which this field exists pub facet_id_exists_docids: Database, - /// Maps the facet field id and the docids for which this field is set as null pub facet_id_is_null_docids: Database, + /// Maps the facet field id and the docids for which this field is considered empty + pub facet_id_is_empty_docids: Database, /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. pub facet_id_f64_docids: Database, FacetGroupValueCodec>, @@ -157,7 +159,7 @@ impl Index { ) -> Result { use db_name::*; - options.max_dbs(20); + options.max_dbs(21); unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; @@ -180,6 +182,7 @@ impl Index { let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; + let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; let field_id_docid_facet_strings = @@ -207,6 +210,7 @@ impl Index { facet_id_string_docids, facet_id_exists_docids, facet_id_is_null_docids, + facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, documents, @@ -851,6 +855,18 @@ impl Index { } } + /// Retrieve all the documents which contain this field id and that is considered empty + pub fn empty_faceted_documents_ids( + &self, + rtxn: &RoTxn, + field_id: FieldId, + ) -> heed::Result { + match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { + Some(docids) => Ok(docids), + None => Ok(RoaringBitmap::new()), + } + } + /// Retrieve all the documents which contain this field id pub fn exists_faceted_documents_ids( &self, diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index df42725c5..0c11b737e 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -223,6 +223,10 @@ impl<'a> Filter<'a> { let is_null = index.null_faceted_documents_ids(rtxn, field_id)?; return Ok(is_null); } + Condition::Empty => { + let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?; + return Ok(is_empty); + } Condition::Exists => { let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; return Ok(exist); diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 7ac09a785..326e0825d 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -35,6 +35,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { facet_id_string_docids, facet_id_exists_docids, facet_id_is_null_docids, + facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, documents, @@ -88,6 +89,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { facet_id_f64_docids.clear(self.wtxn)?; facet_id_exists_docids.clear(self.wtxn)?; facet_id_is_null_docids.clear(self.wtxn)?; + facet_id_is_empty_docids.clear(self.wtxn)?; facet_id_string_docids.clear(self.wtxn)?; field_id_docid_facet_f64s.clear(self.wtxn)?; field_id_docid_facet_strings.clear(self.wtxn)?; diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index bb232d7cc..6f2fa5e5a 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -246,6 +246,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { script_language_docids, facet_id_exists_docids, facet_id_is_null_docids, + facet_id_is_empty_docids, documents, } = self.index; @@ -531,6 +532,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { &self.to_delete_docids, )?; + // We delete the documents ids that are under the facet field id values. + remove_docids_from_facet_id_docids( + self.wtxn, + facet_id_is_empty_docids, + &self.to_delete_docids, + )?; + self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; Ok(DetailedDocumentDeletionResult { diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 6460af812..8f3d9408d 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -21,6 +21,7 @@ pub struct ExtractedFacetValues { pub docid_fid_facet_numbers_chunk: grenad::Reader, pub docid_fid_facet_strings_chunk: grenad::Reader, pub fid_facet_is_null_docids_chunk: grenad::Reader, + pub fid_facet_is_empty_docids_chunk: grenad::Reader, pub fid_facet_exists_docids_chunk: grenad::Reader, } @@ -56,6 +57,7 @@ pub fn extract_fid_docid_facet_values( let mut facet_exists_docids = BTreeMap::::new(); let mut facet_is_null_docids = BTreeMap::::new(); + let mut facet_is_empty_docids = BTreeMap::::new(); let mut key_buffer = Vec::new(); let mut cursor = obkv_documents.into_cursor()?; @@ -80,10 +82,14 @@ pub fn extract_fid_docid_facet_values( key_buffer.extend_from_slice(docid_bytes); let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?; + match extract_facet_values(&value) { FilterableValues::Null => { facet_is_null_docids.entry(field_id).or_default().insert(document); } + FilterableValues::Empty => { + facet_is_empty_docids.entry(field_id).or_default().insert(document); + } FilterableValues::Values { numbers, strings } => { // insert facet numbers in sorter for number in numbers { @@ -140,22 +146,34 @@ pub fn extract_fid_docid_facet_values( } let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?; + let mut facet_is_empty_docids_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); + for (fid, bitmap) in facet_is_empty_docids.into_iter() { + let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap(); + facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?; + } + let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?; + Ok(ExtractedFacetValues { docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, fid_facet_is_null_docids_chunk: facet_is_null_docids_reader, + fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader, fid_facet_exists_docids_chunk: facet_exists_docids_reader, }) } /// Represent what a document field contains. enum FilterableValues { + /// Corresponds to the JSON `null` value. Null, + /// Corresponds to either, an empty string `""`, an empty array `[]`, or an empty object `{}`. + Empty, /// Represents all the numbers and strings values found in this document field. - Values { - numbers: Vec, - strings: Vec<(String, String)>, - }, + Values { numbers: Vec, strings: Vec<(String, String)> }, } fn extract_facet_values(value: &Value) -> FilterableValues { @@ -192,6 +210,9 @@ fn extract_facet_values(value: &Value) -> FilterableValues { match value { Value::Null => FilterableValues::Null, + Value::String(s) if s.is_empty() => FilterableValues::Empty, + Value::Array(a) if a.is_empty() => FilterableValues::Empty, + Value::Object(o) if o.is_empty() => FilterableValues::Empty, otherwise => { let mut numbers = Vec::new(); let mut strings = Vec::new(); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 4a5c9b64c..641a8a210 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -55,22 +55,23 @@ pub(crate) fn data_from_obkv_documents( .collect::>()?; #[allow(clippy::type_complexity)] - let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>))))> = flattened_obkv_chunks - .par_bridge() - .map(|flattened_obkv_chunks| { - send_and_extract_flattened_documents_data( - flattened_obkv_chunks, - indexer, - lmdb_writer_sx.clone(), - &searchable_fields, - &faceted_fields, - primary_key_id, - geo_fields_ids, - &stop_words, - max_positions_per_attributes, - ) - }) - .collect(); + let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))))> = + flattened_obkv_chunks + .par_bridge() + .map(|flattened_obkv_chunks| { + send_and_extract_flattened_documents_data( + flattened_obkv_chunks, + indexer, + lmdb_writer_sx.clone(), + &searchable_fields, + &faceted_fields, + primary_key_id, + geo_fields_ids, + &stop_words, + max_positions_per_attributes, + ) + }) + .collect(); let ( docid_word_positions_chunks, @@ -78,7 +79,10 @@ pub(crate) fn data_from_obkv_documents( docid_fid_facet_numbers_chunks, ( docid_fid_facet_strings_chunks, - (facet_is_null_docids_chunks, facet_exists_docids_chunks), + ( + facet_is_null_docids_chunks, + (facet_is_empty_docids_chunks, facet_exists_docids_chunks), + ), ), ), ) = result?; @@ -115,6 +119,22 @@ pub(crate) fn data_from_obkv_documents( }); } + // merge facet_is_empty_docids and send them as a typed chunk + { + let lmdb_writer_sx = lmdb_writer_sx.clone(); + rayon::spawn(move || { + debug!("merge {} database", "facet-id-is-empty-docids"); + match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) { + Ok(reader) => { + let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); + } + Err(e) => { + let _ = lmdb_writer_sx.send(Err(e)); + } + } + }); + } + spawn_extraction_task::<_, _, Vec>>( docid_word_positions_chunks.clone(), indexer, @@ -254,7 +274,10 @@ fn send_and_extract_flattened_documents_data( grenad::Reader, ( grenad::Reader, - (grenad::Reader, (grenad::Reader, grenad::Reader)), + ( + grenad::Reader, + (grenad::Reader, (grenad::Reader, grenad::Reader)), + ), ), )> { let flattened_documents_chunk = @@ -304,6 +327,7 @@ fn send_and_extract_flattened_documents_data( docid_fid_facet_numbers_chunk, docid_fid_facet_strings_chunk, fid_facet_is_null_docids_chunk, + fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk, } = extract_fid_docid_facet_values( flattened_documents_chunk.clone(), @@ -331,7 +355,10 @@ fn send_and_extract_flattened_documents_data( docid_fid_facet_numbers_chunk, ( docid_fid_facet_strings_chunk, - (fid_facet_is_null_docids_chunk, fid_facet_exists_docids_chunk), + ( + fid_facet_is_null_docids_chunk, + (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk), + ), ), )) }, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 79f2e2c55..e1fc01ca9 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -40,6 +40,7 @@ pub(crate) enum TypedChunk { FieldIdFacetNumberDocids(grenad::Reader), FieldIdFacetExistsDocids(grenad::Reader), FieldIdFacetIsNullDocids(grenad::Reader), + FieldIdFacetIsEmptyDocids(grenad::Reader), GeoPoints(grenad::Reader), ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>), } @@ -173,6 +174,17 @@ pub(crate) fn write_typed_chunk_into_index( )?; is_merged_database = true; } + TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { + append_entries_into_database( + facet_id_is_empty_docids, + &index.facet_id_is_empty_docids, + wtxn, + index_is_empty, + |value, _buffer| Ok(value), + merge_cbo_roaring_bitmaps, + )?; + is_merged_database = true; + } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { append_entries_into_database( word_pair_proximity_docids_iter,