From 453d593ce804eac35af4f7abd1b3a88c218941d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 19 Jul 2022 09:30:19 +0200 Subject: [PATCH] Add a database containing the docids where each field exists --- infos/src/main.rs | 15 ++ milli/src/heed_codec/facet/mod.rs | 40 +++++ milli/src/index.rs | 24 ++- milli/src/update/clear_documents.rs | 2 + milli/src/update/delete_documents.rs | 11 +- .../extract/extract_facet_exists_docids.rs | 42 +++++ .../extract/extract_fid_docid_facet_values.rs | 29 +++- .../src/update/index_documents/extract/mod.rs | 47 ++++-- milli/src/update/index_documents/mod.rs | 149 ++++++++++++++++++ .../src/update/index_documents/typed_chunk.rs | 13 ++ 10 files changed, 350 insertions(+), 22 deletions(-) create mode 100644 milli/src/update/index_documents/extract/extract_facet_exists_docids.rs diff --git a/infos/src/main.rs b/infos/src/main.rs index 29a87cdcf..89aec6182 100644 --- a/infos/src/main.rs +++ b/infos/src/main.rs @@ -384,6 +384,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho field_id_word_count_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_exists_docids, exact_word_docids, exact_word_prefix_docids, field_id_docid_facet_f64s: _, @@ -402,6 +403,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho let field_id_word_count_docids_name = "field_id_word_count_docids"; let facet_id_f64_docids_name = "facet_id_f64_docids"; let facet_id_string_docids_name = "facet_id_string_docids"; + let facet_id_exists_docids_name = "facet_id_exists_docids"; let documents_name = "documents"; let mut heap = BinaryHeap::with_capacity(limit + 1); @@ -544,6 +546,17 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho heap.pop(); } } + + // List the docids where the facet exists + let db = facet_id_exists_docids.remap_data_type::(); + for result in facet_values_iter(rtxn, db, facet_id)? { + let (_fid, value) = result?; + let key = format!("{}", facet_name); + heap.push(Reverse((value.len(), key, facet_id_exists_docids_name))); + if heap.len() > limit { + heap.pop(); + } + } } for result in index.all_documents(rtxn)? { @@ -984,6 +997,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec) -> a facet_id_string_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, + facet_id_exists_docids, exact_word_prefix_docids, exact_word_docids, .. @@ -1007,6 +1021,7 @@ fn size_of_databases(index: &Index, rtxn: &heed::RoTxn, names: Vec) -> a FIELD_ID_WORD_COUNT_DOCIDS => field_id_word_count_docids.as_polymorph(), FACET_ID_F64_DOCIDS => facet_id_f64_docids.as_polymorph(), FACET_ID_STRING_DOCIDS => facet_id_string_docids.as_polymorph(), + FACET_ID_EXISTS_DOCIDS => facet_id_exists_docids.as_polymorph(), FIELD_ID_DOCID_FACET_F64S => field_id_docid_facet_f64s.as_polymorph(), FIELD_ID_DOCID_FACET_STRINGS => field_id_docid_facet_strings.as_polymorph(), EXACT_WORD_DOCIDS => exact_word_docids.as_polymorph(), diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index e93fb57b9..79dbffa1d 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -25,3 +25,43 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { None } } + +use crate::{try_split_array_at, DocumentId, FieldId}; +use std::borrow::Cow; +use std::convert::TryInto; + +pub struct FieldIdCodec; + +impl<'a> heed::BytesDecode<'a> for FieldIdCodec { + type DItem = FieldId; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let (field_id_bytes, _) = try_split_array_at(bytes)?; + let field_id = u16::from_be_bytes(field_id_bytes); + Some(field_id) + } +} + +impl<'a> heed::BytesEncode<'a> for FieldIdCodec { + type EItem = FieldId; + + fn bytes_encode(field_id: &Self::EItem) -> Option> { + Some(Cow::Owned(field_id.to_be_bytes().to_vec())) + } +} + +pub struct FieldIdDocIdCodec; + +impl<'a> heed::BytesDecode<'a> for FieldIdDocIdCodec { + type DItem = (FieldId, DocumentId); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + let field_id = u16::from_be_bytes(field_id_bytes); + + let document_id_bytes = bytes[..4].try_into().ok()?; + let document_id = u32::from_be_bytes(document_id_bytes); + + Some((field_id, document_id)) + } +} diff --git a/milli/src/index.rs b/milli/src/index.rs index 9637b4103..816112178 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -15,7 +15,7 @@ use crate::error::{InternalError, UserError}; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, - FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, + FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, }; use crate::{ default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, @@ -75,6 +75,7 @@ pub mod db_name { pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids"; pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; + pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; @@ -116,6 +117,9 @@ pub struct Index { /// Maps the position of a word prefix with all the docids where this prefix appears. pub word_prefix_position_docids: Database, + /// Maps the facet field id and the docids for which this field exists + pub facet_id_exists_docids: Database, + /// Maps the facet field id, level and the number with the docids that corresponds to it. pub facet_id_f64_docids: Database, /// Maps the facet field id and the string with the original string and docids that corresponds to it. @@ -134,7 +138,7 @@ impl Index { pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> Result { use db_name::*; - options.max_dbs(16); + options.max_dbs(17); unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; @@ -152,6 +156,9 @@ impl Index { let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?; let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_exists_docids: Database = + env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; + let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; let field_id_docid_facet_strings = env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; @@ -174,6 +181,7 @@ impl Index { field_id_word_count_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_exists_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, documents, @@ -806,6 +814,18 @@ impl Index { } } + /// Retrieve all the documents which contain this field id + pub fn exists_faceted_documents_ids( + &self, + rtxn: &RoTxn, + field_id: FieldId, + ) -> heed::Result { + match self.facet_id_exists_docids.get(rtxn, &field_id)? { + Some(docids) => Ok(docids), + None => Ok(RoaringBitmap::new()), + } + } + /* distinct field */ pub(crate) fn put_distinct_field( diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index d1939df7b..db438d019 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -30,6 +30,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { word_prefix_position_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_exists_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, documents, @@ -69,6 +70,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { field_id_word_count_docids.clear(self.wtxn)?; word_prefix_position_docids.clear(self.wtxn)?; facet_id_f64_docids.clear(self.wtxn)?; + facet_id_exists_docids.clear(self.wtxn)?; facet_id_string_docids.clear(self.wtxn)?; field_id_docid_facet_f64s.clear(self.wtxn)?; field_id_docid_facet_strings.clear(self.wtxn)?; diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 3b519c101..6dfdb9a7c 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -170,6 +170,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { word_position_docids, word_prefix_position_docids, facet_id_f64_docids, + facet_id_exists_docids, facet_id_string_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, @@ -424,11 +425,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } // We delete the documents ids that are under the facet field id values. - remove_docids_from_facet_field_id_number_docids( + remove_docids_from_facet_field_id_docids( self.wtxn, facet_id_f64_docids, &self.to_delete_docids, )?; + // We delete the documents ids that are under the facet field id values. + remove_docids_from_facet_field_id_docids( + self.wtxn, + facet_id_exists_docids, + &self.to_delete_docids, + )?; remove_docids_from_facet_field_id_string_docids( self.wtxn, @@ -618,7 +625,7 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>( Ok(()) } -fn remove_docids_from_facet_field_id_number_docids<'a, C>( +fn remove_docids_from_facet_field_id_docids<'a, C>( wtxn: &'a mut heed::RwTxn, db: &heed::Database, to_remove: &RoaringBitmap, diff --git a/milli/src/update/index_documents/extract/extract_facet_exists_docids.rs b/milli/src/update/index_documents/extract/extract_facet_exists_docids.rs new file mode 100644 index 000000000..e7a001c08 --- /dev/null +++ b/milli/src/update/index_documents/extract/extract_facet_exists_docids.rs @@ -0,0 +1,42 @@ +use std::fs::File; +use std::io; + +use heed::{BytesDecode, BytesEncode}; + +use super::helpers::{ + create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters, +}; +use crate::heed_codec::facet::{FieldIdCodec, FieldIdDocIdCodec}; +use crate::Result; + +/// Extracts the documents ids where this field appears. +/// +/// Returns a grenad reader whose key is the field id encoded +/// with `FieldIdCodec` and the value is a document_id (u32) +/// encoded as native-endian bytes. +#[logging_timer::time] +pub fn extract_facet_exists_docids( + docid_fid_facet_number: grenad::Reader, + indexer: GrenadParameters, +) -> Result> { + let max_memory = indexer.max_memory_by_thread(); + + let mut facet_exists_docids_sorter = create_sorter( + merge_cbo_roaring_bitmaps, + indexer.chunk_compression_type, + indexer.chunk_compression_level, + indexer.max_nb_chunks, + max_memory, + ); + + let mut cursor = docid_fid_facet_number.into_cursor()?; + while let Some((key_bytes, _)) = cursor.move_on_next()? { + let (field_id, document_id) = FieldIdDocIdCodec::bytes_decode(key_bytes).unwrap(); + + let key_bytes = FieldIdCodec::bytes_encode(&field_id).unwrap(); + + facet_exists_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; + } + + sorter_into_reader(facet_exists_docids_sorter, indexer) +} diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 628636f78..d93bde500 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -20,7 +20,7 @@ pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, faceted_fields: &HashSet, -) -> Result<(grenad::Reader, grenad::Reader)> { +) -> Result<(grenad::Reader, grenad::Reader, grenad::Reader)> { let max_memory = indexer.max_memory_by_thread(); let mut fid_docid_facet_numbers_sorter = create_sorter( @@ -28,7 +28,7 @@ pub fn extract_fid_docid_facet_values( indexer.chunk_compression_type, indexer.chunk_compression_level, indexer.max_nb_chunks, - max_memory.map(|m| m / 2), + max_memory.map(|m| m / 3), ); let mut fid_docid_facet_strings_sorter = create_sorter( @@ -36,7 +36,15 @@ pub fn extract_fid_docid_facet_values( indexer.chunk_compression_type, indexer.chunk_compression_level, indexer.max_nb_chunks, - max_memory.map(|m| m / 2), + max_memory.map(|m| m / 3), + ); + + let mut fid_docid_facet_exists_sorter = create_sorter( + keep_first, + indexer.chunk_compression_type, + indexer.chunk_compression_level, + indexer.max_nb_chunks, + max_memory.map(|m| m / 3), ); let mut key_buffer = Vec::new(); @@ -46,15 +54,19 @@ pub fn extract_fid_docid_facet_values( for (field_id, field_bytes) in obkv.iter() { if faceted_fields.contains(&field_id) { - let value = - serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; - let (numbers, strings) = extract_facet_values(&value); - key_buffer.clear(); + // here, we know already that the document must be added to the “field id exists” database // prefix key with the field_id and the document_id + key_buffer.extend_from_slice(&field_id.to_be_bytes()); key_buffer.extend_from_slice(&docid_bytes); + fid_docid_facet_exists_sorter.insert(&key_buffer, ().as_bytes())?; + + let value = + serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?; + + let (numbers, strings) = extract_facet_values(&value); // insert facet numbers in sorter for number in numbers { @@ -79,7 +91,8 @@ pub fn extract_fid_docid_facet_values( Ok(( sorter_into_reader(fid_docid_facet_numbers_sorter, indexer.clone())?, - sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, + sorter_into_reader(fid_docid_facet_strings_sorter, indexer.clone())?, + sorter_into_reader(fid_docid_facet_exists_sorter, indexer)?, )) } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index c3c2033a6..7d26e0984 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -1,4 +1,5 @@ mod extract_docid_word_positions; +mod extract_facet_exists_docids; mod extract_facet_number_docids; mod extract_facet_string_docids; mod extract_fid_docid_facet_values; @@ -16,6 +17,7 @@ use log::debug; use rayon::prelude::*; use self::extract_docid_word_positions::extract_docid_word_positions; +use self::extract_facet_exists_docids::extract_facet_exists_docids; use self::extract_facet_number_docids::extract_facet_number_docids; use self::extract_facet_string_docids::extract_facet_string_docids; use self::extract_fid_docid_facet_values::extract_fid_docid_facet_values; @@ -53,7 +55,7 @@ pub(crate) fn data_from_obkv_documents( }) .collect::>()?; - let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = flattened_obkv_chunks + let result: Result<(Vec<_>, (Vec<_>, (Vec<_>, Vec<_>)))> = flattened_obkv_chunks .par_bridge() .map(|flattened_obkv_chunks| { send_and_extract_flattened_documents_data( @@ -72,7 +74,10 @@ pub(crate) fn data_from_obkv_documents( let ( docid_word_positions_chunks, - (docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks), + ( + docid_fid_facet_numbers_chunks, + (docid_fid_facet_strings_chunks, docid_fid_facet_exists_chunks), + ), ) = result?; spawn_extraction_task::<_, _, Vec>>( @@ -137,6 +142,15 @@ pub(crate) fn data_from_obkv_documents( TypedChunk::FieldIdFacetNumberDocids, "field-id-facet-number-docids", ); + spawn_extraction_task::<_, _, Vec>>( + docid_fid_facet_exists_chunks.clone(), + indexer.clone(), + lmdb_writer_sx.clone(), + extract_facet_exists_docids, + merge_cbo_roaring_bitmaps, + TypedChunk::FieldIdFacetExistsDocids, + "field-id-facet-exists-docids", + ); Ok(()) } @@ -197,6 +211,7 @@ fn send_original_documents_data( /// - docid_word_positions /// - docid_fid_facet_numbers /// - docid_fid_facet_strings +/// - docid_fid_facet_exists fn send_and_extract_flattened_documents_data( flattened_documents_chunk: Result>, indexer: GrenadParameters, @@ -209,7 +224,10 @@ fn send_and_extract_flattened_documents_data( max_positions_per_attributes: Option, ) -> Result<( grenad::Reader, - (grenad::Reader, grenad::Reader), + ( + grenad::Reader, + (grenad::Reader, grenad::Reader), + ), )> { let flattened_documents_chunk = flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; @@ -250,12 +268,15 @@ fn send_and_extract_flattened_documents_data( Ok(docid_word_positions_chunk) }, || { - let (docid_fid_facet_numbers_chunk, docid_fid_facet_strings_chunk) = - extract_fid_docid_facet_values( - flattened_documents_chunk.clone(), - indexer.clone(), - faceted_fields, - )?; + let ( + docid_fid_facet_numbers_chunk, + docid_fid_facet_strings_chunk, + docid_fid_facet_exists_chunk, + ) = extract_fid_docid_facet_values( + flattened_documents_chunk.clone(), + indexer.clone(), + faceted_fields, + )?; // send docid_fid_facet_numbers_chunk to DB writer let docid_fid_facet_numbers_chunk = @@ -273,7 +294,13 @@ fn send_and_extract_flattened_documents_data( docid_fid_facet_strings_chunk.clone(), ))); - Ok((docid_fid_facet_numbers_chunk, docid_fid_facet_strings_chunk)) + let docid_fid_facet_exists_chunk = + unsafe { as_cloneable_grenad(&docid_fid_facet_exists_chunk)? }; + + Ok(( + docid_fid_facet_numbers_chunk, + (docid_fid_facet_strings_chunk, docid_fid_facet_exists_chunk), + )) }, ); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index ba428f078..82457762e 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1931,4 +1931,153 @@ mod tests { assert_eq!(ids.len(), map.len()); } + + #[test] + fn index_documents_check_exists_database_reindex() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let content = documents!([ + { + "id": 0, + "colour": 0, + }, + { + "id": 1, + "colour": [] + }, + { + "id": 2, + "colour": {} + }, + { + "id": 3, + "colour": null + }, + { + "id": 4, + "colour": [1] + }, + { + "id": 5 + }, + { + "id": 6, + "colour": { + "green": 1 + } + } + ]); + + let config = IndexerConfig::default(); + let indexing_config = IndexDocumentsConfig::default(); + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(content).unwrap(); + builder.execute().unwrap(); + + wtxn.commit().unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update::Settings::new(&mut wtxn, &index, &config); + + let faceted_fields = hashset!(S("colour")); + builder.set_filterable_fields(faceted_fields); + builder.execute(|_| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let facets = index.faceted_fields(&rtxn).unwrap(); + assert_eq!(facets, hashset!(S("colour"), S("colour.green"))); + + let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); + let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); + + let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap(); + assert_eq!(bitmap_colour.into_iter().collect::>(), vec![0, 1, 2, 3, 4, 6]); + + let bitmap_colour_green = + index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap(); + assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![6]); + } + + #[test] + fn index_documents_check_exists_database() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let config = IndexerConfig::default(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update::Settings::new(&mut wtxn, &index, &config); + + let faceted_fields = hashset!(S("colour")); + builder.set_filterable_fields(faceted_fields); + builder.execute(|_| ()).unwrap(); + wtxn.commit().unwrap(); + + let content = documents!([ + { + "id": 0, + "colour": 0, + }, + { + "id": 1, + "colour": [] + }, + { + "id": 2, + "colour": {} + }, + { + "id": 3, + "colour": null + }, + { + "id": 4, + "colour": [1] + }, + { + "id": 5 + }, + { + "id": 6, + "colour": { + "green": 1 + } + } + ]); + + let indexing_config = IndexDocumentsConfig::default(); + + let mut wtxn = index.write_txn().unwrap(); + + let mut builder = + IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ()) + .unwrap(); + builder.add_documents(content).unwrap(); + builder.execute().unwrap(); + + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let facets = index.faceted_fields(&rtxn).unwrap(); + assert_eq!(facets, hashset!(S("colour"), S("colour.green"))); + + let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); + let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); + + let bitmap_colour = index.facet_id_exists_docids.get(&rtxn, &colour_id).unwrap().unwrap(); + assert_eq!(bitmap_colour.into_iter().collect::>(), vec![0, 1, 2, 3, 4, 6]); + + let bitmap_colour_green = + index.facet_id_exists_docids.get(&rtxn, &colour_green_id).unwrap().unwrap(); + assert_eq!(bitmap_colour_green.into_iter().collect::>(), vec![6]); + } } diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 26b97c3a0..e501e5efd 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -35,6 +35,7 @@ pub(crate) enum TypedChunk { WordPairProximityDocids(grenad::Reader), FieldIdFacetStringDocids(grenad::Reader), FieldIdFacetNumberDocids(grenad::Reader), + FieldIdFacetExistsDocids(grenad::Reader), GeoPoints(grenad::Reader), } @@ -146,6 +147,18 @@ pub(crate) fn write_typed_chunk_into_index( )?; is_merged_database = true; } + TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids_iter) => { + append_entries_into_database( + facet_id_exists_docids_iter, + &index.facet_id_exists_docids, + wtxn, + index_is_empty, + |value, _buffer| Ok(value), + merge_cbo_roaring_bitmaps, + ) + .unwrap(); + is_merged_database = true; + } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { append_entries_into_database( word_pair_proximity_docids_iter,