diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs index 7628f2fed..446396856 100644 --- a/meilisearch/tests/search/facet_search.rs +++ b/meilisearch/tests/search/facet_search.rs @@ -1,3 +1,4 @@ +use meili_snap::snapshot; use once_cell::sync::Lazy; use serde_json::{json, Value}; @@ -56,6 +57,54 @@ async fn simple_facet_search() { assert_eq!(response["facetHits"].as_array().unwrap().len(), 1); } +#[actix_rt::test] +async fn advanced_facet_search() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.update_settings_typo_tolerance(json!({ "enabled": false })).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; + + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"].as_array().unwrap().len(), @"0"); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "àdventure"})).await; + + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"].as_array().unwrap().len(), @"1"); +} + +#[actix_rt::test] +async fn more_advanced_facet_search() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.update_settings_filterable_attributes(json!(["genres"])).await; + index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await; + index.add_documents(documents, None).await; + index.wait_task(2).await; + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await; + + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"].as_array().unwrap().len(), @"0"); + + let (response, code) = + index.facet_search(json!({"facetName": "genres", "facetQuery": "adventure"})).await; + + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"].as_array().unwrap().len(), @"1"); +} + #[actix_rt::test] async fn non_filterable_facet_search_error() { let server = Server::new().await; diff --git a/milli/src/heed_codec/beu16_str_codec.rs b/milli/src/heed_codec/beu16_str_codec.rs new file mode 100644 index 000000000..d1b85d47f --- /dev/null +++ b/milli/src/heed_codec/beu16_str_codec.rs @@ -0,0 +1,27 @@ +use std::borrow::Cow; +use std::convert::TryInto; +use std::str; + +pub struct BEU16StrCodec; + +impl<'a> heed::BytesDecode<'a> for BEU16StrCodec { + type DItem = (u16, &'a str); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let (n_bytes, str_bytes) = bytes.split_at(2); + let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?; + let s = str::from_utf8(str_bytes).ok()?; + Some((n, s)) + } +} + +impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { + type EItem = (u16, &'a str); + + fn bytes_encode((n, s): &Self::EItem) -> Option> { + let mut bytes = Vec::with_capacity(s.len() + 2); + bytes.extend_from_slice(&n.to_be_bytes()); + bytes.extend_from_slice(s.as_bytes()); + Some(Cow::Owned(bytes)) + } +} diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs index 666f68e28..d04eaa644 100644 --- a/milli/src/heed_codec/mod.rs +++ b/milli/src/heed_codec/mod.rs @@ -1,3 +1,4 @@ +mod beu16_str_codec; mod beu32_str_codec; mod byte_slice_ref; pub mod facet; @@ -14,6 +15,7 @@ mod str_str_u8_codec; pub use byte_slice_ref::ByteSliceRefCodec; pub use str_ref::StrRefCodec; +pub use self::beu16_str_codec::BEU16StrCodec; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::field_id_word_count_codec::FieldIdWordCountCodec; pub use self::fst_set_codec::FstSetCodec; diff --git a/milli/src/index.rs b/milli/src/index.rs index 847ab0088..0ddfcda94 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fs::File; use std::mem::size_of; use std::path::Path; @@ -20,7 +20,9 @@ use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, OrderedF64Codec, }; -use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; +use crate::heed_codec::{ + BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, +}; use crate::readable_slices::ReadableSlices; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, @@ -95,6 +97,7 @@ pub mod db_name { pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; + pub const FACET_ID_NORMALIZED_STRING_STRINGS: &str = "facet-id-normalized-string-strings"; pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst"; pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; @@ -156,6 +159,8 @@ pub struct Index { pub facet_id_f64_docids: Database, FacetGroupValueCodec>, /// Maps the facet field id and ranges of strings with the docids that corresponds to them. pub facet_id_string_docids: Database, FacetGroupValueCodec>, + /// Maps the facet field id of the normalized-for-search string facets with their original versions. + pub facet_id_normalized_string_strings: Database>>, /// Maps the facet field id of the string facets with an FST containing all the facets values. pub facet_id_string_fst: Database, FstSetCodec>, @@ -180,7 +185,7 @@ impl Index { ) -> Result { use db_name::*; - options.max_dbs(24); + options.max_dbs(25); unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; @@ -210,6 +215,8 @@ impl Index { let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; let facet_id_string_docids = env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_normalized_string_strings = + env.create_database(&mut wtxn, Some(FACET_ID_NORMALIZED_STRING_STRINGS))?; let facet_id_string_fst = env.create_database(&mut wtxn, Some(FACET_ID_STRING_FST))?; let facet_id_exists_docids = env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; @@ -245,6 +252,7 @@ impl Index { field_id_word_count_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_normalized_string_strings, facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 55b283931..3e5f63fd5 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -51,9 +51,10 @@ pub use self::error::{ pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fields_ids_map::FieldsIdsMap; pub use self::heed_codec::{ - BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec, - CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec, - RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec, + BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, + CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, + RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, + UncheckedU8StrStrCodec, }; pub use self::index::Index; pub use self::search::{ diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 65e78caa9..e1ee37d79 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -1,5 +1,8 @@ use std::fmt; +use std::ops::ControlFlow; +use charabia::normalizer::NormalizerOption; +use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; @@ -14,8 +17,8 @@ use crate::error::UserError; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{ - execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, - Result, SearchContext, BEU16, + execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, + SearchContext, BEU16, }; // Building these factories is not free. @@ -301,29 +304,28 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { - let query = normalize_facet(query); - let query = query.as_str(); + let options = NormalizerOption { lossy: true, ..Default::default() }; + let query = query.normalize(&options); + let query = query.as_ref(); + let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; let field_authorizes_typos = !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); if authorize_typos && field_authorizes_typos { - let mut results = vec![]; - let exact_words_fst = self.search_query.index.exact_words(rtxn)?; if exact_words_fst.map_or(false, |fst| fst.contains(query)) { - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query }; - if let Some(FacetGroupValue { bitmap, .. }) = - index.facet_id_string_docids.get(rtxn, &key)? - { - let count = search_candidates.intersection_len(&bitmap); - if count != 0 { - let value = self - .one_original_value_of(fid, query, bitmap.min().unwrap())? - .unwrap_or_else(|| query.to_string()); - results.push(FacetValueHit { value, count }); - } + let mut results = vec![]; + if fst.contains(query) { + self.fetch_original_facets_using_normalized( + fid, + query, + query, + &search_candidates, + &mut results, + )?; } + Ok(results) } else { let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; @@ -338,60 +340,41 @@ impl<'a> SearchForFacetValues<'a> { }; let mut stream = fst.search(automaton).into_stream(); - let mut length = 0; + let mut results = vec![]; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!( - "the facet value is missing from the facet database: {key:?}" - ); - continue; - } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - let value = self - .one_original_value_of(fid, value, docids.min().unwrap())? - .unwrap_or_else(|| query.to_string()); - results.push(FacetValueHit { value, count }); - length += 1; - } - if length >= MAX_NUMBER_OF_FACETS { + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { break; } } - } - Ok(results) + Ok(results) + } } else { let automaton = Str::new(query).starts_with(); let mut stream = fst.search(automaton).into_stream(); let mut results = vec![]; - let mut length = 0; while let Some(facet_value) = stream.next() { let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!( - "the facet value is missing from the facet database: {key:?}" - ); - continue; - } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - let value = self - .one_original_value_of(fid, value, docids.min().unwrap())? - .unwrap_or_else(|| query.to_string()); - results.push(FacetValueHit { value, count }); - length += 1; - } - if length >= MAX_NUMBER_OF_FACETS { + if self + .fetch_original_facets_using_normalized( + fid, + value, + query, + &search_candidates, + &mut results, + )? + .is_break() + { break; } } @@ -401,7 +384,6 @@ impl<'a> SearchForFacetValues<'a> { } None => { let mut results = vec![]; - let mut length = 0; let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = @@ -412,9 +394,8 @@ impl<'a> SearchForFacetValues<'a> { .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? .unwrap_or_else(|| left_bound.to_string()); results.push(FacetValueHit { value, count }); - length += 1; } - if length >= MAX_NUMBER_OF_FACETS { + if results.len() >= MAX_NUMBER_OF_FACETS { break; } } @@ -422,6 +403,50 @@ impl<'a> SearchForFacetValues<'a> { } } } + + fn fetch_original_facets_using_normalized( + &self, + fid: FieldId, + value: &str, + query: &str, + search_candidates: &RoaringBitmap, + results: &mut Vec, + ) -> Result> { + let index = self.search_query.index; + let rtxn = self.search_query.rtxn; + + let database = index.facet_id_normalized_string_strings; + let key = (fid, value); + let original_strings = match database.get(rtxn, &key)? { + Some(original_strings) => original_strings, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + for original in original_strings { + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!("the facet value is missing from the facet database: {key:?}"); + return Ok(ControlFlow::Continue(())); + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + let value = self + .one_original_value_of(fid, &original, docids.min().unwrap())? + .unwrap_or_else(|| query.to_string()); + results.push(FacetValueHit { value, count }); + } + if results.len() >= MAX_NUMBER_OF_FACETS { + return Ok(ControlFlow::Break(())); + } + } + + Ok(ControlFlow::Continue(())) + } } #[derive(Debug, Clone, serde::Serialize, PartialEq)] diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 5fdf8ef49..2d2f78d5e 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -34,6 +34,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { script_language_docids, facet_id_f64_docids, facet_id_string_docids, + facet_id_normalized_string_strings, facet_id_string_fst, facet_id_exists_docids, facet_id_is_null_docids, @@ -92,6 +93,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { word_prefix_fid_docids.clear(self.wtxn)?; script_language_docids.clear(self.wtxn)?; facet_id_f64_docids.clear(self.wtxn)?; + facet_id_normalized_string_strings.clear(self.wtxn)?; facet_id_string_fst.clear(self.wtxn)?; facet_id_exists_docids.clear(self.wtxn)?; facet_id_is_null_docids.clear(self.wtxn)?; diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 906d6922f..01fc50a13 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -236,6 +236,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { word_prefix_fid_docids, facet_id_f64_docids: _, facet_id_string_docids: _, + facet_id_normalized_string_strings: _, facet_id_string_fst: _, field_id_docid_facet_f64s: _, field_id_docid_facet_strings: _, diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 0e6fd494c..16fc1cd2f 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -76,9 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8; pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_MIN_LEVEL_SIZE: u8 = 5; +use std::collections::BTreeSet; use std::fs::File; +use std::iter::FromIterator; -use heed::types::DecodeIgnore; +use charabia::normalizer::{Normalize, NormalizerOption}; +use grenad::{CompressionType, SortAlgorithm}; +use heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use heed::BytesEncode; use log::debug; use time::OffsetDateTime; @@ -87,7 +92,9 @@ use super::FacetsUpdateBulk; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::ByteSliceRefCodec; -use crate::{Index, Result, BEU16}; +use crate::update::index_documents::create_sorter; +use crate::update::merge_btreeset_string; +use crate::{BEU16StrCodec, Index, Result, BEU16}; pub mod bulk; pub mod delete; @@ -159,26 +166,69 @@ impl<'i> FacetsUpdate<'i> { incremental_update.execute(wtxn)?; } + // We clear the list of normalized-for-search facets + // and the previous FSTs to compute everything from scratch + self.index.facet_id_normalized_string_strings.clear(wtxn)?; + self.index.facet_id_string_fst.clear(wtxn)?; + + // As we can't use the same write transaction to read and write in two different databases + // we must create a temporary sorter that we will write into LMDB afterward. + // As multiple unnormalized facet values can become the same normalized facet value + // we must merge them together. + let mut sorter = create_sorter( + SortAlgorithm::Unstable, + merge_btreeset_string, + CompressionType::None, + None, + None, + None, + ); + + // We iterate on the list of original, semi-normalized, facet values + // and normalize them for search, inserting them in LMDB in any given order. + let options = NormalizerOption { lossy: true, ..Default::default() }; + let database = self.index.facet_id_string_docids.remap_data_type::(); + for result in database.iter(wtxn)? { + let (facet_group_key, ()) = result?; + if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { + let normalized_facet = left_bound.normalize(&options); + let set = BTreeSet::from_iter(std::iter::once(left_bound)); + let key = (field_id, normalized_facet.as_ref()); + let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; + let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?; + sorter.insert(key, val)?; + } + } + + // In this loop we don't need to take care of merging bitmaps + // as the grenad sorter already merged them for us. + let mut merger_iter = sorter.into_stream_merger_iter()?; + while let Some((key_bytes, btreeset_bytes)) = merger_iter.next()? { + self.index + .facet_id_normalized_string_strings + .remap_types::() + .put(wtxn, key_bytes, btreeset_bytes)?; + } + // We compute one FST by string facet let mut text_fsts = vec![]; let mut current_fst: Option<(u16, fst::SetBuilder>)> = None; - let database = self.index.facet_id_string_docids.remap_data_type::(); + let database = + self.index.facet_id_normalized_string_strings.remap_data_type::(); for result in database.iter(wtxn)? { - let (facet_group_key, _) = result?; - if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key { - current_fst = match current_fst.take() { - Some((fid, fst_builder)) if fid != field_id => { - let fst = fst_builder.into_set(); - text_fsts.push((fid, fst)); - Some((field_id, fst::SetBuilder::memory())) - } - Some((field_id, fst_builder)) => Some((field_id, fst_builder)), - None => Some((field_id, fst::SetBuilder::memory())), - }; - - if let Some((_, fst_builder)) = current_fst.as_mut() { - fst_builder.insert(left_bound)?; + let ((field_id, normalized_facet), _) = result?; + current_fst = match current_fst.take() { + Some((fid, fst_builder)) if fid != field_id => { + let fst = fst_builder.into_set(); + text_fsts.push((fid, fst)); + Some((field_id, fst::SetBuilder::memory())) } + Some((field_id, fst_builder)) => Some((field_id, fst_builder)), + None => Some((field_id, fst::SetBuilder::memory())), + }; + + if let Some((_, fst_builder)) = current_fst.as_mut() { + fst_builder.insert(normalized_facet)?; } } @@ -187,9 +237,6 @@ impl<'i> FacetsUpdate<'i> { text_fsts.push((field_id, fst)); } - // We remove all of the previous FSTs that were in this database - self.index.facet_id_string_fst.clear(wtxn)?; - // We write those FSTs in LMDB now for (field_id, fst) in text_fsts { self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index 64bee95df..5d111067a 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::collections::BTreeSet; use std::io; use std::result::Result as StdResult; @@ -44,6 +45,27 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul } } +pub fn merge_btreeset_string<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result> { + if values.len() == 1 { + Ok(values[0].clone()) + } else { + // TODO improve the perf by using a `#[borrow] Cow`. + let strings: BTreeSet = values + .iter() + .map(AsRef::as_ref) + .map(serde_json::from_slice::>) + .map(StdResult::unwrap) + .reduce(|mut current, new| { + for x in new { + current.insert(x); + } + current + }) + .unwrap(); + Ok(Cow::Owned(serde_json::to_vec(&strings).unwrap())) + } +} + pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result> { Ok(values[0].clone()) } diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index 95e497af4..d59a3bc08 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -13,9 +13,9 @@ pub use grenad_helpers::{ GrenadParameters, MergeableReader, }; pub use merge_functions::{ - concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, - merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap, - MergeFn, + concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string, + merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, + serialize_roaring_bitmap, MergeFn, }; use crate::MAX_WORD_LENGTH; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 20a36237e..849e84035 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -26,7 +26,7 @@ pub use self::enrich::{ }; pub use self::helpers::{ as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset, - fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, + fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn, }; use self::helpers::{grenad_obkv_into_chunks, GrenadParameters}; diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 32584825b..9982957e5 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -4,8 +4,9 @@ pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDele pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::index_documents::{ - merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, DocumentAdditionResult, DocumentId, - IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, MergeFn, + merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, + DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, + MergeFn, }; pub use self::indexer_config::IndexerConfig; pub use self::prefix_word_pairs::{