diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 8bae99a20..1d7326db7 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::HashSet; use std::fmt; use anyhow::{Context, bail}; @@ -6,8 +6,6 @@ use regex::Regex; use serde::{Serialize, Deserialize}; use once_cell::sync::Lazy; -use crate::facet::FacetType; - static ASC_DESC_REGEX: Lazy = Lazy::new(|| { Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap() }); @@ -33,7 +31,7 @@ pub enum Criterion { } impl Criterion { - pub fn from_str(faceted_attributes: &HashMap, txt: &str) -> anyhow::Result { + pub fn from_str(faceted_attributes: &HashSet, txt: &str) -> anyhow::Result { match txt { "words" => Ok(Criterion::Words), "typo" => Ok(Criterion::Typo), @@ -44,7 +42,9 @@ impl Criterion { let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; let order = caps.get(1).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str(); - faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?; + faceted_attributes.get(field_name).with_context(|| { + format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name) + })?; match order { "asc" => Ok(Criterion::Asc(field_name.to_string())), "desc" => Ok(Criterion::Desc(field_name.to_string())), diff --git a/milli/src/index.rs b/milli/src/index.rs index 305d95cc7..14b153a2e 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::path::Path; use anyhow::Context; @@ -18,24 +18,24 @@ use crate::heed_codec::facet::{ FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FacetValueStringCodec, FacetLevelValueF64Codec, }; -use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; pub const CRITERIA_KEY: &str = "criteria"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key"; pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; -pub const FACETED_DOCUMENTS_IDS_PREFIX: &str = "faceted-documents-ids"; pub const FACETED_FIELDS_KEY: &str = "faceted-fields"; -pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; +pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; +pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; +pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids"; pub const PRIMARY_KEY_KEY: &str = "primary-key"; pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields"; -pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; -pub const WORDS_FST_KEY: &str = "words-fst"; pub const STOP_WORDS_KEY: &str = "stop-words"; +pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; pub const SYNONYMS_KEY: &str = "synonyms"; +pub const WORDS_FST_KEY: &str = "words-fst"; pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; const CREATED_AT_KEY: &str = "created-at"; const UPDATED_AT_KEY: &str = "updated-at"; @@ -321,53 +321,97 @@ impl Index { /* faceted fields */ - /// Writes the facet fields associated with their facet type or `None` if - /// the facet type is currently unknown. - pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types) + /// Writes the facet fields names in the database. + pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields: &HashSet) -> heed::Result<()> { + self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields) } - /// Deletes the facet fields ids associated with their facet type. + /// Deletes the facet fields ids in the database. pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result { self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) } - /// Returns the facet fields names associated with their facet type. - pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { + /// Returns the facet fields names. + pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) } /// Same as `faceted_fields`, but returns ids instead. - pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result> { + pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result> { let faceted_fields = self.faceted_fields(rtxn)?; let fields_ids_map = self.fields_ids_map(rtxn)?; let faceted_fields = faceted_fields .iter() - .map(|(k, v)| { - let kid = fields_ids_map + .map(|k| { + fields_ids_map .id(k) .ok_or_else(|| format!("{:?} should be present in the field id map", k)) - .expect("corrupted data: "); - (kid, *v) + .expect("corrupted data: ") }) .collect(); + Ok(faceted_fields) } /* faceted documents ids */ - /// Writes the documents ids that are faceted under this field id. - pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: FieldId, docids: &RoaringBitmap) -> heed::Result<()> { - let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + /// Writes the documents ids that are faceted with numbers under this field id. + pub fn put_number_faceted_documents_ids( + &self, + wtxn: &mut RwTxn, + field_id: FieldId, + docids: &RoaringBitmap, + ) -> heed::Result<()> + { + let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) } - /// Retrieve all the documents ids that faceted under this field id. - pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: FieldId) -> heed::Result { - let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + /// Retrieve all the documents ids that faceted with numbers under this field id. + pub fn number_faceted_documents_ids( + &self, + rtxn: &RoTxn, + field_id: FieldId, + ) -> heed::Result + { + let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + *buffer.last_mut().unwrap() = field_id; + match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { + Some(docids) => Ok(docids), + None => Ok(RoaringBitmap::new()), + } + } + + /// Writes the documents ids that are faceted with strings under this field id. + pub fn put_string_faceted_documents_ids( + &self, + wtxn: &mut RwTxn, + field_id: FieldId, + docids: &RoaringBitmap, + ) -> heed::Result<()> + { + let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + *buffer.last_mut().unwrap() = field_id; + self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) + } + + /// Retrieve all the documents ids that faceted with strings under this field id. + pub fn string_faceted_documents_ids( + &self, + rtxn: &RoTxn, + field_id: FieldId, + ) -> heed::Result + { + let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); *buffer.last_mut().unwrap() = field_id; match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { Some(docids) => Ok(docids), diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 9e8bebb8f..32857b8d7 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::mem::take; -use anyhow::{bail, Context as _}; +use anyhow::Context; use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; @@ -23,7 +23,6 @@ pub struct AscDesc<'t> { rtxn: &'t heed::RoTxn<'t>, field_name: String, field_id: FieldId, - facet_type: FacetType, ascending: bool, query_tree: Option, candidates: Box> + 't>, @@ -51,6 +50,7 @@ impl<'t> AscDesc<'t> { Self::new(index, rtxn, parent, field_name, false) } + fn new( index: &'t Index, rtxn: &'t heed::RoTxn, @@ -60,19 +60,19 @@ impl<'t> AscDesc<'t> { ) -> anyhow::Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let faceted_fields = index.faceted_fields(rtxn)?; - let (field_id, facet_type) = - field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?; + let field_id = fields_ids_map + .id(&field_name) + .with_context(|| format!("field {:?} isn't registered", field_name))?; Ok(AscDesc { index, rtxn, field_name, field_id, - facet_type, ascending, query_tree: None, candidates: Box::new(std::iter::empty()), - faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?, + faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?, bucket_candidates: RoaringBitmap::new(), parent, }) @@ -165,27 +165,20 @@ fn facet_ordered<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, field_id: FieldId, - facet_type: FacetType, ascending: bool, candidates: RoaringBitmap, ) -> anyhow::Result> + 't>> { - match facet_type { - FacetType::Number => { - if candidates.len() <= CANDIDATES_THRESHOLD { - let iter = - iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; - Ok(Box::new(iter.map(Ok)) as Box>) - } else { - let facet_fn = if ascending { - FacetIter::new_reducing - } else { - FacetIter::new_reverse_reducing - }; - let iter = facet_fn(rtxn, index, field_id, candidates)?; - Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) - } - } - FacetType::String => bail!("criteria facet type must be a number"), + if candidates.len() <= CANDIDATES_THRESHOLD { + let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; + Ok(Box::new(iter.map(Ok)) as Box>) + } else { + let facet_fn = if ascending { + FacetIter::new_reducing + } else { + FacetIter::new_reverse_reducing + }; + let iter = facet_fn(rtxn, index, field_id, candidates)?; + Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) } } diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index f3952e6f1..44dd6bc66 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; use super::{Distinct, DocIter}; use crate::heed_codec::facet::*; -use crate::{facet::FacetType, DocumentId, FieldId, Index}; +use crate::{DocumentId, FieldId, Index}; const FID_SIZE: usize = size_of::(); const DOCID_SIZE: usize = size_of::(); @@ -22,7 +22,6 @@ pub struct FacetDistinct<'a> { distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>, - facet_type: FacetType, } impl<'a> FacetDistinct<'a> { @@ -30,14 +29,9 @@ impl<'a> FacetDistinct<'a> { distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>, - facet_type: FacetType, - ) -> Self { - Self { - distinct, - index, - txn, - facet_type, - } + ) -> Self + { + Self { distinct, index, txn } } } @@ -45,7 +39,6 @@ pub struct FacetDistinctIter<'a> { candidates: RoaringBitmap, distinct: FieldId, excluded: RoaringBitmap, - facet_type: FacetType, index: &'a Index, iter_offset: usize, txn: &'a heed::RoTxn<'a>, @@ -117,6 +110,7 @@ impl<'a> FacetDistinctIter<'a> { // increasing the offset we make sure to get the first valid value for the next // distinct document to keep. self.iter_offset += 1; + Ok(Some(id)) } // no more candidate at this offset, return. @@ -188,7 +182,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> { candidates, distinct: self.distinct, excluded, - facet_type: self.facet_type, index: self.index, iter_offset: 0, txn: self.txn, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index be107bf72..640f081ba 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -145,7 +145,7 @@ impl<'a> Search<'a> { let faceted_fields = self.index.faceted_fields(self.rtxn)?; match faceted_fields.get(name) { Some(facet_type) => { - let distinct = FacetDistinct::new(id, self.index, self.rtxn, *facet_type); + let distinct = FacetDistinct::new(id, self.index, self.rtxn); self.perform_sort(distinct, matching_words, criteria) } None => { diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index ba0c9e58e..c163046ec 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -49,8 +49,10 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { self.index.put_fields_distribution(self.wtxn, &FieldsDistribution::default())?; // We clean all the faceted documents ids. - for (field_id, _) in faceted_fields { - self.index.put_faceted_documents_ids(self.wtxn, field_id, &RoaringBitmap::default())?; + let empty = RoaringBitmap::default(); + for field_id in faceted_fields { + self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &empty)?; + self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &empty)?; } // Clear the other databases. diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index b2b1e8410..e93ff9a0a 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -330,11 +330,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { )?; // Remove the documents ids from the faceted documents ids. - let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; - for (field_id, facet_type) in faceted_fields { - let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?; + for field_id in self.index.faceted_fields_ids(self.wtxn)? { + // Remove docids from the number faceted documents ids + let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?; docids.difference_with(&self.documents_ids); - self.index.put_faceted_documents_ids(self.wtxn, field_id, &docids)?; + self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?; remove_docids_from_field_id_docid_facet_value( self.wtxn, @@ -344,6 +344,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { |(_fid, docid, _value)| docid, )?; + // Remove docids from the string faceted documents ids + let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?; + docids.difference_with(&self.documents_ids); + self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?; + remove_docids_from_field_id_docid_facet_value( self.wtxn, field_id_docid_facet_strings, diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index b9e4d7488..af72133a2 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -9,7 +9,6 @@ use heed::{BytesEncode, Error}; use log::debug; use roaring::RoaringBitmap; -use crate::facet::FacetType; use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::Index; @@ -62,56 +61,51 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; debug!("Computing and writing the facet values levels docids into LMDB on disk..."); - for (field_id, facet_type) in faceted_fields { - let (content, documents_ids) = match facet_type { - FacetType::String => { - let documents_ids = compute_faceted_documents_ids( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; - (None, documents_ids) - }, - FacetType::Number => { - clear_field_number_levels( - self.wtxn, - self.index.facet_field_id_value_docids.remap_key_type::(), - field_id, - )?; + for field_id in faceted_fields { + // Compute and store the faceted strings documents ids. + let string_documents_ids = compute_faceted_documents_ids( + self.wtxn, + self.index.facet_id_string_docids.remap_key_type::(), + field_id, + )?; - let documents_ids = compute_faceted_documents_ids( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; + // Clear the facet number levels. + clear_field_number_levels( + self.wtxn, + self.index.facet_id_f64_docids, + field_id, + )?; - let content = compute_facet_number_levels( - self.wtxn, - self.index.facet_field_id_value_docids.remap_key_type::(), - self.chunk_compression_type, - self.chunk_compression_level, - self.chunk_fusing_shrink_size, - self.level_group_size, - self.min_level_size, - field_id, - )?; + // Compute and store the faceted numbers documents ids. + let number_documents_ids = compute_faceted_documents_ids( + self.wtxn, + self.index.facet_id_f64_docids.remap_key_type::(), + field_id, + )?; - (Some(content), documents_ids) - }, - }; + let content = compute_facet_number_levels( + self.wtxn, + self.index.facet_id_f64_docids, + self.chunk_compression_type, + self.chunk_compression_level, + self.chunk_fusing_shrink_size, + self.level_group_size, + self.min_level_size, + field_id, + )?; - if let Some(content) = content { - write_into_lmdb_database( - self.wtxn, - *self.index.facet_field_id_value_docids.as_polymorph(), - content, - |_, _| anyhow::bail!("invalid facet level merging"), - WriteMethod::GetMergePut, - )?; - } + self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &string_documents_ids)?; + self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &number_documents_ids)?; - self.index.put_faceted_documents_ids(self.wtxn, field_id, &documents_ids)?; + // Store the + write_into_lmdb_database( + self.wtxn, + *self.index.facet_id_f64_docids.as_polymorph(), + content, + |_, _| anyhow::bail!("invalid facet number level merging"), + WriteMethod::GetMergePut, + )?; } Ok(()) @@ -205,10 +199,12 @@ fn compute_faceted_documents_ids( ) -> anyhow::Result { let mut documents_ids = RoaringBitmap::new(); + for result in db.prefix_iter(rtxn, &[field_id])? { let (_key, docids) = result?; - documents_ids.union_with(&docids); + documents_ids |= docids; } + Ok(documents_ids) } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 3acae7821..10c2e41e7 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -412,7 +412,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { Main, WordDocids, WordLevel0PositionDocids, - FacetLevel0ValuesDocids, + FacetLevel0NumbersDocids, } let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; @@ -478,8 +478,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { let mut docid_word_positions_readers = Vec::with_capacity(readers.len()); let mut words_pairs_proximities_docids_readers = Vec::with_capacity(readers.len()); let mut word_level_position_docids_readers = Vec::with_capacity(readers.len()); - let mut facet_field_value_docids_readers = Vec::with_capacity(readers.len()); - let mut field_id_docid_facet_values_readers = Vec::with_capacity(readers.len()); + let mut facet_field_numbers_docids_readers = Vec::with_capacity(readers.len()); + let mut facet_field_strings_docids_readers = Vec::with_capacity(readers.len()); + let mut field_id_docid_facet_numbers_readers = Vec::with_capacity(readers.len()); + let mut field_id_docid_facet_strings_readers = Vec::with_capacity(readers.len()); let mut documents_readers = Vec::with_capacity(readers.len()); readers.into_iter().for_each(|readers| { let Readers { @@ -488,17 +490,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { docid_word_positions, words_pairs_proximities_docids, word_level_position_docids, - facet_field_value_docids, - field_id_docid_facet_values, - documents + facet_field_numbers_docids, + facet_field_strings_docids, + field_id_docid_facet_numbers, + field_id_docid_facet_strings, + documents, } = readers; main_readers.push(main); word_docids_readers.push(word_docids); docid_word_positions_readers.push(docid_word_positions); words_pairs_proximities_docids_readers.push(words_pairs_proximities_docids); word_level_position_docids_readers.push(word_level_position_docids); - facet_field_value_docids_readers.push(facet_field_value_docids); - field_id_docid_facet_values_readers.push(field_id_docid_facet_values); + facet_field_numbers_docids_readers.push(facet_field_numbers_docids); + facet_field_strings_docids_readers.push(facet_field_strings_docids); + field_id_docid_facet_numbers_readers.push(field_id_docid_facet_numbers); + field_id_docid_facet_strings_readers.push(field_id_docid_facet_strings); documents_readers.push(documents); }); @@ -523,8 +529,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { (DatabaseType::Main, main_readers, main_merge as MergeFn), (DatabaseType::WordDocids, word_docids_readers, word_docids_merge), ( - DatabaseType::FacetLevel0ValuesDocids, - facet_field_value_docids_readers, + DatabaseType::FacetLevel0NumbersDocids, + facet_field_numbers_docids_readers, facet_field_value_docids_merge, ), ( @@ -547,7 +553,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { docid_word_positions_readers, documents_readers, words_pairs_proximities_docids_readers, - field_id_docid_facet_values_readers, + facet_field_numbers_docids_readers, + facet_field_strings_docids_readers, + field_id_docid_facet_numbers_readers, + field_id_docid_facet_strings_readers, )) as anyhow::Result<_> })?; @@ -556,7 +565,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { docid_word_positions_readers, documents_readers, words_pairs_proximities_docids_readers, - field_id_docid_facet_values_readers, + facet_field_numbers_docids_readers, + facet_field_strings_docids_readers, + field_id_docid_facet_numbers_readers, + field_id_docid_facet_strings_readers, ) = readers; let mut documents_ids = self.index.documents_ids(self.wtxn)?; @@ -624,11 +636,26 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { total_databases, }); - debug!("Writing the field id docid facet values into LMDB on disk..."); + debug!("Writing the field id docid facet numbers into LMDB on disk..."); merge_into_lmdb_database( self.wtxn, - *self.index.field_id_docid_facet_values.as_polymorph(), - field_id_docid_facet_values_readers, + *self.index.field_id_docid_facet_f64s.as_polymorph(), + field_id_docid_facet_numbers_readers, + field_id_docid_facet_values_merge, + write_method, + )?; + + database_count += 1; + progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase { + databases_seen: database_count, + total_databases, + }); + + debug!("Writing the field id docid facet strings into LMDB on disk..."); + merge_into_lmdb_database( + self.wtxn, + *self.index.field_id_docid_facet_strings.as_polymorph(), + field_id_docid_facet_strings_readers, field_id_docid_facet_values_merge, write_method, )?; @@ -678,9 +705,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { write_method, )?; }, - DatabaseType::FacetLevel0ValuesDocids => { - debug!("Writing the facet level 0 values docids into LMDB on disk..."); - let db = *self.index.facet_field_id_value_docids.as_polymorph(); + DatabaseType::FacetLevel0NumbersDocids => { + debug!("Writing the facet numbers docids into LMDB on disk..."); + let db = *self.index.facet_id_f64_docids.as_polymorph(); write_into_lmdb_database( self.wtxn, db, diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index 0f97476d9..ba8da6d16 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -6,25 +6,24 @@ use std::iter::FromIterator; use std::time::Instant; use std::{cmp, iter}; -use anyhow::{bail, Context}; +use anyhow::Context; use bstr::ByteSlice as _; use fst::Set; use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; use heed::BytesEncode; use linked_hash_map::LinkedHashMap; -use log::{debug, info, warn}; +use log::{debug, info}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde_json::Value; use tempfile::tempfile; -use crate::facet::{FacetType, FacetValue}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::update::UpdateIndexingStep; -use crate::{json_to_string, SmallVec8, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap}; +use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::merge_function::{ @@ -45,8 +44,10 @@ pub struct Readers { pub docid_word_positions: Reader, pub words_pairs_proximities_docids: Reader, pub word_level_position_docids: Reader, - pub facet_field_value_docids: Reader, - pub field_id_docid_facet_values: Reader, + pub facet_field_numbers_docids: Reader, + pub facet_field_strings_docids: Reader, + pub field_id_docid_facet_numbers: Reader, + pub field_id_docid_facet_strings: Reader, pub documents: Reader, } @@ -55,13 +56,14 @@ pub struct Store<'s, A> { primary_key: String, fields_ids_map: FieldsIdsMap, searchable_fields: HashSet, - faceted_fields: HashMap, + faceted_fields: HashSet, // Caches word_docids: LinkedHashMap, RoaringBitmap>, word_docids_limit: usize, words_pairs_proximities_docids: LinkedHashMap<(SmallVec32, SmallVec32, u8), RoaringBitmap>, words_pairs_proximities_docids_limit: usize, - facet_field_value_docids: LinkedHashMap<(u8, FacetValue), RoaringBitmap>, + facet_field_number_docids: LinkedHashMap<(FieldId, OrderedFloat), RoaringBitmap>, + facet_field_string_docids: LinkedHashMap<(FieldId, String), RoaringBitmap>, facet_field_value_docids_limit: usize, // MTBL parameters chunk_compression_type: CompressionType, @@ -72,8 +74,10 @@ pub struct Store<'s, A> { word_docids_sorter: Sorter, words_pairs_proximities_docids_sorter: Sorter, word_level_position_docids_sorter: Sorter, - facet_field_value_docids_sorter: Sorter, - field_id_docid_facet_values_sorter: Sorter, + facet_field_numbers_docids_sorter: Sorter, + facet_field_strings_docids_sorter: Sorter, + field_id_docid_facet_numbers_sorter: Sorter, + field_id_docid_facet_strings_sorter: Sorter, // MTBL writers docid_word_positions_writer: Writer, documents_writer: Writer, @@ -86,7 +90,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { primary_key: String, fields_ids_map: FieldsIdsMap, searchable_fields: HashSet, - faceted_fields: HashMap, + faceted_fields: HashSet, linked_hash_map_size: Option, max_nb_chunks: Option, max_memory: Option, @@ -132,7 +136,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { max_nb_chunks, max_memory, ); - let facet_field_value_docids_sorter = create_sorter( + let facet_field_numbers_docids_sorter = create_sorter( facet_field_value_docids_merge, chunk_compression_type, chunk_compression_level, @@ -140,7 +144,23 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { max_nb_chunks, max_memory, ); - let field_id_docid_facet_values_sorter = create_sorter( + let facet_field_strings_docids_sorter = create_sorter( + facet_field_value_docids_merge, + chunk_compression_type, + chunk_compression_level, + chunk_fusing_shrink_size, + max_nb_chunks, + max_memory, + ); + let field_id_docid_facet_numbers_sorter = create_sorter( + field_id_docid_facet_values_merge, + chunk_compression_type, + chunk_compression_level, + chunk_fusing_shrink_size, + max_nb_chunks, + Some(1024 * 1024 * 1024), // 1MB + ); + let field_id_docid_facet_strings_sorter = create_sorter( field_id_docid_facet_values_merge, chunk_compression_type, chunk_compression_level, @@ -173,7 +193,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { word_docids_limit: linked_hash_map_size, words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size), words_pairs_proximities_docids_limit: linked_hash_map_size, - facet_field_value_docids: LinkedHashMap::with_capacity(linked_hash_map_size), + facet_field_number_docids: LinkedHashMap::with_capacity(linked_hash_map_size), + facet_field_string_docids: LinkedHashMap::with_capacity(linked_hash_map_size), facet_field_value_docids_limit: linked_hash_map_size, // MTBL parameters chunk_compression_type, @@ -184,8 +205,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { word_docids_sorter, words_pairs_proximities_docids_sorter, word_level_position_docids_sorter, - facet_field_value_docids_sorter, - field_id_docid_facet_values_sorter, + facet_field_numbers_docids_sorter, + facet_field_strings_docids_sorter, + field_id_docid_facet_numbers_sorter, + field_id_docid_facet_strings_sorter, // MTBL writers docid_word_positions_writer, documents_writer, @@ -215,34 +238,68 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - // Save the documents ids under the facet field id and value we have seen it. - fn insert_facet_values_docid( + fn insert_facet_number_values_docid( &mut self, field_id: FieldId, - field_value: FacetValue, + value: OrderedFloat, id: DocumentId, ) -> anyhow::Result<()> { - Self::write_field_id_docid_facet_value(&mut self.field_id_docid_facet_values_sorter, field_id, id, &field_value)?; + let sorter = &mut self.field_id_docid_facet_numbers_sorter; + Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?; - let key = (field_id, field_value); + let key = (field_id, value); // if get_refresh finds the element it is assured to be at the end of the linked hash map. - match self.facet_field_value_docids.get_refresh(&key) { + match self.facet_field_number_docids.get_refresh(&key) { Some(old) => { old.insert(id); }, None => { // A newly inserted element is append at the end of the linked hash map. - self.facet_field_value_docids.insert(key, RoaringBitmap::from_iter(Some(id))); + self.facet_field_number_docids.insert(key, RoaringBitmap::from_iter(Some(id))); // If the word docids just reached it's capacity we must make sure to remove // one element, this way next time we insert we doesn't grow the capacity. - if self.facet_field_value_docids.len() == self.facet_field_value_docids_limit { + if self.facet_field_number_docids.len() == self.facet_field_value_docids_limit { // Removing the front element is equivalent to removing the LRU element. - Self::write_facet_field_value_docids( - &mut self.facet_field_value_docids_sorter, - self.facet_field_value_docids.pop_front(), + Self::write_facet_field_number_docids( + &mut self.facet_field_numbers_docids_sorter, + self.facet_field_number_docids.pop_front(), )?; } } } + + Ok(()) + } + + // Save the documents ids under the facet field id and value we have seen it. + fn insert_facet_string_values_docid( + &mut self, + field_id: FieldId, + value: String, + id: DocumentId, + ) -> anyhow::Result<()> + { + let sorter = &mut self.field_id_docid_facet_strings_sorter; + Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?; + + let key = (field_id, value); + // if get_refresh finds the element it is assured to be at the end of the linked hash map. + match self.facet_field_string_docids.get_refresh(&key) { + Some(old) => { old.insert(id); }, + None => { + // A newly inserted element is append at the end of the linked hash map. + self.facet_field_string_docids.insert(key, RoaringBitmap::from_iter(Some(id))); + // If the word docids just reached it's capacity we must make sure to remove + // one element, this way next time we insert we doesn't grow the capacity. + if self.facet_field_string_docids.len() == self.facet_field_value_docids_limit { + // Removing the front element is equivalent to removing the LRU element. + Self::write_facet_field_string_docids( + &mut self.facet_field_strings_docids_sorter, + self.facet_field_string_docids.pop_front(), + )?; + } + } + } + Ok(()) } @@ -287,7 +344,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { &mut self, document_id: DocumentId, words_positions: &mut HashMap>, - facet_values: &mut HashMap>, + facet_numbers_values: &mut HashMap>, + facet_strings_values: &mut HashMap>, record: &[u8], ) -> anyhow::Result<()> { @@ -306,10 +364,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { words_positions.clear(); - // We store document_id associated with all the field id and values. - for (field, values) in facet_values.drain() { + // We store document_id associated with all the facet numbers fields ids and values. + for (field, values) in facet_numbers_values.drain() { for value in values { - self.insert_facet_values_docid(field, value, document_id)?; + let value = OrderedFloat::from(value); + self.insert_facet_number_values_docid(field, value, document_id)?; + } + } + + // We store document_id associated with all the facet strings fields ids and values. + for (field, values) in facet_strings_values.drain() { + for value in values { + self.insert_facet_string_values_docid(field, value, document_id)?; } } @@ -409,20 +475,16 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_facet_field_value_docids( + fn write_facet_field_string_docids( sorter: &mut Sorter, iter: I, ) -> anyhow::Result<()> - where I: IntoIterator + where I: IntoIterator { - use FacetValue::*; - for ((field_id, value), docids) in iter { - let result = match value { - String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned), - Number(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned), - }; - let key = result.context("could not serialize facet key")?; + let key = FacetValueStringCodec::bytes_encode(&(field_id, &value)) + .map(Cow::into_owned) + .context("could not serialize facet key")?; let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) .context("could not serialize docids")?; if lmdb_key_valid_size(&key) { @@ -433,21 +495,55 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_field_id_docid_facet_value( + fn write_facet_field_number_docids( + sorter: &mut Sorter, + iter: I, + ) -> anyhow::Result<()> + where I: IntoIterator), RoaringBitmap)> + { + for ((field_id, value), docids) in iter { + let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value)) + .map(Cow::into_owned) + .context("could not serialize facet key")?; + let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) + .context("could not serialize docids")?; + if lmdb_key_valid_size(&key) { + sorter.insert(&key, &bytes)?; + } + } + + Ok(()) + } + + fn write_field_id_docid_facet_number_value( sorter: &mut Sorter, field_id: FieldId, document_id: DocumentId, - value: &FacetValue, + value: OrderedFloat, ) -> anyhow::Result<()> { - use FacetValue::*; + let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value)) + .map(Cow::into_owned) + .context("could not serialize facet key")?; - let result = match value { - String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned), - Number(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned), - }; + if lmdb_key_valid_size(&key) { + sorter.insert(&key, &[])?; + } + + Ok(()) + } + + fn write_field_id_docid_facet_string_value( + sorter: &mut Sorter, + field_id: FieldId, + document_id: DocumentId, + value: &str, + ) -> anyhow::Result<()> + { + let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value)) + .map(Cow::into_owned) + .context("could not serialize facet key")?; - let key = result.context("could not serialize facet key")?; if lmdb_key_valid_size(&key) { sorter.insert(&key, &[])?; } @@ -493,7 +589,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { let mut before = Instant::now(); let mut words_positions = HashMap::new(); - let mut facet_values = HashMap::new(); + let mut facet_numbers_values = HashMap::new(); + let mut facet_strings_values = HashMap::new(); let mut count: usize = 0; while let Some((key, value)) = documents.next()? { @@ -513,32 +610,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { } for (attr, content) in document.iter() { - if self.faceted_fields.contains_key(&attr) || self.searchable_fields.contains(&attr) { + if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) { let value = serde_json::from_slice(content)?; - if let Some(ftype) = self.faceted_fields.get(&attr) { - let mut values = match parse_facet_value(*ftype, &value) { - Ok(values) => values, - Err(e) => { - // We extract the name of the attribute and the document id - // to help users debug a facet type conversion. - let attr_name = self.fields_ids_map.name(attr).unwrap(); - let document_id: Value = self.fields_ids_map.id(&self.primary_key) - .and_then(|fid| document.get(fid)) - .map(serde_json::from_slice) - .unwrap()?; - - let context = format!( - "while extracting facet from the {:?} attribute in the {} document", - attr_name, document_id, - ); - warn!("{}", e.context(context)); - - SmallVec8::default() - }, - }; - facet_values.entry(attr).or_insert_with(SmallVec8::new).extend(values.drain(..)); - } + let (facet_numbers, facet_strings) = extract_facet_values(&value); + facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers); + facet_strings_values.entry(attr).or_insert_with(Vec::new).extend(facet_strings); if self.searchable_fields.contains(&attr) { let content = match json_to_string(&value) { @@ -558,7 +635,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { } // We write the document in the documents store. - self.write_document(document_id, &mut words_positions, &mut facet_values, value)?; + self.write_document( + document_id, + &mut words_positions, + &mut facet_numbers_values, + &mut facet_strings_values, + value, + )?; } // Compute the document id of the next document. @@ -585,9 +668,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { &mut self.words_pairs_proximities_docids_sorter, self.words_pairs_proximities_docids, )?; - Self::write_facet_field_value_docids( - &mut self.facet_field_value_docids_sorter, - self.facet_field_value_docids, + Self::write_facet_field_number_docids( + &mut self.facet_field_numbers_docids_sorter, + self.facet_field_number_docids, + )?; + + Self::write_facet_field_string_docids( + &mut self.facet_field_strings_docids_sorter, + self.facet_field_string_docids, )?; let mut word_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; @@ -613,18 +701,26 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { let mut word_level_position_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; self.word_level_position_docids_sorter.write_into(&mut word_level_position_docids_wtr)?; - let mut facet_field_value_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; - self.facet_field_value_docids_sorter.write_into(&mut facet_field_value_docids_wtr)?; + let mut facet_field_numbers_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; + self.facet_field_numbers_docids_sorter.write_into(&mut facet_field_numbers_docids_wtr)?; - let mut field_id_docid_facet_values_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; - self.field_id_docid_facet_values_sorter.write_into(&mut field_id_docid_facet_values_wtr)?; + let mut facet_field_strings_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; + self.facet_field_strings_docids_sorter.write_into(&mut facet_field_strings_docids_wtr)?; + + let mut field_id_docid_facet_numbers_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; + self.field_id_docid_facet_numbers_sorter.write_into(&mut field_id_docid_facet_numbers_wtr)?; + + let mut field_id_docid_facet_strings_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?; + self.field_id_docid_facet_strings_sorter.write_into(&mut field_id_docid_facet_strings_wtr)?; let main = writer_into_reader(main_wtr, shrink_size)?; let word_docids = writer_into_reader(word_docids_wtr, shrink_size)?; let words_pairs_proximities_docids = writer_into_reader(words_pairs_proximities_docids_wtr, shrink_size)?; let word_level_position_docids = writer_into_reader(word_level_position_docids_wtr, shrink_size)?; - let facet_field_value_docids = writer_into_reader(facet_field_value_docids_wtr, shrink_size)?; - let field_id_docid_facet_values = writer_into_reader(field_id_docid_facet_values_wtr, shrink_size)?; + let facet_field_numbers_docids = writer_into_reader(facet_field_numbers_docids_wtr, shrink_size)?; + let facet_field_strings_docids = writer_into_reader(facet_field_strings_docids_wtr, shrink_size)?; + let field_id_docid_facet_numbers = writer_into_reader(field_id_docid_facet_numbers_wtr, shrink_size)?; + let field_id_docid_facet_strings = writer_into_reader(field_id_docid_facet_strings_wtr, shrink_size)?; let docid_word_positions = writer_into_reader(self.docid_word_positions_writer, shrink_size)?; let documents = writer_into_reader(self.documents_writer, shrink_size)?; @@ -634,8 +730,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { docid_word_positions, words_pairs_proximities_docids, word_level_position_docids, - facet_field_value_docids, - field_id_docid_facet_values, + facet_field_numbers_docids, + facet_field_strings_docids, + field_id_docid_facet_numbers, + field_id_docid_facet_strings, documents, }) } @@ -710,71 +808,36 @@ fn process_tokens<'a>(tokens: impl Iterator>) -> impl Iterator< .filter(|(_, t)| t.is_word()) } -fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result> { - use FacetValue::*; - - fn inner_parse_facet_value( - ftype: FacetType, +fn extract_facet_values(value: &Value) -> (Vec, Vec) { + fn inner_extract_facet_values( value: &Value, can_recurse: bool, - output: &mut SmallVec8, - ) -> anyhow::Result<()> - { + output_numbers: &mut Vec, + output_strings: &mut Vec, + ) { match value { - Value::Null => Ok(()), - Value::Bool(b) => match ftype { - FacetType::String => { - output.push(String(b.to_string())); - Ok(()) - }, - FacetType::Number => { - output.push(Number(OrderedFloat(if *b { 1.0 } else { 0.0 }))); - Ok(()) - }, - }, - Value::Number(number) => match ftype { - FacetType::String => { - output.push(String(number.to_string())); - Ok(()) - }, - FacetType::Number => match number.as_f64() { - Some(float) => { - output.push(Number(OrderedFloat(float))); - Ok(()) - }, - None => bail!("invalid facet type, expecting {} found number", ftype), - }, + Value::Null => (), + Value::Bool(b) => output_strings.push(b.to_string()), + Value::Number(number) => if let Some(float) = number.as_f64() { + output_numbers.push(float); }, Value::String(string) => { // TODO must be normalized and not only lowercased. let string = string.trim().to_lowercase(); - match ftype { - FacetType::String => { - output.push(String(string)); - Ok(()) - }, - FacetType::Number => match string.parse() { - Ok(float) => { - output.push(Number(OrderedFloat(float))); - Ok(()) - }, - Err(_err) => bail!("invalid facet type, expecting {} found string", ftype), - }, - } + output_strings.push(string); }, Value::Array(values) => if can_recurse { - values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect() - } else { - bail!( - "invalid facet type, expecting {} found array (recursive arrays are not supported)", - ftype, - ); + for value in values { + inner_extract_facet_values(value, false, output_numbers, output_strings); + } }, - Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype), + Value::Object(_) => (), } } - let mut facet_values = SmallVec8::new(); - inner_parse_facet_value(ftype, value, true, &mut facet_values)?; - Ok(facet_values) + let mut facet_number_values = Vec::new(); + let mut facet_string_values = Vec::new(); + inner_extract_facet_values(value, true, &mut facet_number_values, &mut facet_string_values); + + (facet_number_values, facet_string_values) } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index c4d4fcfce..79c447834 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::str::FromStr; use anyhow::Context; @@ -11,7 +11,6 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::{FieldsIdsMap, Index}; use crate::criterion::Criterion; -use crate::facet::FacetType; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::index_documents::{IndexDocumentsMethod, Transform}; @@ -68,7 +67,7 @@ pub struct Settings<'a, 't, 'u, 'i> { searchable_fields: Setting>, displayed_fields: Setting>, - faceted_fields: Setting>, + faceted_fields: Setting>, criteria: Setting>, stop_words: Setting>, distinct_attribute: Setting, @@ -123,7 +122,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.faceted_fields = Setting::Reset; } - pub fn set_faceted_fields(&mut self, names_facet_types: HashMap) { + pub fn set_faceted_fields(&mut self, names_facet_types: HashSet) { self.faceted_fields = Setting::Set(names_facet_types); } @@ -387,11 +386,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { match self.faceted_fields { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; - let mut new_facets = HashMap::new(); - for (name, ty) in fields { + let mut new_facets = HashSet::new(); + for name in fields { fields_ids_map.insert(name).context("field id limit exceeded")?; - let ty = FacetType::from_str(&ty)?; - new_facets.insert(name.clone(), ty); + new_facets.insert(name.clone()); } self.index.put_faceted_fields(self.wtxn, &new_facets)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;