From 2a3f9b32ff2d928d77ea4281e800023e976ae681 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 12:19:55 +0200 Subject: [PATCH 01/11] Rename the faceted fields into filterable fields --- milli/src/index.rs | 38 ++++++++++---- milli/src/search/distinct/mod.rs | 2 +- milli/src/search/facet/facet_condition.rs | 52 ++++++++++---------- milli/src/search/facet/facet_distribution.rs | 4 +- milli/src/search/mod.rs | 4 +- milli/src/update/facets.rs | 16 +++--- milli/src/update/index_documents/mod.rs | 4 +- milli/src/update/settings.rs | 34 ++++++------- 8 files changed, 87 insertions(+), 67 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index bd057a02a..9a52188dc 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -25,7 +25,7 @@ pub const CRITERIA_KEY: &str = "criteria"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key"; pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; -pub const FACETED_FIELDS_KEY: &str = "faceted-fields"; +pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields"; pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; @@ -324,19 +324,39 @@ impl Index { } } - /* faceted fields */ + /* filterable fields */ - /// Writes the facet fields names in the database. - pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields: &HashSet) -> heed::Result<()> { - self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields) + /// Writes the filterable fields names in the database. + pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet) -> heed::Result<()> { + self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields) } - /// Deletes the facet fields ids in the database. - pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) + /// Deletes the filterable fields ids in the database. + pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY) } - /// Returns the facet fields names. + /// Returns the filterable fields names. + pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result> { + Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default()) + } + + /// Same as `filterable_fields`, but returns ids instead. + pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result> { + let filterable_fields = self.filterable_fields(rtxn)?; + let fields_ids_map = self.fields_ids_map(rtxn)?; + let filterable_fields = filterable_fields + .iter() + .map(|k| { + fields_ids_map + .id(k) + .ok_or_else(|| format!("{:?} should be present in the field id map", k)) + .expect("corrupted data: ") + }) + .collect(); + + Ok(filterable_fields) + } pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) } diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index 0dd628d5b..eed475863 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -83,7 +83,7 @@ mod test { let mut update = builder.settings(&mut txn, &index); update.set_distinct_attribute(distinct.to_string()); if !facets.is_empty() { - update.set_faceted_fields(facets) + update.set_filterable_fields(facets) } update.execute(|_, _| ()).unwrap(); diff --git a/milli/src/search/facet/facet_condition.rs b/milli/src/search/facet/facet_condition.rs index fd7053269..0c1f9e046 100644 --- a/milli/src/search/facet/facet_condition.rs +++ b/milli/src/search/facet/facet_condition.rs @@ -57,7 +57,7 @@ pub enum FacetCondition { fn field_id( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, items: &mut Pairs, ) -> Result> { @@ -78,13 +78,13 @@ fn field_id( )), }; - if !faceted_fields.contains(&field_id) { + if !filterable_fields.contains(&field_id) { return Err(PestError::new_from_span( ErrorVariant::CustomError { message: format!( - "attribute `{}` is not faceted, available faceted attributes are: {}", + "attribute `{}` is not filterable, available filterable attributes are: {}", key.as_str(), - faceted_fields.iter().flat_map(|id| { + filterable_fields.iter().flat_map(|id| { fields_ids_map.name(*id) }).collect::>().join(", "), ), @@ -163,9 +163,9 @@ impl FacetCondition { ) -> anyhow::Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let faceted_fields = index.faceted_fields_ids(rtxn)?; + let filterable_fields = index.filterable_fields_ids(rtxn)?; let lexed = FilterParser::parse(Rule::prgm, expression)?; - FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) + FacetCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) } fn from_pairs( @@ -212,12 +212,12 @@ impl FacetCondition { fn between( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let (lresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap()); @@ -230,12 +230,12 @@ impl FacetCondition { fn equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let value = items.next().unwrap(); let (result, svalue) = pest_parse(value); @@ -246,12 +246,12 @@ impl FacetCondition { fn greater_than( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -261,12 +261,12 @@ impl FacetCondition { fn greater_than_or_equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -276,12 +276,12 @@ impl FacetCondition { fn lower_than( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -291,12 +291,12 @@ impl FacetCondition { fn lower_than_or_equal( fields_ids_map: &FieldsIdsMap, - faceted_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> anyhow::Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -484,10 +484,10 @@ mod tests { options.map_size(10 * 1024 * 1024); // 10 MB let index = Index::new(options, &path).unwrap(); - // Set the faceted fields to be the channel. + // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashset!{ S("channel") }); + builder.set_filterable_fields(hashset!{ S("channel") }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -513,10 +513,10 @@ mod tests { options.map_size(10 * 1024 * 1024); // 10 MB let index = Index::new(options, &path).unwrap(); - // Set the faceted fields to be the channel. + // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashset!{ "timestamp".into() }); + builder.set_filterable_fields(hashset!{ "timestamp".into() }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -541,11 +541,11 @@ mod tests { options.map_size(10 * 1024 * 1024); // 10 MB let index = Index::new(options, &path).unwrap(); - // Set the faceted fields to be the channel. + // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") }); + builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -588,11 +588,11 @@ mod tests { options.map_size(10 * 1024 * 1024); // 10 MB let index = Index::new(options, &path).unwrap(); - // Set the faceted fields to be the channel. + // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") }); + builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index c6122cc77..565f4c6dd 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -197,10 +197,10 @@ impl<'a> FacetDistribution<'a> { pub fn execute(&self) -> anyhow::Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let faceted_fields = self.index.faceted_fields(self.rtxn)?; + let filterable_fields = self.index.filterable_fields(self.rtxn)?; let mut distribution = BTreeMap::new(); - for name in faceted_fields { + for name in filterable_fields { let fid = fields_ids_map.id(&name).with_context(|| { format!("missing field name {:?} from the fields id map", name) })?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index fc64d020f..abf19844e 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -141,8 +141,8 @@ impl<'a> Search<'a> { Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let id = field_ids_map.id(name).expect("distinct not present in field map"); - let faceted_fields = self.index.faceted_fields(self.rtxn)?; - if faceted_fields.contains(name) { + let filterable_fields = self.index.filterable_fields(self.rtxn)?; + if filterable_fields.contains(name) { let distinct = FacetDistinct::new(id, self.index, self.rtxn); self.perform_sort(distinct, matching_words, criteria) } else { diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index f0eab6023..1c235a509 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -57,14 +57,14 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { pub fn execute(self) -> anyhow::Result<()> { self.index.set_updated_at(self.wtxn, &Utc::now())?; - // We get the faceted fields to be able to create the facet levels. - let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; + // We get the filterable fields to be able to create the facet levels. + let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?; debug!("Computing and writing the facet values levels docids into LMDB on disk..."); - for field_id in faceted_fields { - // Compute and store the faceted strings documents ids. - let string_documents_ids = compute_faceted_documents_ids( + for field_id in filterable_fields { + // Compute and store the filterable strings documents ids. + let string_documents_ids = compute_filterable_documents_ids( self.wtxn, self.index.facet_id_string_docids.remap_key_type::(), field_id, @@ -77,8 +77,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { field_id, )?; - // Compute and store the faceted numbers documents ids. - let number_documents_ids = compute_faceted_documents_ids( + // Compute and store the filterable numbers documents ids. + let number_documents_ids = compute_filterable_documents_ids( self.wtxn, self.index.facet_id_f64_docids.remap_key_type::(), field_id, @@ -191,7 +191,7 @@ fn compute_facet_number_levels<'t>( writer_into_reader(writer, shrink_size) } -fn compute_faceted_documents_ids( +fn compute_filterable_documents_ids( rtxn: &heed::RoTxn, db: heed::Database, field_id: u8, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 71f281e98..7efd3631c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -417,7 +417,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { FacetLevel0NumbersDocids, } - let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; + let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?; let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? { Some(fields) => fields.iter().copied().collect(), None => fields_ids_map.iter().map(|(id, _name)| id).collect(), @@ -453,7 +453,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { .map(|(i, documents)| { let store = Store::new( searchable_fields.clone(), - faceted_fields.clone(), + filterable_fields.clone(), linked_hash_map_size, max_nb_chunks, max_memory_by_job, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 1571f627d..10b6b8cbe 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -66,7 +66,7 @@ pub struct Settings<'a, 't, 'u, 'i> { searchable_fields: Setting>, displayed_fields: Setting>, - faceted_fields: Setting>, + filterable_fields: Setting>, criteria: Setting>, stop_words: Setting>, distinct_attribute: Setting, @@ -92,7 +92,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { thread_pool: None, searchable_fields: Setting::NotSet, displayed_fields: Setting::NotSet, - faceted_fields: Setting::NotSet, + filterable_fields: Setting::NotSet, criteria: Setting::NotSet, stop_words: Setting::NotSet, distinct_attribute: Setting::NotSet, @@ -117,12 +117,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.displayed_fields = Setting::Set(names); } - pub fn reset_faceted_fields(&mut self) { - self.faceted_fields = Setting::Reset; + pub fn reset_filterable_fields(&mut self) { + self.filterable_fields = Setting::Reset; } - pub fn set_faceted_fields(&mut self, names_facet_types: HashSet) { - self.faceted_fields = Setting::Set(names_facet_types); + pub fn set_filterable_fields(&mut self, names: HashSet) { + self.filterable_fields = Setting::Set(names); } pub fn reset_criteria(&mut self) { @@ -267,7 +267,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Setting::Set(ref fields) => { // every time the searchable attributes are updated, we need to update the // ids for any settings that uses the facets. (displayed_fields, - // faceted_fields) + // filterable_fields) let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_fields_ids_map = FieldsIdsMap::new(); @@ -382,7 +382,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn update_facets(&mut self) -> anyhow::Result { - match self.faceted_fields { + match self.filterable_fields { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_facets = HashSet::new(); @@ -390,10 +390,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fields_ids_map.insert(name).context("field id limit exceeded")?; new_facets.insert(name.clone()); } - self.index.put_faceted_fields(self.wtxn, &new_facets)?; + self.index.put_filterable_fields(self.wtxn, &new_facets)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } - Setting::Reset => { self.index.delete_faceted_fields(self.wtxn)?; } + Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; } Setting::NotSet => return Ok(false) } Ok(true) @@ -402,10 +402,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_criteria(&mut self) -> anyhow::Result<()> { match self.criteria { Setting::Set(ref fields) => { - let faceted_fields = self.index.faceted_fields(&self.wtxn)?; + let filterable_fields = self.index.filterable_fields(&self.wtxn)?; let mut new_criteria = Vec::new(); for name in fields { - let criterion = Criterion::from_str(&faceted_fields, &name)?; + let criterion = Criterion::from_str(&filterable_fields, &name)?; new_criteria.push(criterion); } self.index.put_criteria(self.wtxn, &new_criteria)?; @@ -611,16 +611,16 @@ mod tests { } #[test] - fn set_faceted_fields() { + fn set_filterable_fields() { let path = tempfile::tempdir().unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(10 * 1024 * 1024); // 10 MB let index = Index::new(options, &path).unwrap(); - // Set the faceted fields to be the age. + // Set the filterable fields to be the age. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashset!{ S("age") }); + builder.set_filterable_fields(hashset!{ S("age") }); builder.execute(|_, _| ()).unwrap(); // Then index some documents. @@ -637,7 +637,7 @@ mod tests { // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.faceted_fields(&rtxn).unwrap(); + let fields_ids = index.filterable_fields(&rtxn).unwrap(); assert_eq!(fields_ids, hashset!{ S("age") }); // Only count the field_id 0 and level 0 facet values. // TODO we must support typed CSVs for numbers to be understood. @@ -833,7 +833,7 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_displayed_fields(vec!["hello".to_string()]); - builder.set_faceted_fields(hashset!{ S("age"), S("toto") }); + builder.set_filterable_fields(hashset!{ S("age"), S("toto") }); builder.set_criteria(vec!["asc(toto)".to_string()]); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); From ff440c1d9d28e60e682f0352bd9c2b2b789a01a7 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 12:20:29 +0200 Subject: [PATCH 02/11] Introduce the faceted fields method to retrieve those that needs faceting --- milli/src/index.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 9a52188dc..9cfcd841c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -357,8 +357,29 @@ impl Index { Ok(filterable_fields) } + + /* faceted documents ids */ + + /// Returns the faceted fields names. + /// + /// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields. pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { - Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) + let filterable_fields = self.filterable_fields(rtxn)?; + let distinct_field = self.distinct_attribute(rtxn)?; + let asc_desc_fields = self.criteria(rtxn)? + .into_iter() + .filter_map(|criterion| match criterion { + Criterion::Asc(field) | Criterion::Desc(field) => Some(field), + _otherwise => None, + }); + + let mut faceted_fields = filterable_fields; + faceted_fields.extend(asc_desc_fields); + if let Some(field) = distinct_field { + faceted_fields.insert(field.to_owned()); + } + + Ok(faceted_fields) } /// Same as `faceted_fields`, but returns ids instead. From 187c713de53b41a73d2ff696b71b3d32aa93e051 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 12:32:03 +0200 Subject: [PATCH 03/11] Remove the MapDistinct struct as now distinct attributes are faceted --- milli/src/search/distinct/facet_distinct.rs | 7 +- milli/src/search/distinct/map_distinct.rs | 138 -------------------- milli/src/search/distinct/mod.rs | 7 +- milli/src/search/mod.rs | 12 +- 4 files changed, 5 insertions(+), 159 deletions(-) delete mode 100644 milli/src/search/distinct/map_distinct.rs diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 9485087d3..322843ee0 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> { #[cfg(test)] mod test { - use std::collections::HashSet; - use super::super::test::{generate_index, validate_distinct_candidates}; use super::*; @@ -198,10 +196,7 @@ mod test { ($name:ident, $distinct:literal) => { #[test] fn $name() { - use std::iter::FromIterator; - - let facets = HashSet::from_iter(Some(($distinct.to_string()))); - let (index, fid, candidates) = generate_index($distinct, facets); + let (index, fid, candidates) = generate_index($distinct); let txn = index.read_txn().unwrap(); let mut map_distinct = FacetDistinct::new(fid, &index, &txn); let excluded = RoaringBitmap::new(); diff --git a/milli/src/search/distinct/map_distinct.rs b/milli/src/search/distinct/map_distinct.rs deleted file mode 100644 index 465af2c3b..000000000 --- a/milli/src/search/distinct/map_distinct.rs +++ /dev/null @@ -1,138 +0,0 @@ -use std::collections::HashMap; - -use roaring::RoaringBitmap; -use serde_json::Value; - -use super::{Distinct, DocIter}; -use crate::{DocumentId, FieldId, Index}; - -/// A distinct implementer that is backed by an `HashMap`. -/// -/// Each time a document is seen, the value -/// for its distinct field is added to the map. If the map already contains an entry for this -/// value, then the document is filtered out, and is added to the excluded set. -pub struct MapDistinct<'a> { - distinct: FieldId, - map: HashMap, - index: &'a Index, - txn: &'a heed::RoTxn<'a>, -} - -impl<'a> MapDistinct<'a> { - pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self { - Self { - distinct, - map: HashMap::new(), - index, - txn, - } - } -} - -pub struct MapDistinctIter<'a, 'b> { - distinct: FieldId, - map: &'b mut HashMap, - index: &'a Index, - txn: &'a heed::RoTxn<'a>, - candidates: roaring::bitmap::IntoIter, - excluded: RoaringBitmap, -} - -impl<'a, 'b> MapDistinctIter<'a, 'b> { - /// Performs the next iteration of the mafacetp distinct. This is a convenience method that is - /// called by the Iterator::next implementation that transposes the result. It makes error - /// handling easier. - fn next_inner(&mut self) -> anyhow::Result> { - let map = &mut self.map; - let mut filter = |value: Value| { - let entry = map.entry(value.to_string()).or_insert(0); - *entry += 1; - *entry <= 1 - }; - - while let Some(id) = self.candidates.next() { - let document = self.index.documents(&self.txn, Some(id))?[0].1; - let value = document - .get(self.distinct) - .map(serde_json::from_slice::) - .transpose()?; - - let accept = match value { - Some(Value::Array(values)) => { - let mut accept = true; - for value in values { - accept &= filter(value); - } - accept - } - Some(Value::Null) | Some(Value::Object(_)) | None => true, - Some(value) => filter(value), - }; - - if accept { - return Ok(Some(id)); - } else { - self.excluded.insert(id); - } - } - Ok(None) - } -} - -impl Iterator for MapDistinctIter<'_, '_> { - type Item = anyhow::Result; - - fn next(&mut self) -> Option { - self.next_inner().transpose() - } -} - -impl DocIter for MapDistinctIter<'_, '_> { - fn into_excluded(self) -> RoaringBitmap { - self.excluded - } -} - -impl<'a, 'b> Distinct<'b> for MapDistinct<'a> { - type Iter = MapDistinctIter<'a, 'b>; - - fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { - MapDistinctIter { - distinct: self.distinct, - map: &mut self.map, - index: &self.index, - txn: &self.txn, - candidates: candidates.into_iter(), - excluded, - } - } -} - -#[cfg(test)] -mod test { - use std::collections::HashSet; - - use super::*; - use super::super::test::{generate_index, validate_distinct_candidates}; - - macro_rules! test_map_distinct { - ($name:ident, $distinct:literal) => { - #[test] - fn $name() { - let (index, fid, candidates) = generate_index($distinct, HashSet::new()); - let txn = index.read_txn().unwrap(); - let mut map_distinct = MapDistinct::new(fid, &index, &txn); - let excluded = RoaringBitmap::new(); - let mut iter = map_distinct.distinct(candidates.clone(), excluded); - let count = validate_distinct_candidates(iter.by_ref(), fid, &index); - let excluded = iter.into_excluded(); - assert_eq!(count as u64 + excluded.len(), candidates.len()); - } - }; - } - - test_map_distinct!(test_string, "txt"); - test_map_distinct!(test_strings, "txts"); - test_map_distinct!(test_int, "cat-int"); - test_map_distinct!(test_ints, "cat-ints"); -} diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index eed475863..68a94ed48 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -1,12 +1,10 @@ mod facet_distinct; -mod map_distinct; mod noop_distinct; use roaring::RoaringBitmap; use crate::DocumentId; pub use facet_distinct::FacetDistinct; -pub use map_distinct::MapDistinct; pub use noop_distinct::NoopDistinct; /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. @@ -74,7 +72,7 @@ mod test { /// Returns a temporary index populated with random test documents, the FieldId for the /// distinct attribute, and the RoaringBitmap with the document ids. - pub(crate) fn generate_index(distinct: &str, facets: HashSet) -> (TempIndex, FieldId, RoaringBitmap) { + pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) { let index = TempIndex::new(); let mut txn = index.write_txn().unwrap(); @@ -82,9 +80,6 @@ mod test { let builder = UpdateBuilder::new(0); let mut update = builder.settings(&mut txn, &index); update.set_distinct_attribute(distinct.to_string()); - if !facets.is_empty() { - update.set_filterable_fields(facets) - } update.execute(|_, _| ()).unwrap(); // add documents to the index diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index abf19844e..36a155290 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -12,7 +12,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; +use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; use crate::search::criteria::r#final::{Final, FinalResult}; use crate::{Index, DocumentId}; @@ -141,14 +141,8 @@ impl<'a> Search<'a> { Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let id = field_ids_map.id(name).expect("distinct not present in field map"); - let filterable_fields = self.index.filterable_fields(self.rtxn)?; - if filterable_fields.contains(name) { - let distinct = FacetDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } else { - let distinct = MapDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } + let distinct = FacetDistinct::new(id, self.index, self.rtxn); + self.perform_sort(distinct, matching_words, criteria) } } } From 1e366dae3e06094f9e764ef619f49b195001427e Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 1 Jun 2021 14:43:48 +0200 Subject: [PATCH 04/11] remove useless lifetime on Distinct Trait --- milli/src/search/distinct/facet_distinct.rs | 2 +- milli/src/search/distinct/mod.rs | 4 ++-- milli/src/search/distinct/noop_distinct.rs | 2 +- milli/src/search/mod.rs | 7 ++++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 322843ee0..de7b28141 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -172,7 +172,7 @@ impl DocIter for FacetDistinctIter<'_> { } } -impl<'a> Distinct<'_> for FacetDistinct<'a> { +impl<'a> Distinct for FacetDistinct<'a> { type Iter = FacetDistinctIter<'a>; fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index 68a94ed48..945beb7e6 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -18,10 +18,10 @@ pub trait DocIter: Iterator> { /// must return an iterator containing only distinct documents, and add the discarded documents to /// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the /// returned iterator. -pub trait Distinct<'a> { +pub trait Distinct { type Iter: DocIter; - fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter; + fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter; } #[cfg(test)] diff --git a/milli/src/search/distinct/noop_distinct.rs b/milli/src/search/distinct/noop_distinct.rs index 3de9be631..bfaafed85 100644 --- a/milli/src/search/distinct/noop_distinct.rs +++ b/milli/src/search/distinct/noop_distinct.rs @@ -26,7 +26,7 @@ impl DocIter for NoopDistinctIter { } } -impl Distinct<'_> for NoopDistinct { +impl Distinct for NoopDistinct { type Iter = NoopDistinctIter; fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 36a155290..11f56b7a6 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -147,12 +147,13 @@ impl<'a> Search<'a> { } } - fn perform_sort( + fn perform_sort( &self, - mut distinct: impl for<'c> Distinct<'c>, + mut distinct: D, matching_words: MatchingWords, mut criteria: Final, - ) -> anyhow::Result { + ) -> anyhow::Result + { let mut offset = self.offset; let mut initial_candidates = RoaringBitmap::new(); let mut excluded_candidates = RoaringBitmap::new(); From c10469ddb6e6823b802d001e61a3c1a7bd66e721 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 15:10:34 +0200 Subject: [PATCH 05/11] Patch the http-ui crate to support filterable fields --- http-ui/src/main.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index da3b6204c..c232c0620 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -251,7 +251,7 @@ struct Settings { searchable_attributes: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] - faceted_attributes: Setting>, + filterable_attributes: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] criteria: Setting>, @@ -420,9 +420,9 @@ async fn main() -> anyhow::Result<()> { } // We transpose the settings JSON struct into a real setting update. - match settings.faceted_attributes { - Setting::Set(faceted_attributes) => builder.set_faceted_fields(faceted_attributes), - Setting::Reset => builder.reset_faceted_fields(), + match settings.filterable_attributes { + Setting::Set(filterable_attributes) => builder.set_filterable_fields(filterable_attributes), + Setting::Reset => builder.reset_filterable_fields(), Setting::NotSet => () } @@ -996,7 +996,7 @@ mod tests { let settings = Settings { displayed_attributes: Setting::Set(vec!["name".to_string()]), searchable_attributes: Setting::Set(vec!["age".to_string()]), - faceted_attributes: Setting::Set(hashset!{ "age".to_string() }), + filterable_attributes: Setting::Set(hashset!{ "age".to_string() }), criteria: Setting::Set(vec!["asc(age)".to_string()]), stop_words: Setting::Set(btreeset! { "and".to_string() }), synonyms: Setting::Set(hashmap!{ "alex".to_string() => vec!["alexey".to_string()] }) @@ -1047,7 +1047,7 @@ mod tests { let settings = Settings { displayed_attributes: Setting::Reset, searchable_attributes: Setting::Reset, - faceted_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, criteria: Setting::Reset, stop_words: Setting::Reset, synonyms: Setting::Reset, @@ -1076,7 +1076,7 @@ mod tests { let settings = Settings { displayed_attributes: Setting::NotSet, searchable_attributes: Setting::NotSet, - faceted_attributes: Setting::NotSet, + filterable_attributes: Setting::NotSet, criteria: Setting::NotSet, stop_words: Setting::NotSet, synonyms: Setting::NotSet, From 6476827d3a576b1f20294d9d08d08a7c8f271598 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 15:22:05 +0200 Subject: [PATCH 06/11] Fix the indexer to be sure that distinct and Asc/Desc are also faceted --- milli/src/update/index_documents/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 7efd3631c..71f281e98 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -417,7 +417,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { FacetLevel0NumbersDocids, } - let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?; + let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; let searchable_fields: HashSet<_> = match self.index.searchable_fields_ids(self.wtxn)? { Some(fields) => fields.iter().copied().collect(), None => fields_ids_map.iter().map(|(id, _name)| id).collect(), @@ -453,7 +453,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { .map(|(i, documents)| { let store = Store::new( searchable_fields.clone(), - filterable_fields.clone(), + faceted_fields.clone(), linked_hash_map_size, max_nb_chunks, max_memory_by_job, From c2afdbb1fb319edd1186081c90262a5cf43323b0 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 15:22:58 +0200 Subject: [PATCH 07/11] Move and comment some internal facet_condition helper functions --- milli/src/search/facet/facet_condition.rs | 121 ++++++++++++---------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/milli/src/search/facet/facet_condition.rs b/milli/src/search/facet/facet_condition.rs index 0c1f9e046..2ff997270 100644 --- a/milli/src/search/facet/facet_condition.rs +++ b/milli/src/search/facet/facet_condition.rs @@ -55,62 +55,6 @@ pub enum FacetCondition { And(Box, Box), } -fn field_id( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - items: &mut Pairs, -) -> Result> -{ - // lexing ensures that we at least have a key - let key = items.next().unwrap(); - - let field_id = match fields_ids_map.id(key.as_str()) { - Some(field_id) => field_id, - None => return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `{}` not found, available attributes are: {}", - key.as_str(), - fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", "), - ), - }, - key.as_span(), - )), - }; - - if !filterable_fields.contains(&field_id) { - return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `{}` is not filterable, available filterable attributes are: {}", - key.as_str(), - filterable_fields.iter().flat_map(|id| { - fields_ids_map.name(*id) - }).collect::>().join(", "), - ), - }, - key.as_span(), - )); - } - - Ok(field_id) -} - -fn pest_parse(pair: Pair) -> (Result>, String) -where T: FromStr, - T::Err: ToString, -{ - let result = match pair.as_str().parse::() { - Ok(value) => Ok(value), - Err(e) => Err(PestError::::new_from_span( - ErrorVariant::CustomError { message: e.to_string() }, - pair.as_span(), - )), - }; - - (result, pair.as_str().to_string()) -} - impl FacetCondition { pub fn from_array( rtxn: &heed::RoTxn, @@ -469,6 +413,71 @@ impl FacetCondition { } } +/// Retrieve the field id base on the pest value, returns an error is +/// the field does not exist or is not filterable. +/// +/// The pest pair is simply a string associated with a span, a location to highlight in +/// the error message. +fn field_id( + fields_ids_map: &FieldsIdsMap, + filterable_fields: &HashSet, + items: &mut Pairs, +) -> Result> +{ + // lexing ensures that we at least have a key + let key = items.next().unwrap(); + + let field_id = match fields_ids_map.id(key.as_str()) { + Some(field_id) => field_id, + None => return Err(PestError::new_from_span( + ErrorVariant::CustomError { + message: format!( + "attribute `{}` not found, available attributes are: {}", + key.as_str(), + fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", "), + ), + }, + key.as_span(), + )), + }; + + if !filterable_fields.contains(&field_id) { + return Err(PestError::new_from_span( + ErrorVariant::CustomError { + message: format!( + "attribute `{}` is not filterable, available filterable attributes are: {}", + key.as_str(), + filterable_fields.iter().flat_map(|id| { + fields_ids_map.name(*id) + }).collect::>().join(", "), + ), + }, + key.as_span(), + )); + } + + Ok(field_id) +} + +/// Tries to parse the pest pair into the type `T` specified, always returns +/// the original string that we tried to parse. +/// +/// Returns the parsing error associated with the span if the conversion fails. +fn pest_parse(pair: Pair) -> (Result>, String) +where T: FromStr, + T::Err: ToString, +{ + let result = match pair.as_str().parse::() { + Ok(value) => Ok(value), + Err(e) => Err(PestError::::new_from_span( + ErrorVariant::CustomError { message: e.to_string() }, + pair.as_span(), + )), + }; + + (result, pair.as_str().to_string()) +} + #[cfg(test)] mod tests { use super::*; From 3b1cd4c4b437baf1a80625ade4b4f01bae3ed91c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 15:25:17 +0200 Subject: [PATCH 08/11] Rename the FacetCondition into FilterCondition --- http-ui/src/main.rs | 10 ++-- milli/src/lib.rs | 2 +- ...facet_condition.rs => filter_condition.rs} | 50 +++++++++---------- milli/src/search/facet/mod.rs | 4 +- milli/src/search/mod.rs | 20 ++++---- 5 files changed, 43 insertions(+), 43 deletions(-) rename milli/src/search/facet/{facet_condition.rs => filter_condition.rs} (93%) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index c232c0620..b6a894373 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -29,7 +29,7 @@ use tokio::sync::broadcast; use warp::{Filter, http::Response}; use warp::filters::ws::Message; -use milli::{FacetCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore}; +use milli::{FilterCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore}; use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat}; use milli::update::UpdateIndexingStep::*; @@ -690,7 +690,7 @@ async fn main() -> anyhow::Result<()> { let filters = match query.filters { Some(condition) if !condition.trim().is_empty() => { - Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap()) + Some(FilterCondition::from_str(&rtxn, &index, &condition).unwrap()) } _otherwise => None, }; @@ -698,21 +698,21 @@ async fn main() -> anyhow::Result<()> { let facet_filters = match query.facet_filters { Some(array) => { let eithers = array.into_iter().map(Into::into); - FacetCondition::from_array(&rtxn, &index, eithers).unwrap() + FilterCondition::from_array(&rtxn, &index, eithers).unwrap() } _otherwise => None, }; let condition = match (filters, facet_filters) { (Some(filters), Some(facet_filters)) => { - Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters))) + Some(FilterCondition::And(Box::new(filters), Box::new(facet_filters))) } (Some(condition), None) | (None, Some(condition)) => Some(condition), _otherwise => None, }; if let Some(condition) = condition { - search.facet_condition(condition); + search.filter(condition); } let SearchResult { matching_words, candidates, documents_ids } = search.execute().unwrap(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index e4b58765e..39e107073 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -27,7 +27,7 @@ pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec}; pub use self::index::Index; -pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords}; +pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords}; pub use self::tree_level::TreeLevel; pub use self::update_store::UpdateStore; diff --git a/milli/src/search/facet/facet_condition.rs b/milli/src/search/facet/filter_condition.rs similarity index 93% rename from milli/src/search/facet/facet_condition.rs rename to milli/src/search/facet/filter_condition.rs index 2ff997270..f58443b6f 100644 --- a/milli/src/search/facet/facet_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -18,7 +18,7 @@ use super::FacetRange; use super::parser::Rule; use super::parser::{PREC_CLIMBER, FilterParser}; -use self::FacetCondition::*; +use self::FilterCondition::*; use self::Operator::*; #[derive(Debug, Clone, PartialEq)] @@ -49,18 +49,18 @@ impl Operator { } #[derive(Debug, Clone, PartialEq)] -pub enum FacetCondition { +pub enum FilterCondition { Operator(FieldId, Operator), Or(Box, Box), And(Box, Box), } -impl FacetCondition { +impl FilterCondition { pub fn from_array( rtxn: &heed::RoTxn, index: &Index, array: I, - ) -> anyhow::Result> + ) -> anyhow::Result> where I: IntoIterator>, J: IntoIterator, A: AsRef, @@ -73,7 +73,7 @@ impl FacetCondition { Either::Left(array) => { let mut ors = None; for rule in array { - let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; ors = match ors.take() { Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), None => Some(condition), @@ -88,7 +88,7 @@ impl FacetCondition { } }, Either::Right(rule) => { - let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; ands = match ands.take() { Some(ands) => Some(And(Box::new(ands), Box::new(condition))), None => Some(condition), @@ -104,12 +104,12 @@ impl FacetCondition { rtxn: &heed::RoTxn, index: &Index, expression: &str, - ) -> anyhow::Result + ) -> anyhow::Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields_ids(rtxn)?; let lexed = FilterParser::parse(Rule::prgm, expression)?; - FacetCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) + FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) } fn from_pairs( @@ -143,7 +143,7 @@ impl FacetCondition { ) } - fn negate(self) -> FacetCondition { + fn negate(self) -> FilterCondition { match self { Operator(fid, op) => match op.negate() { (op, None) => Operator(fid, op), @@ -158,7 +158,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -176,7 +176,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -192,7 +192,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -207,7 +207,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -222,7 +222,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -237,7 +237,7 @@ impl FacetCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> anyhow::Result { let mut items = item.into_inner(); let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; @@ -249,7 +249,7 @@ impl FacetCondition { } } -impl FacetCondition { +impl FilterCondition { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. fn explore_facet_number_levels( @@ -502,15 +502,15 @@ mod tests { // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); - let condition = FacetCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); let expected = Operator(0, Operator::Equal(None, S("ponce"))); assert_eq!(condition, expected); - let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); assert_eq!(condition, expected); - let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); assert_eq!(condition, expected); } @@ -531,11 +531,11 @@ mod tests { // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); - let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); let expected = Operator(0, Between(22.0, 44.0)); assert_eq!(condition, expected); - let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); let expected = Or( Box::new(Operator(0, LowerThan(22.0))), Box::new(Operator(0, GreaterThan(44.0))), @@ -560,7 +560,7 @@ mod tests { // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); - let condition = FacetCondition::from_str( + let condition = FilterCondition::from_str( &rtxn, &index, "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", ).unwrap(); @@ -573,7 +573,7 @@ mod tests { ); assert_eq!(condition, expected); - let condition = FacetCondition::from_str( + let condition = FilterCondition::from_str( &rtxn, &index, "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", ).unwrap(); @@ -607,11 +607,11 @@ mod tests { // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); - let condition = FacetCondition::from_array( + let condition = FilterCondition::from_array( &rtxn, &index, vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])], ).unwrap().unwrap(); - let expected = FacetCondition::from_str( + let expected = FilterCondition::from_str( &rtxn, &index, "channel = gotaga AND (timestamp = 44 OR channel != ponce)", ).unwrap(); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index fff1d14a8..a5e02fc9f 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -9,10 +9,10 @@ use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::{Index, FieldId}; -pub use self::facet_condition::{FacetCondition, Operator}; +pub use self::filter_condition::{FilterCondition, Operator}; pub use self::facet_distribution::FacetDistribution; -mod facet_condition; +mod filter_condition; mod facet_distribution; mod parser; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 11f56b7a6..c152d47a4 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -16,7 +16,7 @@ use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; use crate::search::criteria::r#final::{Final, FinalResult}; use crate::{Index, DocumentId}; -pub use self::facet::{FacetCondition, FacetDistribution, FacetIter, Operator}; +pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; @@ -33,7 +33,7 @@ mod matching_words; pub struct Search<'a> { query: Option, - facet_condition: Option, + filter: Option, offset: usize, limit: usize, optional_words: bool, @@ -47,7 +47,7 @@ impl<'a> Search<'a> { pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> { Search { query: None, - facet_condition: None, + filter: None, offset: 0, limit: 20, optional_words: true, @@ -88,8 +88,8 @@ impl<'a> Search<'a> { self } - pub fn facet_condition(&mut self, condition: FacetCondition) -> &mut Search<'a> { - self.facet_condition = Some(condition); + pub fn filter(&mut self, condition: FilterCondition) -> &mut Search<'a> { + self.filter = Some(condition); self } @@ -121,12 +121,12 @@ impl<'a> Search<'a> { // We create the original candidates with the facet conditions results. let before = Instant::now(); - let facet_candidates = match &self.facet_condition { + let filtered_candidates = match &self.filter { Some(condition) => Some(condition.evaluate(self.rtxn, self.index)?), None => None, }; - debug!("facet candidates: {:?} took {:.02?}", facet_candidates, before.elapsed()); + debug!("facet candidates: {:?} took {:.02?}", filtered_candidates, before.elapsed()); let matching_words = match query_tree.as_ref() { Some(query_tree) => MatchingWords::from_query_tree(&query_tree), @@ -134,7 +134,7 @@ impl<'a> Search<'a> { }; let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; - let criteria = criteria_builder.build(query_tree, primitive_query, facet_candidates)?; + let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?; match self.index.distinct_attribute(self.rtxn)? { None => self.perform_sort(NoopDistinct, matching_words, criteria), @@ -188,7 +188,7 @@ impl fmt::Debug for Search<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let Search { query, - facet_condition, + filter, offset, limit, optional_words, @@ -199,7 +199,7 @@ impl fmt::Debug for Search<'_> { } = self; f.debug_struct("Search") .field("query", query) - .field("facet_condition", facet_condition) + .field("filter", filter) .field("offset", offset) .field("limit", limit) .field("optional_words", optional_words) From b0c0490e857be4e9cd36ad74514e000e6c50158f Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 15:48:38 +0200 Subject: [PATCH 09/11] Make sure that we can add a Asc/Desc field without it being filterable --- milli/src/criterion.rs | 11 +++++------ milli/src/update/settings.rs | 25 ++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 1d7326db7..81a2878b3 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -1,5 +1,5 @@ -use std::collections::HashSet; use std::fmt; +use std::str::FromStr; use anyhow::{Context, bail}; use regex::Regex; @@ -30,8 +30,10 @@ pub enum Criterion { Desc(String), } -impl Criterion { - pub fn from_str(faceted_attributes: &HashSet, txt: &str) -> anyhow::Result { +impl FromStr for Criterion { + type Err = anyhow::Error; + + fn from_str(txt: &str) -> Result { match txt { "words" => Ok(Criterion::Words), "typo" => Ok(Criterion::Typo), @@ -42,9 +44,6 @@ impl Criterion { let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; let order = caps.get(1).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str(); - faceted_attributes.get(field_name).with_context(|| { - format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name) - })?; match order { "asc" => Ok(Criterion::Asc(field_name.to_string())), "desc" => Ok(Criterion::Desc(field_name.to_string())), diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 10b6b8cbe..eba14a17c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -9,7 +9,6 @@ use rayon::ThreadPool; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::{FieldsIdsMap, Index}; -use crate::criterion::Criterion; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::index_documents::{IndexDocumentsMethod, Transform}; @@ -402,10 +401,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_criteria(&mut self) -> anyhow::Result<()> { match self.criteria { Setting::Set(ref fields) => { - let filterable_fields = self.index.filterable_fields(&self.wtxn)?; let mut new_criteria = Vec::new(); for name in fields { - let criterion = Criterion::from_str(&filterable_fields, &name)?; + let criterion = name.parse()?; new_criteria.push(criterion); } self.index.put_criteria(self.wtxn, &new_criteria)?; @@ -446,6 +444,7 @@ mod tests { use maplit::{btreeset, hashmap, hashset}; use big_s::S; + use crate::{Criterion, FilterCondition}; use crate::update::{IndexDocuments, UpdateFormat}; use super::*; @@ -858,4 +857,24 @@ mod tests { assert!(index.primary_key(&rtxn).unwrap().is_none()); assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); } + + #[test] + fn setting_not_filterable_cant_filter() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set all the settings except searchable + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_displayed_fields(vec!["hello".to_string()]); + // It is only Asc(toto), there is a facet database but it is denied to filter with toto. + builder.set_criteria(vec!["asc(toto)".to_string()]); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err(); + } } From 3c304c89d4885fd3fd0cbef58826bf0fa3aace80 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 1 Jun 2021 16:29:14 +0200 Subject: [PATCH 10/11] Make sure that we generate the faceted database when required --- milli/src/index.rs | 18 ++-- milli/src/search/distinct/mod.rs | 2 +- milli/src/search/mod.rs | 2 +- milli/src/update/facets.rs | 16 ++-- milli/src/update/settings.rs | 139 ++++++++++++++++++++++++++----- 5 files changed, 135 insertions(+), 42 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 9cfcd841c..4e32f673a 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -23,7 +23,7 @@ use crate::fields_ids_map::FieldsIdsMap; pub const CRITERIA_KEY: &str = "criteria"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; -pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key"; +pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key"; pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields"; pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; @@ -365,7 +365,7 @@ impl Index { /// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields. pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result> { let filterable_fields = self.filterable_fields(rtxn)?; - let distinct_field = self.distinct_attribute(rtxn)?; + let distinct_field = self.distinct_field(rtxn)?; let asc_desc_fields = self.criteria(rtxn)? .into_iter() .filter_map(|criterion| match criterion { @@ -465,18 +465,18 @@ impl Index { } } - /* Distinct attribute */ + /* distinct field */ - pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> { - self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute) + pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> { + self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field) } - pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result> { - self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY) + pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result> { + self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY) } - pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY) + pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY) } /* criteria */ diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index 945beb7e6..1b7c69c7a 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -79,7 +79,7 @@ mod test { // set distinct and faceted attributes for the index. let builder = UpdateBuilder::new(0); let mut update = builder.settings(&mut txn, &index); - update.set_distinct_attribute(distinct.to_string()); + update.set_distinct_field(distinct.to_string()); update.execute(|_, _| ()).unwrap(); // add documents to the index diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index c152d47a4..872ebfca6 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -136,7 +136,7 @@ impl<'a> Search<'a> { let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?; - match self.index.distinct_attribute(self.rtxn)? { + match self.index.distinct_field(self.rtxn)? { None => self.perform_sort(NoopDistinct, matching_words, criteria), Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 1c235a509..f0eab6023 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -57,14 +57,14 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { pub fn execute(self) -> anyhow::Result<()> { self.index.set_updated_at(self.wtxn, &Utc::now())?; - // We get the filterable fields to be able to create the facet levels. - let filterable_fields = self.index.filterable_fields_ids(self.wtxn)?; + // We get the faceted fields to be able to create the facet levels. + let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; debug!("Computing and writing the facet values levels docids into LMDB on disk..."); - for field_id in filterable_fields { - // Compute and store the filterable strings documents ids. - let string_documents_ids = compute_filterable_documents_ids( + for field_id in faceted_fields { + // Compute and store the faceted strings documents ids. + let string_documents_ids = compute_faceted_documents_ids( self.wtxn, self.index.facet_id_string_docids.remap_key_type::(), field_id, @@ -77,8 +77,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { field_id, )?; - // Compute and store the filterable numbers documents ids. - let number_documents_ids = compute_filterable_documents_ids( + // Compute and store the faceted numbers documents ids. + let number_documents_ids = compute_faceted_documents_ids( self.wtxn, self.index.facet_id_f64_docids.remap_key_type::(), field_id, @@ -191,7 +191,7 @@ fn compute_facet_number_levels<'t>( writer_into_reader(writer, shrink_size) } -fn compute_filterable_documents_ids( +fn compute_faceted_documents_ids( rtxn: &heed::RoTxn, db: heed::Database, field_id: u8, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index eba14a17c..ef32c5c44 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -68,7 +68,7 @@ pub struct Settings<'a, 't, 'u, 'i> { filterable_fields: Setting>, criteria: Setting>, stop_words: Setting>, - distinct_attribute: Setting, + distinct_field: Setting, synonyms: Setting>>, } @@ -94,7 +94,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { filterable_fields: Setting::NotSet, criteria: Setting::NotSet, stop_words: Setting::NotSet, - distinct_attribute: Setting::NotSet, + distinct_field: Setting::NotSet, synonyms: Setting::NotSet, update_id, } @@ -144,12 +144,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } - pub fn reset_distinct_attribute(&mut self) { - self.distinct_attribute = Setting::Reset; + pub fn reset_distinct_field(&mut self) { + self.distinct_field = Setting::Reset; } - pub fn set_distinct_attribute(&mut self, distinct_attribute: String) { - self.distinct_attribute = Setting::Set(distinct_attribute); + pub fn set_distinct_field(&mut self, distinct_field: String) { + self.distinct_field = Setting::Set(distinct_field); } pub fn reset_synonyms(&mut self) { @@ -165,8 +165,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> - where - F: Fn(UpdateIndexingStep, u64) + Sync + where + F: Fn(UpdateIndexingStep, u64) + Sync { let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let update_id = self.update_id; @@ -197,7 +197,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let output = transform.remap_index_documents( primary_key.to_string(), old_fields_ids_map, - fields_ids_map.clone())?; + fields_ids_map.clone(), + )?; // We clear the full database (words-fst, documents ids and documents content). ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?; @@ -214,6 +215,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; indexing_builder.thread_pool = self.thread_pool; indexing_builder.execute_raw(output, &cb)?; + Ok(()) } @@ -242,18 +244,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(true) } - fn update_distinct_attribute(&mut self) -> anyhow::Result { - match self.distinct_attribute { + fn update_distinct_field(&mut self) -> anyhow::Result { + match self.distinct_field { Setting::Set(ref attr) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; fields_ids_map .insert(attr) .context("field id limit exceeded")?; - self.index.put_distinct_attribute(self.wtxn, &attr)?; + self.index.put_distinct_field(self.wtxn, &attr)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } - Setting::Reset => { self.index.delete_distinct_attribute(self.wtxn)?; }, + Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; }, Setting::NotSet => return Ok(false), } Ok(true) @@ -380,7 +382,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } - fn update_facets(&mut self) -> anyhow::Result { + fn update_filterable(&mut self) -> anyhow::Result<()> { match self.filterable_fields { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; @@ -393,9 +395,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; } - Setting::NotSet => return Ok(false) + Setting::NotSet => (), } - Ok(true) + Ok(()) } fn update_criteria(&mut self) -> anyhow::Result<()> { @@ -419,20 +421,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { F: Fn(UpdateIndexingStep, u64) + Sync { self.index.set_updated_at(self.wtxn, &Utc::now())?; + + let old_faceted_fields = self.index.faceted_fields(&self.wtxn)?; let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; + self.update_displayed()?; - let stop_words_updated = self.update_stop_words()?; - let facets_updated = self.update_facets()?; - self.update_distinct_attribute()?; - // update_criteria MUST be called after update_facets, since criterion fields must be set - // as facets. + self.update_filterable()?; + self.update_distinct_field()?; self.update_criteria()?; + + // If there is new faceted fields we indicate that we must reindex as we must + // index new fields as facets. It means that the distinct attribute, + // an Asc/Desc criterion or a filtered attribute as be added or removed. + let new_faceted_fields = self.index.faceted_fields(&self.wtxn)?; + let faceted_updated = old_faceted_fields != new_faceted_fields; + + let stop_words_updated = self.update_stop_words()?; let synonyms_updated = self.update_synonyms()?; let searchable_updated = self.update_searchable()?; - if stop_words_updated || facets_updated || synonyms_updated || searchable_updated { + if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated { self.reindex(&progress_callback, old_fields_ids_map)?; } + Ok(()) } } @@ -444,7 +455,7 @@ mod tests { use maplit::{btreeset, hashmap, hashset}; use big_s::S; - use crate::{Criterion, FilterCondition}; + use crate::{Criterion, FilterCondition, SearchResult}; use crate::update::{IndexDocuments, UpdateFormat}; use super::*; @@ -669,6 +680,88 @@ mod tests { assert_eq!(count, 4); } + #[test] + fn set_asc_desc_field() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the age. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + // Don't display the generated `id` field. + builder.set_displayed_fields(vec![S("name"), S("age")]); + builder.set_criteria(vec![S("asc(age)")]); + builder.execute(|_, _| ()).unwrap(); + + // Then index some documents. + let content = &br#"[ + { "name": "kevin", "age": 23 }, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); + builder.update_format(UpdateFormat::Json); + builder.enable_autogenerate_docids(); + builder.execute(content, |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + let documents = index.documents(&rtxn, documents_ids).unwrap(); + + // Fetch the documents "age" field in the ordre in which the documents appear. + let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap(); + let iter = documents.into_iter().map(|(_, doc)| { + let bytes = doc.get(age_field_id).unwrap(); + let string = std::str::from_utf8(bytes).unwrap(); + string.parse::().unwrap() + }); + + assert_eq!(iter.collect::>(), vec![21, 23, 34]); + } + + #[test] + fn set_distinct_field() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the age. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + // Don't display the generated `id` field. + builder.set_displayed_fields(vec![S("name"), S("age")]); + builder.set_distinct_field(S("age")); + builder.execute(|_, _| ()).unwrap(); + + // Then index some documents. + let content = &br#"[ + { "name": "kevin", "age": 23 }, + { "name": "kevina", "age": 21 }, + { "name": "benoit", "age": 34 }, + { "name": "bernard", "age": 34 }, + { "name": "bertrand", "age": 34 }, + { "name": "bernie", "age": 34 }, + { "name": "ben", "age": 34 } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); + builder.update_format(UpdateFormat::Json); + builder.enable_autogenerate_docids(); + builder.execute(content, |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + + // There must be at least one document with a 34 as the age. + assert_eq!(documents_ids.len(), 3); + } + #[test] fn default_stop_words() { let path = tempfile::tempdir().unwrap(); From 26a9974667fc10cec2e27557f52e2e00c953642b Mon Sep 17 00:00:00 2001 From: many Date: Wed, 2 Jun 2021 16:30:56 +0200 Subject: [PATCH 11/11] Make asc/desc criterion return resting documents Fix #161.2 --- milli/src/search/criteria/asc_desc.rs | 24 +++++++++++++++++++----- milli/src/search/criteria/mod.rs | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index c80bb38f1..f90f3e421 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -24,6 +24,7 @@ pub struct AscDesc<'t> { ascending: bool, query_tree: Option, candidates: Box> + 't>, + allowed_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap, faceted_candidates: RoaringBitmap, parent: Box, @@ -68,6 +69,7 @@ impl<'t> AscDesc<'t> { ascending, query_tree: None, candidates: Box::new(std::iter::empty()), + allowed_candidates: RoaringBitmap::new(), faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?, bucket_candidates: RoaringBitmap::new(), parent, @@ -78,6 +80,9 @@ impl<'t> AscDesc<'t> { impl<'t> Criterion for AscDesc<'t> { #[logging_timer::time("AscDesc::{}")] fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + // remove excluded candidates when next is called, instead of doing it in the loop. + self.allowed_candidates -= params.excluded_candidates; + loop { debug!( "Facet {}({}) iteration", @@ -86,18 +91,25 @@ impl<'t> Criterion for AscDesc<'t> { ); match self.candidates.next().transpose()? { + None if !self.allowed_candidates.is_empty() => { + return Ok(Some(CriterionResult { + query_tree: self.query_tree.clone(), + candidates: Some(take(&mut self.allowed_candidates)), + filtered_candidates: None, + bucket_candidates: Some(take(&mut self.bucket_candidates)), + })); + }, None => { match self.parent.next(params)? { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => { self.query_tree = query_tree; let mut candidates = match (&self.query_tree, candidates) { - (_, Some(candidates)) => candidates & &self.faceted_candidates, + (_, Some(candidates)) => candidates, (Some(qt), None) => { let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; - let candidates = resolve_query_tree(&context, qt, params.wdcache)?; - candidates & &self.faceted_candidates + resolve_query_tree(&context, qt, params.wdcache)? }, - (None, None) => take(&mut self.faceted_candidates), + (None, None) => self.index.documents_ids(self.rtxn)?, }; if let Some(filtered_candidates) = filtered_candidates { @@ -113,12 +125,13 @@ impl<'t> Criterion for AscDesc<'t> { continue; } + self.allowed_candidates = &candidates - params.excluded_candidates; self.candidates = facet_ordered( self.index, self.rtxn, self.field_id, self.ascending, - candidates, + candidates & &self.faceted_candidates, )?; }, None => return Ok(None), @@ -126,6 +139,7 @@ impl<'t> Criterion for AscDesc<'t> { }, Some(mut candidates) => { candidates -= params.excluded_candidates; + self.allowed_candidates -= &candidates; return Ok(Some(CriterionResult { query_tree: self.query_tree.clone(), candidates: Some(candidates), diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 456d16e1a..e4ca66b2c 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -203,14 +203,14 @@ impl<'t> CriteriaBuilder<'t> { &'t self, query_tree: Option, primitive_query: Option>, - facet_candidates: Option, + filtered_candidates: Option, ) -> anyhow::Result> { use crate::criterion::Criterion as Name; let primitive_query = primitive_query.unwrap_or_default(); - let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box; + let mut criterion = Box::new(Initial::new(query_tree, filtered_candidates)) as Box; for name in self.index.criteria(&self.rtxn)? { criterion = match name { Name::Typo => Box::new(Typo::new(self, criterion)),