diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 9485087d3..322843ee0 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> { #[cfg(test)] mod test { - use std::collections::HashSet; - use super::super::test::{generate_index, validate_distinct_candidates}; use super::*; @@ -198,10 +196,7 @@ mod test { ($name:ident, $distinct:literal) => { #[test] fn $name() { - use std::iter::FromIterator; - - let facets = HashSet::from_iter(Some(($distinct.to_string()))); - let (index, fid, candidates) = generate_index($distinct, facets); + let (index, fid, candidates) = generate_index($distinct); let txn = index.read_txn().unwrap(); let mut map_distinct = FacetDistinct::new(fid, &index, &txn); let excluded = RoaringBitmap::new(); diff --git a/milli/src/search/distinct/map_distinct.rs b/milli/src/search/distinct/map_distinct.rs deleted file mode 100644 index 465af2c3b..000000000 --- a/milli/src/search/distinct/map_distinct.rs +++ /dev/null @@ -1,138 +0,0 @@ -use std::collections::HashMap; - -use roaring::RoaringBitmap; -use serde_json::Value; - -use super::{Distinct, DocIter}; -use crate::{DocumentId, FieldId, Index}; - -/// A distinct implementer that is backed by an `HashMap`. -/// -/// Each time a document is seen, the value -/// for its distinct field is added to the map. If the map already contains an entry for this -/// value, then the document is filtered out, and is added to the excluded set. -pub struct MapDistinct<'a> { - distinct: FieldId, - map: HashMap, - index: &'a Index, - txn: &'a heed::RoTxn<'a>, -} - -impl<'a> MapDistinct<'a> { - pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self { - Self { - distinct, - map: HashMap::new(), - index, - txn, - } - } -} - -pub struct MapDistinctIter<'a, 'b> { - distinct: FieldId, - map: &'b mut HashMap, - index: &'a Index, - txn: &'a heed::RoTxn<'a>, - candidates: roaring::bitmap::IntoIter, - excluded: RoaringBitmap, -} - -impl<'a, 'b> MapDistinctIter<'a, 'b> { - /// Performs the next iteration of the mafacetp distinct. This is a convenience method that is - /// called by the Iterator::next implementation that transposes the result. It makes error - /// handling easier. - fn next_inner(&mut self) -> anyhow::Result> { - let map = &mut self.map; - let mut filter = |value: Value| { - let entry = map.entry(value.to_string()).or_insert(0); - *entry += 1; - *entry <= 1 - }; - - while let Some(id) = self.candidates.next() { - let document = self.index.documents(&self.txn, Some(id))?[0].1; - let value = document - .get(self.distinct) - .map(serde_json::from_slice::) - .transpose()?; - - let accept = match value { - Some(Value::Array(values)) => { - let mut accept = true; - for value in values { - accept &= filter(value); - } - accept - } - Some(Value::Null) | Some(Value::Object(_)) | None => true, - Some(value) => filter(value), - }; - - if accept { - return Ok(Some(id)); - } else { - self.excluded.insert(id); - } - } - Ok(None) - } -} - -impl Iterator for MapDistinctIter<'_, '_> { - type Item = anyhow::Result; - - fn next(&mut self) -> Option { - self.next_inner().transpose() - } -} - -impl DocIter for MapDistinctIter<'_, '_> { - fn into_excluded(self) -> RoaringBitmap { - self.excluded - } -} - -impl<'a, 'b> Distinct<'b> for MapDistinct<'a> { - type Iter = MapDistinctIter<'a, 'b>; - - fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { - MapDistinctIter { - distinct: self.distinct, - map: &mut self.map, - index: &self.index, - txn: &self.txn, - candidates: candidates.into_iter(), - excluded, - } - } -} - -#[cfg(test)] -mod test { - use std::collections::HashSet; - - use super::*; - use super::super::test::{generate_index, validate_distinct_candidates}; - - macro_rules! test_map_distinct { - ($name:ident, $distinct:literal) => { - #[test] - fn $name() { - let (index, fid, candidates) = generate_index($distinct, HashSet::new()); - let txn = index.read_txn().unwrap(); - let mut map_distinct = MapDistinct::new(fid, &index, &txn); - let excluded = RoaringBitmap::new(); - let mut iter = map_distinct.distinct(candidates.clone(), excluded); - let count = validate_distinct_candidates(iter.by_ref(), fid, &index); - let excluded = iter.into_excluded(); - assert_eq!(count as u64 + excluded.len(), candidates.len()); - } - }; - } - - test_map_distinct!(test_string, "txt"); - test_map_distinct!(test_strings, "txts"); - test_map_distinct!(test_int, "cat-int"); - test_map_distinct!(test_ints, "cat-ints"); -} diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index eed475863..68a94ed48 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -1,12 +1,10 @@ mod facet_distinct; -mod map_distinct; mod noop_distinct; use roaring::RoaringBitmap; use crate::DocumentId; pub use facet_distinct::FacetDistinct; -pub use map_distinct::MapDistinct; pub use noop_distinct::NoopDistinct; /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. @@ -74,7 +72,7 @@ mod test { /// Returns a temporary index populated with random test documents, the FieldId for the /// distinct attribute, and the RoaringBitmap with the document ids. - pub(crate) fn generate_index(distinct: &str, facets: HashSet) -> (TempIndex, FieldId, RoaringBitmap) { + pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) { let index = TempIndex::new(); let mut txn = index.write_txn().unwrap(); @@ -82,9 +80,6 @@ mod test { let builder = UpdateBuilder::new(0); let mut update = builder.settings(&mut txn, &index); update.set_distinct_attribute(distinct.to_string()); - if !facets.is_empty() { - update.set_filterable_fields(facets) - } update.execute(|_, _| ()).unwrap(); // add documents to the index diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index abf19844e..36a155290 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -12,7 +12,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; +use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; use crate::search::criteria::r#final::{Final, FinalResult}; use crate::{Index, DocumentId}; @@ -141,14 +141,8 @@ impl<'a> Search<'a> { Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let id = field_ids_map.id(name).expect("distinct not present in field map"); - let filterable_fields = self.index.filterable_fields(self.rtxn)?; - if filterable_fields.contains(name) { - let distinct = FacetDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } else { - let distinct = MapDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } + let distinct = FacetDistinct::new(id, self.index, self.rtxn); + self.perform_sort(distinct, matching_words, criteria) } } }