use std::collections::{HashSet, HashMap}; use std::{cmp, fmt}; use std::ops::Bound::Unbounded; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde_json::Value; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; use crate::search::facet::FacetRange; use crate::{Index, FieldId}; pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, max_values_by_facet: usize, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } impl<'a> FacetDistribution<'a> { pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> { FacetDistribution { facets: None, candidates: None, max_values_by_facet: 100, rtxn, index } } pub fn facets, A: AsRef>(&mut self, names: I) -> &mut Self { self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect()); self } pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { self.candidates = Some(candidates); self } pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self { self.max_values_by_facet = cmp::min(max, 1000); self } fn facet_values(&self, field_id: FieldId, facet_type: FacetType) -> heed::Result> { if let Some(candidates) = self.candidates.as_ref().filter(|c| c.len() <= 1000) { let mut key_buffer = vec![field_id]; match facet_type { FacetType::Float => { let mut facet_values = HashSet::new(); for docid in candidates { key_buffer.truncate(1); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = self.index.field_id_docid_facet_values .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); for result in iter { let ((_, _, value), ()) = result?; facet_values.insert(OrderedFloat(value)); } } Ok(facet_values.into_iter().map(|f| Value::from(*f)).collect()) }, FacetType::Integer => { let mut facet_values = HashSet::new(); for docid in candidates { key_buffer.truncate(1); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = self.index.field_id_docid_facet_values .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); for result in iter { let ((_, _, value), ()) = result?; facet_values.insert(value); } } Ok(facet_values.into_iter().map(Value::from).collect()) }, FacetType::String => { let mut facet_values = HashSet::new(); for docid in candidates { key_buffer.truncate(1); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = self.index.field_id_docid_facet_values .prefix_iter(self.rtxn, &key_buffer)? .remap_key_type::(); for result in iter { let ((_, _, value), ()) = result?; facet_values.insert(value); } } Ok(facet_values.into_iter().map(Value::from).collect()) }, } } else { let db = self.index.facet_field_id_value_docids; let iter = match facet_type { FacetType::String => { let iter = db .prefix_iter(&self.rtxn, &[field_id])? .remap_key_type::() .map(|r| r.map(|((_, v), docids)| (Value::from(v), docids))); Box::new(iter) as Box::> }, FacetType::Integer => { let db = db.remap_key_type::(); let range = FacetRange::::new( self.rtxn, db, field_id, 0, Unbounded, Unbounded, )?; Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids)))) }, FacetType::Float => { let db = db.remap_key_type::(); let range = FacetRange::::new( self.rtxn, db, field_id, 0, Unbounded, Unbounded, )?; Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids)))) }, }; let mut facet_values = Vec::new(); for result in iter { let (value, docids) = result?; match &self.candidates { Some(candidates) => if !docids.is_disjoint(candidates) { facet_values.push(value); }, None => facet_values.push(value), } if facet_values.len() == self.max_values_by_facet { break; } } Ok(facet_values) } } pub fn execute(&self) -> heed::Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let faceted_fields = self.index.faceted_fields(self.rtxn)?; let fields_ids: Vec<_> = match &self.facets { Some(names) => { names.iter().filter_map(|n| { let id = fields_ids_map.id(n)?; faceted_fields.get(&id).cloned().map(|t| (id, t)) }).collect() }, None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(), }; let mut facets_values = HashMap::new(); for (fid, ftype) in fields_ids { let facet_name = fields_ids_map.name(fid).unwrap(); let values = self.facet_values(fid, ftype)?; facets_values.insert(facet_name.to_string(), values); } Ok(facets_values) } } impl fmt::Debug for FacetDistribution<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _, } = self; f.debug_struct("FacetDistribution") .field("facets", facets) .field("candidates", candidates) .field("max_values_by_facet", max_values_by_facet) .finish() } }