Reintroduce facet distribution functionality

This commit is contained in:
Loïc Lecrenier 2022-08-31 07:50:18 +02:00 committed by Loïc Lecrenier
parent b8a1caad5e
commit 5a904cf29d
3 changed files with 72 additions and 70 deletions

View File

@ -1,13 +1,18 @@
use std::collections::{BTreeMap, HashSet}; use std::collections::{BTreeMap, HashSet};
use std::ops::Bound::Unbounded; use std::ops::ControlFlow;
use std::{fmt, mem}; use std::{fmt, mem};
use heed::types::ByteSlice; use heed::types::ByteSlice;
use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::UserError; use crate::error::UserError;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
use crate::search::facet::facet_distribution_iter;
// use crate::search::facet::FacetStringIter; // use crate::search::facet::FacetStringIter;
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};
@ -131,22 +136,21 @@ impl<'a> FacetDistribution<'a> {
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
distribution: &mut BTreeMap<String, u64>, distribution: &mut BTreeMap<String, u64>,
) -> heed::Result<()> { ) -> heed::Result<()> {
todo!() facet_distribution_iter::iterate_over_facet_distribution(
// let iter = self.rtxn,
// FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?; &self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
field_id,
// for result in iter { candidates,
// let (value, mut docids) = result?; |facet_key, nbr_docids| {
// docids &= candidates; let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
// if !docids.is_empty() { distribution.insert(facet_key.to_string(), nbr_docids);
// distribution.insert(value.to_string(), docids.len()); if distribution.len() == self.max_values_per_facet {
// } ControlFlow::Break(())
// if distribution.len() == self.max_values_per_facet { } else {
// break; ControlFlow::Continue(())
// } }
// } },
)
// Ok(())
} }
fn facet_strings_distribution_from_facet_levels( fn facet_strings_distribution_from_facet_levels(
@ -155,22 +159,21 @@ impl<'a> FacetDistribution<'a> {
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
distribution: &mut BTreeMap<String, u64>, distribution: &mut BTreeMap<String, u64>,
) -> heed::Result<()> { ) -> heed::Result<()> {
todo!() facet_distribution_iter::iterate_over_facet_distribution(
// let iter = self.rtxn,
// FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?; &self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
field_id,
// for result in iter { candidates,
// let (_normalized, original, mut docids) = result?; |facet_key, nbr_docids| {
// docids &= candidates; let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
// if !docids.is_empty() { distribution.insert(facet_key.to_string(), nbr_docids);
// distribution.insert(original.to_string(), docids.len()); if distribution.len() == self.max_values_per_facet {
// } ControlFlow::Break(())
// if distribution.len() == self.max_values_per_facet { } else {
// break; ControlFlow::Continue(())
// } }
// } },
)
// Ok(())
} }
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
@ -179,43 +182,42 @@ impl<'a> FacetDistribution<'a> {
&self, &self,
field_id: FieldId, field_id: FieldId,
) -> heed::Result<BTreeMap<String, u64>> { ) -> heed::Result<BTreeMap<String, u64>> {
todo!() let mut distribution = BTreeMap::new();
// let mut distribution = BTreeMap::new();
// let db = self.index.facet_id_f64_docids; let db = self.index.facet_id_f64_docids;
// let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?; let mut prefix = vec![];
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(0);
let iter = db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
// for result in range { for result in iter {
// let ((_, _, value, _), docids) = result?; let (key, value) = result?;
// distribution.insert(value.to_string(), docids.len()); distribution.insert(key.left_bound.to_string(), value.bitmap.len());
// if distribution.len() == self.max_values_per_facet { if distribution.len() == self.max_values_per_facet {
// break; break;
// } }
// } }
// let iter = self let iter = self
// .index .index
// .facet_id_string_docids .facet_id_string_docids
// .remap_key_type::<ByteSlice>() .as_polymorph()
// .prefix_iter(self.rtxn, &field_id.to_be_bytes())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
// .remap_key_type::<FacetStringLevelZeroCodec>(); .remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
// let mut normalized_distribution = BTreeMap::new(); // TODO: get the original value of the facet somewhere (in the documents DB?)
// for result in iter { for result in iter {
// let ((_, normalized_value), group_value) = result?; let (key, value) = result?;
// normalized_distribution distribution.insert(key.left_bound.to_owned(), value.bitmap.len());
// .insert(normalized_value, (normalized_value, group_value.bitmap.len())); if distribution.len() == self.max_values_per_facet {
// if normalized_distribution.len() == self.max_values_per_facet { break;
// break; }
// } }
// }
// let iter = normalized_distribution Ok(distribution)
// .into_iter()
// .map(|(_normalized, (original, count))| (original.to_string(), count));
// distribution.extend(iter);
// Ok(distribution)
} }
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> { fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {

View File

@ -1,5 +1,5 @@
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
use crate::Result; use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::ops::ControlFlow; use std::ops::ControlFlow;
@ -20,7 +20,7 @@ where
get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?; get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX); fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
return Ok(()); return Ok(());
} else { } else {
return Ok(()); return Ok(());

View File

@ -20,7 +20,7 @@ pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> crate::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
BoundCodec: BytesDecode<'t>, BoundCodec: BytesDecode<'t>,
{ {
@ -42,7 +42,7 @@ pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> crate::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
BoundCodec: BytesDecode<'t>, BoundCodec: BytesDecode<'t>,
{ {
@ -65,7 +65,7 @@ pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>, txn: &'t RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> crate::Result<u8> { ) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes(); let field_id_prefix = &field_id.to_be_bytes();
Ok(db Ok(db
.as_polymorph() .as_polymorph()