mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Reintroduce facet distribution functionality
This commit is contained in:
parent
b8a1caad5e
commit
5a904cf29d
@ -1,13 +1,18 @@
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::ops::Bound::Unbounded;
|
||||
use std::ops::ControlFlow;
|
||||
use std::{fmt, mem};
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||
use crate::search::facet::facet_distribution_iter;
|
||||
// use crate::search::facet::FacetStringIter;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
@ -131,22 +136,21 @@ impl<'a> FacetDistribution<'a> {
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
) -> heed::Result<()> {
|
||||
todo!()
|
||||
// let iter =
|
||||
// FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||
|
||||
// for result in iter {
|
||||
// let (value, mut docids) = result?;
|
||||
// docids &= candidates;
|
||||
// if !docids.is_empty() {
|
||||
// distribution.insert(value.to_string(), docids.len());
|
||||
// }
|
||||
// if distribution.len() == self.max_values_per_facet {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Ok(())
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
&self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
|
||||
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
fn facet_strings_distribution_from_facet_levels(
|
||||
@ -155,22 +159,21 @@ impl<'a> FacetDistribution<'a> {
|
||||
candidates: &RoaringBitmap,
|
||||
distribution: &mut BTreeMap<String, u64>,
|
||||
) -> heed::Result<()> {
|
||||
todo!()
|
||||
// let iter =
|
||||
// FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||
|
||||
// for result in iter {
|
||||
// let (_normalized, original, mut docids) = result?;
|
||||
// docids &= candidates;
|
||||
// if !docids.is_empty() {
|
||||
// distribution.insert(original.to_string(), docids.len());
|
||||
// }
|
||||
// if distribution.len() == self.max_values_per_facet {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// Ok(())
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
&self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
||||
@ -179,43 +182,42 @@ impl<'a> FacetDistribution<'a> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<BTreeMap<String, u64>> {
|
||||
todo!()
|
||||
// let mut distribution = BTreeMap::new();
|
||||
let mut distribution = BTreeMap::new();
|
||||
|
||||
// let db = self.index.facet_id_f64_docids;
|
||||
// let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
|
||||
let db = self.index.facet_id_f64_docids;
|
||||
let mut prefix = vec![];
|
||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
prefix.push(0);
|
||||
let iter = db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||
|
||||
// for result in range {
|
||||
// let ((_, _, value, _), docids) = result?;
|
||||
// distribution.insert(value.to_string(), docids.len());
|
||||
// if distribution.len() == self.max_values_per_facet {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
distribution.insert(key.left_bound.to_string(), value.bitmap.len());
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// let iter = self
|
||||
// .index
|
||||
// .facet_id_string_docids
|
||||
// .remap_key_type::<ByteSlice>()
|
||||
// .prefix_iter(self.rtxn, &field_id.to_be_bytes())?
|
||||
// .remap_key_type::<FacetStringLevelZeroCodec>();
|
||||
let iter = self
|
||||
.index
|
||||
.facet_id_string_docids
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
// let mut normalized_distribution = BTreeMap::new();
|
||||
// for result in iter {
|
||||
// let ((_, normalized_value), group_value) = result?;
|
||||
// normalized_distribution
|
||||
// .insert(normalized_value, (normalized_value, group_value.bitmap.len()));
|
||||
// if normalized_distribution.len() == self.max_values_per_facet {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
distribution.insert(key.left_bound.to_owned(), value.bitmap.len());
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// let iter = normalized_distribution
|
||||
// .into_iter()
|
||||
// .map(|(_normalized, (original, count))| (original.to_string(), count));
|
||||
// distribution.extend(iter);
|
||||
|
||||
// Ok(distribution)
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
||||
use crate::Result;
|
||||
use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
@ -20,7 +20,7 @@ where
|
||||
get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX);
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
return Ok(());
|
||||
} else {
|
||||
return Ok(());
|
||||
|
@ -20,7 +20,7 @@ pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> crate::Result<Option<BoundCodec::DItem>>
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
BoundCodec: BytesDecode<'t>,
|
||||
{
|
||||
@ -42,7 +42,7 @@ pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> crate::Result<Option<BoundCodec::DItem>>
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
BoundCodec: BytesDecode<'t>,
|
||||
{
|
||||
@ -65,7 +65,7 @@ pub(crate) fn get_highest_level<'t>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> crate::Result<u8> {
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
Ok(db
|
||||
.as_polymorph()
|
||||
|
Loading…
Reference in New Issue
Block a user