mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Reintroduce facet distribution functionality
This commit is contained in:
parent
b8a1caad5e
commit
5a904cf29d
@ -1,13 +1,18 @@
|
|||||||
use std::collections::{BTreeMap, HashSet};
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::ops::Bound::Unbounded;
|
use std::ops::ControlFlow;
|
||||||
use std::{fmt, mem};
|
use std::{fmt, mem};
|
||||||
|
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||||
|
use crate::search::facet::facet_distribution_iter;
|
||||||
// use crate::search::facet::FacetStringIter;
|
// use crate::search::facet::FacetStringIter;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
@ -131,22 +136,21 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
distribution: &mut BTreeMap<String, u64>,
|
distribution: &mut BTreeMap<String, u64>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
todo!()
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
// let iter =
|
self.rtxn,
|
||||||
// FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
&self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||||
|
field_id,
|
||||||
// for result in iter {
|
candidates,
|
||||||
// let (value, mut docids) = result?;
|
|facet_key, nbr_docids| {
|
||||||
// docids &= candidates;
|
let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
|
||||||
// if !docids.is_empty() {
|
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||||
// distribution.insert(value.to_string(), docids.len());
|
if distribution.len() == self.max_values_per_facet {
|
||||||
// }
|
ControlFlow::Break(())
|
||||||
// if distribution.len() == self.max_values_per_facet {
|
} else {
|
||||||
// break;
|
ControlFlow::Continue(())
|
||||||
// }
|
}
|
||||||
// }
|
},
|
||||||
|
)
|
||||||
// Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_strings_distribution_from_facet_levels(
|
fn facet_strings_distribution_from_facet_levels(
|
||||||
@ -155,22 +159,21 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
distribution: &mut BTreeMap<String, u64>,
|
distribution: &mut BTreeMap<String, u64>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
todo!()
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
// let iter =
|
self.rtxn,
|
||||||
// FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
&self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||||
|
field_id,
|
||||||
// for result in iter {
|
candidates,
|
||||||
// let (_normalized, original, mut docids) = result?;
|
|facet_key, nbr_docids| {
|
||||||
// docids &= candidates;
|
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||||
// if !docids.is_empty() {
|
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||||
// distribution.insert(original.to_string(), docids.len());
|
if distribution.len() == self.max_values_per_facet {
|
||||||
// }
|
ControlFlow::Break(())
|
||||||
// if distribution.len() == self.max_values_per_facet {
|
} else {
|
||||||
// break;
|
ControlFlow::Continue(())
|
||||||
// }
|
}
|
||||||
// }
|
},
|
||||||
|
)
|
||||||
// Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
||||||
@ -179,43 +182,42 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
) -> heed::Result<BTreeMap<String, u64>> {
|
) -> heed::Result<BTreeMap<String, u64>> {
|
||||||
todo!()
|
let mut distribution = BTreeMap::new();
|
||||||
// let mut distribution = BTreeMap::new();
|
|
||||||
|
|
||||||
// let db = self.index.facet_id_f64_docids;
|
let db = self.index.facet_id_f64_docids;
|
||||||
// let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
|
let mut prefix = vec![];
|
||||||
|
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
prefix.push(0);
|
||||||
|
let iter = db
|
||||||
|
.as_polymorph()
|
||||||
|
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||||
|
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
// for result in range {
|
for result in iter {
|
||||||
// let ((_, _, value, _), docids) = result?;
|
let (key, value) = result?;
|
||||||
// distribution.insert(value.to_string(), docids.len());
|
distribution.insert(key.left_bound.to_string(), value.bitmap.len());
|
||||||
// if distribution.len() == self.max_values_per_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
// break;
|
break;
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
|
|
||||||
// let iter = self
|
let iter = self
|
||||||
// .index
|
.index
|
||||||
// .facet_id_string_docids
|
.facet_id_string_docids
|
||||||
// .remap_key_type::<ByteSlice>()
|
.as_polymorph()
|
||||||
// .prefix_iter(self.rtxn, &field_id.to_be_bytes())?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||||
// .remap_key_type::<FacetStringLevelZeroCodec>();
|
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
// let mut normalized_distribution = BTreeMap::new();
|
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
||||||
// for result in iter {
|
for result in iter {
|
||||||
// let ((_, normalized_value), group_value) = result?;
|
let (key, value) = result?;
|
||||||
// normalized_distribution
|
distribution.insert(key.left_bound.to_owned(), value.bitmap.len());
|
||||||
// .insert(normalized_value, (normalized_value, group_value.bitmap.len()));
|
if distribution.len() == self.max_values_per_facet {
|
||||||
// if normalized_distribution.len() == self.max_values_per_facet {
|
break;
|
||||||
// break;
|
}
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
// let iter = normalized_distribution
|
Ok(distribution)
|
||||||
// .into_iter()
|
|
||||||
// .map(|(_normalized, (original, count))| (original.to_string(), count));
|
|
||||||
// distribution.extend(iter);
|
|
||||||
|
|
||||||
// Ok(distribution)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
||||||
use crate::Result;
|
use heed::Result;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
@ -20,7 +20,7 @@ where
|
|||||||
get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
|
get_highest_level(rtxn, &db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX);
|
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
} else {
|
} else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -20,7 +20,7 @@ pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
|||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> crate::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
BoundCodec: BytesDecode<'t>,
|
BoundCodec: BytesDecode<'t>,
|
||||||
{
|
{
|
||||||
@ -42,7 +42,7 @@ pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
|||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> crate::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
BoundCodec: BytesDecode<'t>,
|
BoundCodec: BytesDecode<'t>,
|
||||||
{
|
{
|
||||||
@ -65,7 +65,7 @@ pub(crate) fn get_highest_level<'t>(
|
|||||||
txn: &'t RoTxn<'t>,
|
txn: &'t RoTxn<'t>,
|
||||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> crate::Result<u8> {
|
) -> heed::Result<u8> {
|
||||||
let field_id_prefix = &field_id.to_be_bytes();
|
let field_id_prefix = &field_id.to_be_bytes();
|
||||||
Ok(db
|
Ok(db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
|
Loading…
Reference in New Issue
Block a user