mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Refactor facet-related codecs
This commit is contained in:
parent
9b55e582cd
commit
485a72306d
@ -1,25 +1,19 @@
|
||||
// mod facet_level_value_f64_codec;
|
||||
// mod facet_level_value_u32_codec;
|
||||
// mod facet_string_level_zero_codec;
|
||||
// mod facet_string_level_zero_value_codec;
|
||||
// mod facet_string_zero_bounds_value_codec;
|
||||
mod field_doc_id_facet_f64_codec;
|
||||
mod field_doc_id_facet_string_codec;
|
||||
mod ordered_f64_codec;
|
||||
mod str_ref;
|
||||
|
||||
pub mod new;
|
||||
|
||||
use heed::types::OwnedType;
|
||||
|
||||
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
||||
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
||||
// pub use self::facet_string_level_zero_value_codec::{
|
||||
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
||||
// };
|
||||
// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||
use crate::BEU16;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
pub use self::str_ref::StrRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
use heed::types::OwnedType;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||
|
||||
@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FacetGroupKey<T> {
|
||||
pub field_id: u16,
|
||||
pub level: u8,
|
||||
pub left_bound: T,
|
||||
}
|
||||
impl<'a> FacetGroupKey<&'a [u8]> {
|
||||
pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
|
||||
FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FacetGroupKey<Vec<u8>> {
|
||||
pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
|
||||
FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupValue {
|
||||
pub size: u8,
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct FacetGroupKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesEncode<'a>,
|
||||
T::EItem: Sized,
|
||||
{
|
||||
type EItem = FacetGroupKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
{
|
||||
type DItem = FacetGroupKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.push(value.size);
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ByteSliceRef;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRef {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,120 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
|
||||
pub mod ordered_f64_codec;
|
||||
pub mod str_ref;
|
||||
// TODO: these codecs were quickly written and not fast/resilient enough
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FacetKey<T> {
|
||||
pub field_id: u16,
|
||||
pub level: u8,
|
||||
pub left_bound: T,
|
||||
}
|
||||
impl<'a> FacetKey<&'a [u8]> {
|
||||
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
|
||||
FacetKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FacetKey<Vec<u8>> {
|
||||
pub fn as_ref(&self) -> FacetKey<&[u8]> {
|
||||
FacetKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupValue {
|
||||
pub size: u8,
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct FacetKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
|
||||
where
|
||||
T: BytesEncode<'a>,
|
||||
T::EItem: Sized,
|
||||
{
|
||||
type EItem = FacetKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
{
|
||||
type DItem = FacetKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.push(value.size);
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
|
||||
// or even replace the current ByteSlice codec
|
||||
pub struct MyByteSlice;
|
||||
|
||||
impl<'a> BytesEncode<'a> for MyByteSlice {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for MyByteSlice {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
@ -14,15 +14,10 @@ use time::OffsetDateTime;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
// FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
FieldDocIdFacetF64Codec,
|
||||
FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec,
|
||||
};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::StrRefCodec;
|
||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
@ -130,9 +125,9 @@ pub struct Index {
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
@ -196,14 +196,14 @@ fn facet_ordered<'t>(
|
||||
|
||||
let number_iter = make_iter(
|
||||
rtxn,
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
|
||||
let string_iter = make_iter(
|
||||
rtxn,
|
||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
)?;
|
||||
|
@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::error::InternalError;
|
||||
use crate::heed_codec::facet::new::FacetKey;
|
||||
use crate::heed_codec::facet::FacetGroupKey;
|
||||
use crate::heed_codec::facet::*;
|
||||
use crate::index::db_name;
|
||||
use crate::{DocumentId, FieldId, Index, Result};
|
||||
@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> {
|
||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.map(|opt| opt.map(|v| v.bitmap))
|
||||
}
|
||||
|
||||
@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> {
|
||||
// get facet docids on level 0
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.map(|opt| opt.map(|v| v.bitmap))
|
||||
}
|
||||
|
||||
|
@ -8,12 +8,11 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::StrRefCodec;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||
use crate::search::facet::facet_distribution_iter;
|
||||
// use crate::search::facet::FacetStringIter;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
/// The default number of values by facets that will
|
||||
@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
let iter = db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||
.remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
.facet_id_string_docids
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
||||
for result in iter {
|
||||
|
@ -4,11 +4,11 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec};
|
||||
|
||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
callback: CB,
|
||||
@ -18,9 +18,9 @@ where
|
||||
{
|
||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level =
|
||||
get_highest_level(rtxn, db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
|
||||
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
return Ok(());
|
||||
} else {
|
||||
@ -33,7 +33,7 @@ where
|
||||
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
||||
{
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
callback: CB,
|
||||
}
|
||||
@ -49,7 +49,7 @@ where
|
||||
group_size: usize,
|
||||
) -> Result<ControlFlow<()>> {
|
||||
let starting_key =
|
||||
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
|
||||
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
|
||||
let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
|
||||
for el in iter {
|
||||
let (key, value) = el?;
|
||||
@ -78,7 +78,7 @@ where
|
||||
if level == 0 {
|
||||
return self.iterate_level_0(candidates, starting_bound, group_size);
|
||||
}
|
||||
let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound };
|
||||
let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
|
||||
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
||||
|
||||
for el in iter {
|
||||
@ -116,7 +116,7 @@ mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
|
||||
|
@ -4,12 +4,12 @@ use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef};
|
||||
use crate::Result;
|
||||
|
||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
@ -42,13 +42,13 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let mut docids = RoaringBitmap::new();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
||||
Ok(docids)
|
||||
} else {
|
||||
@ -59,7 +59,7 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||
let left_key =
|
||||
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
|
||||
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
|
||||
let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
||||
for el in iter {
|
||||
let (key, value) = el?;
|
||||
@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
return self.run_level_0(starting_left_bound, group_size);
|
||||
}
|
||||
|
||||
let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
||||
let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
||||
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
||||
|
||||
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
|
||||
@ -258,8 +258,8 @@ mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::find_docids_of_facet_within_bounds;
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::FacetKeyCodec;
|
||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
@ -310,7 +310,7 @@ mod tests {
|
||||
let end = Bound::Included(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -326,7 +326,7 @@ mod tests {
|
||||
let end = Bound::Excluded(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -352,7 +352,7 @@ mod tests {
|
||||
let end = Bound::Included(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -371,7 +371,7 @@ mod tests {
|
||||
let end = Bound::Excluded(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -399,7 +399,7 @@ mod tests {
|
||||
let end = Bound::Included(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
@ -418,7 +418,7 @@ mod tests {
|
||||
let end = Bound::Excluded(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
||||
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
|
@ -2,19 +2,19 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::new::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||
};
|
||||
|
||||
pub fn ascending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
|
||||
@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>(
|
||||
|
||||
struct AscendingFacetSort<'t, 'e> {
|
||||
rtxn: &'t heed::RoTxn<'e>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
|
||||
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
||||
)>,
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||
let (documents_ids, deepest_iter) = self.stack.last_mut()?;
|
||||
for result in deepest_iter {
|
||||
let (
|
||||
FacetKey { level, left_bound, field_id },
|
||||
FacetGroupKey { level, left_bound, field_id },
|
||||
FacetGroupValue { size: group_size, mut bitmap },
|
||||
) = result.unwrap();
|
||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||
@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||
return Some(Ok(bitmap));
|
||||
}
|
||||
let starting_key_below =
|
||||
FacetKey { field_id: self.field_id, level: level - 1, left_bound };
|
||||
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
|
||||
let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
|
||||
Ok(iter) => iter,
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
@ -86,7 +86,7 @@ mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
|
@ -4,21 +4,21 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::new::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||
};
|
||||
|
||||
pub fn descending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
||||
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(Box::new(DescendingFacetSort {
|
||||
rtxn,
|
||||
@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>(
|
||||
|
||||
struct DescendingFacetSort<'t> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
|
||||
std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
||||
Bound<&'t [u8]>,
|
||||
)>,
|
||||
}
|
||||
@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
|
||||
while let Some(result) = deepest_iter.next() {
|
||||
let (
|
||||
FacetKey { level, left_bound, field_id },
|
||||
FacetGroupKey { level, left_bound, field_id },
|
||||
FacetGroupValue { size: group_size, mut bitmap },
|
||||
) = result.unwrap();
|
||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||
@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
if level == 0 {
|
||||
return Some(Ok(bitmap));
|
||||
}
|
||||
let starting_key_below = FacetKey { field_id, level: level - 1, left_bound };
|
||||
let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound };
|
||||
|
||||
let end_key_kelow = match *right_bound {
|
||||
Bound::Included(right) => Bound::Included(FacetKey {
|
||||
Bound::Included(right) => Bound::Included(FacetGroupKey {
|
||||
field_id,
|
||||
level: level - 1,
|
||||
left_bound: right,
|
||||
}),
|
||||
Bound::Excluded(right) => Bound::Excluded(FacetKey {
|
||||
Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
|
||||
field_id,
|
||||
level: level - 1,
|
||||
left_bound: right,
|
||||
@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
let prev_right_bound = *right_bound;
|
||||
*right_bound = Bound::Excluded(left_bound);
|
||||
let iter =
|
||||
match self.db.remap_key_type::<FacetKeyCodec<MyByteSlice>>().rev_range(
|
||||
match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range(
|
||||
&self.rtxn,
|
||||
&(Bound::Included(starting_key_below), end_key_kelow),
|
||||
) {
|
||||
@ -114,8 +114,8 @@ mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
@ -162,7 +162,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
||||
let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
let docids = el.unwrap();
|
||||
|
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::facet_range_search;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||
|
||||
/// The maximum number of filters the filter AST can process.
|
||||
@ -180,7 +180,11 @@ impl<'a> Filter<'a> {
|
||||
let string_docids = strings_db
|
||||
.get(
|
||||
rtxn,
|
||||
&FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() },
|
||||
&FacetGroupKey {
|
||||
field_id,
|
||||
level: 0,
|
||||
left_bound: &val.value().to_lowercase(),
|
||||
},
|
||||
)?
|
||||
.map(|v| v.bitmap)
|
||||
.unwrap_or_default();
|
||||
@ -218,10 +222,10 @@ impl<'a> Filter<'a> {
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.get_lower_than_or_equal_to(
|
||||
rtxn,
|
||||
&FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX },
|
||||
&FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX },
|
||||
)?
|
||||
.and_then(
|
||||
|(FacetKey { field_id: id, level, .. }, _)| {
|
||||
|(FacetGroupKey { field_id: id, level, .. }, _)| {
|
||||
if id == field_id {
|
||||
Some(level)
|
||||
} else {
|
||||
@ -252,7 +256,7 @@ impl<'a> Filter<'a> {
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_number_levels(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
field_id: FieldId,
|
||||
level: u8,
|
||||
left: Bound<f64>,
|
||||
|
@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn};
|
||||
|
||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::Filter;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||
|
||||
mod facet_distribution;
|
||||
mod facet_distribution_iter;
|
||||
@ -14,7 +14,7 @@ mod filter;
|
||||
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -28,7 +28,7 @@ where
|
||||
if let Some(first) = level0_iter_forward.next() {
|
||||
let (first_key, _) = first?;
|
||||
let first_key =
|
||||
FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
|
||||
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
|
||||
Ok(Some(first_key.left_bound))
|
||||
} else {
|
||||
Ok(None)
|
||||
@ -36,7 +36,7 @@ where
|
||||
}
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -51,7 +51,7 @@ where
|
||||
if let Some(last) = level0_iter_backward.next() {
|
||||
let (last_key, _) = last?;
|
||||
let last_key =
|
||||
FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
|
||||
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
|
||||
Ok(Some(last_key.left_bound))
|
||||
} else {
|
||||
Ok(None)
|
||||
@ -59,7 +59,7 @@ where
|
||||
}
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>(
|
||||
.next()
|
||||
.map(|el| {
|
||||
let (key, _) = el.unwrap();
|
||||
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
|
||||
key.level
|
||||
})
|
||||
.unwrap_or(0))
|
||||
@ -84,8 +84,8 @@ pub mod test {
|
||||
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::new::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||
};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::FacetsUpdateIncremental;
|
||||
@ -101,7 +101,7 @@ pub mod test {
|
||||
}
|
||||
|
||||
pub struct Database {
|
||||
pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
pub group_size: usize,
|
||||
pub max_group_size: usize,
|
||||
_tempdir: Rc<tempfile::TempDir>,
|
||||
@ -184,7 +184,7 @@ pub mod test {
|
||||
let mut iter = self.db.content.iter(&txn).unwrap();
|
||||
while let Some(el) = iter.next() {
|
||||
let (key, value) = el.unwrap();
|
||||
let FacetKey { field_id, level, left_bound: bound } = key;
|
||||
let FacetGroupKey { field_id, level, left_bound: bound } = key;
|
||||
let bound = BoundCodec::bytes_decode(bound).unwrap();
|
||||
let FacetGroupValue { size, bitmap } = value;
|
||||
writeln!(
|
||||
|
@ -5,7 +5,7 @@ use std::path::Path;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey};
|
||||
use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey};
|
||||
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
|
||||
|
||||
#[track_caller]
|
||||
@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String {
|
||||
}
|
||||
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
||||
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
|
||||
FacetKey { field_id, level, left_bound },
|
||||
FacetGroupKey { field_id, level, left_bound },
|
||||
FacetGroupValue { size, bitmap },
|
||||
)| {
|
||||
&format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap))
|
||||
@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
||||
}
|
||||
pub fn snap_facet_id_string_docids(index: &Index) -> String {
|
||||
let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |(
|
||||
FacetKey { field_id, level, left_bound },
|
||||
FacetGroupKey { field_id, level, left_bound },
|
||||
FacetGroupValue { size, bitmap },
|
||||
)| {
|
||||
&format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap))
|
||||
|
@ -11,7 +11,7 @@ use time::OffsetDateTime;
|
||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::{
|
||||
@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>(
|
||||
) -> Result<()> {
|
||||
let db = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
};
|
||||
let mut modified = false;
|
||||
|
@ -12,8 +12,8 @@ use time::OffsetDateTime;
|
||||
|
||||
use crate::error::InternalError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::new::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||
};
|
||||
use crate::update::index_documents::{
|
||||
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
|
||||
@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
|
||||
pub struct FacetsUpdateBulk<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
level_group_size: usize,
|
||||
min_level_size: usize,
|
||||
facet_type: FacetType,
|
||||
@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index,
|
||||
database: match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
},
|
||||
level_group_size: 4,
|
||||
@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index,
|
||||
database: match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
},
|
||||
level_group_size: 4,
|
||||
@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
}
|
||||
|
||||
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
|
||||
let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
self.database.delete_range(wtxn, &range).map(drop)?;
|
||||
Ok(())
|
||||
@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
for level_reader in level_readers {
|
||||
let mut cursor = level_reader.into_cursor()?;
|
||||
while let Some((k, v)) = cursor.move_on_next()? {
|
||||
let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
|
||||
let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
|
||||
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
|
||||
println!("inserting {key:?} {value:?}");
|
||||
|
||||
@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
|
||||
struct ComputeHigherLevels<'t> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
level_group_size: usize,
|
||||
min_level_size: usize,
|
||||
@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
|
||||
.remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>();
|
||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
|
||||
|
||||
let mut left_bound: &[u8] = &[];
|
||||
let mut first_iteration_for_new_group = true;
|
||||
@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> {
|
||||
for ((bitmap, left_bound), group_size) in
|
||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||
{
|
||||
let key = FacetKey { field_id: self.field_id, level, left_bound };
|
||||
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||
let key =
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value =
|
||||
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||
@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> {
|
||||
for ((bitmap, left_bound), group_size) in
|
||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||
{
|
||||
let key = FacetKey { field_id: self.field_id, level, left_bound };
|
||||
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||
let key =
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||
cur_writer.insert(key, value)?;
|
||||
|
@ -2,8 +2,8 @@ use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::new::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||
};
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::Result;
|
||||
@ -19,13 +19,13 @@ enum DeletionResult {
|
||||
}
|
||||
|
||||
pub struct FacetsUpdateIncremental {
|
||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
group_size: usize,
|
||||
min_level_size: usize,
|
||||
max_group_size: usize,
|
||||
}
|
||||
impl FacetsUpdateIncremental {
|
||||
pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self {
|
||||
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
||||
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
||||
}
|
||||
}
|
||||
@ -36,7 +36,7 @@ impl FacetsUpdateIncremental {
|
||||
level: u8,
|
||||
search_key: &[u8],
|
||||
txn: &RoTxn,
|
||||
) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> {
|
||||
) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
|
||||
let mut prefix = vec![];
|
||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
prefix.push(level);
|
||||
@ -45,17 +45,17 @@ impl FacetsUpdateIncremental {
|
||||
let mut prefix_iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
|
||||
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
|
||||
if let Some(e) = prefix_iter.next() {
|
||||
let (key_bytes, value) = e?;
|
||||
Ok((
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?
|
||||
.into_owned(),
|
||||
value,
|
||||
))
|
||||
} else {
|
||||
let key = FacetKey { field_id, level, left_bound: search_key };
|
||||
let key = FacetGroupKey { field_id, level, left_bound: search_key };
|
||||
match self.db.get_lower_than(txn, &key)? {
|
||||
Some((key, value)) => {
|
||||
if key.level != level || key.field_id != field_id {
|
||||
@ -66,13 +66,13 @@ impl FacetsUpdateIncremental {
|
||||
let mut iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(
|
||||
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(
|
||||
txn,
|
||||
&prefix.as_slice(),
|
||||
)?;
|
||||
let (key_bytes, value) = iter.next().unwrap()?;
|
||||
Ok((
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?
|
||||
.into_owned(),
|
||||
value,
|
||||
@ -93,7 +93,7 @@ impl FacetsUpdateIncremental {
|
||||
new_key: &[u8],
|
||||
new_values: &RoaringBitmap,
|
||||
) -> Result<InsertionResult> {
|
||||
let key = FacetKey { field_id, level: 0, left_bound: new_key };
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: new_key };
|
||||
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
|
||||
|
||||
let mut level0_prefix = vec![];
|
||||
@ -193,7 +193,7 @@ impl FacetsUpdateIncremental {
|
||||
.db
|
||||
.get_greater_than_or_equal_to(
|
||||
&txn,
|
||||
&FacetKey {
|
||||
&FacetGroupKey {
|
||||
field_id,
|
||||
level: level_below,
|
||||
left_bound: insertion_key.left_bound.as_slice(),
|
||||
@ -217,7 +217,7 @@ impl FacetsUpdateIncremental {
|
||||
}
|
||||
|
||||
let key =
|
||||
FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
|
||||
FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
|
||||
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
|
||||
(key, value)
|
||||
};
|
||||
@ -235,7 +235,7 @@ impl FacetsUpdateIncremental {
|
||||
}
|
||||
|
||||
let key =
|
||||
FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
|
||||
FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
|
||||
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
|
||||
(key, value)
|
||||
};
|
||||
@ -303,7 +303,7 @@ impl FacetsUpdateIncremental {
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..group_size {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
@ -311,7 +311,7 @@ impl FacetsUpdateIncremental {
|
||||
}
|
||||
values |= value_i.bitmap;
|
||||
}
|
||||
let key = FacetKey {
|
||||
let key = FacetGroupKey {
|
||||
field_id,
|
||||
level: highest_level + 1,
|
||||
left_bound: first_key.unwrap().left_bound,
|
||||
@ -384,7 +384,7 @@ impl FacetsUpdateIncremental {
|
||||
key: &[u8],
|
||||
value: u32,
|
||||
) -> Result<DeletionResult> {
|
||||
let key = FacetKey { field_id, level: 0, left_bound: key };
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: key };
|
||||
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
|
||||
bitmap.remove(value);
|
||||
|
||||
@ -415,7 +415,7 @@ impl FacetsUpdateIncremental {
|
||||
key: &[u8],
|
||||
value: u32,
|
||||
) -> Result<()> {
|
||||
if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() {
|
||||
if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
let highest_level = get_highest_level(&txn, self.db, field_id)?;
|
||||
@ -450,7 +450,7 @@ impl FacetsUpdateIncremental {
|
||||
while let Some(el) = iter.next() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
|
||||
);
|
||||
}
|
||||
drop(iter);
|
||||
@ -469,9 +469,9 @@ mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::search::facet::test::FacetIndex;
|
||||
@ -502,7 +502,7 @@ mod tests {
|
||||
.unwrap();
|
||||
while let Some(el) = iter.next() {
|
||||
let (key, value) = el.unwrap();
|
||||
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
|
||||
|
||||
let mut prefix_start_below = vec![];
|
||||
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
||||
@ -519,7 +519,7 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap()
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
||||
};
|
||||
|
||||
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
|
||||
@ -996,7 +996,7 @@ mod tests {
|
||||
|
||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
// let (group_key, group_values) = group.unwrap();
|
||||
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||
// assert_eq!(key, &group_key.left_bound);
|
||||
// assert_eq!(values, &group_values.bitmap);
|
||||
// }
|
||||
@ -1014,7 +1014,7 @@ mod tests {
|
||||
|
||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
// let (group_key, group_values) = group.unwrap();
|
||||
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||
// assert_eq!(key, &group_key.left_bound);
|
||||
// assert_eq!(values, &group_values.bitmap);
|
||||
// }
|
||||
|
@ -1,23 +1,20 @@
|
||||
use std::{collections::HashMap, fs::File};
|
||||
|
||||
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
|
||||
use crate::{
|
||||
facet::FacetType,
|
||||
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||
};
|
||||
use grenad::CompressionType;
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{
|
||||
facet::FacetType,
|
||||
heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice},
|
||||
CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||
};
|
||||
|
||||
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
|
||||
use std::{collections::HashMap, fs::File};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod incremental;
|
||||
|
||||
pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
level_group_size: u8,
|
||||
max_level_group_size: u8,
|
||||
min_level_size: u8,
|
||||
@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
};
|
||||
Self {
|
||||
@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> {
|
||||
|
||||
let mut cursor = self.new_data.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let key =
|
||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
||||
.ok_or(heed::Error::Encoding)?;
|
||||
let docids =
|
||||
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
||||
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
||||
|
@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||
};
|
||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::Result;
|
||||
|
||||
/// Extracts the facet number and the documents ids where this facet number appear.
|
||||
@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
let (field_id, document_id, number) =
|
||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||
|
||||
let key = FacetKey { field_id, level: 0, left_bound: number };
|
||||
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: number };
|
||||
let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||
|
||||
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||
}
|
||||
|
@ -4,8 +4,8 @@ use std::io;
|
||||
use heed::BytesEncode;
|
||||
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||
use crate::heed_codec::facet::StrRefCodec;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||
use crate::{FieldId, Result};
|
||||
|
||||
@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
|
||||
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
|
||||
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
|
||||
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
|
||||
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||
|
||||
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
|
||||
}
|
||||
|
@ -0,0 +1,4 @@
|
||||
---
|
||||
source: milli/src/update/word_prefix_pair_proximity_docids.rs
|
||||
---
|
||||
6873ff1f78d08f2b1a13bb9e37349c01
|
Loading…
Reference in New Issue
Block a user