mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 12:35:05 +08:00
Refactor facet-related codecs
This commit is contained in:
parent
9b55e582cd
commit
485a72306d
@ -1,25 +1,19 @@
|
|||||||
// mod facet_level_value_f64_codec;
|
|
||||||
// mod facet_level_value_u32_codec;
|
|
||||||
// mod facet_string_level_zero_codec;
|
|
||||||
// mod facet_string_level_zero_value_codec;
|
|
||||||
// mod facet_string_zero_bounds_value_codec;
|
|
||||||
mod field_doc_id_facet_f64_codec;
|
mod field_doc_id_facet_f64_codec;
|
||||||
mod field_doc_id_facet_string_codec;
|
mod field_doc_id_facet_string_codec;
|
||||||
|
mod ordered_f64_codec;
|
||||||
|
mod str_ref;
|
||||||
|
|
||||||
pub mod new;
|
|
||||||
|
|
||||||
use heed::types::OwnedType;
|
|
||||||
|
|
||||||
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
|
||||||
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
|
||||||
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
|
||||||
// pub use self::facet_string_level_zero_value_codec::{
|
|
||||||
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
|
||||||
// };
|
|
||||||
// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
|
||||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||||
use crate::BEU16;
|
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||||
|
pub use self::str_ref::StrRefCodec;
|
||||||
|
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||||
|
use heed::types::OwnedType;
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||||
|
|
||||||
@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct FacetGroupKey<T> {
|
||||||
|
pub field_id: u16,
|
||||||
|
pub level: u8,
|
||||||
|
pub left_bound: T,
|
||||||
|
}
|
||||||
|
impl<'a> FacetGroupKey<&'a [u8]> {
|
||||||
|
pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
|
||||||
|
FacetGroupKey {
|
||||||
|
field_id: self.field_id,
|
||||||
|
level: self.level,
|
||||||
|
left_bound: self.left_bound.to_vec(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FacetGroupKey<Vec<u8>> {
|
||||||
|
pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
|
||||||
|
FacetGroupKey {
|
||||||
|
field_id: self.field_id,
|
||||||
|
level: self.level,
|
||||||
|
left_bound: self.left_bound.as_slice(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FacetGroupValue {
|
||||||
|
pub size: u8,
|
||||||
|
pub bitmap: RoaringBitmap,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetGroupKeyCodec<T> {
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
|
||||||
|
where
|
||||||
|
T: BytesEncode<'a>,
|
||||||
|
T::EItem: Sized,
|
||||||
|
{
|
||||||
|
type EItem = FacetGroupKey<T::EItem>;
|
||||||
|
|
||||||
|
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut v = vec![];
|
||||||
|
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||||
|
v.extend_from_slice(&[value.level]);
|
||||||
|
|
||||||
|
let bound = T::bytes_encode(&value.left_bound)?;
|
||||||
|
v.extend_from_slice(&bound);
|
||||||
|
|
||||||
|
Some(Cow::Owned(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||||
|
where
|
||||||
|
T: BytesDecode<'a>,
|
||||||
|
{
|
||||||
|
type DItem = FacetGroupKey<T::DItem>;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||||
|
let level = bytes[2];
|
||||||
|
let bound = T::bytes_decode(&bytes[3..])?;
|
||||||
|
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetGroupValueCodec;
|
||||||
|
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||||
|
type EItem = FacetGroupValue;
|
||||||
|
|
||||||
|
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut v = vec![];
|
||||||
|
v.push(value.size);
|
||||||
|
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||||
|
Some(Cow::Owned(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||||
|
type DItem = FacetGroupValue;
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let size = bytes[0];
|
||||||
|
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||||
|
Some(FacetGroupValue { size, bitmap })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ByteSliceRef;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
||||||
|
type EItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
Some(Cow::Borrowed(item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for ByteSliceRef {
|
||||||
|
type DItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
Some(bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,120 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryFrom;
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use crate::CboRoaringBitmapCodec;
|
|
||||||
|
|
||||||
pub mod ordered_f64_codec;
|
|
||||||
pub mod str_ref;
|
|
||||||
// TODO: these codecs were quickly written and not fast/resilient enough
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub struct FacetKey<T> {
|
|
||||||
pub field_id: u16,
|
|
||||||
pub level: u8,
|
|
||||||
pub left_bound: T,
|
|
||||||
}
|
|
||||||
impl<'a> FacetKey<&'a [u8]> {
|
|
||||||
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
|
|
||||||
FacetKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: self.level,
|
|
||||||
left_bound: self.left_bound.to_vec(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> FacetKey<Vec<u8>> {
|
|
||||||
pub fn as_ref(&self) -> FacetKey<&[u8]> {
|
|
||||||
FacetKey {
|
|
||||||
field_id: self.field_id,
|
|
||||||
level: self.level,
|
|
||||||
left_bound: self.left_bound.as_slice(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct FacetGroupValue {
|
|
||||||
pub size: u8,
|
|
||||||
pub bitmap: RoaringBitmap,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FacetKeyCodec<T> {
|
|
||||||
_phantom: PhantomData<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
|
|
||||||
where
|
|
||||||
T: BytesEncode<'a>,
|
|
||||||
T::EItem: Sized,
|
|
||||||
{
|
|
||||||
type EItem = FacetKey<T::EItem>;
|
|
||||||
|
|
||||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
let mut v = vec![];
|
|
||||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
|
||||||
v.extend_from_slice(&[value.level]);
|
|
||||||
|
|
||||||
let bound = T::bytes_encode(&value.left_bound)?;
|
|
||||||
v.extend_from_slice(&bound);
|
|
||||||
|
|
||||||
Some(Cow::Owned(v))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
|
|
||||||
where
|
|
||||||
T: BytesDecode<'a>,
|
|
||||||
{
|
|
||||||
type DItem = FacetKey<T::DItem>;
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
|
||||||
let level = bytes[2];
|
|
||||||
let bound = T::bytes_decode(&bytes[3..])?;
|
|
||||||
Some(FacetKey { field_id: fid, level, left_bound: bound })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FacetGroupValueCodec;
|
|
||||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
|
||||||
type EItem = FacetGroupValue;
|
|
||||||
|
|
||||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
let mut v = vec![];
|
|
||||||
v.push(value.size);
|
|
||||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
|
||||||
Some(Cow::Owned(v))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
|
||||||
type DItem = FacetGroupValue;
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let size = bytes[0];
|
|
||||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
|
||||||
Some(FacetGroupValue { size, bitmap })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
|
|
||||||
// or even replace the current ByteSlice codec
|
|
||||||
pub struct MyByteSlice;
|
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for MyByteSlice {
|
|
||||||
type EItem = &'a [u8];
|
|
||||||
|
|
||||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
Some(Cow::Borrowed(item))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for MyByteSlice {
|
|
||||||
type DItem = &'a [u8];
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
Some(bytes)
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,15 +14,10 @@ use time::OffsetDateTime;
|
|||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
use crate::heed_codec::facet::StrRefCodec;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec};
|
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec};
|
||||||
// FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
|
||||||
FieldDocIdFacetF64Codec,
|
|
||||||
FieldDocIdFacetStringCodec,
|
|
||||||
FieldIdCodec,
|
|
||||||
};
|
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||||
@ -130,9 +125,9 @@ pub struct Index {
|
|||||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||||
pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||||
pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||||
|
|
||||||
/// Maps the document id, the facet field id and the numbers.
|
/// Maps the document id, the facet field id and the numbers.
|
||||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
@ -196,14 +196,14 @@ fn facet_ordered<'t>(
|
|||||||
|
|
||||||
let number_iter = make_iter(
|
let number_iter = make_iter(
|
||||||
rtxn,
|
rtxn,
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates.clone(),
|
candidates.clone(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let string_iter = make_iter(
|
let string_iter = make_iter(
|
||||||
rtxn,
|
rtxn,
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
)?;
|
)?;
|
||||||
|
@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
use crate::error::InternalError;
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::facet::new::FacetKey;
|
use crate::heed_codec::facet::FacetGroupKey;
|
||||||
use crate::heed_codec::facet::*;
|
use crate::heed_codec::facet::*;
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::{DocumentId, FieldId, Index, Result};
|
use crate::{DocumentId, FieldId, Index, Result};
|
||||||
@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index
|
self.index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||||
.map(|opt| opt.map(|v| v.bitmap))
|
.map(|opt| opt.map(|v| v.bitmap))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
// get facet docids on level 0
|
// get facet docids on level 0
|
||||||
self.index
|
self.index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||||
.map(|opt| opt.map(|v| v.bitmap))
|
.map(|opt| opt.map(|v| v.bitmap))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,12 +8,11 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
use crate::heed_codec::facet::StrRefCodec;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||||
use crate::search::facet::facet_distribution_iter;
|
use crate::search::facet::facet_distribution_iter;
|
||||||
// use crate::search::facet::FacetStringIter;
|
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
/// The default number of values by facets that will
|
/// The default number of values by facets that will
|
||||||
@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
facet_distribution_iter::iterate_over_facet_distribution(
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids| {
|
|facet_key, nbr_docids| {
|
||||||
@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
facet_distribution_iter::iterate_over_facet_distribution(
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids| {
|
|facet_key, nbr_docids| {
|
||||||
@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let iter = db
|
let iter = db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||||
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
.remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
for result in iter {
|
for result in iter {
|
||||||
let (key, value) = result?;
|
let (key, value) = result?;
|
||||||
@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||||
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
||||||
for result in iter {
|
for result in iter {
|
||||||
|
@ -4,11 +4,11 @@ use heed::Result;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec};
|
||||||
|
|
||||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
@ -18,9 +18,9 @@ where
|
|||||||
{
|
{
|
||||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||||
let highest_level =
|
let highest_level =
|
||||||
get_highest_level(rtxn, db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
|
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
} else {
|
} else {
|
||||||
@ -33,7 +33,7 @@ where
|
|||||||
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
||||||
{
|
{
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ where
|
|||||||
group_size: usize,
|
group_size: usize,
|
||||||
) -> Result<ControlFlow<()>> {
|
) -> Result<ControlFlow<()>> {
|
||||||
let starting_key =
|
let starting_key =
|
||||||
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
|
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
|
||||||
let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
|
let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let (key, value) = el?;
|
let (key, value) = el?;
|
||||||
@ -78,7 +78,7 @@ where
|
|||||||
if level == 0 {
|
if level == 0 {
|
||||||
return self.iterate_level_0(candidates, starting_bound, group_size);
|
return self.iterate_level_0(candidates, starting_bound, group_size);
|
||||||
}
|
}
|
||||||
let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound };
|
let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
|
||||||
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
||||||
|
|
||||||
for el in iter {
|
for el in iter {
|
||||||
@ -116,7 +116,7 @@ mod tests {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::iterate_over_facet_distribution;
|
use super::iterate_over_facet_distribution;
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
|
|
||||||
|
@ -4,12 +4,12 @@ use heed::BytesEncode;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
@ -42,13 +42,13 @@ where
|
|||||||
}
|
}
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
Bound::Unbounded => Bound::Unbounded,
|
||||||
};
|
};
|
||||||
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||||
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else {
|
} else {
|
||||||
@ -59,7 +59,7 @@ where
|
|||||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: Bound<&'b [u8]>,
|
left: Bound<&'b [u8]>,
|
||||||
right: Bound<&'b [u8]>,
|
right: Bound<&'b [u8]>,
|
||||||
@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
|
||||||
let left_key =
|
let left_key =
|
||||||
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
|
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
|
||||||
let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let (key, value) = el?;
|
let (key, value) = el?;
|
||||||
@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
return self.run_level_0(starting_left_bound, group_size);
|
return self.run_level_0(starting_left_bound, group_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
||||||
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
||||||
|
|
||||||
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
|
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
|
||||||
@ -258,8 +258,8 @@ mod tests {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::find_docids_of_facet_within_bounds;
|
use super::find_docids_of_facet_within_bounds;
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::FacetKeyCodec;
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
@ -310,7 +310,7 @@ mod tests {
|
|||||||
let end = Bound::Included(i);
|
let end = Bound::Included(i);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
@ -326,7 +326,7 @@ mod tests {
|
|||||||
let end = Bound::Excluded(i);
|
let end = Bound::Excluded(i);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
@ -352,7 +352,7 @@ mod tests {
|
|||||||
let end = Bound::Included(255.);
|
let end = Bound::Included(255.);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
@ -371,7 +371,7 @@ mod tests {
|
|||||||
let end = Bound::Excluded(255.);
|
let end = Bound::Excluded(255.);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
@ -399,7 +399,7 @@ mod tests {
|
|||||||
let end = Bound::Included(255. - i);
|
let end = Bound::Included(255. - i);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
@ -418,7 +418,7 @@ mod tests {
|
|||||||
let end = Bound::Excluded(255. - i);
|
let end = Bound::Excluded(255. - i);
|
||||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
&txn,
|
&txn,
|
||||||
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
|
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||||
0,
|
0,
|
||||||
&start,
|
&start,
|
||||||
&end,
|
&end,
|
||||||
|
@ -2,19 +2,19 @@ use heed::Result;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::new::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn ascending_facet_sort<'t>(
|
pub fn ascending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||||
|
|
||||||
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
|
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
|
||||||
@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct AscendingFacetSort<'t, 'e> {
|
struct AscendingFacetSort<'t, 'e> {
|
||||||
rtxn: &'t heed::RoTxn<'e>,
|
rtxn: &'t heed::RoTxn<'e>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
|
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
||||||
)>,
|
)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
|||||||
let (documents_ids, deepest_iter) = self.stack.last_mut()?;
|
let (documents_ids, deepest_iter) = self.stack.last_mut()?;
|
||||||
for result in deepest_iter {
|
for result in deepest_iter {
|
||||||
let (
|
let (
|
||||||
FacetKey { level, left_bound, field_id },
|
FacetGroupKey { level, left_bound, field_id },
|
||||||
FacetGroupValue { size: group_size, mut bitmap },
|
FacetGroupValue { size: group_size, mut bitmap },
|
||||||
) = result.unwrap();
|
) = result.unwrap();
|
||||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||||
@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
|||||||
return Some(Ok(bitmap));
|
return Some(Ok(bitmap));
|
||||||
}
|
}
|
||||||
let starting_key_below =
|
let starting_key_below =
|
||||||
FacetKey { field_id: self.field_id, level: level - 1, left_bound };
|
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
|
||||||
let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
|
let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
|
||||||
Ok(iter) => iter,
|
Ok(iter) => iter,
|
||||||
Err(e) => return Some(Err(e.into())),
|
Err(e) => return Some(Err(e.into())),
|
||||||
@ -86,7 +86,7 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
|
@ -4,21 +4,21 @@ use heed::Result;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::new::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn descending_facet_sort<'t>(
|
pub fn descending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||||
let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound };
|
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||||
Ok(Box::new(DescendingFacetSort {
|
Ok(Box::new(DescendingFacetSort {
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct DescendingFacetSort<'t> {
|
struct DescendingFacetSort<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
|
std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
||||||
Bound<&'t [u8]>,
|
Bound<&'t [u8]>,
|
||||||
)>,
|
)>,
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
|
let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
|
||||||
while let Some(result) = deepest_iter.next() {
|
while let Some(result) = deepest_iter.next() {
|
||||||
let (
|
let (
|
||||||
FacetKey { level, left_bound, field_id },
|
FacetGroupKey { level, left_bound, field_id },
|
||||||
FacetGroupValue { size: group_size, mut bitmap },
|
FacetGroupValue { size: group_size, mut bitmap },
|
||||||
) = result.unwrap();
|
) = result.unwrap();
|
||||||
// The range is unbounded on the right and the group size for the highest level is MAX,
|
// The range is unbounded on the right and the group size for the highest level is MAX,
|
||||||
@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
if level == 0 {
|
if level == 0 {
|
||||||
return Some(Ok(bitmap));
|
return Some(Ok(bitmap));
|
||||||
}
|
}
|
||||||
let starting_key_below = FacetKey { field_id, level: level - 1, left_bound };
|
let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound };
|
||||||
|
|
||||||
let end_key_kelow = match *right_bound {
|
let end_key_kelow = match *right_bound {
|
||||||
Bound::Included(right) => Bound::Included(FacetKey {
|
Bound::Included(right) => Bound::Included(FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: level - 1,
|
level: level - 1,
|
||||||
left_bound: right,
|
left_bound: right,
|
||||||
}),
|
}),
|
||||||
Bound::Excluded(right) => Bound::Excluded(FacetKey {
|
Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: level - 1,
|
level: level - 1,
|
||||||
left_bound: right,
|
left_bound: right,
|
||||||
@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
let prev_right_bound = *right_bound;
|
let prev_right_bound = *right_bound;
|
||||||
*right_bound = Bound::Excluded(left_bound);
|
*right_bound = Bound::Excluded(left_bound);
|
||||||
let iter =
|
let iter =
|
||||||
match self.db.remap_key_type::<FacetKeyCodec<MyByteSlice>>().rev_range(
|
match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range(
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
&(Bound::Included(starting_key_below), end_key_kelow),
|
&(Bound::Included(starting_key_below), end_key_kelow),
|
||||||
) {
|
) {
|
||||||
@ -114,8 +114,8 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
@ -162,7 +162,7 @@ mod tests {
|
|||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let docids = el.unwrap();
|
||||||
|
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||||
|
|
||||||
/// The maximum number of filters the filter AST can process.
|
/// The maximum number of filters the filter AST can process.
|
||||||
@ -180,7 +180,11 @@ impl<'a> Filter<'a> {
|
|||||||
let string_docids = strings_db
|
let string_docids = strings_db
|
||||||
.get(
|
.get(
|
||||||
rtxn,
|
rtxn,
|
||||||
&FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() },
|
&FacetGroupKey {
|
||||||
|
field_id,
|
||||||
|
level: 0,
|
||||||
|
left_bound: &val.value().to_lowercase(),
|
||||||
|
},
|
||||||
)?
|
)?
|
||||||
.map(|v| v.bitmap)
|
.map(|v| v.bitmap)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
@ -218,10 +222,10 @@ impl<'a> Filter<'a> {
|
|||||||
.remap_data_type::<DecodeIgnore>()
|
.remap_data_type::<DecodeIgnore>()
|
||||||
.get_lower_than_or_equal_to(
|
.get_lower_than_or_equal_to(
|
||||||
rtxn,
|
rtxn,
|
||||||
&FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX },
|
&FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX },
|
||||||
)?
|
)?
|
||||||
.and_then(
|
.and_then(
|
||||||
|(FacetKey { field_id: id, level, .. }, _)| {
|
|(FacetGroupKey { field_id: id, level, .. }, _)| {
|
||||||
if id == field_id {
|
if id == field_id {
|
||||||
Some(level)
|
Some(level)
|
||||||
} else {
|
} else {
|
||||||
@ -252,7 +256,7 @@ impl<'a> Filter<'a> {
|
|||||||
/// going deeper through the levels.
|
/// going deeper through the levels.
|
||||||
fn explore_facet_number_levels(
|
fn explore_facet_number_levels(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
level: u8,
|
level: u8,
|
||||||
left: Bound<f64>,
|
left: Bound<f64>,
|
||||||
|
@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn};
|
|||||||
|
|
||||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::filter::Filter;
|
pub use self::filter::Filter;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||||
|
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod facet_distribution_iter;
|
mod facet_distribution_iter;
|
||||||
@ -14,7 +14,7 @@ mod filter;
|
|||||||
|
|
||||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -28,7 +28,7 @@ where
|
|||||||
if let Some(first) = level0_iter_forward.next() {
|
if let Some(first) = level0_iter_forward.next() {
|
||||||
let (first_key, _) = first?;
|
let (first_key, _) = first?;
|
||||||
let first_key =
|
let first_key =
|
||||||
FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
|
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
|
||||||
Ok(Some(first_key.left_bound))
|
Ok(Some(first_key.left_bound))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
@ -36,7 +36,7 @@ where
|
|||||||
}
|
}
|
||||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -51,7 +51,7 @@ where
|
|||||||
if let Some(last) = level0_iter_backward.next() {
|
if let Some(last) = level0_iter_backward.next() {
|
||||||
let (last_key, _) = last?;
|
let (last_key, _) = last?;
|
||||||
let last_key =
|
let last_key =
|
||||||
FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
|
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
|
||||||
Ok(Some(last_key.left_bound))
|
Ok(Some(last_key.left_bound))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
@ -59,7 +59,7 @@ where
|
|||||||
}
|
}
|
||||||
pub(crate) fn get_highest_level<'t>(
|
pub(crate) fn get_highest_level<'t>(
|
||||||
txn: &'t RoTxn<'t>,
|
txn: &'t RoTxn<'t>,
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<u8> {
|
) -> heed::Result<u8> {
|
||||||
let field_id_prefix = &field_id.to_be_bytes();
|
let field_id_prefix = &field_id.to_be_bytes();
|
||||||
@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>(
|
|||||||
.next()
|
.next()
|
||||||
.map(|el| {
|
.map(|el| {
|
||||||
let (key, _) = el.unwrap();
|
let (key, _) = el.unwrap();
|
||||||
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap();
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
|
||||||
key.level
|
key.level
|
||||||
})
|
})
|
||||||
.unwrap_or(0))
|
.unwrap_or(0))
|
||||||
@ -84,8 +84,8 @@ pub mod test {
|
|||||||
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
|
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::new::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||||
};
|
};
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
use crate::update::FacetsUpdateIncremental;
|
use crate::update::FacetsUpdateIncremental;
|
||||||
@ -101,7 +101,7 @@ pub mod test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct Database {
|
pub struct Database {
|
||||||
pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
pub group_size: usize,
|
pub group_size: usize,
|
||||||
pub max_group_size: usize,
|
pub max_group_size: usize,
|
||||||
_tempdir: Rc<tempfile::TempDir>,
|
_tempdir: Rc<tempfile::TempDir>,
|
||||||
@ -184,7 +184,7 @@ pub mod test {
|
|||||||
let mut iter = self.db.content.iter(&txn).unwrap();
|
let mut iter = self.db.content.iter(&txn).unwrap();
|
||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (key, value) = el.unwrap();
|
let (key, value) = el.unwrap();
|
||||||
let FacetKey { field_id, level, left_bound: bound } = key;
|
let FacetGroupKey { field_id, level, left_bound: bound } = key;
|
||||||
let bound = BoundCodec::bytes_decode(bound).unwrap();
|
let bound = BoundCodec::bytes_decode(bound).unwrap();
|
||||||
let FacetGroupValue { size, bitmap } = value;
|
let FacetGroupValue { size, bitmap } = value;
|
||||||
writeln!(
|
writeln!(
|
||||||
|
@ -5,7 +5,7 @@ use std::path::Path;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey};
|
use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey};
|
||||||
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
|
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
|
||||||
|
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String {
|
|||||||
}
|
}
|
||||||
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
||||||
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
|
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
|
||||||
FacetKey { field_id, level, left_bound },
|
FacetGroupKey { field_id, level, left_bound },
|
||||||
FacetGroupValue { size, bitmap },
|
FacetGroupValue { size, bitmap },
|
||||||
)| {
|
)| {
|
||||||
&format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap))
|
&format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap))
|
||||||
@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
|||||||
}
|
}
|
||||||
pub fn snap_facet_id_string_docids(index: &Index) -> String {
|
pub fn snap_facet_id_string_docids(index: &Index) -> String {
|
||||||
let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |(
|
let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |(
|
||||||
FacetKey { field_id, level, left_bound },
|
FacetGroupKey { field_id, level, left_bound },
|
||||||
FacetGroupValue { size, bitmap },
|
FacetGroupValue { size, bitmap },
|
||||||
)| {
|
)| {
|
||||||
&format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap))
|
&format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap))
|
||||||
|
@ -11,7 +11,7 @@ use time::OffsetDateTime;
|
|||||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
use super::{ClearDocuments, FacetsUpdateBulk};
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>(
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut modified = false;
|
let mut modified = false;
|
||||||
|
@ -12,8 +12,8 @@ use time::OffsetDateTime;
|
|||||||
|
|
||||||
use crate::error::InternalError;
|
use crate::error::InternalError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::new::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||||
};
|
};
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
|
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
|
||||||
@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
|||||||
|
|
||||||
pub struct FacetsUpdateBulk<'i> {
|
pub struct FacetsUpdateBulk<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
level_group_size: usize,
|
level_group_size: usize,
|
||||||
min_level_size: usize,
|
min_level_size: usize,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
index,
|
index,
|
||||||
database: match facet_type {
|
database: match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
level_group_size: 4,
|
level_group_size: 4,
|
||||||
@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
index,
|
index,
|
||||||
database: match facet_type {
|
database: match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
level_group_size: 4,
|
level_group_size: 4,
|
||||||
@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
|
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
|
||||||
let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||||
let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||||
let range = left..=right;
|
let range = left..=right;
|
||||||
self.database.delete_range(wtxn, &range).map(drop)?;
|
self.database.delete_range(wtxn, &range).map(drop)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
for level_reader in level_readers {
|
for level_reader in level_readers {
|
||||||
let mut cursor = level_reader.into_cursor()?;
|
let mut cursor = level_reader.into_cursor()?;
|
||||||
while let Some((k, v)) = cursor.move_on_next()? {
|
while let Some((k, v)) = cursor.move_on_next()? {
|
||||||
let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
|
let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
|
||||||
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
|
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
|
||||||
println!("inserting {key:?} {value:?}");
|
println!("inserting {key:?} {value:?}");
|
||||||
|
|
||||||
@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
struct ComputeHigherLevels<'t> {
|
struct ComputeHigherLevels<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
level_group_size: usize,
|
level_group_size: usize,
|
||||||
min_level_size: usize,
|
min_level_size: usize,
|
||||||
@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
.db
|
.db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
|
||||||
.remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>();
|
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
let mut left_bound: &[u8] = &[];
|
let mut left_bound: &[u8] = &[];
|
||||||
let mut first_iteration_for_new_group = true;
|
let mut first_iteration_for_new_group = true;
|
||||||
@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
for ((bitmap, left_bound), group_size) in
|
for ((bitmap, left_bound), group_size) in
|
||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetKey { field_id: self.field_id, level, left_bound };
|
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||||
let key =
|
let key =
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value =
|
let value =
|
||||||
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||||
@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
for ((bitmap, left_bound), group_size) in
|
for ((bitmap, left_bound), group_size) in
|
||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetKey { field_id: self.field_id, level, left_bound };
|
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||||
let key =
|
let key =
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||||
cur_writer.insert(key, value)?;
|
cur_writer.insert(key, value)?;
|
||||||
|
@ -2,8 +2,8 @@ use heed::types::ByteSlice;
|
|||||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::new::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
|
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
||||||
};
|
};
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -19,13 +19,13 @@ enum DeletionResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct FacetsUpdateIncremental {
|
pub struct FacetsUpdateIncremental {
|
||||||
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
group_size: usize,
|
group_size: usize,
|
||||||
min_level_size: usize,
|
min_level_size: usize,
|
||||||
max_group_size: usize,
|
max_group_size: usize,
|
||||||
}
|
}
|
||||||
impl FacetsUpdateIncremental {
|
impl FacetsUpdateIncremental {
|
||||||
pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self {
|
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
||||||
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -36,7 +36,7 @@ impl FacetsUpdateIncremental {
|
|||||||
level: u8,
|
level: u8,
|
||||||
search_key: &[u8],
|
search_key: &[u8],
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> {
|
) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
|
||||||
let mut prefix = vec![];
|
let mut prefix = vec![];
|
||||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
prefix.push(level);
|
prefix.push(level);
|
||||||
@ -45,17 +45,17 @@ impl FacetsUpdateIncremental {
|
|||||||
let mut prefix_iter = self
|
let mut prefix_iter = self
|
||||||
.db
|
.db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
|
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
|
||||||
if let Some(e) = prefix_iter.next() {
|
if let Some(e) = prefix_iter.next() {
|
||||||
let (key_bytes, value) = e?;
|
let (key_bytes, value) = e?;
|
||||||
Ok((
|
Ok((
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?
|
.ok_or(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
value,
|
value,
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
let key = FacetKey { field_id, level, left_bound: search_key };
|
let key = FacetGroupKey { field_id, level, left_bound: search_key };
|
||||||
match self.db.get_lower_than(txn, &key)? {
|
match self.db.get_lower_than(txn, &key)? {
|
||||||
Some((key, value)) => {
|
Some((key, value)) => {
|
||||||
if key.level != level || key.field_id != field_id {
|
if key.level != level || key.field_id != field_id {
|
||||||
@ -66,13 +66,13 @@ impl FacetsUpdateIncremental {
|
|||||||
let mut iter = self
|
let mut iter = self
|
||||||
.db
|
.db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(
|
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(
|
||||||
txn,
|
txn,
|
||||||
&prefix.as_slice(),
|
&prefix.as_slice(),
|
||||||
)?;
|
)?;
|
||||||
let (key_bytes, value) = iter.next().unwrap()?;
|
let (key_bytes, value) = iter.next().unwrap()?;
|
||||||
Ok((
|
Ok((
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?
|
.ok_or(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
value,
|
value,
|
||||||
@ -93,7 +93,7 @@ impl FacetsUpdateIncremental {
|
|||||||
new_key: &[u8],
|
new_key: &[u8],
|
||||||
new_values: &RoaringBitmap,
|
new_values: &RoaringBitmap,
|
||||||
) -> Result<InsertionResult> {
|
) -> Result<InsertionResult> {
|
||||||
let key = FacetKey { field_id, level: 0, left_bound: new_key };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: new_key };
|
||||||
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
|
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
|
||||||
|
|
||||||
let mut level0_prefix = vec![];
|
let mut level0_prefix = vec![];
|
||||||
@ -193,7 +193,7 @@ impl FacetsUpdateIncremental {
|
|||||||
.db
|
.db
|
||||||
.get_greater_than_or_equal_to(
|
.get_greater_than_or_equal_to(
|
||||||
&txn,
|
&txn,
|
||||||
&FacetKey {
|
&FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: level_below,
|
level: level_below,
|
||||||
left_bound: insertion_key.left_bound.as_slice(),
|
left_bound: insertion_key.left_bound.as_slice(),
|
||||||
@ -217,7 +217,7 @@ impl FacetsUpdateIncremental {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let key =
|
let key =
|
||||||
FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
|
FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
|
||||||
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
|
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
|
||||||
(key, value)
|
(key, value)
|
||||||
};
|
};
|
||||||
@ -235,7 +235,7 @@ impl FacetsUpdateIncremental {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let key =
|
let key =
|
||||||
FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
|
FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
|
||||||
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
|
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
|
||||||
(key, value)
|
(key, value)
|
||||||
};
|
};
|
||||||
@ -303,7 +303,7 @@ impl FacetsUpdateIncremental {
|
|||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..group_size {
|
for _ in 0..group_size {
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
|
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
|
|
||||||
if first_key.is_none() {
|
if first_key.is_none() {
|
||||||
@ -311,7 +311,7 @@ impl FacetsUpdateIncremental {
|
|||||||
}
|
}
|
||||||
values |= value_i.bitmap;
|
values |= value_i.bitmap;
|
||||||
}
|
}
|
||||||
let key = FacetKey {
|
let key = FacetGroupKey {
|
||||||
field_id,
|
field_id,
|
||||||
level: highest_level + 1,
|
level: highest_level + 1,
|
||||||
left_bound: first_key.unwrap().left_bound,
|
left_bound: first_key.unwrap().left_bound,
|
||||||
@ -384,7 +384,7 @@ impl FacetsUpdateIncremental {
|
|||||||
key: &[u8],
|
key: &[u8],
|
||||||
value: u32,
|
value: u32,
|
||||||
) -> Result<DeletionResult> {
|
) -> Result<DeletionResult> {
|
||||||
let key = FacetKey { field_id, level: 0, left_bound: key };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: key };
|
||||||
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
|
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
|
||||||
bitmap.remove(value);
|
bitmap.remove(value);
|
||||||
|
|
||||||
@ -415,7 +415,7 @@ impl FacetsUpdateIncremental {
|
|||||||
key: &[u8],
|
key: &[u8],
|
||||||
value: u32,
|
value: u32,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() {
|
if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let highest_level = get_highest_level(&txn, self.db, field_id)?;
|
let highest_level = get_highest_level(&txn, self.db, field_id)?;
|
||||||
@ -450,7 +450,7 @@ impl FacetsUpdateIncremental {
|
|||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (k, _) = el?;
|
let (k, _) = el?;
|
||||||
to_delete.push(
|
to_delete.push(
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
drop(iter);
|
drop(iter);
|
||||||
@ -469,9 +469,9 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
use crate::heed_codec::facet::str_ref::StrRefCodec;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
@ -502,7 +502,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (key, value) = el.unwrap();
|
let (key, value) = el.unwrap();
|
||||||
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap();
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
|
||||||
|
|
||||||
let mut prefix_start_below = vec![];
|
let mut prefix_start_below = vec![];
|
||||||
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
||||||
@ -519,7 +519,7 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap()
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
|
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
|
||||||
@ -996,7 +996,7 @@ mod tests {
|
|||||||
|
|
||||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
// let (group_key, group_values) = group.unwrap();
|
// let (group_key, group_values) = group.unwrap();
|
||||||
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||||
// assert_eq!(key, &group_key.left_bound);
|
// assert_eq!(key, &group_key.left_bound);
|
||||||
// assert_eq!(values, &group_values.bitmap);
|
// assert_eq!(values, &group_values.bitmap);
|
||||||
// }
|
// }
|
||||||
@ -1014,7 +1014,7 @@ mod tests {
|
|||||||
|
|
||||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
// let (group_key, group_values) = group.unwrap();
|
// let (group_key, group_values) = group.unwrap();
|
||||||
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
||||||
// assert_eq!(key, &group_key.left_bound);
|
// assert_eq!(key, &group_key.left_bound);
|
||||||
// assert_eq!(values, &group_values.bitmap);
|
// assert_eq!(values, &group_values.bitmap);
|
||||||
// }
|
// }
|
||||||
|
@ -1,23 +1,20 @@
|
|||||||
use std::{collections::HashMap, fs::File};
|
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
|
||||||
|
use crate::{
|
||||||
|
facet::FacetType,
|
||||||
|
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||||
|
CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||||
|
};
|
||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use std::{collections::HashMap, fs::File};
|
||||||
use crate::{
|
|
||||||
facet::FacetType,
|
|
||||||
heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice},
|
|
||||||
CboRoaringBitmapCodec, FieldId, Index, Result,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
|
|
||||||
|
|
||||||
pub mod bulk;
|
pub mod bulk;
|
||||||
pub mod incremental;
|
pub mod incremental;
|
||||||
|
|
||||||
pub struct FacetsUpdate<'i> {
|
pub struct FacetsUpdate<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
level_group_size: u8,
|
level_group_size: u8,
|
||||||
max_level_group_size: u8,
|
max_level_group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||||
let database = match facet_type {
|
let database = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
|
|
||||||
let mut cursor = self.new_data.into_cursor()?;
|
let mut cursor = self.new_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
let key =
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
||||||
FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
|
.ok_or(heed::Error::Encoding)?;
|
||||||
let docids =
|
let docids =
|
||||||
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
||||||
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
||||||
|
@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
|
|||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
|
||||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
|
||||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||||
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Extracts the facet number and the documents ids where this facet number appear.
|
/// Extracts the facet number and the documents ids where this facet number appear.
|
||||||
@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
let (field_id, document_id, number) =
|
let (field_id, document_id, number) =
|
||||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||||
|
|
||||||
let key = FacetKey { field_id, level: 0, left_bound: number };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: number };
|
||||||
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,8 @@ use std::io;
|
|||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
|
|
||||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
use crate::heed_codec::facet::StrRefCodec;
|
||||||
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
|
|
||||||
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
|
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
|
||||||
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
|
||||||
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
|
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
---
|
||||||
|
source: milli/src/update/word_prefix_pair_proximity_docids.rs
|
||||||
|
---
|
||||||
|
6873ff1f78d08f2b1a13bb9e37349c01
|
Loading…
Reference in New Issue
Block a user