mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Reintroduce filter range search and facet extractors
This commit is contained in:
parent
22d80eeaf9
commit
39a4a0a362
@ -15,7 +15,7 @@ use super::get_last_facet_value;
|
|||||||
|
|
||||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
@ -48,13 +48,13 @@ where
|
|||||||
}
|
}
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
Bound::Unbounded => Bound::Unbounded,
|
||||||
};
|
};
|
||||||
|
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
let mut f = FacetRangeSearch { rtxn, db: &db, field_id, left, right, docids: &mut docids };
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, &db, field_id)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, &db, field_id)? {
|
||||||
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, &db, field_id)?.unwrap();
|
||||||
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,22 +1,17 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
use std::fmt::{Debug, Display};
|
|
||||||
use std::ops::Bound::{self, Excluded, Included};
|
|
||||||
use std::ops::RangeBounds;
|
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||||
use heed::types::DecodeIgnore;
|
use heed::types::DecodeIgnore;
|
||||||
use heed::LazyDecode;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::fmt::{Debug, Display};
|
||||||
|
use std::ops::Bound::{self, Excluded, Included};
|
||||||
|
|
||||||
// use super::FacetNumberRange;
|
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
||||||
// use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||||||
use crate::{
|
|
||||||
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
|
use super::facet_range_search;
|
||||||
};
|
|
||||||
|
|
||||||
/// The maximum number of filters the filter AST can process.
|
/// The maximum number of filters the filter AST can process.
|
||||||
const MAX_FILTER_DEPTH: usize = 2000;
|
const MAX_FILTER_DEPTH: usize = 2000;
|
||||||
@ -147,158 +142,15 @@ impl<'a> Filter<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn explore_facet_number_levels(
|
|
||||||
rtxn: &heed::RoTxn,
|
|
||||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
|
||||||
field_id: FieldId,
|
|
||||||
) {
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Filter<'a> {
|
impl<'a> Filter<'a> {
|
||||||
/// Aggregates the documents ids that are part of the specified range automatically
|
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||||||
/// going deeper through the levels.
|
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||||
fn explore_facet_number_levels(
|
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||||||
rtxn: &heed::RoTxn,
|
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||||
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, CboRoaringBitmapCodec>,
|
|
||||||
field_id: FieldId,
|
|
||||||
level: u8,
|
|
||||||
left: Bound<f64>,
|
|
||||||
right: Bound<f64>,
|
|
||||||
output: &mut RoaringBitmap,
|
|
||||||
) -> Result<()> {
|
|
||||||
// level must be > 0, I'll create a separate function for level 0
|
|
||||||
// if level == 0 {
|
|
||||||
// call that function
|
|
||||||
//}
|
|
||||||
match (left, right) {
|
|
||||||
// If the request is an exact value we must go directly to the deepest level.
|
|
||||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
|
||||||
return Self::explore_facet_number_levels(
|
|
||||||
rtxn, db, field_id, 0, left, right, output,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// lower TO upper when lower > upper must return no result
|
|
||||||
(Included(l), Included(r)) if l > r => return Ok(()),
|
|
||||||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
|
||||||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
|
||||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
|
||||||
(_, _) => (),
|
|
||||||
}
|
|
||||||
let range_start_key = FacetKey {
|
|
||||||
field_id,
|
|
||||||
level,
|
|
||||||
left_bound: match left {
|
|
||||||
Included(l) => l,
|
|
||||||
Excluded(l) => l,
|
|
||||||
Bound::Unbounded => f64::MIN,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
let mut range_iter = db
|
|
||||||
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
|
|
||||||
.range(rtxn, &(range_start_key..))?;
|
|
||||||
|
|
||||||
let (mut previous_facet_key, mut previous_value) = range_iter.next().unwrap()?;
|
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||||||
while let Some(el) = range_iter.next() {
|
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||||||
let (facet_key, value) = el?;
|
.map(|result| result - soft_deleted_documents)
|
||||||
let range = (Included(previous_facet_key.left_bound), Excluded(facet_key.left_bound));
|
|
||||||
// if the current range intersects with the query range, then go deeper
|
|
||||||
// what does it mean for two ranges to intersect?
|
|
||||||
let gte_left = match left {
|
|
||||||
Included(l) => previous_facet_key.left_bound >= l,
|
|
||||||
Excluded(l) => previous_facet_key.left_bound > l, // TODO: not true?
|
|
||||||
Bound::Unbounded => true,
|
|
||||||
};
|
|
||||||
let lte_right = match right {
|
|
||||||
Included(r) => facet_key.left_bound <= r,
|
|
||||||
Excluded(r) => facet_key.left_bound < r,
|
|
||||||
Bound::Unbounded => true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
// at this point, previous_facet_key and previous_value are the last groups in the level
|
|
||||||
// we must also check whether we should visit this group
|
|
||||||
|
|
||||||
todo!();
|
|
||||||
|
|
||||||
// let mut left_found = None;
|
|
||||||
// let mut right_found = None;
|
|
||||||
|
|
||||||
// // We must create a custom iterator to be able to iterate over the
|
|
||||||
// // requested range as the range iterator cannot express some conditions.
|
|
||||||
// let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
|
|
||||||
|
|
||||||
// debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
|
||||||
|
|
||||||
// for (i, result) in iter.enumerate() {
|
|
||||||
// let ((_fid, level, l, r), docids) = result?;
|
|
||||||
// debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
|
||||||
// *output |= docids;
|
|
||||||
// // We save the leftest and rightest bounds we actually found at this level.
|
|
||||||
// if i == 0 {
|
|
||||||
// left_found = Some(l);
|
|
||||||
// }
|
|
||||||
// right_found = Some(r);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Can we go deeper?
|
|
||||||
// let deeper_level = match level.checked_sub(1) {
|
|
||||||
// Some(level) => level,
|
|
||||||
// None => return Ok(()),
|
|
||||||
// };
|
|
||||||
|
|
||||||
// // We must refine the left and right bounds of this range by retrieving the
|
|
||||||
// // missing part in a deeper level.
|
|
||||||
// match left_found.zip(right_found) {
|
|
||||||
// Some((left_found, right_found)) => {
|
|
||||||
// // If the bound is satisfied we avoid calling this function again.
|
|
||||||
// if !matches!(left, Included(l) if l == left_found) {
|
|
||||||
// let sub_right = Excluded(left_found);
|
|
||||||
// debug!(
|
|
||||||
// "calling left with {:?} to {:?} (level {})",
|
|
||||||
// left, sub_right, deeper_level
|
|
||||||
// );
|
|
||||||
// Self::explore_facet_number_levels(
|
|
||||||
// rtxn,
|
|
||||||
// db,
|
|
||||||
// field_id,
|
|
||||||
// deeper_level,
|
|
||||||
// left,
|
|
||||||
// sub_right,
|
|
||||||
// output,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
// if !matches!(right, Included(r) if r == right_found) {
|
|
||||||
// let sub_left = Excluded(right_found);
|
|
||||||
// debug!(
|
|
||||||
// "calling right with {:?} to {:?} (level {})",
|
|
||||||
// sub_left, right, deeper_level
|
|
||||||
// );
|
|
||||||
// Self::explore_facet_number_levels(
|
|
||||||
// rtxn,
|
|
||||||
// db,
|
|
||||||
// field_id,
|
|
||||||
// deeper_level,
|
|
||||||
// sub_left,
|
|
||||||
// right,
|
|
||||||
// output,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// None => {
|
|
||||||
// // If we found nothing at this level it means that we must find
|
|
||||||
// // the same bounds but at a deeper, more precise level.
|
|
||||||
// Self::explore_facet_number_levels(
|
|
||||||
// rtxn,
|
|
||||||
// db,
|
|
||||||
// field_id,
|
|
||||||
// deeper_level,
|
|
||||||
// left,
|
|
||||||
// right,
|
|
||||||
// output,
|
|
||||||
// )?;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate_operator(
|
fn evaluate_operator(
|
||||||
@ -337,15 +189,15 @@ impl<'a> Filter<'a> {
|
|||||||
Some(n) => {
|
Some(n) => {
|
||||||
let n = Included(n);
|
let n = Included(n);
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
// Self::explore_facet_number_levels(
|
Self::explore_facet_number_levels(
|
||||||
// rtxn,
|
rtxn,
|
||||||
// numbers_db,
|
numbers_db,
|
||||||
// field_id,
|
field_id,
|
||||||
// 0,
|
0,
|
||||||
// n,
|
n,
|
||||||
// n,
|
n,
|
||||||
// &mut output,
|
&mut output,
|
||||||
// )?;
|
)?;
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
None => RoaringBitmap::new(),
|
None => RoaringBitmap::new(),
|
||||||
@ -381,29 +233,53 @@ impl<'a> Filter<'a> {
|
|||||||
match biggest_level {
|
match biggest_level {
|
||||||
Some(level) => {
|
Some(level) => {
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
// Self::explore_facet_number_levels(
|
Self::explore_facet_number_levels(
|
||||||
// rtxn,
|
rtxn,
|
||||||
// numbers_db,
|
numbers_db,
|
||||||
// field_id,
|
field_id,
|
||||||
// level,
|
level,
|
||||||
// left,
|
left,
|
||||||
// right,
|
right,
|
||||||
// &mut output,
|
&mut output,
|
||||||
// )?;
|
)?;
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
None => Ok(RoaringBitmap::new()),
|
None => Ok(RoaringBitmap::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
/// Aggregates the documents ids that are part of the specified range automatically
|
||||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
/// going deeper through the levels.
|
||||||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
fn explore_facet_number_levels(
|
||||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
rtxn: &heed::RoTxn,
|
||||||
|
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
|
field_id: FieldId,
|
||||||
|
level: u8,
|
||||||
|
left: Bound<f64>,
|
||||||
|
right: Bound<f64>,
|
||||||
|
output: &mut RoaringBitmap,
|
||||||
|
) -> Result<()> {
|
||||||
|
match (left, right) {
|
||||||
|
// If the request is an exact value we must go directly to the deepest level.
|
||||||
|
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||||
|
return Self::explore_facet_number_levels(
|
||||||
|
rtxn, db, field_id, 0, left, right, output,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// lower TO upper when lower > upper must return no result
|
||||||
|
(Included(l), Included(r)) if l > r => return Ok(()),
|
||||||
|
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||||||
|
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||||||
|
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||||
|
(_, _) => (),
|
||||||
|
}
|
||||||
|
let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||||
|
rtxn, &db, field_id, &left, &right,
|
||||||
|
)?;
|
||||||
|
// TODO: the facet range search should take a mutable roaring bitmap as argument
|
||||||
|
*output = x;
|
||||||
|
|
||||||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
Ok(())
|
||||||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
|
||||||
.map(|result| result - soft_deleted_documents)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inner_evaluate(
|
fn inner_evaluate(
|
||||||
|
@ -2,22 +2,20 @@ use std::collections::btree_map::Entry;
|
|||||||
|
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, Str};
|
use heed::types::{ByteSlice, Str};
|
||||||
use heed::{BytesDecode, BytesEncode, Database};
|
use heed::Database;
|
||||||
use obkv::Key;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::{ClearDocuments, Facets};
|
use super::{ClearDocuments, Facets};
|
||||||
use crate::error::{InternalError, SerializationError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
// use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
|
||||||
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
fields_ids_map, DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry,
|
DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||||
FieldsIdsMap, Index, Result, RoaringBitmapCodec, SmallString32, BEU32,
|
RoaringBitmapCodec, SmallString32, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
|
@ -6,6 +6,8 @@ use heed::{BytesDecode, BytesEncode};
|
|||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||||
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -31,14 +33,13 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
||||||
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
||||||
todo!()
|
let (field_id, document_id, number) =
|
||||||
// let (field_id, document_id, number) =
|
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||||
// FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
|
||||||
|
|
||||||
// let key = (field_id, 0, number, number);
|
let key = FacetKey { field_id, level: 0, left_bound: number };
|
||||||
// // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
// facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
sorter_into_reader(facet_number_docids_sorter, indexer)
|
sorter_into_reader(facet_number_docids_sorter, indexer)
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
use std::fs::File;
|
|
||||||
use std::iter::FromIterator;
|
|
||||||
use std::{io, str};
|
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
|
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
|
||||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||||
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
use heed::BytesEncode;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io;
|
||||||
|
|
||||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||||
///
|
///
|
||||||
@ -22,38 +20,26 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut facet_string_docids_sorter = create_sorter(
|
let mut facet_string_docids_sorter = create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
merge_cbo_roaring_bitmaps, // TODO: check
|
merge_cbo_roaring_bitmaps, // TODO: check that it is correct
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
|
||||||
let mut value_buffer = Vec::new();
|
|
||||||
let mut cursor = docid_fid_facet_string.into_cursor()?;
|
let mut cursor = docid_fid_facet_string.into_cursor()?;
|
||||||
while let Some((key, original_value_bytes)) = cursor.move_on_next()? {
|
while let Some((key, _original_value_bytes)) = cursor.move_on_next()? {
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||||
let (document_id_bytes, normalized_value_bytes) = try_split_array_at(bytes).unwrap();
|
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
|
||||||
let original_value = str::from_utf8(original_value_bytes)?;
|
|
||||||
|
|
||||||
key_buffer.clear();
|
let (document_id_bytes, normalized_value_bytes) =
|
||||||
// TODO
|
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||||
// FacetStringLevelZeroCodec::serialize_into(
|
|
||||||
// field_id,
|
|
||||||
// str::from_utf8(normalized_value_bytes)?,
|
|
||||||
// &mut key_buffer,
|
|
||||||
// );
|
|
||||||
|
|
||||||
value_buffer.clear();
|
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
|
||||||
// TODO
|
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
|
||||||
// encode_prefix_string(original_value, &mut value_buffer)?;
|
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||||
let bitmap = RoaringBitmap::from_iter(Some(document_id));
|
|
||||||
bitmap.serialize_into(&mut value_buffer)?;
|
|
||||||
|
|
||||||
facet_string_docids_sorter.insert(&key_buffer, &value_buffer)?;
|
facet_string_docids_sorter.insert(&key_bytes, &document_id_bytes)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
sorter_into_reader(facet_string_docids_sorter, indexer)
|
sorter_into_reader(facet_string_docids_sorter, indexer)
|
||||||
|
Loading…
Reference in New Issue
Block a user