mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 01:57:41 +08:00
Extract and write facet databases
This commit is contained in:
parent
f7652186e1
commit
7ba49b849e
@ -6,6 +6,7 @@ use grenad::Merger;
|
||||
use heed::types::Bytes;
|
||||
use memmap2::Mmap;
|
||||
|
||||
use super::extract::FacetKind;
|
||||
use super::StdResult;
|
||||
use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY};
|
||||
use crate::update::new::KvReaderFieldId;
|
||||
@ -120,11 +121,16 @@ pub enum Database {
|
||||
WordFidDocids,
|
||||
WordPairProximityDocids,
|
||||
WordPositionDocids,
|
||||
FacetIdIsNullDocids,
|
||||
FacetIdIsEmptyDocids,
|
||||
FacetIdExistsDocids,
|
||||
FacetIdF64NumberDocids,
|
||||
FacetIdStringDocids,
|
||||
}
|
||||
|
||||
impl WriterOperation {
|
||||
impl Database {
|
||||
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
|
||||
match self.database {
|
||||
match self {
|
||||
Database::Documents => index.documents.remap_types(),
|
||||
Database::ExactWordDocids => index.exact_word_docids.remap_types(),
|
||||
Database::Main => index.main.remap_types(),
|
||||
@ -133,8 +139,19 @@ impl WriterOperation {
|
||||
Database::WordPositionDocids => index.word_position_docids.remap_types(),
|
||||
Database::FidWordCountDocids => index.field_id_word_count_docids.remap_types(),
|
||||
Database::WordPairProximityDocids => index.word_pair_proximity_docids.remap_types(),
|
||||
Database::FacetIdIsNullDocids => index.facet_id_is_null_docids.remap_types(),
|
||||
Database::FacetIdIsEmptyDocids => index.facet_id_is_empty_docids.remap_types(),
|
||||
Database::FacetIdExistsDocids => index.facet_id_exists_docids.remap_types(),
|
||||
Database::FacetIdF64NumberDocids => index.facet_id_f64_docids.remap_types(),
|
||||
Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WriterOperation {
|
||||
pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
|
||||
self.database.database(index)
|
||||
}
|
||||
|
||||
pub fn entry(self) -> EntryOperation {
|
||||
self.entry
|
||||
@ -159,8 +176,12 @@ impl MergerSender {
|
||||
MainSender(&self.0)
|
||||
}
|
||||
|
||||
pub fn docids<D: DatabaseType>(&self) -> DocidsSender<'_, D> {
|
||||
DocidsSender { sender: &self.0, _marker: PhantomData }
|
||||
pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
|
||||
WordDocidsSender { sender: &self.0, _marker: PhantomData }
|
||||
}
|
||||
|
||||
pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
|
||||
FacetDocidsSender { sender: &self.0 }
|
||||
}
|
||||
|
||||
pub fn documents(&self) -> DocumentsSender<'_> {
|
||||
@ -208,16 +229,21 @@ pub enum WordDocids {}
|
||||
pub enum WordFidDocids {}
|
||||
pub enum WordPairProximityDocids {}
|
||||
pub enum WordPositionDocids {}
|
||||
pub enum FacetDocids {}
|
||||
|
||||
pub trait DatabaseType {
|
||||
const DATABASE: Database;
|
||||
}
|
||||
|
||||
pub trait MergerOperationType {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation;
|
||||
}
|
||||
|
||||
impl DatabaseType for ExactWordDocids {
|
||||
const DATABASE: Database = Database::ExactWordDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for ExactWordDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::ExactWordDocidsMerger(merger)
|
||||
}
|
||||
@ -225,7 +251,9 @@ impl DatabaseType for ExactWordDocids {
|
||||
|
||||
impl DatabaseType for FidWordCountDocids {
|
||||
const DATABASE: Database = Database::FidWordCountDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for FidWordCountDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::FidWordCountDocidsMerger(merger)
|
||||
}
|
||||
@ -233,7 +261,9 @@ impl DatabaseType for FidWordCountDocids {
|
||||
|
||||
impl DatabaseType for WordDocids {
|
||||
const DATABASE: Database = Database::WordDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for WordDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::WordDocidsMerger(merger)
|
||||
}
|
||||
@ -241,7 +271,9 @@ impl DatabaseType for WordDocids {
|
||||
|
||||
impl DatabaseType for WordFidDocids {
|
||||
const DATABASE: Database = Database::WordFidDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for WordFidDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::WordFidDocidsMerger(merger)
|
||||
}
|
||||
@ -249,7 +281,9 @@ impl DatabaseType for WordFidDocids {
|
||||
|
||||
impl DatabaseType for WordPairProximityDocids {
|
||||
const DATABASE: Database = Database::WordPairProximityDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for WordPairProximityDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::WordPairProximityDocidsMerger(merger)
|
||||
}
|
||||
@ -257,19 +291,32 @@ impl DatabaseType for WordPairProximityDocids {
|
||||
|
||||
impl DatabaseType for WordPositionDocids {
|
||||
const DATABASE: Database = Database::WordPositionDocids;
|
||||
}
|
||||
|
||||
impl MergerOperationType for WordPositionDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::WordPositionDocidsMerger(merger)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocidsSender<'a, D> {
|
||||
impl MergerOperationType for FacetDocids {
|
||||
fn new_merger_operation(merger: Merger<File, MergeDeladdCboRoaringBitmaps>) -> MergerOperation {
|
||||
MergerOperation::FacetDocidsMerger(merger)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait DocidsSender {
|
||||
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
|
||||
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
|
||||
}
|
||||
|
||||
pub struct WordDocidsSender<'a, D> {
|
||||
sender: &'a Sender<WriterOperation>,
|
||||
_marker: PhantomData<D>,
|
||||
}
|
||||
|
||||
impl<D: DatabaseType> DocidsSender<'_, D> {
|
||||
pub fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
|
||||
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
|
||||
match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
|
||||
Ok(()) => Ok(()),
|
||||
@ -277,7 +324,7 @@ impl<D: DatabaseType> DocidsSender<'_, D> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
|
||||
match self.sender.send(WriterOperation { database: D::DATABASE, entry }) {
|
||||
Ok(()) => Ok(()),
|
||||
@ -286,6 +333,43 @@ impl<D: DatabaseType> DocidsSender<'_, D> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetDocidsSender<'a> {
|
||||
sender: &'a Sender<WriterOperation>,
|
||||
}
|
||||
|
||||
impl DocidsSender for FacetDocidsSender<'_> {
|
||||
fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
let (database, key) = self.extract_database(key);
|
||||
let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
|
||||
match self.sender.send(WriterOperation { database, entry }) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(SendError(_)) => Err(SendError(())),
|
||||
}
|
||||
}
|
||||
|
||||
fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
|
||||
let (database, key) = self.extract_database(key);
|
||||
let entry = EntryOperation::Delete(KeyEntry::from_key(key));
|
||||
match self.sender.send(WriterOperation { database, entry }) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(SendError(_)) => Err(SendError(())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetDocidsSender<'_> {
|
||||
fn extract_database<'a>(&self, key: &'a [u8]) -> (Database, &'a [u8]) {
|
||||
let database = match FacetKind::from(key[0]) {
|
||||
FacetKind::Number => Database::FacetIdF64NumberDocids,
|
||||
FacetKind::String => Database::FacetIdStringDocids,
|
||||
FacetKind::Null => Database::FacetIdIsNullDocids,
|
||||
FacetKind::Empty => Database::FacetIdIsEmptyDocids,
|
||||
FacetKind::Exists => Database::FacetIdExistsDocids,
|
||||
};
|
||||
(database, &key[1..])
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsSender<'a>(&'a Sender<WriterOperation>);
|
||||
|
||||
impl DocumentsSender<'_> {
|
||||
@ -321,6 +405,7 @@ pub enum MergerOperation {
|
||||
WordFidDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||
WordPairProximityDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||
WordPositionDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||
FacetDocidsMerger(Merger<File, MergeDeladdCboRoaringBitmaps>),
|
||||
DeleteDocument { docid: DocumentId },
|
||||
InsertDocument { docid: DocumentId, document: Box<KvReaderFieldId> },
|
||||
FinishedDocument,
|
||||
@ -344,7 +429,7 @@ impl ExtractorSender {
|
||||
DocumentSender(&self.0)
|
||||
}
|
||||
|
||||
pub fn send_searchable<D: DatabaseType>(
|
||||
pub fn send_searchable<D: MergerOperationType>(
|
||||
&self,
|
||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
||||
) -> StdResult<(), SendError<()>> {
|
||||
|
@ -1,61 +1,180 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::fs::File;
|
||||
|
||||
use grenad::{MergeFunction, Merger};
|
||||
use heed::RoTxn;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::FacetedExtractor;
|
||||
use super::super::cache::CboCachedSorter;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::FacetKind;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::{normalize_facet, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::update::new::extract::DocidsExtractor;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::{DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
pub struct FacetedDocidsExtractor;
|
||||
|
||||
pub struct FieldIdFacetNumberDocidsExtractor;
|
||||
impl FacetedDocidsExtractor {
|
||||
fn extract_document_change(
|
||||
rtxn: &RoTxn,
|
||||
index: &Index,
|
||||
buffer: &mut Vec<u8>,
|
||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
attributes_to_extract: &[&str],
|
||||
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()> {
|
||||
match document_change {
|
||||
DocumentChange::Deletion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index)?.unwrap(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_del_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index)?.unwrap(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_del_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.new(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_add_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
DocumentChange::Insertion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.new(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_add_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn facet_fn_with_options<MF>(
|
||||
buffer: &mut Vec<u8>,
|
||||
cached_sorter: &mut CboCachedSorter<MF>,
|
||||
cache_fn: impl Fn(&mut CboCachedSorter<MF>, &[u8], u32) -> grenad::Result<(), MF::Error>,
|
||||
docid: DocumentId,
|
||||
fid: FieldId,
|
||||
value: &Value,
|
||||
) -> Result<()>
|
||||
where
|
||||
MF: MergeFunction,
|
||||
MF::Error: Debug,
|
||||
grenad::Error<MF::Error>: Into<crate::Error>,
|
||||
{
|
||||
// Exists
|
||||
// key: fid
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Exists as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)?;
|
||||
|
||||
match value {
|
||||
// Number
|
||||
// key: fid - level - orderedf64 - orignalf64
|
||||
Value::Number(number) => {
|
||||
if let Some((n, ordered)) =
|
||||
number.as_f64().and_then(|n| f64_into_bytes(n).map(|ordered| (n, ordered)))
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Number as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(1); // level 0
|
||||
buffer.extend_from_slice(&ordered);
|
||||
buffer.extend_from_slice(&n.to_be_bytes());
|
||||
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
// String
|
||||
// key: fid - level - truncated_string
|
||||
Value::String(s) => {
|
||||
let truncated = truncate_str(s);
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::String as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
buffer.push(1); // level 0
|
||||
buffer.extend_from_slice(truncated.as_bytes());
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)
|
||||
}
|
||||
// Null
|
||||
// key: fid
|
||||
Value::Null => {
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Null as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)
|
||||
}
|
||||
// Empty
|
||||
// key: fid
|
||||
Value::Array(a) if a.is_empty() => {
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)
|
||||
}
|
||||
Value::Object(o) if o.is_empty() => {
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &*buffer, docid).map_err(Into::into)
|
||||
}
|
||||
// Otherwise, do nothing
|
||||
/// TODO: What about Value::Bool?
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetedExtractor for FieldIdFacetNumberDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
|
||||
fn build_key<'b>(
|
||||
field_id: FieldId,
|
||||
value: &Value,
|
||||
output: &'b mut Vec<u8>,
|
||||
) -> Option<&'b [u8]> {
|
||||
let number = value.as_number()?;
|
||||
let n = number.as_f64()?;
|
||||
let ordered = f64_into_bytes(n)?;
|
||||
|
||||
// fid - level - orderedf64 - orignalf64
|
||||
output.extend_from_slice(&field_id.to_be_bytes());
|
||||
output.push(1); // level 0
|
||||
output.extend_from_slice(&ordered);
|
||||
output.extend_from_slice(&n.to_be_bytes());
|
||||
|
||||
Some(&*output)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldIdFacetStringDocidsExtractor;
|
||||
|
||||
impl FacetedExtractor for FieldIdFacetStringDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
|
||||
fn build_key<'b>(
|
||||
field_id: FieldId,
|
||||
value: &Value,
|
||||
output: &'b mut Vec<u8>,
|
||||
) -> Option<&'b [u8]> {
|
||||
let string = value.as_str()?;
|
||||
let normalize = normalize_facet(string);
|
||||
let truncated = truncate_str(&normalize);
|
||||
|
||||
// fid - level - normalized string
|
||||
output.extend_from_slice(&field_id.to_be_bytes());
|
||||
output.push(1); // level 0
|
||||
output.extend_from_slice(truncated.as_bytes());
|
||||
|
||||
Some(&*output)
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncates a string to the biggest valid LMDB key size.
|
||||
@ -70,68 +189,77 @@ fn truncate_str(s: &str) -> &str {
|
||||
&s[..index.unwrap_or(0)]
|
||||
}
|
||||
|
||||
pub struct FieldIdFacetIsNullDocidsExtractor;
|
||||
impl DocidsExtractor for FacetedDocidsExtractor {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
||||
fn run_extraction(
|
||||
index: &Index,
|
||||
fields_ids_map: &GlobalFieldsIdsMap,
|
||||
indexer: GrenadParameters,
|
||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
impl FacetedExtractor for FieldIdFacetIsNullDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
let rtxn = index.read_txn()?;
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_extract: Vec<_> =
|
||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||
|
||||
fn build_key<'b>(
|
||||
field_id: FieldId,
|
||||
value: &Value,
|
||||
output: &'b mut Vec<u8>,
|
||||
) -> Option<&'b [u8]> {
|
||||
if value.is_null() {
|
||||
output.extend_from_slice(&field_id.to_be_bytes());
|
||||
Some(&*output)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldIdFacetExistsDocidsExtractor;
|
||||
|
||||
impl FacetedExtractor for FieldIdFacetExistsDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
|
||||
fn build_key<'b>(
|
||||
field_id: FieldId,
|
||||
_value: &Value,
|
||||
output: &'b mut Vec<u8>,
|
||||
) -> Option<&'b [u8]> {
|
||||
output.extend_from_slice(&field_id.to_be_bytes());
|
||||
Some(&*output)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldIdFacetIsEmptyDocidsExtractor;
|
||||
|
||||
impl FacetedExtractor for FieldIdFacetIsEmptyDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
|
||||
fn build_key<'b>(
|
||||
field_id: FieldId,
|
||||
value: &Value,
|
||||
output: &'b mut Vec<u8>,
|
||||
) -> Option<&'b [u8]> {
|
||||
let is_empty = match value {
|
||||
Value::Null | Value::Bool(_) | Value::Number(_) => false,
|
||||
Value::String(s) => s.is_empty(),
|
||||
Value::Array(a) => a.is_empty(),
|
||||
Value::Object(o) => o.is_empty(),
|
||||
};
|
||||
|
||||
if is_empty {
|
||||
output.extend_from_slice(&field_id.to_be_bytes());
|
||||
Some(&*output)
|
||||
} else {
|
||||
None
|
||||
let context_pool = ItemsPool::new(|| {
|
||||
Ok((
|
||||
index.read_txn()?,
|
||||
fields_ids_map.clone(),
|
||||
Vec::new(),
|
||||
CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
100.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
),
|
||||
))
|
||||
});
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
document_changes.into_par_iter().try_for_each(|document_change| {
|
||||
context_pool.with(|(rtxn, fields_ids_map, buffer, cached_sorter)| {
|
||||
Self::extract_document_change(
|
||||
&*rtxn,
|
||||
index,
|
||||
buffer,
|
||||
fields_ids_map,
|
||||
&attributes_to_extract,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
})
|
||||
})?;
|
||||
}
|
||||
{
|
||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||
let _entered = span.enter();
|
||||
|
||||
let readers: Vec<_> = context_pool
|
||||
.into_items()
|
||||
.par_bridge()
|
||||
.map(|(_rtxn, _tokenizer, _fields_ids_map, cached_sorter)| {
|
||||
let sorter = cached_sorter.into_sorter()?;
|
||||
sorter.into_reader_cursors()
|
||||
})
|
||||
.collect();
|
||||
for reader in readers {
|
||||
builder.extend(reader?);
|
||||
}
|
||||
Ok(builder.build())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,180 +1,26 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::fs::File;
|
||||
|
||||
pub use extract_facets::*;
|
||||
use grenad::{MergeFunction, Merger};
|
||||
use heed::RoTxn;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::cache::CboCachedSorter;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::{DocumentId, FieldId, GlobalFieldsIdsMap, Index, Result};
|
||||
|
||||
mod extract_facets;
|
||||
mod facet_document;
|
||||
|
||||
pub trait FacetedExtractor {
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
||||
fn run_extraction(
|
||||
index: &Index,
|
||||
fields_ids_map: &GlobalFieldsIdsMap,
|
||||
indexer: GrenadParameters,
|
||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
pub use extract_facets::FacetedDocidsExtractor;
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_extract: Vec<_> =
|
||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||
|
||||
let context_pool = ItemsPool::new(|| {
|
||||
Ok((
|
||||
index.read_txn()?,
|
||||
fields_ids_map.clone(),
|
||||
Vec::new(),
|
||||
CboCachedSorter::new(
|
||||
// TODO use a better value
|
||||
100.try_into().unwrap(),
|
||||
create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
MergeDeladdCboRoaringBitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
indexer.max_nb_chunks,
|
||||
max_memory,
|
||||
),
|
||||
),
|
||||
))
|
||||
});
|
||||
|
||||
document_changes.into_par_iter().try_for_each(|document_change| {
|
||||
context_pool.with(|(rtxn, fields_ids_map, buffer, cached_sorter)| {
|
||||
Self::extract_document_change(
|
||||
&*rtxn,
|
||||
index,
|
||||
buffer,
|
||||
fields_ids_map,
|
||||
&attributes_to_extract,
|
||||
cached_sorter,
|
||||
document_change?,
|
||||
)
|
||||
})
|
||||
})?;
|
||||
|
||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||
for (_rtxn, _fields_ids_map, _buffer, cache) in context_pool.into_items() {
|
||||
let sorter = cache.into_sorter()?;
|
||||
let readers = sorter.into_reader_cursors()?;
|
||||
builder.extend(readers);
|
||||
}
|
||||
|
||||
Ok(builder.build())
|
||||
}
|
||||
|
||||
// TODO Shorten this
|
||||
fn facet_fn_with_options<MF>(
|
||||
buffer: &mut Vec<u8>,
|
||||
cached_sorter: &mut CboCachedSorter<MF>,
|
||||
cache_fn: impl Fn(&mut CboCachedSorter<MF>, &[u8], u32) -> grenad::Result<(), MF::Error>,
|
||||
docid: DocumentId,
|
||||
fid: FieldId,
|
||||
value: &Value,
|
||||
) -> Result<()>
|
||||
where
|
||||
MF: MergeFunction,
|
||||
MF::Error: Debug,
|
||||
grenad::Error<MF::Error>: Into<crate::Error>,
|
||||
{
|
||||
buffer.clear();
|
||||
match Self::build_key(fid, value, buffer) {
|
||||
Some(key) => cache_fn(cached_sorter, &key, docid).map_err(Into::into),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_document_change(
|
||||
rtxn: &RoTxn,
|
||||
index: &Index,
|
||||
buffer: &mut Vec<u8>,
|
||||
fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
attributes_to_extract: &[&str],
|
||||
cached_sorter: &mut CboCachedSorter<MergeDeladdCboRoaringBitmaps>,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()> {
|
||||
match document_change {
|
||||
DocumentChange::Deletion(inner) => facet_document::extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index)?.unwrap(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_del_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
facet_document::extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index)?.unwrap(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_del_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
facet_document::extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.new(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_add_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
DocumentChange::Insertion(inner) => facet_document::extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.new(),
|
||||
fields_ids_map,
|
||||
&mut |fid, value| {
|
||||
Self::facet_fn_with_options(
|
||||
buffer,
|
||||
cached_sorter,
|
||||
CboCachedSorter::insert_add_u32,
|
||||
inner.docid(),
|
||||
fid,
|
||||
value,
|
||||
)
|
||||
},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO avoid owning the strings here.
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>>;
|
||||
|
||||
fn build_key<'b>(field_id: FieldId, value: &Value, output: &'b mut Vec<u8>)
|
||||
-> Option<&'b [u8]>;
|
||||
#[repr(u8)]
|
||||
pub enum FacetKind {
|
||||
Number = 0,
|
||||
String = 1,
|
||||
Null = 2,
|
||||
Empty = 3,
|
||||
Exists,
|
||||
}
|
||||
|
||||
impl From<u8> for FacetKind {
|
||||
fn from(value: u8) -> Self {
|
||||
match value {
|
||||
0 => Self::Number,
|
||||
1 => Self::String,
|
||||
2 => Self::Null,
|
||||
3 => Self::Empty,
|
||||
4 => Self::Exists,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,9 +2,29 @@ mod cache;
|
||||
mod faceted;
|
||||
mod searchable;
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
pub use faceted::*;
|
||||
use grenad::Merger;
|
||||
use rayon::iter::IntoParallelIterator;
|
||||
pub use searchable::*;
|
||||
|
||||
use crate::{
|
||||
update::{GrenadParameters, MergeDeladdCboRoaringBitmaps},
|
||||
GlobalFieldsIdsMap, Index, Result,
|
||||
};
|
||||
|
||||
use super::DocumentChange;
|
||||
|
||||
pub trait DocidsExtractor {
|
||||
fn run_extraction(
|
||||
index: &Index,
|
||||
fields_ids_map: &GlobalFieldsIdsMap,
|
||||
indexer: GrenadParameters,
|
||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>>;
|
||||
}
|
||||
|
||||
/// TODO move in permissive json pointer
|
||||
pub mod perm_json_p {
|
||||
use serde_json::{Map, Value};
|
||||
@ -39,6 +59,10 @@ pub mod perm_json_p {
|
||||
base_key: &str,
|
||||
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
if value.is_empty() {
|
||||
seeker(&base_key, &Value::Object(Map::with_capacity(0)))?;
|
||||
}
|
||||
|
||||
for (key, value) in value.iter() {
|
||||
let base_key = if base_key.is_empty() {
|
||||
key.to_string()
|
||||
@ -80,6 +104,10 @@ pub mod perm_json_p {
|
||||
base_key: &str,
|
||||
seeker: &mut impl FnMut(&str, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
seeker(&base_key, &Value::Array(vec![]))?;
|
||||
}
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
|
@ -17,6 +17,7 @@ use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
|
||||
use super::cache::CboCachedSorter;
|
||||
use super::DocidsExtractor;
|
||||
use crate::update::new::{DocumentChange, ItemsPool};
|
||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::{GlobalFieldsIdsMap, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
@ -130,3 +131,14 @@ pub trait SearchableExtractor {
|
||||
|
||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
||||
}
|
||||
|
||||
impl<T: SearchableExtractor> DocidsExtractor for T {
|
||||
fn run_extraction(
|
||||
index: &Index,
|
||||
fields_ids_map: &GlobalFieldsIdsMap,
|
||||
indexer: GrenadParameters,
|
||||
document_changes: impl IntoParallelIterator<Item = Result<DocumentChange>>,
|
||||
) -> Result<Merger<File, MergeDeladdCboRoaringBitmaps>> {
|
||||
Self::run_extraction(index, fields_ids_map, indexer, document_changes)
|
||||
}
|
||||
}
|
||||
|
@ -101,6 +101,22 @@ where
|
||||
max_memory: Some(max_memory),
|
||||
..GrenadParameters::default()
|
||||
};
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
|
||||
let _entered = span.enter();
|
||||
extract_and_send_docids::<
|
||||
FacetedDocidsExtractor,
|
||||
FacetDocids,
|
||||
>(
|
||||
index,
|
||||
&global_fields_ids_map,
|
||||
grenad_parameters,
|
||||
document_changes.clone(),
|
||||
&extractor_sender,
|
||||
)?;
|
||||
}
|
||||
|
||||
{
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
|
||||
let _entered = span.enter();
|
||||
@ -176,19 +192,19 @@ where
|
||||
}
|
||||
|
||||
// TODO THIS IS TOO MUCH
|
||||
// Extract fieldid docid facet number
|
||||
// Extract fieldid docid facet string
|
||||
// Extract facetid string fst
|
||||
// Extract facetid normalized string strings
|
||||
// - [ ] Extract fieldid docid facet number
|
||||
// - [ ] Extract fieldid docid facet string
|
||||
// - [ ] Extract facetid string fst
|
||||
// - [ ] Extract facetid normalized string strings
|
||||
|
||||
// TODO Inverted Indexes again
|
||||
// Extract fieldid facet isempty docids
|
||||
// Extract fieldid facet isnull docids
|
||||
// Extract fieldid facet exists docids
|
||||
// - [x] Extract fieldid facet isempty docids
|
||||
// - [x] Extract fieldid facet isnull docids
|
||||
// - [x] Extract fieldid facet exists docids
|
||||
|
||||
// TODO This is the normal system
|
||||
// Extract fieldid facet number docids
|
||||
// Extract fieldid facet string docids
|
||||
// - [x] Extract fieldid facet number docids
|
||||
// - [x] Extract fieldid facet string docids
|
||||
|
||||
Ok(()) as Result<_>
|
||||
})
|
||||
@ -238,7 +254,7 @@ where
|
||||
/// TODO: GrenadParameters::default() should be removed in favor a passed parameter
|
||||
/// TODO: manage the errors correctly
|
||||
/// TODO: we must have a single trait that also gives the extractor type
|
||||
fn extract_and_send_docids<E: SearchableExtractor, D: DatabaseType>(
|
||||
fn extract_and_send_docids<E: DocidsExtractor, D: MergerOperationType>(
|
||||
index: &Index,
|
||||
fields_ids_map: &GlobalFieldsIdsMap,
|
||||
indexer: GrenadParameters,
|
||||
|
@ -12,6 +12,7 @@ use tempfile::tempfile;
|
||||
|
||||
use super::channel::*;
|
||||
use super::{Deletion, DocumentChange, Insertion, KvReaderDelAdd, KvReaderFieldId, Update};
|
||||
use super::extract::FacetKind;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::channel::MergerOperation;
|
||||
use crate::update::MergeDeladdCboRoaringBitmaps;
|
||||
@ -63,26 +64,33 @@ pub fn merge_grenad_entries(
|
||||
)?;
|
||||
}
|
||||
MergerOperation::WordDocidsMerger(merger) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
let mut add_words_fst = SetBuilder::new(BufWriter::new(tempfile()?))?;
|
||||
let mut del_words_fst = SetBuilder::new(BufWriter::new(tempfile()?))?;
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
merge_and_send_docids(
|
||||
merger,
|
||||
index.word_docids.remap_types(),
|
||||
rtxn,
|
||||
&mut buffer,
|
||||
sender.docids::<WordDocids>(),
|
||||
|key| add_words_fst.insert(key),
|
||||
|key| del_words_fst.insert(key),
|
||||
)?;
|
||||
merge_and_send_docids(
|
||||
merger,
|
||||
index.word_docids.remap_types(),
|
||||
rtxn,
|
||||
&mut buffer,
|
||||
sender.docids::<WordDocids>(),
|
||||
|key| add_words_fst.insert(key),
|
||||
|key| del_words_fst.insert(key),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Move that into a dedicated function
|
||||
let words_fst = index.words_fst(rtxn)?;
|
||||
let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?;
|
||||
sender.main().write_words_fst(mmap).unwrap();
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "words_fst");
|
||||
let _entered = span.enter();
|
||||
// Move that into a dedicated function
|
||||
let words_fst = index.words_fst(rtxn)?;
|
||||
let mmap = compute_new_words_fst(add_words_fst, del_words_fst, words_fst)?;
|
||||
sender.main().write_words_fst(mmap).unwrap();
|
||||
}
|
||||
}
|
||||
MergerOperation::WordFidDocidsMerger(merger) => {
|
||||
let span =
|
||||
@ -161,6 +169,18 @@ pub fn merge_grenad_entries(
|
||||
MergerOperation::FinishedDocument => {
|
||||
// send the rtree
|
||||
}
|
||||
MergerOperation::FacetDocidsMerger(merger) => {
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::merge", "facet_docids");
|
||||
let _entered = span.enter();
|
||||
merge_and_send_facet_docids(
|
||||
merger,
|
||||
FacetDatabases::new(index),
|
||||
rtxn,
|
||||
&mut buffer,
|
||||
sender.facet_docids(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,12 +272,12 @@ fn compute_new_words_fst(
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
fn merge_and_send_docids<D: DatabaseType>(
|
||||
fn merge_and_send_docids(
|
||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
||||
database: Database<Bytes, Bytes>,
|
||||
rtxn: &RoTxn<'_>,
|
||||
buffer: &mut Vec<u8>,
|
||||
word_docids_sender: DocidsSender<'_, D>,
|
||||
docids_sender: impl DocidsSender,
|
||||
mut add_key: impl FnMut(&[u8]) -> fst::Result<()>,
|
||||
mut del_key: impl FnMut(&[u8]) -> fst::Result<()>,
|
||||
) -> Result<()> {
|
||||
@ -271,11 +291,11 @@ fn merge_and_send_docids<D: DatabaseType>(
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
||||
word_docids_sender.write(key, value).unwrap();
|
||||
docids_sender.write(key, value).unwrap();
|
||||
add_key(key)?;
|
||||
}
|
||||
Operation::Delete => {
|
||||
word_docids_sender.delete(key).unwrap();
|
||||
docids_sender.delete(key).unwrap();
|
||||
del_key(key)?;
|
||||
}
|
||||
Operation::Ignore => (),
|
||||
@ -285,6 +305,76 @@ fn merge_and_send_docids<D: DatabaseType>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
fn merge_and_send_facet_docids(
|
||||
merger: Merger<File, MergeDeladdCboRoaringBitmaps>,
|
||||
database: FacetDatabases,
|
||||
rtxn: &RoTxn<'_>,
|
||||
buffer: &mut Vec<u8>,
|
||||
docids_sender: impl DocidsSender,
|
||||
) -> Result<()> {
|
||||
let mut merger_iter = merger.into_stream_merger_iter().unwrap();
|
||||
while let Some((key, deladd)) = merger_iter.next().unwrap() {
|
||||
let current = database.get(rtxn, key)?;
|
||||
let deladd: &KvReaderDelAdd = deladd.into();
|
||||
let del = deladd.get(DelAdd::Deletion);
|
||||
let add = deladd.get(DelAdd::Addition);
|
||||
|
||||
match merge_cbo_bitmaps(current, del, add)? {
|
||||
Operation::Write(bitmap) => {
|
||||
let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer);
|
||||
docids_sender.write(key, value).unwrap();
|
||||
}
|
||||
Operation::Delete => {
|
||||
docids_sender.delete(key).unwrap();
|
||||
}
|
||||
Operation::Ignore => (),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct FacetDatabases {
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
facet_id_exists_docids: Database<Bytes, Bytes>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
facet_id_is_null_docids: Database<Bytes, Bytes>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
facet_id_is_empty_docids: Database<Bytes, Bytes>,
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
facet_id_f64_docids: Database<Bytes, Bytes>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
facet_id_string_docids: Database<Bytes, Bytes>,
|
||||
}
|
||||
|
||||
impl FacetDatabases {
|
||||
fn new(index: &Index) -> Self {
|
||||
Self {
|
||||
facet_id_exists_docids: index.facet_id_exists_docids.remap_types(),
|
||||
facet_id_is_null_docids: index.facet_id_is_null_docids.remap_types(),
|
||||
facet_id_is_empty_docids: index.facet_id_is_empty_docids.remap_types(),
|
||||
facet_id_f64_docids: index.facet_id_f64_docids.remap_types(),
|
||||
facet_id_string_docids: index.facet_id_string_docids.remap_types(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get<'a>(&self, rtxn: &'a RoTxn<'_>, key: &[u8]) -> heed::Result<Option<&'a [u8]>> {
|
||||
let (facet_kind, key) = self.extract_facet_kind(key);
|
||||
match facet_kind {
|
||||
FacetKind::Exists => self.facet_id_exists_docids.get(rtxn, key),
|
||||
FacetKind::Null => self.facet_id_is_null_docids.get(rtxn, key),
|
||||
FacetKind::Empty => self.facet_id_is_empty_docids.get(rtxn, key),
|
||||
FacetKind::Number => self.facet_id_f64_docids.get(rtxn, key),
|
||||
FacetKind::String => self.facet_id_string_docids.get(rtxn, key),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_facet_kind<'a>(&self, key: &'a [u8]) -> (FacetKind, &'a [u8]) {
|
||||
(FacetKind::from(key[0]), &key[1..])
|
||||
}
|
||||
}
|
||||
|
||||
enum Operation {
|
||||
Write(RoaringBitmap),
|
||||
Delete,
|
||||
|
Loading…
Reference in New Issue
Block a user