Facet Incremental update

This commit is contained in:
Louis Dureuil 2023-10-19 12:01:12 +02:00
parent f67ff3a738
commit 04ec293024
No known key found for this signature in database

View File

@ -4,6 +4,7 @@ use std::io::BufReader;
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, Error, RoTxn, RwTxn}; use heed::{BytesDecode, Error, RoTxn, RwTxn};
use obkv::KvReader;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
@ -12,6 +13,7 @@ use crate::heed_codec::facet::{
}; };
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::update::del_add::DelAdd;
use crate::update::index_documents::valid_lmdb_key; use crate::update::index_documents::valid_lmdb_key;
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
@ -35,14 +37,14 @@ pub struct FacetsUpdateIncremental<'i> {
index: &'i Index, index: &'i Index,
inner: FacetsUpdateIncrementalInner, inner: FacetsUpdateIncrementalInner,
facet_type: FacetType, facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>, delta_data: grenad::Reader<BufReader<File>>,
} }
impl<'i> FacetsUpdateIncremental<'i> { impl<'i> FacetsUpdateIncremental<'i> {
pub fn new( pub fn new(
index: &'i Index, index: &'i Index,
facet_type: FacetType, facet_type: FacetType,
new_data: grenad::Reader<BufReader<File>>, delta_data: grenad::Reader<BufReader<File>>,
group_size: u8, group_size: u8,
min_level_size: u8, min_level_size: u8,
max_group_size: u8, max_group_size: u8,
@ -63,29 +65,82 @@ impl<'i> FacetsUpdateIncremental<'i> {
min_level_size, min_level_size,
}, },
facet_type, facet_type,
new_data, delta_data,
} }
} }
pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default(); #[derive(Default)]
struct DeltaDocids {
deleted: RoaringBitmap,
added: RoaringBitmap,
}
impl DeltaDocids {
fn add(&mut self, added: &RoaringBitmap) {
self.deleted -= added;
self.added |= added;
}
fn delete(&mut self, deleted: &RoaringBitmap) {
self.deleted |= deleted;
self.added -= deleted;
}
fn applied(self, mut docids: RoaringBitmap) -> RoaringBitmap {
docids -= self.deleted;
docids |= self.added;
docids
}
}
let mut cursor = self.new_data.into_cursor()?; let mut new_faceted_docids = HashMap::<FieldId, DeltaDocids>::default();
let mut cursor = self.delta_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
} }
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key) let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?; .ok_or(heed::Error::Encoding)?;
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; let value = KvReader::new(value);
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
*new_faceted_docids.entry(key.field_id).or_default() |= docids; let entry = new_faceted_docids.entry(key.field_id).or_default();
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
let docids_to_add = value
.get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
if let Some(docids_to_delete) = docids_to_delete {
let docids_to_delete = docids_to_delete?;
self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
entry.delete(&docids_to_delete);
}
if let Some(docids_to_add) = docids_to_add {
let docids_to_add = docids_to_add?;
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
entry.add(&docids_to_add);
}
} }
// FIXME: broken for multi-value facets?
//
// Consider an incremental update: `facet="tags", facet_value="Action", {Del: Some([0, 1]), Add: None }`
// The current code will inconditionally remove docs 0 and 1 from faceted docs for "tags".
// Now for doc 0: `"tags": "Action"`, it's correct behavior
// for doc 1: `"tags": "Action, Adventure"`, it's incorrect behavior
for (field_id, new_docids) in new_faceted_docids { for (field_id, new_docids) in new_faceted_docids {
let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?; let old_docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
docids |= new_docids; self.index.put_faceted_documents_ids(
self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?; wtxn,
field_id,
self.facet_type,
&new_docids.applied(old_docids),
)?;
} }
Ok(()) Ok(())
} }