2022-09-05 17:31:26 +02:00
|
|
|
use self::incremental::FacetsUpdateIncremental;
|
|
|
|
use super::FacetsUpdateBulk;
|
|
|
|
use crate::facet::FacetType;
|
|
|
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
|
|
|
use crate::{Index, Result};
|
|
|
|
use std::fs::File;
|
2022-09-05 12:52:05 +02:00
|
|
|
|
2022-08-31 13:03:36 +02:00
|
|
|
pub mod bulk;
|
|
|
|
pub mod incremental;
|
2022-09-05 12:52:05 +02:00
|
|
|
|
|
|
|
pub struct FacetsUpdate<'i> {
|
|
|
|
index: &'i Index,
|
2022-09-05 13:01:36 +02:00
|
|
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
2022-09-05 17:31:26 +02:00
|
|
|
facet_type: FacetType,
|
|
|
|
new_data: grenad::Reader<File>,
|
|
|
|
// Options:
|
|
|
|
// there's no way to change these for now
|
2022-09-05 12:52:05 +02:00
|
|
|
level_group_size: u8,
|
|
|
|
max_level_group_size: u8,
|
|
|
|
min_level_size: u8,
|
|
|
|
}
|
|
|
|
impl<'i> FacetsUpdate<'i> {
|
|
|
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
|
|
|
let database = match facet_type {
|
|
|
|
FacetType::String => {
|
2022-09-05 13:01:36 +02:00
|
|
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
2022-09-05 12:52:05 +02:00
|
|
|
}
|
|
|
|
FacetType::Number => {
|
2022-09-05 13:01:36 +02:00
|
|
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
2022-09-05 12:52:05 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
Self {
|
|
|
|
index,
|
|
|
|
database,
|
|
|
|
level_group_size: 4,
|
|
|
|
max_level_group_size: 8,
|
|
|
|
min_level_size: 5,
|
|
|
|
facet_type,
|
|
|
|
new_data,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
2022-09-05 17:31:26 +02:00
|
|
|
if self.new_data.is_empty() {
|
|
|
|
return Ok(());
|
|
|
|
}
|
2022-09-05 13:49:52 +02:00
|
|
|
// here, come up with a better condition!
|
2022-09-05 17:31:26 +02:00
|
|
|
// ideally we'd choose which method to use for each field id individually
|
|
|
|
// but I dont' think it's worth the effort yet
|
|
|
|
// As a first requirement, we ask that the length of the new data is less
|
|
|
|
// than a 1/50th of the length of the database in order to use the incremental
|
|
|
|
// method.
|
|
|
|
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
|
2022-09-05 13:49:52 +02:00
|
|
|
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)
|
|
|
|
.level_group_size(self.level_group_size)
|
|
|
|
.min_level_size(self.min_level_size);
|
2022-09-05 12:52:05 +02:00
|
|
|
bulk_update.execute(wtxn)?;
|
|
|
|
} else {
|
2022-09-05 17:31:26 +02:00
|
|
|
let incremental_update =
|
|
|
|
FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data)
|
|
|
|
.group_size(self.level_group_size)
|
|
|
|
.max_group_size(self.max_level_group_size)
|
|
|
|
.min_level_size(self.min_level_size);
|
|
|
|
incremental_update.execute(wtxn)?;
|
2022-09-05 12:52:05 +02:00
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
2022-09-05 17:31:26 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
// here I want to create a benchmark
|
|
|
|
// to find out at which point it is faster to do it incrementally
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn update() {}
|
|
|
|
}
|