meilisearch/milli/src/update/facet/mod.rs

use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::{Index, Result};
use std::fs::File;

pub mod bulk;
pub mod incremental;

pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
    facet_type: FacetType,
    new_data: grenad::Reader<File>,
    // Options:
    // there's no way to change these for now
    level_group_size: u8,
    max_level_group_size: u8,
    min_level_size: u8,
}
impl<'i> FacetsUpdate<'i> {
    pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
        let database = match facet_type {
            FacetType::String => {
                index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
            }
            FacetType::Number => {
                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
            }
        };
        Self {
            index,
            database,
            level_group_size: 4,
            max_level_group_size: 8,
            min_level_size: 5,
            facet_type,
            new_data,
        }
    }

    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
        if self.new_data.is_empty() {
            return Ok(());
        }
        // here, come up with a better condition!
        // ideally we'd choose which method to use for each field id individually
        // but I dont' think it's worth the effort yet
        // As a first requirement, we ask that the length of the new data is less
        // than a 1/50th of the length of the database in order to use the incremental
        // method.
        if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
            let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)
                .level_group_size(self.level_group_size)
                .min_level_size(self.min_level_size);
            bulk_update.execute(wtxn)?;
        } else {
            let incremental_update =
                FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data)
                    .group_size(self.level_group_size)
                    .max_group_size(self.max_level_group_size)
                    .min_level_size(self.min_level_size);
            incremental_update.execute(wtxn)?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    // here I want to create a benchmark
    // to find out at which point it is faster to do it incrementally

    #[test]
    fn update() {}
}
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00			`use self::incremental::FacetsUpdateIncremental;`
			`use super::FacetsUpdateBulk;`
			`use crate::facet::FacetType;`
			`use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};`
			`use crate::{Index, Result};`
			`use std::fs::File;`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00
Reorganise facets database indexing code 2022-08-31 13:03:36 +02:00			`pub mod bulk;`
			`pub mod incremental;`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00
			`pub struct FacetsUpdate<'i> {`
			`index: &'i Index,`
Refactor facet-related codecs 2022-09-05 13:01:36 +02:00			`database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,`
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00			`facet_type: FacetType,`
			`new_data: grenad::Reader<File>,`
			`// Options:`
			`// there's no way to change these for now`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00			`level_group_size: u8,`
			`max_level_group_size: u8,`
			`min_level_size: u8,`
			`}`
			`impl<'i> FacetsUpdate<'i> {`
			`pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {`
			`let database = match facet_type {`
			`FacetType::String => {`
Refactor facet-related codecs 2022-09-05 13:01:36 +02:00			`index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00			`}`
			`FacetType::Number => {`
Refactor facet-related codecs 2022-09-05 13:01:36 +02:00			`index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00			`}`
			`};`
			`Self {`
			`index,`
			`database,`
			`level_group_size: 4,`
			`max_level_group_size: 8,`
			`min_level_size: 5,`
			`facet_type,`
			`new_data,`
			`}`
			`}`

			`pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {`
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00			`if self.new_data.is_empty() {`
			`return Ok(());`
			`}`
Rename facet codecs and refine FacetsUpdate API 2022-09-05 13:49:52 +02:00			`// here, come up with a better condition!`
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00			`// ideally we'd choose which method to use for each field id individually`
			`// but I dont' think it's worth the effort yet`
			`// As a first requirement, we ask that the length of the new data is less`
			`// than a 1/50th of the length of the database in order to use the incremental`
			`// method.`
			`if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {`
Rename facet codecs and refine FacetsUpdate API 2022-09-05 13:49:52 +02:00			`let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)`
			`.level_group_size(self.level_group_size)`
			`.min_level_size(self.min_level_size);`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00			`bulk_update.execute(wtxn)?;`
			`} else {`
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00			`let incremental_update =`
			`FacetsUpdateIncremental::new(self.index, self.facet_type, self.new_data)`
			`.group_size(self.level_group_size)`
			`.max_group_size(self.max_level_group_size)`
			`.min_level_size(self.min_level_size);`
			`incremental_update.execute(wtxn)?;`
Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-09-05 12:52:05 +02:00			`}`
			`Ok(())`
			`}`
			`}`
Give same interface to bulk and incremental facet indexing types + cargo fmt, oops, sorry for the bad history :( 2022-09-05 17:31:26 +02:00
			`#[cfg(test)]`
			`mod tests {`
			`// here I want to create a benchmark`
			`// to find out at which point it is faster to do it incrementally`

			`#[test]`
			`fn update() {}`
			`}`