From e8a156d68287db90841109328f8dd3ba70f10433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 31 Aug 2022 13:03:36 +0200 Subject: [PATCH] Reorganise facets database indexing code --- http-ui/src/main.rs | 1 + milli/src/search/facet/mod.rs | 3 -- milli/src/search/mod.rs | 2 +- milli/src/update/delete_documents.rs | 4 +- milli/src/update/{facets.rs => facet/bulk.rs} | 37 +++++++++---------- .../facet/incremental.rs} | 11 +++--- milli/src/update/facet/mod.rs | 2 + milli/src/update/index_documents/mod.rs | 6 +-- milli/src/update/mod.rs | 4 +- 9 files changed, 33 insertions(+), 37 deletions(-) create mode 100644 http-ui/src/main.rs rename milli/src/update/{facets.rs => facet/bulk.rs} (97%) rename milli/src/{search/facet/incremental_update.rs => update/facet/incremental.rs} (98%) create mode 100644 milli/src/update/facet/mod.rs diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/http-ui/src/main.rs @@ -0,0 +1 @@ + diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 8405c0141..12074cc12 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -4,8 +4,6 @@ use heed::{BytesDecode, RoTxn}; use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; -// pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; -// pub use self::facet_string::FacetStringIter; pub use self::filter::Filter; mod facet_distribution; @@ -14,7 +12,6 @@ mod facet_range_search; pub mod facet_sort_ascending; pub mod facet_sort_descending; mod filter; -mod incremental_update; pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index d05e807df..e6651737c 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -32,7 +32,7 @@ static LEVDIST2: Lazy = Lazy::new(|| LevBuilder::new(2, true)); mod criteria; mod distinct; -mod facet; +pub mod facet; mod fst_utils; mod matches; mod query_tree; diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index e16d98e74..1d1745d82 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use time::OffsetDateTime; -use super::{ClearDocuments, Facets}; +use super::{ClearDocuments, FacetsUpdateBulk}; use crate::error::{InternalError, UserError}; use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::CboRoaringBitmapCodec; @@ -643,7 +643,7 @@ fn remove_docids_from_facet_id_docids<'a>( if !modified { return Ok(()); } - let builder = Facets::new(index, db); + let builder = FacetsUpdateBulk::new(index, db); builder.execute(wtxn)?; Ok(()) diff --git a/milli/src/update/facets.rs b/milli/src/update/facet/bulk.rs similarity index 97% rename from milli/src/update/facets.rs rename to milli/src/update/facet/bulk.rs index fe8c2855e..587dc95ab 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facet/bulk.rs @@ -1,23 +1,20 @@ -use std::cmp; -use std::fs::File; -use std::num::NonZeroUsize; - +use crate::error::InternalError; +use crate::heed_codec::facet::new::{ + FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +}; +use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader}; +use crate::{FieldId, Index, Result}; use grenad::CompressionType; use heed::types::ByteSlice; use heed::{BytesEncode, Error, RoTxn}; use log::debug; use roaring::RoaringBitmap; +use std::cmp; +use std::fs::File; +use std::num::NonZeroUsize; use time::OffsetDateTime; -use crate::error::InternalError; -use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, -}; -// use crate::heed_codec::CboRoaringBitmapCodec; -use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader}; -use crate::{FieldId, Index, Result}; - -pub struct Facets<'i> { +pub struct FacetsUpdateBulk<'i> { index: &'i Index, database: heed::Database, FacetGroupValueCodec>, pub(crate) chunk_compression_type: CompressionType, @@ -26,12 +23,12 @@ pub struct Facets<'i> { min_level_size: usize, } -impl<'i> Facets<'i> { +impl<'i> FacetsUpdateBulk<'i> { pub fn new( index: &'i Index, database: heed::Database, FacetGroupValueCodec>, - ) -> Facets<'i> { - Facets { + ) -> FacetsUpdateBulk<'i> { + FacetsUpdateBulk { index, database, chunk_compression_type: CompressionType::None, @@ -63,7 +60,7 @@ impl<'i> Facets<'i> { Ok(()) } - #[logging_timer::time("Facets::{}")] + #[logging_timer::time("FacetsUpdateBulk::{}")] pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; // We get the faceted fields to be able to create the facet levels. @@ -105,7 +102,7 @@ impl<'i> Facets<'i> { field_id: FieldId, txn: &RoTxn, ) -> Result<(Vec>, RoaringBitmap)> { - let algo = CreateFacetsAlgo { + let algo = FacetsUpdateBulkAlgorithm { rtxn: txn, db: &self.database, field_id, @@ -129,7 +126,7 @@ impl<'i> Facets<'i> { } } -pub struct CreateFacetsAlgo<'t> { +pub struct FacetsUpdateBulkAlgorithm<'t> { rtxn: &'t heed::RoTxn<'t>, db: &'t heed::Database, FacetGroupValueCodec>, chunk_compression_type: CompressionType, @@ -138,7 +135,7 @@ pub struct CreateFacetsAlgo<'t> { level_group_size: usize, min_level_size: usize, } -impl<'t> CreateFacetsAlgo<'t> { +impl<'t> FacetsUpdateBulkAlgorithm<'t> { fn read_level_0( &self, handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>, diff --git a/milli/src/search/facet/incremental_update.rs b/milli/src/update/facet/incremental.rs similarity index 98% rename from milli/src/search/facet/incremental_update.rs rename to milli/src/update/facet/incremental.rs index fd4e1eeb5..d2fb3755f 100644 --- a/milli/src/search/facet/incremental_update.rs +++ b/milli/src/update/facet/incremental.rs @@ -1,13 +1,12 @@ use crate::heed_codec::facet::new::{ FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, }; +use crate::search::facet::get_highest_level; use crate::Result; use heed::Error; use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn}; use roaring::RoaringBitmap; -use super::get_highest_level; - enum InsertionResult { InPlace, Insert, @@ -18,14 +17,14 @@ enum DeletionResult { Remove { prev: Option>, next: Option> }, } -struct IncrementalFacetUpdate<'i> { - db: &'i heed::Database, FacetGroupValueCodec>, +struct FacetUpdateIncremental { + db: heed::Database, FacetGroupValueCodec>, group_size: usize, min_level_size: usize, max_group_size: usize, } -impl<'i> IncrementalFacetUpdate<'i> { - fn find_insertion_key_value<'a>( +impl FacetUpdateIncremental { + fn find_insertion_key_value( &self, field_id: u16, level: u8, diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs new file mode 100644 index 000000000..ecde3a248 --- /dev/null +++ b/milli/src/update/facet/mod.rs @@ -0,0 +1,2 @@ +pub mod bulk; +pub mod incremental; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 5a9066eba..be9b1e3c5 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -37,8 +37,8 @@ use crate::error::UserError; use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; pub use crate::update::index_documents::helpers::CursorClonableMmap; use crate::update::{ - self, Facets, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, - WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst, + self, FacetsUpdateBulk, IndexerConfig, UpdateIndexingStep, WordPrefixDocids, + PrefixWordPairsProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst, }; use crate::{Index, Result, RoaringBitmapCodec}; @@ -436,7 +436,7 @@ where (&self.index.facet_id_string_docids).remap_key_type::>(), (&self.index.facet_id_f64_docids).remap_key_type::>(), ] { - let mut builder = Facets::new(self.index, facet_db); + let mut builder = FacetsUpdateBulk::new(self.index, facet_db); builder.chunk_compression_type = self.indexer_config.chunk_compression_type; builder.chunk_compression_level = self.indexer_config.chunk_compression_level; if let Some(value) = self.config.facet_level_group_size { diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index 3ddc01cef..cd96d3e88 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -1,7 +1,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::clear_documents::ClearDocuments; pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; -pub use self::facets::Facets; +pub use self::facet::bulk::FacetsUpdateBulk; pub use self::index_documents::{ DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, }; @@ -16,7 +16,7 @@ pub use self::words_prefixes_fst::WordsPrefixesFst; mod available_documents_ids; mod clear_documents; mod delete_documents; -mod facets; +mod facet; mod index_documents; mod indexer_config; mod prefix_word_pairs;