From 59f88c14b3087eb78324d36e679bb4f64799f277 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 23 Oct 2023 15:19:33 +0200 Subject: [PATCH] Simplify facet update after removing `Index::faceted_documents_ids` --- milli/src/index.rs | 2 -- milli/src/update/clear_documents.rs | 2 -- milli/src/update/facet/bulk.rs | 31 +++++-------------- milli/src/update/facet/incremental.rs | 15 +++------ milli/src/update/facet/mod.rs | 1 - .../src/update/index_documents/typed_chunk.rs | 5 +-- 6 files changed, 13 insertions(+), 43 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index f8be55545..eb9e153ec 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fs::File; -use std::mem::size_of; use std::path::Path; use charabia::{Language, Script}; @@ -14,7 +13,6 @@ use time::OffsetDateTime; use crate::distance::NDotProductPoint; use crate::error::{InternalError, UserError}; -use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 52f3e80db..3eb7e0910 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -1,7 +1,6 @@ use roaring::RoaringBitmap; use time::OffsetDateTime; -use crate::facet::FacetType; use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result}; pub struct ClearDocuments<'t, 'u, 'i> { @@ -51,7 +50,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { // We retrieve the number of documents ids that we are deleting. let number_of_documents = self.index.number_of_documents(self.wtxn)?; - let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; // We clean some of the main engine datastructures. self.index.put_words_fst(self.wtxn, &fst::Set::default())?; diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 5247298a4..d2205f9d6 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -1,8 +1,7 @@ -use std::borrow::Cow; use std::fs::File; use std::io::BufReader; -use grenad::{CompressionType, Reader}; +use grenad::CompressionType; use heed::types::ByteSlice; use heed::{BytesEncode, Error, RoTxn, RwTxn}; use obkv::KvReader; @@ -82,10 +81,7 @@ impl<'i> FacetsUpdateBulk<'i> { let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size }; - inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| { - // TODO: remove the lambda altogether - Ok(()) - })?; + inner.update(wtxn, &field_ids)?; Ok(()) } @@ -99,21 +95,14 @@ pub(crate) struct FacetsUpdateBulkInner { pub min_level_size: u8, } impl FacetsUpdateBulkInner { - pub fn update( - mut self, - wtxn: &mut RwTxn, - field_ids: &[u16], - mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>, - ) -> Result<()> { + pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> { self.update_level0(wtxn)?; for &field_id in field_ids.iter() { self.clear_levels(wtxn, field_id)?; } for &field_id in field_ids.iter() { - let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?; - - handle_all_docids(wtxn, field_id, all_docids)?; + let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?; for level_reader in level_readers { let mut cursor = level_reader.into_cursor()?; @@ -201,16 +190,10 @@ impl FacetsUpdateBulkInner { &self, field_id: FieldId, txn: &RoTxn, - ) -> Result<(Vec>>, RoaringBitmap)> { - let mut all_docids = RoaringBitmap::new(); - let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| { - for bitmap in bitmaps { - all_docids |= bitmap; - } - Ok(()) - })?; + ) -> Result>>> { + let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?; - Ok((subwriters, all_docids)) + Ok(subwriters) } #[allow(clippy::type_complexity)] fn read_level_0<'t>( diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index 77e9874f6..e241c499c 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fs::File; use std::io::BufReader; @@ -15,7 +14,7 @@ use crate::heed_codec::ByteSliceRefCodec; use crate::search::facet::get_highest_level; use crate::update::del_add::DelAdd; use crate::update::index_documents::valid_lmdb_key; -use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; +use crate::{CboRoaringBitmapCodec, Index, Result}; enum InsertionResult { InPlace, @@ -30,16 +29,14 @@ enum DeletionResult { /// Algorithm to incrementally insert and delete elememts into the /// `facet_id_(string/f64)_docids` databases. -pub struct FacetsUpdateIncremental<'i> { - index: &'i Index, +pub struct FacetsUpdateIncremental { inner: FacetsUpdateIncrementalInner, - facet_type: FacetType, delta_data: grenad::Reader>, } -impl<'i> FacetsUpdateIncremental<'i> { +impl FacetsUpdateIncremental { pub fn new( - index: &'i Index, + index: &Index, facet_type: FacetType, delta_data: grenad::Reader>, group_size: u8, @@ -47,7 +44,6 @@ impl<'i> FacetsUpdateIncremental<'i> { max_group_size: u8, ) -> Self { FacetsUpdateIncremental { - index, inner: FacetsUpdateIncrementalInner { db: match facet_type { FacetType::String => index @@ -61,12 +57,11 @@ impl<'i> FacetsUpdateIncremental<'i> { max_group_size, min_level_size, }, - facet_type, delta_data, } } - pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { + pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { let mut cursor = self.delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { if !valid_lmdb_key(key) { diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index e3c632983..3465e5437 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -115,7 +115,6 @@ pub struct FacetsUpdate<'i> { min_level_size: u8, } impl<'i> FacetsUpdate<'i> { - // TODO grenad::Reader> pub fn new( index: &'i Index, facet_type: FacetType, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index faeee944f..0d618ad28 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -1,4 +1,3 @@ -use std::borrow::Cow; use std::collections::HashMap; use std::convert::TryInto; use std::fs::File; @@ -11,9 +10,7 @@ use heed::types::ByteSlice; use heed::RwTxn; use roaring::RoaringBitmap; -use super::helpers::{ - self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, -}; +use super::helpers::{self, merge_ignore_values, valid_lmdb_key, CursorClonableMmap}; use super::{ClonableMmap, MergeFn}; use crate::distance::NDotProductPoint; use crate::error::UserError;