diff --git a/crates/milli/src/update/new/extract/documents/mod.rs b/crates/milli/src/update/new/extract/documents/mod.rs index b4fe84952..dc1025489 100644 --- a/crates/milli/src/update/new/extract/documents/mod.rs +++ b/crates/milli/src/update/new/extract/documents/mod.rs @@ -7,8 +7,8 @@ use zstd::bulk::Compressor; use zstd::dict::EncoderDictionary; use super::DelAddRoaringBitmap; -use crate::heed_codec::CompressedObkvU16; use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::heed_codec::CompressedObkvU16; use crate::update::new::channel::DocumentsSender; use crate::update::new::document::{write_to_obkv, Document as _}; use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; @@ -126,6 +126,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> { &context.rtxn, context.index, &context.db_fields_ids_map, + context.db_document_decompression_dictionary, &context.doc_alloc, )?; let geo_iter = content diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 3ecdd53c6..5cfea80ce 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -7,6 +7,7 @@ use big_s::S; use bumpalo::Bump; pub use document_changes::{extract, DocumentChanges, IndexingContext}; use document_changes::{DocumentChangeContext, Extractor}; +use bumparaw_collections::RawMap; pub use document_deletion::DocumentDeletion; pub use document_operation::{DocumentOperation, PayloadStats}; use hashbrown::HashMap; diff --git a/crates/milli/src/update/new/reindex.rs b/crates/milli/src/update/new/reindex.rs index effbfa0cb..036e0c838 100644 --- a/crates/milli/src/update/new/reindex.rs +++ b/crates/milli/src/update/new/reindex.rs @@ -1,4 +1,5 @@ use heed::RwTxn; +use zstd::dict::DecoderDictionary; use super::document::{Document, DocumentFromDb}; use crate::progress::{self, AtomicSubStep, Progress}; @@ -17,10 +18,23 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre let docids = index.documents_ids(wtxn)?; let mut doc_alloc = bumpalo::Bump::new(); + let db_document_decompression_dictionary = + match index.document_compression_raw_dictionary(wtxn)? { + Some(raw) => Some(DecoderDictionary::copy(raw)), + None => None, + }; + for docid in docids { update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); - let Some(document) = DocumentFromDb::new(docid, wtxn, index, &field_id_map, &doc_alloc)? + let Some(document) = DocumentFromDb::new( + docid, + wtxn, + index, + &field_id_map, + db_document_decompression_dictionary.as_ref(), + &doc_alloc, + )? else { continue; }; @@ -38,5 +52,6 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre } index.put_field_distribution(wtxn, &distribution)?; + Ok(()) }