From 83616bc03ed8c3cda1271fa1a7797df78f42b159 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 20 Jan 2025 16:26:50 +0100 Subject: [PATCH] Expose a new indexer parameter to enable the creation of a document dictionary --- crates/milli/src/update/new/extract/documents/compression.rs | 1 + crates/milli/src/update/new/indexer/document_changes.rs | 2 ++ crates/milli/src/update/new/indexer/mod.rs | 1 + 3 files changed, 4 insertions(+) diff --git a/crates/milli/src/update/new/extract/documents/compression.rs b/crates/milli/src/update/new/extract/documents/compression.rs index db64c01c9..af3fd0395 100644 --- a/crates/milli/src/update/new/extract/documents/compression.rs +++ b/crates/milli/src/update/new/extract/documents/compression.rs @@ -60,6 +60,7 @@ where let number_of_documents = index.number_of_documents(wtxn)? as usize; match index.document_compression_raw_dictionary(wtxn)? { Some(dict) => Ok(Some(EncoderDictionary::copy(dict, COMPRESSION_LEVEL))), + None if !indexing_context.allow_creating_compression_dictionary => Ok(None), None if number_of_documents >= COMPRESS_LIMIT => Ok(None), None if number_of_documents + document_changes.len() < SAMPLE_SIZE => Ok(None), None => { diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs index 4d426927c..27f94c224 100644 --- a/crates/milli/src/update/new/indexer/document_changes.rs +++ b/crates/milli/src/update/new/indexer/document_changes.rs @@ -146,6 +146,7 @@ pub struct IndexingContext< { pub index: &'index Index, pub db_fields_ids_map: &'indexer FieldsIdsMap, + pub allow_creating_compression_dictionary: bool, pub db_document_decompression_dictionary: Option<&'indexer DecoderDictionary<'static>>, pub new_fields_ids_map: &'fid RwLock, pub doc_allocs: &'indexer ThreadLocal>>, @@ -210,6 +211,7 @@ pub fn extract< IndexingContext { index, db_fields_ids_map, + allow_creating_compression_dictionary, db_document_decompression_dictionary, new_fields_ids_map, doc_allocs, diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 8af54dd9b..61b91a8c8 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -108,6 +108,7 @@ where let indexing_context = IndexingContext { index, db_fields_ids_map, + allow_creating_compression_dictionary, db_document_decompression_dictionary: db_document_decompression_dictionary.as_ref(), new_fields_ids_map: &new_fields_ids_map, doc_allocs: &doc_allocs,