Expose a new indexer parameter to enable the creation of a document dictionary

This commit is contained in:
Kerollmops 2025-01-20 16:26:50 +01:00
parent bbbc4410ac
commit 83616bc03e
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 4 additions and 0 deletions

View File

@ -60,6 +60,7 @@ where
let number_of_documents = index.number_of_documents(wtxn)? as usize;
match index.document_compression_raw_dictionary(wtxn)? {
Some(dict) => Ok(Some(EncoderDictionary::copy(dict, COMPRESSION_LEVEL))),
None if !indexing_context.allow_creating_compression_dictionary => Ok(None),
None if number_of_documents >= COMPRESS_LIMIT => Ok(None),
None if number_of_documents + document_changes.len() < SAMPLE_SIZE => Ok(None),
None => {

View File

@ -146,6 +146,7 @@ pub struct IndexingContext<
{
pub index: &'index Index,
pub db_fields_ids_map: &'indexer FieldsIdsMap,
pub allow_creating_compression_dictionary: bool,
pub db_document_decompression_dictionary: Option<&'indexer DecoderDictionary<'static>>,
pub new_fields_ids_map: &'fid RwLock<FieldIdMapWithMetadata>,
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
@ -210,6 +211,7 @@ pub fn extract<
IndexingContext {
index,
db_fields_ids_map,
allow_creating_compression_dictionary,
db_document_decompression_dictionary,
new_fields_ids_map,
doc_allocs,

View File

@ -108,6 +108,7 @@ where
let indexing_context = IndexingContext {
index,
db_fields_ids_map,
allow_creating_compression_dictionary,
db_document_decompression_dictionary: db_document_decompression_dictionary.as_ref(),
new_fields_ids_map: &new_fields_ids_map,
doc_allocs: &doc_allocs,