From bbbc4410acaf083a0553eaafae93835cc7236119 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 20 Jan 2025 15:37:08 +0100 Subject: [PATCH] Fix the dump creation process --- .../src/scheduler/process_dump_creation.rs | 8 +++++++- crates/milli/src/update/new/reindex.rs | 5 +---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/process_dump_creation.rs b/crates/index-scheduler/src/scheduler/process_dump_creation.rs index 3fd5c795b..92464eac7 100644 --- a/crates/index-scheduler/src/scheduler/process_dump_creation.rs +++ b/crates/index-scheduler/src/scheduler/process_dump_creation.rs @@ -128,6 +128,7 @@ impl IndexScheduler { let embedding_configs = index .embedding_configs(&rtxn) .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + let decompression_dictionary = index.document_decompression_dictionary(&rtxn)?; let nb_documents = index .number_of_documents(&rtxn) @@ -135,8 +136,9 @@ impl IndexScheduler { as u32; let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); progress.update_progress(update_document_progress); + let doc_alloc = bumpalo::Bump::new(); let documents = index - .all_documents(&rtxn) + .all_compressed_documents(&rtxn) .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; // 3.1. Dump the documents for ret in documents { @@ -145,6 +147,10 @@ impl IndexScheduler { } let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + let doc = match decompression_dictionary.as_ref() { + Some(dict) => doc.decompress_into_bump(&doc_alloc, dict)?, + None => doc.as_non_compressed(), + }; let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc) .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; diff --git a/crates/milli/src/update/new/reindex.rs b/crates/milli/src/update/new/reindex.rs index 036e0c838..8215b46c4 100644 --- a/crates/milli/src/update/new/reindex.rs +++ b/crates/milli/src/update/new/reindex.rs @@ -19,10 +19,7 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre let mut doc_alloc = bumpalo::Bump::new(); let db_document_decompression_dictionary = - match index.document_compression_raw_dictionary(wtxn)? { - Some(raw) => Some(DecoderDictionary::copy(raw)), - None => None, - }; + index.document_compression_raw_dictionary(wtxn)?.map(|raw| DecoderDictionary::copy(raw)); for docid in docids { update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);