Fix the dump creation process

This commit is contained in:
Kerollmops 2025-01-20 15:37:08 +01:00
parent 46dfa9f7c1
commit bbbc4410ac
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
2 changed files with 8 additions and 5 deletions

View File

@ -128,6 +128,7 @@ impl IndexScheduler {
let embedding_configs = index let embedding_configs = index
.embedding_configs(&rtxn) .embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let decompression_dictionary = index.document_decompression_dictionary(&rtxn)?;
let nb_documents = index let nb_documents = index
.number_of_documents(&rtxn) .number_of_documents(&rtxn)
@ -135,8 +136,9 @@ impl IndexScheduler {
as u32; as u32;
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents); let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
progress.update_progress(update_document_progress); progress.update_progress(update_document_progress);
let doc_alloc = bumpalo::Bump::new();
let documents = index let documents = index
.all_documents(&rtxn) .all_compressed_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 3.1. Dump the documents // 3.1. Dump the documents
for ret in documents { for ret in documents {
@ -145,6 +147,10 @@ impl IndexScheduler {
} }
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let doc = match decompression_dictionary.as_ref() {
Some(dict) => doc.decompress_into_bump(&doc_alloc, dict)?,
None => doc.as_non_compressed(),
};
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc) let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;

View File

@ -19,10 +19,7 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
let mut doc_alloc = bumpalo::Bump::new(); let mut doc_alloc = bumpalo::Bump::new();
let db_document_decompression_dictionary = let db_document_decompression_dictionary =
match index.document_compression_raw_dictionary(wtxn)? { index.document_compression_raw_dictionary(wtxn)?.map(|raw| DecoderDictionary::copy(raw));
Some(raw) => Some(DecoderDictionary::copy(raw)),
None => None,
};
for docid in docids { for docid in docids {
update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed); update_document_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);