Remove log in chunk generator

This commit is contained in:
many 2021-09-02 16:57:46 +02:00
parent 7f7fafb857
commit 741a4444a9
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA
3 changed files with 2 additions and 19 deletions

View File

@ -129,10 +129,8 @@ impl GrenadParameters {
pub fn grenad_obkv_into_chunks<R: io::Read>( pub fn grenad_obkv_into_chunks<R: io::Read>(
mut reader: grenad::Reader<R>, mut reader: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
log_frequency: Option<usize>,
documents_chunk_size: usize, documents_chunk_size: usize,
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> { ) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
let mut document_count = 0;
let mut continue_reading = true; let mut continue_reading = true;
let indexer_clone = indexer.clone(); let indexer_clone = indexer.clone();
@ -154,11 +152,6 @@ pub fn grenad_obkv_into_chunks<R: io::Read>(
obkv_documents.insert(document_id, obkv)?; obkv_documents.insert(document_id, obkv)?;
current_chunk_size += document_id.len() as u64 + obkv.len() as u64; current_chunk_size += document_id.len() as u64 + obkv.len() as u64;
document_count += 1;
if log_frequency.map_or(false, |log_frequency| document_count % log_frequency == 0) {
debug!("reached {} chunked documents", document_count);
}
if current_chunk_size >= documents_chunk_size as u64 { if current_chunk_size >= documents_chunk_size as u64 {
return writer_into_reader(obkv_documents).map(Some); return writer_into_reader(obkv_documents).map(Some);
} }
@ -168,16 +161,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read>(
writer_into_reader(obkv_documents).map(Some) writer_into_reader(obkv_documents).map(Some)
}; };
Ok(std::iter::from_fn(move || { Ok(std::iter::from_fn(move || transposer().transpose()))
let result = transposer().transpose();
if result.as_ref().map_or(false, |r| r.is_ok()) {
debug!(
"A new chunk of approximately {:.2} MiB has been generated",
documents_chunk_size as f64 / 1024.0 / 1024.0,
);
}
result
}))
} }
pub fn write_into_lmdb_database( pub fn write_into_lmdb_database(

View File

@ -250,7 +250,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
let chunk_iter = grenad_obkv_into_chunks( let chunk_iter = grenad_obkv_into_chunks(
documents_file, documents_file,
params.clone(), params.clone(),
self.log_every_n,
self.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB self.documents_chunk_size.unwrap_or(1024 * 1024 * 128), // 128MiB
); );

View File

@ -5,7 +5,7 @@ use big_s::S;
use either::{Either, Left, Right}; use either::{Either, Left, Right};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{hashmap, hashset}; use maplit::{hashmap, hashset};
use milli::update::{IndexDocuments, Settings, UpdateBuilder, UpdateFormat}; use milli::update::{Settings, UpdateBuilder, UpdateFormat};
use milli::{AscDesc, Criterion, DocumentId, Index}; use milli::{AscDesc, Criterion, DocumentId, Index};
use serde::Deserialize; use serde::Deserialize;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;