Don't sort in parallel in sorters of the new indexer

This commit is contained in:
Louis Dureuil 2024-10-17 09:30:18 +02:00
parent 0647f75e6b
commit 0749633618
No known key found for this signature in database
15 changed files with 32 additions and 1 deletions

View File

@ -40,6 +40,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
true,
);
// initialize buffers.

View File

@ -32,6 +32,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
true,
);
let mut buffer = Vec::new();

View File

@ -61,6 +61,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
let mut normalized_facet_string_docids_sorter = create_sorter(
@ -70,6 +71,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
let mut buffer = Vec::new();
@ -149,6 +151,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
let mut normalized_facet_string_docids_sorter = create_sorter(
@ -158,6 +161,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
let mut buffer = Vec::new();

View File

@ -53,6 +53,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
let mut fid_docid_facet_strings_sorter = create_sorter(
@ -62,6 +63,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 2),
true,
);
// The tuples represents the Del and Add side for a bitmap

View File

@ -35,6 +35,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
true,
);
let mut key_buffer = Vec::new();

View File

@ -44,6 +44,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 3),
true,
);
let mut key_buffer = Vec::new();
let mut del_words = BTreeSet::new();
@ -98,6 +99,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 3),
true,
);
let mut exact_word_docids_sorter = create_sorter(
@ -107,6 +109,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / 3),
true,
);
let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;

View File

@ -49,6 +49,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory.map(|m| m / MAX_DISTANCE as usize),
true,
)
})
.collect();

View File

@ -33,6 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
true,
);
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();

View File

@ -37,6 +37,7 @@ pub fn create_sorter<MF: MergeFunction>(
chunk_compression_level: Option<u32>,
max_nb_chunks: Option<usize>,
max_memory: Option<usize>,
sort_in_parallel: bool,
) -> grenad::Sorter<MF> {
let mut builder = grenad::Sorter::builder(merge);
builder.chunk_compression_type(chunk_compression_type);
@ -51,7 +52,7 @@ pub fn create_sorter<MF: MergeFunction>(
builder.allow_realloc(false);
}
builder.sort_algorithm(sort_algorithm);
builder.sort_in_parallel(true);
builder.sort_in_parallel(sort_in_parallel);
builder.build()
}

View File

@ -127,6 +127,7 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2),
true,
);
// We initialize the sorter with the user indexing settings.
@ -137,6 +138,7 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2),
true,
);
let documents_ids = index.documents_ids(wtxn)?;
@ -988,6 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
))
} else {
None
@ -1030,6 +1033,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
))
} else {
None

View File

@ -46,6 +46,10 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
self.grenad_parameters.chunk_compression_level,
self.grenad_parameters.max_nb_chunks,
self.max_memory,
// *NOTE*: this must not be set to true:
// 1. we're already using max parallelism in the pool, so it wouldn't help
// 2. it creates correctness issues if it causes to yield a borrow-mut wielding task
false,
),
))))
}

View File

@ -48,6 +48,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
false,
),
);
let word_docids = CboCachedSorter::new(
@ -59,6 +60,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
false,
),
);
let exact_word_docids = CboCachedSorter::new(
@ -70,6 +72,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
false,
),
);
let word_position_docids = CboCachedSorter::new(
@ -81,6 +84,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
false,
),
);
let fid_word_count_docids = CboCachedSorter::new(
@ -92,6 +96,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level,
indexer.max_nb_chunks,
max_memory,
false,
),
);

View File

@ -50,6 +50,7 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
self.grenad_parameters.chunk_compression_level,
self.grenad_parameters.max_nb_chunks,
self.max_memory,
false,
),
))))
}

View File

@ -60,6 +60,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
self.chunk_compression_level,
self.max_nb_chunks,
self.max_memory,
true,
);
if !common_prefix_fst_words.is_empty() {

View File

@ -65,6 +65,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
self.chunk_compression_level,
self.max_nb_chunks,
self.max_memory,
true,
);
if !common_prefix_fst_words.is_empty() {