Don't sort in parallel in sorters of the new indexer

This commit is contained in:
Louis Dureuil 2024-10-17 09:30:18 +02:00
parent 0647f75e6b
commit 0749633618
No known key found for this signature in database
15 changed files with 32 additions and 1 deletions

View File

@ -40,6 +40,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
true,
); );
// initialize buffers. // initialize buffers.

View File

@ -32,6 +32,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
true,
); );
let mut buffer = Vec::new(); let mut buffer = Vec::new();

View File

@ -61,6 +61,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
let mut normalized_facet_string_docids_sorter = create_sorter( let mut normalized_facet_string_docids_sorter = create_sorter(
@ -70,6 +71,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@ -149,6 +151,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
let mut normalized_facet_string_docids_sorter = create_sorter( let mut normalized_facet_string_docids_sorter = create_sorter(
@ -158,6 +161,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
let mut buffer = Vec::new(); let mut buffer = Vec::new();

View File

@ -53,6 +53,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
let mut fid_docid_facet_strings_sorter = create_sorter( let mut fid_docid_facet_strings_sorter = create_sorter(
@ -62,6 +63,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 2), max_memory.map(|m| m / 2),
true,
); );
// The tuples represents the Del and Add side for a bitmap // The tuples represents the Del and Add side for a bitmap

View File

@ -35,6 +35,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
true,
); );
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();

View File

@ -44,6 +44,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 3), max_memory.map(|m| m / 3),
true,
); );
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();
let mut del_words = BTreeSet::new(); let mut del_words = BTreeSet::new();
@ -98,6 +99,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 3), max_memory.map(|m| m / 3),
true,
); );
let mut exact_word_docids_sorter = create_sorter( let mut exact_word_docids_sorter = create_sorter(
@ -107,6 +109,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / 3), max_memory.map(|m| m / 3),
true,
); );
let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?; let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;

View File

@ -49,6 +49,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory.map(|m| m / MAX_DISTANCE as usize), max_memory.map(|m| m / MAX_DISTANCE as usize),
true,
) )
}) })
.collect(); .collect();

View File

@ -33,6 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
true,
); );
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new(); let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();

View File

@ -37,6 +37,7 @@ pub fn create_sorter<MF: MergeFunction>(
chunk_compression_level: Option<u32>, chunk_compression_level: Option<u32>,
max_nb_chunks: Option<usize>, max_nb_chunks: Option<usize>,
max_memory: Option<usize>, max_memory: Option<usize>,
sort_in_parallel: bool,
) -> grenad::Sorter<MF> { ) -> grenad::Sorter<MF> {
let mut builder = grenad::Sorter::builder(merge); let mut builder = grenad::Sorter::builder(merge);
builder.chunk_compression_type(chunk_compression_type); builder.chunk_compression_type(chunk_compression_type);
@ -51,7 +52,7 @@ pub fn create_sorter<MF: MergeFunction>(
builder.allow_realloc(false); builder.allow_realloc(false);
} }
builder.sort_algorithm(sort_algorithm); builder.sort_algorithm(sort_algorithm);
builder.sort_in_parallel(true); builder.sort_in_parallel(sort_in_parallel);
builder.build() builder.build()
} }

View File

@ -127,6 +127,7 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.chunk_compression_level, indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks, indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2), indexer_settings.max_memory.map(|mem| mem / 2),
true,
); );
// We initialize the sorter with the user indexing settings. // We initialize the sorter with the user indexing settings.
@ -137,6 +138,7 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.chunk_compression_level, indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks, indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2), indexer_settings.max_memory.map(|mem| mem / 2),
true,
); );
let documents_ids = index.documents_ids(wtxn)?; let documents_ids = index.documents_ids(wtxn)?;
@ -988,6 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level, self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks, self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2), self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
)) ))
} else { } else {
None None
@ -1030,6 +1033,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level, self.indexer_settings.chunk_compression_level,
self.indexer_settings.max_nb_chunks, self.indexer_settings.max_nb_chunks,
self.indexer_settings.max_memory.map(|mem| mem / 2), self.indexer_settings.max_memory.map(|mem| mem / 2),
true,
)) ))
} else { } else {
None None

View File

@ -46,6 +46,10 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
self.grenad_parameters.chunk_compression_level, self.grenad_parameters.chunk_compression_level,
self.grenad_parameters.max_nb_chunks, self.grenad_parameters.max_nb_chunks,
self.max_memory, self.max_memory,
// *NOTE*: this must not be set to true:
// 1. we're already using max parallelism in the pool, so it wouldn't help
// 2. it creates correctness issues if it causes to yield a borrow-mut wielding task
false,
), ),
)))) ))))
} }

View File

@ -48,6 +48,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
false,
), ),
); );
let word_docids = CboCachedSorter::new( let word_docids = CboCachedSorter::new(
@ -59,6 +60,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
false,
), ),
); );
let exact_word_docids = CboCachedSorter::new( let exact_word_docids = CboCachedSorter::new(
@ -70,6 +72,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
false,
), ),
); );
let word_position_docids = CboCachedSorter::new( let word_position_docids = CboCachedSorter::new(
@ -81,6 +84,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
false,
), ),
); );
let fid_word_count_docids = CboCachedSorter::new( let fid_word_count_docids = CboCachedSorter::new(
@ -92,6 +96,7 @@ impl WordDocidsCachedSorters {
indexer.chunk_compression_level, indexer.chunk_compression_level,
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
false,
), ),
); );

View File

@ -50,6 +50,7 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
self.grenad_parameters.chunk_compression_level, self.grenad_parameters.chunk_compression_level,
self.grenad_parameters.max_nb_chunks, self.grenad_parameters.max_nb_chunks,
self.max_memory, self.max_memory,
false,
), ),
)))) ))))
} }

View File

@ -60,6 +60,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
self.chunk_compression_level, self.chunk_compression_level,
self.max_nb_chunks, self.max_nb_chunks,
self.max_memory, self.max_memory,
true,
); );
if !common_prefix_fst_words.is_empty() { if !common_prefix_fst_words.is_empty() {

View File

@ -65,6 +65,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
self.chunk_compression_level, self.chunk_compression_level,
self.max_nb_chunks, self.max_nb_chunks,
self.max_memory, self.max_memory,
true,
); );
if !common_prefix_fst_words.is_empty() { if !common_prefix_fst_words.is_empty() {