diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 716e4dd6b..b1e6f24be 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -40,6 +40,7 @@ pub fn extract_docid_word_positions( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + true, ); // initialize buffers. diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index 8a5a93270..34bece989 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -32,6 +32,7 @@ pub fn extract_facet_number_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + true, ); let mut buffer = Vec::new(); diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index f7bdcbb56..e0d7e1386 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -61,6 +61,7 @@ fn extract_facet_string_docids_document_update( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); let mut normalized_facet_string_docids_sorter = create_sorter( @@ -70,6 +71,7 @@ fn extract_facet_string_docids_document_update( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); let mut buffer = Vec::new(); @@ -149,6 +151,7 @@ fn extract_facet_string_docids_settings( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); let mut normalized_facet_string_docids_sorter = create_sorter( @@ -158,6 +161,7 @@ fn extract_facet_string_docids_settings( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); let mut buffer = Vec::new(); diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index f7f447ca9..047669521 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -53,6 +53,7 @@ pub fn extract_fid_docid_facet_values( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); let mut fid_docid_facet_strings_sorter = create_sorter( @@ -62,6 +63,7 @@ pub fn extract_fid_docid_facet_values( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 2), + true, ); // The tuples represents the Del and Add side for a bitmap diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 784de5d94..5739a5e15 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -35,6 +35,7 @@ pub fn extract_fid_word_count_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + true, ); let mut key_buffer = Vec::new(); diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 70db9d759..829da768c 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -44,6 +44,7 @@ pub fn extract_word_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 3), + true, ); let mut key_buffer = Vec::new(); let mut del_words = BTreeSet::new(); @@ -98,6 +99,7 @@ pub fn extract_word_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 3), + true, ); let mut exact_word_docids_sorter = create_sorter( @@ -107,6 +109,7 @@ pub fn extract_word_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / 3), + true, ); let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?; diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index 705a5c96f..6194da23d 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -49,6 +49,7 @@ pub fn extract_word_pair_proximity_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory.map(|m| m / MAX_DISTANCE as usize), + true, ) }) .collect(); diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index bee510bfb..f870fbe1b 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -33,6 +33,7 @@ pub fn extract_word_position_docids( indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + true, ); let mut del_word_positions: BTreeSet<(u16, Vec)> = BTreeSet::new(); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 1f8f7eddf..220567208 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -37,6 +37,7 @@ pub fn create_sorter( chunk_compression_level: Option, max_nb_chunks: Option, max_memory: Option, + sort_in_parallel: bool, ) -> grenad::Sorter { let mut builder = grenad::Sorter::builder(merge); builder.chunk_compression_type(chunk_compression_type); @@ -51,7 +52,7 @@ pub fn create_sorter( builder.allow_realloc(false); } builder.sort_algorithm(sort_algorithm); - builder.sort_in_parallel(true); + builder.sort_in_parallel(sort_in_parallel); builder.build() } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 65007aa32..84135ff24 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -127,6 +127,7 @@ impl<'a, 'i> Transform<'a, 'i> { indexer_settings.chunk_compression_level, indexer_settings.max_nb_chunks, indexer_settings.max_memory.map(|mem| mem / 2), + true, ); // We initialize the sorter with the user indexing settings. @@ -137,6 +138,7 @@ impl<'a, 'i> Transform<'a, 'i> { indexer_settings.chunk_compression_level, indexer_settings.max_nb_chunks, indexer_settings.max_memory.map(|mem| mem / 2), + true, ); let documents_ids = index.documents_ids(wtxn)?; @@ -988,6 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> { self.indexer_settings.chunk_compression_level, self.indexer_settings.max_nb_chunks, self.indexer_settings.max_memory.map(|mem| mem / 2), + true, )) } else { None @@ -1030,6 +1033,7 @@ impl<'a, 'i> Transform<'a, 'i> { self.indexer_settings.chunk_compression_level, self.indexer_settings.max_nb_chunks, self.indexer_settings.max_memory.map(|mem| mem / 2), + true, )) } else { None diff --git a/milli/src/update/new/extract/faceted/extract_facets.rs b/milli/src/update/new/extract/faceted/extract_facets.rs index 82f80c7b5..9f3ed18d8 100644 --- a/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/milli/src/update/new/extract/faceted/extract_facets.rs @@ -46,6 +46,10 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> { self.grenad_parameters.chunk_compression_level, self.grenad_parameters.max_nb_chunks, self.max_memory, + // *NOTE*: this must not be set to true: + // 1. we're already using max parallelism in the pool, so it wouldn't help + // 2. it creates correctness issues if it causes to yield a borrow-mut wielding task + false, ), )))) } diff --git a/milli/src/update/new/extract/searchable/extract_word_docids.rs b/milli/src/update/new/extract/searchable/extract_word_docids.rs index 5d70408bb..c76ab49d0 100644 --- a/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -48,6 +48,7 @@ impl WordDocidsCachedSorters { indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + false, ), ); let word_docids = CboCachedSorter::new( @@ -59,6 +60,7 @@ impl WordDocidsCachedSorters { indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + false, ), ); let exact_word_docids = CboCachedSorter::new( @@ -70,6 +72,7 @@ impl WordDocidsCachedSorters { indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + false, ), ); let word_position_docids = CboCachedSorter::new( @@ -81,6 +84,7 @@ impl WordDocidsCachedSorters { indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + false, ), ); let fid_word_count_docids = CboCachedSorter::new( @@ -92,6 +96,7 @@ impl WordDocidsCachedSorters { indexer.chunk_compression_level, indexer.max_nb_chunks, max_memory, + false, ), ); diff --git a/milli/src/update/new/extract/searchable/mod.rs b/milli/src/update/new/extract/searchable/mod.rs index 1edeec8b4..8934ee892 100644 --- a/milli/src/update/new/extract/searchable/mod.rs +++ b/milli/src/update/new/extract/searchable/mod.rs @@ -50,6 +50,7 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> self.grenad_parameters.chunk_compression_level, self.grenad_parameters.max_nb_chunks, self.max_memory, + false, ), )))) } diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index f683146cf..d129d485e 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -60,6 +60,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { self.chunk_compression_level, self.max_nb_chunks, self.max_memory, + true, ); if !common_prefix_fst_words.is_empty() { diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 28b9b1523..ff974b797 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -65,6 +65,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { self.chunk_compression_level, self.max_nb_chunks, self.max_memory, + true, ); if !common_prefix_fst_words.is_empty() {