mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Merge #635
635: Use an unstable algorithm for `grenad::Sorter` when possible r=Kerollmops a=loiclec # Pull Request ## What does this PR do? Use an unstable algorithm to sort the internal vector used by `grenad::Sorter` whenever possible to speed up indexing. In practice, every time the merge function creates a `RoaringBitmap`, we use an unstable sort. For every other merge function, such as `keep_first`, `keep_last`, etc., a stable sort is used. Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
commit
15d478cf4d
@ -17,7 +17,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
|
|||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
geoutils = "0.4.1"
|
geoutils = "0.4.1"
|
||||||
grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
|
grenad = { version = "0.4.3", default-features = false, features = ["tempfile"] }
|
||||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.3", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.3", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||||
json-depth-checker = { path = "../json-depth-checker" }
|
json-depth-checker = { path = "../json-depth-checker" }
|
||||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||||
|
@ -32,6 +32,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
let mut docid_word_positions_sorter = create_sorter(
|
let mut docid_word_positions_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
concat_u32s_array,
|
concat_u32s_array,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -21,6 +21,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut facet_number_docids_sorter = create_sorter(
|
let mut facet_number_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -23,6 +23,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut facet_string_docids_sorter = create_sorter(
|
let mut facet_string_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
keep_first_prefix_value_merge_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -28,6 +28,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
keep_first,
|
keep_first,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
@ -36,6 +37,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
let mut fid_docid_facet_strings_sorter = create_sorter(
|
let mut fid_docid_facet_strings_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
keep_first,
|
keep_first,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -25,6 +25,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_word_count_docids_sorter = create_sorter(
|
let mut fid_word_count_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -30,6 +30,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_docids_sorter = create_sorter(
|
let mut word_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_roaring_bitmaps,
|
merge_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
@ -38,6 +39,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
);
|
);
|
||||||
|
|
||||||
let mut exact_word_docids_sorter = create_sorter(
|
let mut exact_word_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_roaring_bitmaps,
|
merge_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -24,6 +24,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -21,6 +21,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_position_docids_sorter = create_sorter(
|
let mut word_position_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -27,6 +27,7 @@ pub fn create_writer<R: io::Write>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_sorter(
|
pub fn create_sorter(
|
||||||
|
sort_algorithm: grenad::SortAlgorithm,
|
||||||
merge: MergeFn,
|
merge: MergeFn,
|
||||||
chunk_compression_type: grenad::CompressionType,
|
chunk_compression_type: grenad::CompressionType,
|
||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
@ -45,6 +46,7 @@ pub fn create_sorter(
|
|||||||
builder.dump_threshold(memory);
|
builder.dump_threshold(memory);
|
||||||
builder.allow_realloc(false);
|
builder.allow_realloc(false);
|
||||||
}
|
}
|
||||||
|
builder.sort_algorithm(sort_algorithm);
|
||||||
builder.build()
|
builder.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1488,6 +1488,7 @@ mod tests {
|
|||||||
assert_eq!(count, 4);
|
assert_eq!(count, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "default")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_meilisearch_1714() {
|
fn test_meilisearch_1714() {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
@ -99,6 +99,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
let original_sorter = create_sorter(
|
let original_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
merge_function,
|
merge_function,
|
||||||
indexer_settings.chunk_compression_type,
|
indexer_settings.chunk_compression_type,
|
||||||
indexer_settings.chunk_compression_level,
|
indexer_settings.chunk_compression_level,
|
||||||
@ -108,6 +109,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
let flattened_sorter = create_sorter(
|
let flattened_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Stable,
|
||||||
merge_function,
|
merge_function,
|
||||||
indexer_settings.chunk_compression_type,
|
indexer_settings.chunk_compression_type,
|
||||||
indexer_settings.chunk_compression_level,
|
indexer_settings.chunk_compression_level,
|
||||||
|
@ -48,6 +48,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
// It is forbidden to keep a mutable reference into the database
|
// It is forbidden to keep a mutable reference into the database
|
||||||
// and write into it at the same time, therefore we write into another file.
|
// and write into it at the same time, therefore we write into another file.
|
||||||
let mut prefix_docids_sorter = create_sorter(
|
let mut prefix_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_roaring_bitmaps,
|
merge_roaring_bitmaps,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
|
@ -65,6 +65,7 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
|
|||||||
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
|
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
|
||||||
|
|
||||||
let mut prefix_position_docids_sorter = create_sorter(
|
let mut prefix_position_docids_sorter = create_sorter(
|
||||||
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
|
Loading…
Reference in New Issue
Block a user