Sort entries using rayon in the transform sorters

This commit is contained in:
Clément Renault 2023-11-01 10:39:16 +01:00
parent 0fc446c62f
commit c71b1d33ae
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 40 additions and 19 deletions

5
Cargo.lock generated
View File

@ -1664,11 +1664,12 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "grenad"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93"
source = "git+https://github.com/meilisearch/grenad?branch=parallel-sorter#eafb6ae795af6078e087edf77e7cd31a26238707"
dependencies = [
"bytemuck",
"byteorder",
"crossbeam-channel",
"rayon",
"tempfile",
]

View File

@ -26,7 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.4", default-features = false, features = [
grenad = { git = "https://github.com/meilisearch/grenad", branch = "parallel-sorter", default-features = false, features = [
"rayon",
"tempfile",
] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [

View File

@ -114,24 +114,43 @@ impl<'a, 'i> Transform<'a, 'i> {
};
// We initialize the sorter with the user indexing settings.
let original_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
merge_function,
indexer_settings.chunk_compression_type,
indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2),
);
let original_sorter = {
let mut builder = grenad::Sorter::builder(merge_function);
builder.chunk_compression_type(indexer_settings.chunk_compression_type);
if let Some(level) = indexer_settings.chunk_compression_level {
builder.chunk_compression_level(level);
}
if let Some(nb_chunks) = indexer_settings.max_nb_chunks {
builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) {
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(grenad::SortAlgorithm::Stable);
builder.sort_in_parallel(true);
builder.build()
};
// We initialize the sorter with the user indexing settings.
let flattened_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
merge_function,
indexer_settings.chunk_compression_type,
indexer_settings.chunk_compression_level,
indexer_settings.max_nb_chunks,
indexer_settings.max_memory.map(|mem| mem / 2),
);
let flattened_sorter = {
let mut builder = grenad::Sorter::builder(merge_function);
builder.chunk_compression_type(indexer_settings.chunk_compression_type);
if let Some(level) = indexer_settings.chunk_compression_level {
builder.chunk_compression_level(level);
}
if let Some(nb_chunks) = indexer_settings.max_nb_chunks {
builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) {
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(grenad::SortAlgorithm::Stable);
builder.sort_in_parallel(true);
builder.build()
};
let documents_ids = index.documents_ids(wtxn)?;
Ok(Transform {