mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-31 15:31:53 +08:00
Merge #5124
5124: Optimize Prefixes and Merges r=ManyTheFish a=Kerollmops In this PR, we plan to optimize the read of LMDB to use read the entries in lexicographic order and better use the memory-mapping OS cache: - Optimize the prefix generation for word position docids (`@manythefish)` - Optimize the parallel merging of the caches to sort entries before merging the caches (`@kerollmops)` ## Benchmarks on 1cpu 2gb gpo3 (5k IOps) Before on the tag meilisearch-v1.12.0-rc.3. ``` word_position_docids:merge_and_send_docids: 988s compute_word_fst: 23.3s word_pair_proximity_docids:merge_and_send_docids: 428s compute_word_prefix_fid_docids:recompute_modified_prefixes: 76.3s compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 429s ``` After sorting the whole `HashMap`s in a `Vec` on this branch. ``` word_position_docids:merge_and_send_docids: 202s compute_word_fst: 20.4s word_pair_proximity_docids:merge_and_send_docids: 427s compute_word_prefix_fid_docids:recompute_modified_prefixes: 65.5s compute_word_prefix_position_docids:recompute_modified_prefixes:from_prefixes: 62.5s ``` Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
cac355bfa7
@ -466,12 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
|
|||||||
Ok(bucket_caches)
|
Ok(bucket_caches)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Merges the caches that must be all associated to the same bucket.
|
/// Merges the caches that must be all associated to the same bucket
|
||||||
|
/// but make sure to sort the different buckets before performing the merges.
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
/// - If the bucket IDs in these frozen caches are not exactly the same.
|
||||||
pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
|
||||||
{
|
{
|
||||||
@ -543,12 +544,12 @@ where
|
|||||||
|
|
||||||
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
// Then manage the content on the HashMap entries that weren't taken (mem::take).
|
||||||
while let Some(mut map) = maps.pop() {
|
while let Some(mut map) = maps.pop() {
|
||||||
for (key, bbbul) in map.iter_mut() {
|
// Make sure we don't try to work with entries already managed by the spilled
|
||||||
// Make sure we don't try to work with entries already managed by the spilled
|
let mut ordered_entries: Vec<_> =
|
||||||
if bbbul.is_empty() {
|
map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
|
||||||
continue;
|
ordered_entries.sort_unstable_by_key(|(key, _)| *key);
|
||||||
}
|
|
||||||
|
|
||||||
|
for (key, bbbul) in ordered_entries {
|
||||||
let mut output = DelAddRoaringBitmap::empty();
|
let mut output = DelAddRoaringBitmap::empty();
|
||||||
output.union_and_clear_bbbul(bbbul);
|
output.union_and_clear_bbbul(bbbul);
|
||||||
|
|
||||||
|
@ -6,7 +6,9 @@ mod searchable;
|
|||||||
mod vectors;
|
mod vectors;
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
|
pub use cache::{
|
||||||
|
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||||
|
};
|
||||||
pub use documents::*;
|
pub use documents::*;
|
||||||
pub use faceted::*;
|
pub use faceted::*;
|
||||||
pub use geo::*;
|
pub use geo::*;
|
||||||
|
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::channel::*;
|
use super::channel::*;
|
||||||
use super::extract::{
|
use super::extract::{
|
||||||
merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
|
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||||
GeoExtractorData,
|
FacetKind, GeoExtractorData,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ where
|
|||||||
if must_stop_processing() {
|
if must_stop_processing() {
|
||||||
return Err(InternalError::AbortedIndexation.into());
|
return Err(InternalError::AbortedIndexation.into());
|
||||||
}
|
}
|
||||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get(&rtxn, key)?;
|
let current = database.get(&rtxn, key)?;
|
||||||
match merge_cbo_bitmaps(current, del, add)? {
|
match merge_cbo_bitmaps(current, del, add)? {
|
||||||
Operation::Write(bitmap) => {
|
Operation::Write(bitmap) => {
|
||||||
@ -107,7 +107,7 @@ pub fn merge_and_send_facet_docids<'extractor>(
|
|||||||
.map(|frozen| {
|
.map(|frozen| {
|
||||||
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
|
||||||
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
|
||||||
match merge_cbo_bitmaps(current, del, add)? {
|
match merge_cbo_bitmaps(current, del, add)? {
|
||||||
Operation::Write(bitmap) => {
|
Operation::Write(bitmap) => {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::BTreeSet;
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
|
|
||||||
use fst::{Set, SetBuilder, Streamer};
|
use fst::{Set, SetBuilder, Streamer};
|
||||||
@ -75,8 +75,8 @@ pub struct PrefixData {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PrefixDelta {
|
pub struct PrefixDelta {
|
||||||
pub modified: HashSet<Prefix>,
|
pub modified: BTreeSet<Prefix>,
|
||||||
pub deleted: HashSet<Prefix>,
|
pub deleted: BTreeSet<Prefix>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct PrefixFstBuilder {
|
struct PrefixFstBuilder {
|
||||||
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
|
|||||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||||
current_prefix: Vec<Prefix>,
|
current_prefix: Vec<Prefix>,
|
||||||
current_prefix_count: Vec<usize>,
|
current_prefix_count: Vec<usize>,
|
||||||
modified_prefixes: HashSet<Prefix>,
|
modified_prefixes: BTreeSet<Prefix>,
|
||||||
current_prefix_is_modified: Vec<bool>,
|
current_prefix_is_modified: Vec<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ impl PrefixFstBuilder {
|
|||||||
prefix_fst_builders,
|
prefix_fst_builders,
|
||||||
current_prefix: vec![Prefix::new(); max_prefix_length],
|
current_prefix: vec![Prefix::new(); max_prefix_length],
|
||||||
current_prefix_count: vec![0; max_prefix_length],
|
current_prefix_count: vec![0; max_prefix_length],
|
||||||
modified_prefixes: HashSet::new(),
|
modified_prefixes: BTreeSet::new(),
|
||||||
current_prefix_is_modified: vec![false; max_prefix_length],
|
current_prefix_is_modified: vec![false; max_prefix_length],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
|
|||||||
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
||||||
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
||||||
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
||||||
let mut deleted_prefixes = HashSet::new();
|
let mut deleted_prefixes = BTreeSet::new();
|
||||||
{
|
{
|
||||||
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
||||||
while let Some(prefix) = deleted_prefixes_stream.next() {
|
while let Some(prefix) = deleted_prefixes_stream.next() {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::HashSet;
|
use std::collections::BTreeSet;
|
||||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
@ -37,8 +37,8 @@ impl WordPrefixDocids {
|
|||||||
fn execute(
|
fn execute(
|
||||||
self,
|
self,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||||
@ -48,7 +48,7 @@ impl WordPrefixDocids {
|
|||||||
fn recompute_modified_prefixes(
|
fn recompute_modified_prefixes(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||||
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
|||||||
pub fn from_prefixes(
|
pub fn from_prefixes(
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
rtxn: &'rtxn RoTxn,
|
rtxn: &'rtxn RoTxn,
|
||||||
prefixes: &'a HashSet<Prefix>,
|
prefixes: &'a BTreeSet<Prefix>,
|
||||||
) -> heed::Result<Self> {
|
) -> heed::Result<Self> {
|
||||||
let database = database.remap_data_type::<Bytes>();
|
let database = database.remap_data_type::<Bytes>();
|
||||||
|
|
||||||
@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
|
|||||||
fn execute(
|
fn execute(
|
||||||
self,
|
self,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||||
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
|
|||||||
fn recompute_modified_prefixes(
|
fn recompute_modified_prefixes(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||||
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
|||||||
pub fn from_prefixes(
|
pub fn from_prefixes(
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
rtxn: &'rtxn RoTxn,
|
rtxn: &'rtxn RoTxn,
|
||||||
prefixes: &'a HashSet<Prefix>,
|
prefixes: &'a BTreeSet<Prefix>,
|
||||||
) -> heed::Result<Self> {
|
) -> heed::Result<Self> {
|
||||||
let database = database.remap_data_type::<Bytes>();
|
let database = database.remap_data_type::<Bytes>();
|
||||||
|
|
||||||
@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
|
|||||||
fn delete_prefixes(
|
fn delete_prefixes(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We remove all the entries that are no more required in this word prefix docids database.
|
// We remove all the entries that are no more required in this word prefix docids database.
|
||||||
for prefix in prefixes {
|
for prefix in prefixes {
|
||||||
@ -309,8 +309,8 @@ fn delete_prefixes(
|
|||||||
pub fn compute_word_prefix_docids(
|
pub fn compute_word_prefix_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
|
|||||||
pub fn compute_exact_word_prefix_docids(
|
pub fn compute_exact_word_prefix_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
|
|||||||
pub fn compute_word_prefix_fid_docids(
|
pub fn compute_word_prefix_fid_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixIntegerDocids::new(
|
WordPrefixIntegerDocids::new(
|
||||||
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
|
|||||||
pub fn compute_word_prefix_position_docids(
|
pub fn compute_word_prefix_position_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixIntegerDocids::new(
|
WordPrefixIntegerDocids::new(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user