454: Reintroduce appending sorted entries when possible r=Kerollmops a=Kerollmops

This PR modifies the `sorter_into_lmdb_database` function to append values into the database instead of get-put-merging them, it should improve the indexation speed for when the database is empty.

```txt
group                                             indexing_main_25123af3                 indexing_reintroduce-appending-sorted-values_c05e42a8
-----                                             ----------------------                 -----------------------------------------------------
indexing/Indexing movies with default settings    1.07      17.8±0.99s        ? ?/sec    1.00      16.6±1.04s        ? ?/sec
indexing/Indexing songs with default settings     1.00      57.0±6.01s        ? ?/sec    1.05      60.1±7.07s        ? ?/sec
indexing/Indexing songs without any facets        1.10      51.8±5.36s        ? ?/sec    1.00      47.3±3.30s        ? ?/sec
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-02-28 14:55:37 +00:00 committed by GitHub
commit 21898ffc60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -3,7 +3,7 @@ use std::fs::File;
use std::io::{self, Seek, SeekFrom}; use std::io::{self, Seek, SeekFrom};
use std::time::Instant; use std::time::Instant;
use grenad::{CompressionType, MergerIter, Reader, Sorter}; use grenad::{CompressionType, Reader, Sorter};
use heed::types::ByteSlice; use heed::types::ByteSlice;
use log::debug; use log::debug;
@ -209,36 +209,34 @@ pub fn sorter_into_lmdb_database(
debug!("Writing MTBL sorter..."); debug!("Writing MTBL sorter...");
let before = Instant::now(); let before = Instant::now();
merger_iter_into_lmdb_database(wtxn, database, sorter.into_stream_merger_iter()?, merge)?; let mut merger_iter = sorter.into_stream_merger_iter()?;
if database.is_empty(wtxn)? {
debug!("MTBL sorter writen in {:.02?}!", before.elapsed()); let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
Ok(()) while let Some((k, v)) = merger_iter.next()? {
} // safety: we don't keep references from inside the LMDB database.
unsafe { out_iter.append(k, v)? };
fn merger_iter_into_lmdb_database<R: io::Read + io::Seek>( }
wtxn: &mut heed::RwTxn, } else {
database: heed::PolyDatabase, while let Some((k, v)) = merger_iter.next()? {
mut merger_iter: MergerIter<R, MergeFn>, let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
merge: MergeFn, match iter.next().transpose()? {
) -> Result<()> { Some((key, old_val)) if key == k => {
while let Some((k, v)) = merger_iter.next()? { let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?; let val = merge(k, &vals).map_err(|_| {
match iter.next().transpose()? { // TODO just wrap this error?
Some((key, old_val)) if key == k => { InternalError::IndexingMergingKeys { process: "get-put-merge" }
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)]; })?;
let val = merge(k, &vals).map_err(|_| { // safety: we don't keep references from inside the LMDB database.
// TODO just wrap this error? unsafe { iter.put_current(k, &val)? };
InternalError::IndexingMergingKeys { process: "get-put-merge" } }
})?; _ => {
// safety: we don't keep references from inside the LMDB database. drop(iter);
unsafe { iter.put_current(k, &val)? }; database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
} }
_ => {
drop(iter);
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
} }
} }
} }
debug!("MTBL sorter writen in {:.02?}!", before.elapsed());
Ok(()) Ok(())
} }