push a first version of the benchmark for the typo

This commit is contained in:
tamo 2021-04-01 18:54:14 +02:00 committed by Tamo
parent 270da98c46
commit 4fdbfd6048
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
4 changed files with 69 additions and 15 deletions

View File

@ -61,5 +61,5 @@ rand = "0.8.3"
default = [] default = []
[[bench]] [[bench]]
name = "search" name = "typo"
harness = false harness = false

8
milli/benches/README.md Normal file
View File

@ -0,0 +1,8 @@
Benchmarks
==========
For our benchmark we are using a small subset of the dataset songs.csv. It was generated with this command:
```
xsv sample --seed 42 song.csv -o smol_songs.csv
```
The original songs.csv datasets is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz)

View File

@ -1,22 +1,27 @@
use std::time::Duration; mod utils;
use heed::EnvOpenOptions; use std::time::Duration;
use milli::Index;
use criterion::{criterion_group, criterion_main, BenchmarkId}; use criterion::{criterion_group, criterion_main, BenchmarkId};
fn bench_search(c: &mut criterion::Criterion) { fn bench_typo(c: &mut criterion::Criterion) {
let database = "books-4cpu.mmdb"; let index = utils::base_setup(Some(vec!["typo".to_string()]));
let queries = [ let queries = [
"minogue kylie", "mongus ",
"minogue kylie live", "thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
]; ];
let mut options = EnvOpenOptions::new(); let mut group = c.benchmark_group("typo");
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, database).unwrap();
let mut group = c.benchmark_group("search");
group.sample_size(10); group.sample_size(10);
group.measurement_time(Duration::from_secs(12)); group.measurement_time(Duration::from_secs(12));
@ -32,5 +37,5 @@ fn bench_search(c: &mut criterion::Criterion) {
group.finish(); group.finish();
} }
criterion_group!(benches, bench_search); criterion_group!(benches, bench_typo);
criterion_main!(benches); criterion_main!(benches);

41
milli/benches/utils.rs Normal file
View File

@ -0,0 +1,41 @@
use std::{fs::{File, create_dir_all}};
use heed::EnvOpenOptions;
use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
pub fn base_setup(criteria: Option<Vec<String>>) -> Index {
let database = "songs.mmdb";
create_dir_all(&database).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, database).unwrap();
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.settings(&mut wtxn, &index);
if let Some(criteria) = criteria {
builder.reset_faceted_fields();
builder.reset_criteria();
builder.reset_stop_words();
builder.set_criteria(criteria);
}
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.index_documents(&mut wtxn, &index);
builder.update_format(UpdateFormat::Csv);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
// we called from cargo the current directory is supposed to be milli/milli
let reader = File::open("benches/smol_songs.csv").unwrap();
builder.execute(reader, |_, _| ()).unwrap();
wtxn.commit().unwrap();
index
}