From 4fdbfd6048531c0cc2666062f8fdf7325480d5a8 Mon Sep 17 00:00:00 2001 From: tamo Date: Thu, 1 Apr 2021 18:54:14 +0200 Subject: [PATCH] push a first version of the benchmark for the typo --- milli/Cargo.toml | 2 +- milli/benches/README.md | 8 ++++++ milli/benches/{search.rs => typo.rs} | 33 ++++++++++++---------- milli/benches/utils.rs | 41 ++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 milli/benches/README.md rename milli/benches/{search.rs => typo.rs} (52%) create mode 100644 milli/benches/utils.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 3b25bb268..175c15679 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -61,5 +61,5 @@ rand = "0.8.3" default = [] [[bench]] -name = "search" +name = "typo" harness = false diff --git a/milli/benches/README.md b/milli/benches/README.md new file mode 100644 index 000000000..c02af0084 --- /dev/null +++ b/milli/benches/README.md @@ -0,0 +1,8 @@ +Benchmarks +========== + +For our benchmark we are using a small subset of the dataset songs.csv. It was generated with this command: +``` +xsv sample --seed 42 song.csv -o smol_songs.csv +``` +The original songs.csv datasets is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz) diff --git a/milli/benches/search.rs b/milli/benches/typo.rs similarity index 52% rename from milli/benches/search.rs rename to milli/benches/typo.rs index a201e241c..9fbce8038 100644 --- a/milli/benches/search.rs +++ b/milli/benches/typo.rs @@ -1,22 +1,27 @@ -use std::time::Duration; +mod utils; -use heed::EnvOpenOptions; -use milli::Index; +use std::time::Duration; use criterion::{criterion_group, criterion_main, BenchmarkId}; -fn bench_search(c: &mut criterion::Criterion) { - let database = "books-4cpu.mmdb"; +fn bench_typo(c: &mut criterion::Criterion) { + let index = utils::base_setup(Some(vec!["typo".to_string()])); + let queries = [ - "minogue kylie", - "minogue kylie live", + "mongus ", + "thelonius monk ", + "Disnaylande ", + "the white striper ", + "indochie ", + "indochien ", + "klub des loopers ", + "fear of the duck ", + "michel depech ", + "stromal ", + "dire straights ", + "Arethla Franklin ", ]; - let mut options = EnvOpenOptions::new(); - options.map_size(100 * 1024 * 1024 * 1024); // 100 GB - options.max_readers(10); - let index = Index::new(options, database).unwrap(); - - let mut group = c.benchmark_group("search"); + let mut group = c.benchmark_group("typo"); group.sample_size(10); group.measurement_time(Duration::from_secs(12)); @@ -32,5 +37,5 @@ fn bench_search(c: &mut criterion::Criterion) { group.finish(); } -criterion_group!(benches, bench_search); +criterion_group!(benches, bench_typo); criterion_main!(benches); diff --git a/milli/benches/utils.rs b/milli/benches/utils.rs new file mode 100644 index 000000000..23c47ea76 --- /dev/null +++ b/milli/benches/utils.rs @@ -0,0 +1,41 @@ +use std::{fs::{File, create_dir_all}}; + +use heed::EnvOpenOptions; +use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; + +pub fn base_setup(criteria: Option>) -> Index { + let database = "songs.mmdb"; + create_dir_all(&database).unwrap(); + + let mut options = EnvOpenOptions::new(); + options.map_size(100 * 1024 * 1024 * 1024); // 100 GB + options.max_readers(10); + let index = Index::new(options, database).unwrap(); + + let update_builder = UpdateBuilder::new(0); + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update_builder.settings(&mut wtxn, &index); + + if let Some(criteria) = criteria { + builder.reset_faceted_fields(); + builder.reset_criteria(); + builder.reset_stop_words(); + + builder.set_criteria(criteria); + } + + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let update_builder = UpdateBuilder::new(0); + let mut wtxn = index.write_txn().unwrap(); + let mut builder = update_builder.index_documents(&mut wtxn, &index); + builder.update_format(UpdateFormat::Csv); + builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); + // we called from cargo the current directory is supposed to be milli/milli + let reader = File::open("benches/smol_songs.csv").unwrap(); + builder.execute(reader, |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + index +}