add a bunch of queries and start the introduction of the filters and the new dataset

This commit is contained in:
tamo 2021-04-13 10:44:27 +02:00 committed by Tamo
parent 3def42abd8
commit ea0c6d8c40
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
3 changed files with 132 additions and 10 deletions

View File

@ -3,6 +3,24 @@ mod utils;
use criterion::{criterion_group, criterion_main}; use criterion::{criterion_group, criterion_main};
fn bench_criterion(c: &mut criterion::Criterion) { fn bench_criterion(c: &mut criterion::Criterion) {
let songs_base_queries = &[
"mingus ",
"thelonious monk ",
"Disneyland ",
"the white stripes ",
"indochine ",
"klub des loosers ",
"fear of the dark ",
"michel delpech ",
"stromae ",
"dire straits ",
"aretha franklin ",
];
let default_criterion: Vec<String> = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> = std::iter::once("asc").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> = std::iter::once("desc").chain(default_criterion.clone()).collect();
let confs = &[ let confs = &[
utils::Conf { utils::Conf {
group_name: "proximity", group_name: "proximity",
@ -15,6 +33,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
], ],
criterion: Some(&["proximity"]), criterion: Some(&["proximity"]),
optional_words: false, optional_words: false,
..utils::Conf::BASE
}, },
utils::Conf { utils::Conf {
group_name: "typo", group_name: "typo",
@ -34,6 +53,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
], ],
criterion: Some(&["typo"]), criterion: Some(&["typo"]),
optional_words: false, optional_words: false,
..utils::Conf::BASE
}, },
utils::Conf { utils::Conf {
group_name: "words", group_name: "words",
@ -47,8 +67,32 @@ fn bench_criterion(c: &mut criterion::Criterion) {
"whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16 "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16
], ],
criterion: Some(&["words"]), criterion: Some(&["words"]),
optional_words: true, ..utils::Conf::BASE
} },
utils::Conf {
group_name: "asc",
queries: songs_base_queries,
criterion: Some(&["asc"]),
..utils::Conf::BASE
},
utils::Conf {
group_name: "desc",
queries: songs_base_queries,
criterion: Some(&["desc"]),
..utils::Conf::BASE
},
utils::Conf {
group_name: "asc + default",
queries: songs_base_queries,
criterion: Some(&asc_default[..]),
..utils::Conf::BASE
},
utils::Conf {
group_name: "desc + default",
queries: songs_base_queries,
criterion: Some(&desc_default[..]),
..utils::Conf::BASE
},
]; ];
utils::run_benches(c, confs); utils::run_benches(c, confs);

View File

@ -0,0 +1,51 @@
mod utils;
use criterion::{criterion_group, criterion_main};
fn bench_normal(c: &mut criterion::Criterion) {
let confs = &[
utils::Conf {
group_name: "basic placeholder",
queries: &[
"",
],
..utils::Conf::BASE
},
utils::Conf {
group_name: "basic without quote",
queries: &[
"david bowie", // 1200
"michael jackson", // 600
"marcus miller", // 60
"Notstandskomitee", // 4
],
..utils::Conf::BASE
},
utils::Conf {
group_name: "basic with quote",
queries: &[
"\"david\" \"bowie\"", // 1200
"\"michael\" \"jackson\"", // 600
"\"marcus\" \"miller\"", // 60
"\"Notstandskomitee\"", // 4
],
..utils::Conf::BASE
},
utils::Conf {
group_name: "prefix search",
queries: &[
"s", // 500k+ results
"a",
"b",
"i",
"x", // only 7k results
],
..utils::Conf::BASE
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_normal);
criterion_main!(benches);

View File

@ -1,18 +1,40 @@
use std::{fs::{File, create_dir_all}, time::Duration}; use std::{fs::{File, create_dir_all, remove_dir_all}, time::Duration};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
pub struct Conf<'a> { pub struct Conf<'a> {
/// where we are going to create our database.mmdb directory
/// each benchmark will first try to delete it and then recreate it
pub database_name: &'a str,
/// the dataset to be used, it must be an uncompressed csv
pub dataset: &'a str,
pub group_name: &'a str, pub group_name: &'a str,
pub queries: &'a[&'a str], pub queries: &'a[&'a str],
pub criterion: Option<&'a [&'a str]>, pub criterion: Option<&'a [&'a str]>,
pub facet_condition: Option<FacetCondition>,
pub optional_words: bool, pub optional_words: bool,
} }
pub fn base_setup(criterion: Option<Vec<String>>) -> Index { impl Conf<'_> {
let database = "songs.mmdb"; pub const BASE: Self = Conf {
database_name: "benches.mmdb",
dataset: "",
group_name: "",
queries: &[],
criterion: None,
facet_condition: None,
optional_words: true,
};
}
pub fn base_setup(database: &str, dataset: &str, criterion: Option<Vec<String>>) -> Index {
match remove_dir_all(&database) {
Ok(_) => (),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
Err(e) => panic!("{}", e),
}
create_dir_all(&database).unwrap(); create_dir_all(&database).unwrap();
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
@ -41,7 +63,7 @@ pub fn base_setup(criterion: Option<Vec<String>>) -> Index {
builder.update_format(UpdateFormat::Csv); builder.update_format(UpdateFormat::Csv);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
// we called from cargo the current directory is supposed to be milli/milli // we called from cargo the current directory is supposed to be milli/milli
let reader = File::open("benches/smol_songs.csv").unwrap(); let reader = File::open(dataset).unwrap();
builder.execute(reader, |_, _| ()).unwrap(); builder.execute(reader, |_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -51,16 +73,21 @@ pub fn base_setup(criterion: Option<Vec<String>>) -> Index {
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
for conf in confs { for conf in confs {
let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect()); let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect());
let index = base_setup(criterion); let index = base_setup(conf.database_name, conf.dataset, criterion);
let mut group = c.benchmark_group(conf.group_name); let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name));
group.measurement_time(Duration::from_secs(10)); group.measurement_time(Duration::from_secs(10));
for &query in conf.queries { for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| { group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| { b.iter(|| {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let _documents_ids = index.search(&rtxn).query(query).optional_words(conf.optional_words).execute().unwrap(); let mut search = index.search(&rtxn);
search.query(query).optional_words(conf.optional_words);
if let Some(facet_condition) = conf.facet_condition.clone() {
search.facet_condition(facet_condition);
}
let _ids = search.execute().unwrap();
}); });
}); });
} }