From ea0c6d8c401a3ee37c14a62878e4b1641e08d726 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 13 Apr 2021 10:44:27 +0200 Subject: [PATCH] add a bunch of queries and start the introduction of the filters and the new dataset --- milli/benches/criterion.rs | 48 ++++++++++++++++++++++++++++++-- milli/benches/normal_search.rs | 51 ++++++++++++++++++++++++++++++++++ milli/benches/utils.rs | 43 ++++++++++++++++++++++------ 3 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 milli/benches/normal_search.rs diff --git a/milli/benches/criterion.rs b/milli/benches/criterion.rs index 3f0b6d6b7..bdfe3d478 100644 --- a/milli/benches/criterion.rs +++ b/milli/benches/criterion.rs @@ -3,6 +3,24 @@ mod utils; use criterion::{criterion_group, criterion_main}; fn bench_criterion(c: &mut criterion::Criterion) { + let songs_base_queries = &[ + "mingus ", + "thelonious monk ", + "Disneyland ", + "the white stripes ", + "indochine ", + "klub des loosers ", + "fear of the dark ", + "michel delpech ", + "stromae ", + "dire straits ", + "aretha franklin ", + ]; + let default_criterion: Vec = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect(); + let default_criterion = default_criterion.iter().map(|s| s.as_str()); + let asc_default: Vec<&str> = std::iter::once("asc").chain(default_criterion.clone()).collect(); + let desc_default: Vec<&str> = std::iter::once("desc").chain(default_criterion.clone()).collect(); + let confs = &[ utils::Conf { group_name: "proximity", @@ -15,6 +33,7 @@ fn bench_criterion(c: &mut criterion::Criterion) { ], criterion: Some(&["proximity"]), optional_words: false, + ..utils::Conf::BASE }, utils::Conf { group_name: "typo", @@ -34,6 +53,7 @@ fn bench_criterion(c: &mut criterion::Criterion) { ], criterion: Some(&["typo"]), optional_words: false, + ..utils::Conf::BASE }, utils::Conf { group_name: "words", @@ -47,8 +67,32 @@ fn bench_criterion(c: &mut criterion::Criterion) { "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16 ], criterion: Some(&["words"]), - optional_words: true, - } + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "asc", + queries: songs_base_queries, + criterion: Some(&["asc"]), + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "desc", + queries: songs_base_queries, + criterion: Some(&["desc"]), + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "asc + default", + queries: songs_base_queries, + criterion: Some(&asc_default[..]), + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "desc + default", + queries: songs_base_queries, + criterion: Some(&desc_default[..]), + ..utils::Conf::BASE + }, ]; utils::run_benches(c, confs); diff --git a/milli/benches/normal_search.rs b/milli/benches/normal_search.rs new file mode 100644 index 000000000..39a343cf0 --- /dev/null +++ b/milli/benches/normal_search.rs @@ -0,0 +1,51 @@ +mod utils; + +use criterion::{criterion_group, criterion_main}; + +fn bench_normal(c: &mut criterion::Criterion) { + let confs = &[ + utils::Conf { + group_name: "basic placeholder", + queries: &[ + "", + ], + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "basic without quote", + queries: &[ + "david bowie", // 1200 + "michael jackson", // 600 + "marcus miller", // 60 + "Notstandskomitee", // 4 + ], + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "basic with quote", + queries: &[ + "\"david\" \"bowie\"", // 1200 + "\"michael\" \"jackson\"", // 600 + "\"marcus\" \"miller\"", // 60 + "\"Notstandskomitee\"", // 4 + ], + ..utils::Conf::BASE + }, + utils::Conf { + group_name: "prefix search", + queries: &[ + "s", // 500k+ results + "a", + "b", + "i", + "x", // only 7k results + ], + ..utils::Conf::BASE + }, + ]; + + utils::run_benches(c, confs); +} + +criterion_group!(benches, bench_normal); +criterion_main!(benches); diff --git a/milli/benches/utils.rs b/milli/benches/utils.rs index c608a3ef3..6c8360fe2 100644 --- a/milli/benches/utils.rs +++ b/milli/benches/utils.rs @@ -1,18 +1,40 @@ -use std::{fs::{File, create_dir_all}, time::Duration}; +use std::{fs::{File, create_dir_all, remove_dir_all}, time::Duration}; use heed::EnvOpenOptions; use criterion::BenchmarkId; -use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; +use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; pub struct Conf<'a> { + /// where we are going to create our database.mmdb directory + /// each benchmark will first try to delete it and then recreate it + pub database_name: &'a str, + /// the dataset to be used, it must be an uncompressed csv + pub dataset: &'a str, pub group_name: &'a str, pub queries: &'a[&'a str], pub criterion: Option<&'a [&'a str]>, + pub facet_condition: Option, pub optional_words: bool, } -pub fn base_setup(criterion: Option>) -> Index { - let database = "songs.mmdb"; +impl Conf<'_> { + pub const BASE: Self = Conf { + database_name: "benches.mmdb", + dataset: "", + group_name: "", + queries: &[], + criterion: None, + facet_condition: None, + optional_words: true, + }; +} + +pub fn base_setup(database: &str, dataset: &str, criterion: Option>) -> Index { + match remove_dir_all(&database) { + Ok(_) => (), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), + Err(e) => panic!("{}", e), + } create_dir_all(&database).unwrap(); let mut options = EnvOpenOptions::new(); @@ -41,7 +63,7 @@ pub fn base_setup(criterion: Option>) -> Index { builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); // we called from cargo the current directory is supposed to be milli/milli - let reader = File::open("benches/smol_songs.csv").unwrap(); + let reader = File::open(dataset).unwrap(); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -51,16 +73,21 @@ pub fn base_setup(criterion: Option>) -> Index { pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { for conf in confs { let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect()); - let index = base_setup(criterion); + let index = base_setup(conf.database_name, conf.dataset, criterion); - let mut group = c.benchmark_group(conf.group_name); + let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name)); group.measurement_time(Duration::from_secs(10)); for &query in conf.queries { group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| { b.iter(|| { let rtxn = index.read_txn().unwrap(); - let _documents_ids = index.search(&rtxn).query(query).optional_words(conf.optional_words).execute().unwrap(); + let mut search = index.search(&rtxn); + search.query(query).optional_words(conf.optional_words); + if let Some(facet_condition) = conf.facet_condition.clone() { + search.facet_condition(facet_condition); + } + let _ids = search.execute().unwrap(); }); }); }