diff --git a/milli/benches/criterion.rs b/milli/benches/criterion.rs index bdfe3d478..fb79a597d 100644 --- a/milli/benches/criterion.rs +++ b/milli/benches/criterion.rs @@ -33,7 +33,7 @@ fn bench_criterion(c: &mut criterion::Criterion) { ], criterion: Some(&["proximity"]), optional_words: false, - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "typo", @@ -53,7 +53,7 @@ fn bench_criterion(c: &mut criterion::Criterion) { ], criterion: Some(&["typo"]), optional_words: false, - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "words", @@ -67,31 +67,31 @@ fn bench_criterion(c: &mut criterion::Criterion) { "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16 ], criterion: Some(&["words"]), - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "asc", queries: songs_base_queries, criterion: Some(&["asc"]), - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "desc", queries: songs_base_queries, criterion: Some(&["desc"]), - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "asc + default", queries: songs_base_queries, criterion: Some(&asc_default[..]), - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "desc + default", queries: songs_base_queries, criterion: Some(&desc_default[..]), - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, ]; diff --git a/milli/benches/normal_search.rs b/milli/benches/normal_search.rs index 39a343cf0..bd57a8c45 100644 --- a/milli/benches/normal_search.rs +++ b/milli/benches/normal_search.rs @@ -9,7 +9,7 @@ fn bench_normal(c: &mut criterion::Criterion) { queries: &[ "", ], - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "basic without quote", @@ -19,7 +19,7 @@ fn bench_normal(c: &mut criterion::Criterion) { "marcus miller", // 60 "Notstandskomitee", // 4 ], - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "basic with quote", @@ -29,7 +29,7 @@ fn bench_normal(c: &mut criterion::Criterion) { "\"marcus\" \"miller\"", // 60 "\"Notstandskomitee\"", // 4 ], - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, utils::Conf { group_name: "prefix search", @@ -40,7 +40,7 @@ fn bench_normal(c: &mut criterion::Criterion) { "i", "x", // only 7k results ], - ..utils::Conf::BASE + ..utils::Conf::BASE_SONGS }, ]; diff --git a/milli/benches/utils.rs b/milli/benches/utils.rs index 6c8360fe2..2eb067a02 100644 --- a/milli/benches/utils.rs +++ b/milli/benches/utils.rs @@ -2,7 +2,7 @@ use std::{fs::{File, create_dir_all, remove_dir_all}, time::Duration}; use heed::EnvOpenOptions; use criterion::BenchmarkId; -use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; +use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat}}; pub struct Conf<'a> { /// where we are going to create our database.mmdb directory @@ -12,48 +12,82 @@ pub struct Conf<'a> { pub dataset: &'a str, pub group_name: &'a str, pub queries: &'a[&'a str], + /// here you can change which criterion are used and in which order. + /// - if you specify something all the base configuration will be thrown out + /// - if you don't specify anything (None) the default configuration will be kept pub criterion: Option<&'a [&'a str]>, + /// the last chance to configure your database as you want + pub configure: fn(&mut Settings), pub facet_condition: Option, + /// enable or disable the optional words on the query pub optional_words: bool, } impl Conf<'_> { + fn nop(_builder: &mut Settings) {} + + fn songs_conf(builder: &mut Settings) { + let displayed_fields = [ + "id", "title", "album", "artist", "genre", "country", "released", "duration", + ] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_displayed_fields(displayed_fields); + + let searchable_fields = ["title", "album", "artist"] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_searchable_fields(searchable_fields); + } + pub const BASE: Self = Conf { database_name: "benches.mmdb", dataset: "", group_name: "", queries: &[], criterion: None, + configure: Self::nop, facet_condition: None, optional_words: true, }; + + pub const BASE_SONGS: Self = Conf { + dataset: "smol-songs", + configure: Self::songs_conf, + ..Self::BASE + }; } -pub fn base_setup(database: &str, dataset: &str, criterion: Option>) -> Index { - match remove_dir_all(&database) { +pub fn base_setup(conf: &Conf) -> Index { + match remove_dir_all(&conf.database_name) { Ok(_) => (), Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), Err(e) => panic!("{}", e), } - create_dir_all(&database).unwrap(); + create_dir_all(&conf.database_name).unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.max_readers(10); - let index = Index::new(options, database).unwrap(); + let index = Index::new(options, conf.database_name).unwrap(); let update_builder = UpdateBuilder::new(0); let mut wtxn = index.write_txn().unwrap(); let mut builder = update_builder.settings(&mut wtxn, &index); - if let Some(criterion) = criterion { + if let Some(criterion) = conf.criterion { builder.reset_faceted_fields(); builder.reset_criteria(); builder.reset_stop_words(); + let criterion = criterion.iter().map(|s| s.to_string()).collect(); builder.set_criteria(criterion); } + (conf.configure)(&mut builder); + builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -63,7 +97,7 @@ pub fn base_setup(database: &str, dataset: &str, criterion: Option>) builder.update_format(UpdateFormat::Csv); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); // we called from cargo the current directory is supposed to be milli/milli - let reader = File::open(dataset).unwrap(); + let reader = File::open(conf.dataset).unwrap(); builder.execute(reader, |_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -72,8 +106,7 @@ pub fn base_setup(database: &str, dataset: &str, criterion: Option>) pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { for conf in confs { - let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect()); - let index = base_setup(conf.database_name, conf.dataset, criterion); + let index = base_setup(conf); let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name)); group.measurement_time(Duration::from_secs(10));