add the configuration of the searchable fields and displayed fields and a default configuration for the songs

This commit is contained in:
tamo 2021-04-13 11:40:16 +02:00 committed by Tamo
parent ea0c6d8c40
commit 4b78ef31b6
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
3 changed files with 53 additions and 20 deletions

View File

@ -33,7 +33,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
], ],
criterion: Some(&["proximity"]), criterion: Some(&["proximity"]),
optional_words: false, optional_words: false,
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "typo", group_name: "typo",
@ -53,7 +53,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
], ],
criterion: Some(&["typo"]), criterion: Some(&["typo"]),
optional_words: false, optional_words: false,
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "words", group_name: "words",
@ -67,31 +67,31 @@ fn bench_criterion(c: &mut criterion::Criterion) {
"whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16 "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16
], ],
criterion: Some(&["words"]), criterion: Some(&["words"]),
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "asc", group_name: "asc",
queries: songs_base_queries, queries: songs_base_queries,
criterion: Some(&["asc"]), criterion: Some(&["asc"]),
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "desc", group_name: "desc",
queries: songs_base_queries, queries: songs_base_queries,
criterion: Some(&["desc"]), criterion: Some(&["desc"]),
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "asc + default", group_name: "asc + default",
queries: songs_base_queries, queries: songs_base_queries,
criterion: Some(&asc_default[..]), criterion: Some(&asc_default[..]),
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "desc + default", group_name: "desc + default",
queries: songs_base_queries, queries: songs_base_queries,
criterion: Some(&desc_default[..]), criterion: Some(&desc_default[..]),
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
]; ];

View File

@ -9,7 +9,7 @@ fn bench_normal(c: &mut criterion::Criterion) {
queries: &[ queries: &[
"", "",
], ],
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "basic without quote", group_name: "basic without quote",
@ -19,7 +19,7 @@ fn bench_normal(c: &mut criterion::Criterion) {
"marcus miller", // 60 "marcus miller", // 60
"Notstandskomitee", // 4 "Notstandskomitee", // 4
], ],
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "basic with quote", group_name: "basic with quote",
@ -29,7 +29,7 @@ fn bench_normal(c: &mut criterion::Criterion) {
"\"marcus\" \"miller\"", // 60 "\"marcus\" \"miller\"", // 60
"\"Notstandskomitee\"", // 4 "\"Notstandskomitee\"", // 4
], ],
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
utils::Conf { utils::Conf {
group_name: "prefix search", group_name: "prefix search",
@ -40,7 +40,7 @@ fn bench_normal(c: &mut criterion::Criterion) {
"i", "i",
"x", // only 7k results "x", // only 7k results
], ],
..utils::Conf::BASE ..utils::Conf::BASE_SONGS
}, },
]; ];

View File

@ -2,7 +2,7 @@ use std::{fs::{File, create_dir_all, remove_dir_all}, time::Duration};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}}; use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat}};
pub struct Conf<'a> { pub struct Conf<'a> {
/// where we are going to create our database.mmdb directory /// where we are going to create our database.mmdb directory
@ -12,48 +12,82 @@ pub struct Conf<'a> {
pub dataset: &'a str, pub dataset: &'a str,
pub group_name: &'a str, pub group_name: &'a str,
pub queries: &'a[&'a str], pub queries: &'a[&'a str],
/// here you can change which criterion are used and in which order.
/// - if you specify something all the base configuration will be thrown out
/// - if you don't specify anything (None) the default configuration will be kept
pub criterion: Option<&'a [&'a str]>, pub criterion: Option<&'a [&'a str]>,
/// the last chance to configure your database as you want
pub configure: fn(&mut Settings),
pub facet_condition: Option<FacetCondition>, pub facet_condition: Option<FacetCondition>,
/// enable or disable the optional words on the query
pub optional_words: bool, pub optional_words: bool,
} }
impl Conf<'_> { impl Conf<'_> {
fn nop(_builder: &mut Settings) {}
fn songs_conf(builder: &mut Settings) {
let displayed_fields = [
"id", "title", "album", "artist", "genre", "country", "released", "duration",
]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_searchable_fields(searchable_fields);
}
pub const BASE: Self = Conf { pub const BASE: Self = Conf {
database_name: "benches.mmdb", database_name: "benches.mmdb",
dataset: "", dataset: "",
group_name: "", group_name: "",
queries: &[], queries: &[],
criterion: None, criterion: None,
configure: Self::nop,
facet_condition: None, facet_condition: None,
optional_words: true, optional_words: true,
}; };
pub const BASE_SONGS: Self = Conf {
dataset: "smol-songs",
configure: Self::songs_conf,
..Self::BASE
};
} }
pub fn base_setup(database: &str, dataset: &str, criterion: Option<Vec<String>>) -> Index { pub fn base_setup(conf: &Conf) -> Index {
match remove_dir_all(&database) { match remove_dir_all(&conf.database_name) {
Ok(_) => (), Ok(_) => (),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (), Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
Err(e) => panic!("{}", e), Err(e) => panic!("{}", e),
} }
create_dir_all(&database).unwrap(); create_dir_all(&conf.database_name).unwrap();
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10); options.max_readers(10);
let index = Index::new(options, database).unwrap(); let index = Index::new(options, conf.database_name).unwrap();
let update_builder = UpdateBuilder::new(0); let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.settings(&mut wtxn, &index); let mut builder = update_builder.settings(&mut wtxn, &index);
if let Some(criterion) = criterion { if let Some(criterion) = conf.criterion {
builder.reset_faceted_fields(); builder.reset_faceted_fields();
builder.reset_criteria(); builder.reset_criteria();
builder.reset_stop_words(); builder.reset_stop_words();
let criterion = criterion.iter().map(|s| s.to_string()).collect();
builder.set_criteria(criterion); builder.set_criteria(criterion);
} }
(conf.configure)(&mut builder);
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -63,7 +97,7 @@ pub fn base_setup(database: &str, dataset: &str, criterion: Option<Vec<String>>)
builder.update_format(UpdateFormat::Csv); builder.update_format(UpdateFormat::Csv);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
// we called from cargo the current directory is supposed to be milli/milli // we called from cargo the current directory is supposed to be milli/milli
let reader = File::open(dataset).unwrap(); let reader = File::open(conf.dataset).unwrap();
builder.execute(reader, |_, _| ()).unwrap(); builder.execute(reader, |_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -72,8 +106,7 @@ pub fn base_setup(database: &str, dataset: &str, criterion: Option<Vec<String>>)
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
for conf in confs { for conf in confs {
let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect()); let index = base_setup(conf);
let index = base_setup(conf.database_name, conf.dataset, criterion);
let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name)); let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name));
group.measurement_time(Duration::from_secs(10)); group.measurement_time(Duration::from_secs(10));