From 5132a106a160641513084f6a880bf7ba09a03d18 Mon Sep 17 00:00:00 2001 From: tamo Date: Tue, 13 Apr 2021 18:34:00 +0200 Subject: [PATCH] refactorize everything related to the songs dataset in a songs benchmark file --- milli/Cargo.toml | 2 +- milli/benches/criterion.rs | 102 ------------------ milli/benches/normal_search.rs | 51 --------- milli/benches/songs.rs | 185 +++++++++++++++++++++++++++++++++ milli/benches/utils.rs | 33 ------ 5 files changed, 186 insertions(+), 187 deletions(-) delete mode 100644 milli/benches/criterion.rs delete mode 100644 milli/benches/normal_search.rs create mode 100644 milli/benches/songs.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 399b04428..2bdb3f4dc 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -61,5 +61,5 @@ rand = "0.8.3" default = [] [[bench]] -name = "criterion" +name = "songs" harness = false diff --git a/milli/benches/criterion.rs b/milli/benches/criterion.rs deleted file mode 100644 index 3049557f0..000000000 --- a/milli/benches/criterion.rs +++ /dev/null @@ -1,102 +0,0 @@ -mod utils; - -use criterion::{criterion_group, criterion_main}; - -fn bench_criterion(c: &mut criterion::Criterion) { - let songs_base_queries = &[ - "mingus ", - "thelonious monk ", - "Disneyland ", - "the white stripes ", - "indochine ", - "klub des loosers ", - "fear of the dark ", - "michel delpech ", - "stromae ", - "dire straits ", - "aretha franklin ", - ]; - let default_criterion: Vec = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect(); - let default_criterion = default_criterion.iter().map(|s| s.as_str()); - let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)").chain(default_criterion.clone()).collect(); - let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)").chain(default_criterion.clone()).collect(); - - let confs = &[ - utils::Conf { - group_name: "proximity", - queries: &[ - "black saint sinner lady ", - "les dangeureuses 1960 ", - "The Disneyland Sing-Along Chorus ", - "Under Great Northern Lights ", - "7000 Danses Un Jour Dans Notre Vie", - ], - criterion: Some(&["proximity"]), - optional_words: false, - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "typo", - queries: &[ - "mongus ", - "thelonius monk ", - "Disnaylande ", - "the white striper ", - "indochie ", - "indochien ", - "klub des loopers ", - "fear of the duck ", - "michel depech ", - "stromal ", - "dire straights ", - "Arethla Franklin ", - ], - criterion: Some(&["typo"]), - optional_words: false, - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "words", - queries: &[ - "the black saint and the sinner lady and the good doggo ", // four words to pop - "les liaisons dangeureuses 1793 ", // one word to pop - "The Disneyland Children's Sing-Alone song ", // two words to pop - "seven nation mummy ", // one word to pop - "7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop - "Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop - "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16 - ], - criterion: Some(&["words"]), - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "asc", - queries: songs_base_queries, - criterion: Some(&["asc(released-timestamp)"]), - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "desc", - queries: songs_base_queries, - criterion: Some(&["desc(released-timestamp)"]), - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "asc + default", - queries: songs_base_queries, - criterion: Some(&asc_default[..]), - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "desc + default", - queries: songs_base_queries, - criterion: Some(&desc_default[..]), - ..utils::Conf::BASE_SONGS - }, - ]; - - utils::run_benches(c, confs); -} - -criterion_group!(benches, bench_criterion); -criterion_main!(benches); diff --git a/milli/benches/normal_search.rs b/milli/benches/normal_search.rs deleted file mode 100644 index bd57a8c45..000000000 --- a/milli/benches/normal_search.rs +++ /dev/null @@ -1,51 +0,0 @@ -mod utils; - -use criterion::{criterion_group, criterion_main}; - -fn bench_normal(c: &mut criterion::Criterion) { - let confs = &[ - utils::Conf { - group_name: "basic placeholder", - queries: &[ - "", - ], - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "basic without quote", - queries: &[ - "david bowie", // 1200 - "michael jackson", // 600 - "marcus miller", // 60 - "Notstandskomitee", // 4 - ], - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "basic with quote", - queries: &[ - "\"david\" \"bowie\"", // 1200 - "\"michael\" \"jackson\"", // 600 - "\"marcus\" \"miller\"", // 60 - "\"Notstandskomitee\"", // 4 - ], - ..utils::Conf::BASE_SONGS - }, - utils::Conf { - group_name: "prefix search", - queries: &[ - "s", // 500k+ results - "a", - "b", - "i", - "x", // only 7k results - ], - ..utils::Conf::BASE_SONGS - }, - ]; - - utils::run_benches(c, confs); -} - -criterion_group!(benches, bench_normal); -criterion_main!(benches); diff --git a/milli/benches/songs.rs b/milli/benches/songs.rs new file mode 100644 index 000000000..586b8d4ef --- /dev/null +++ b/milli/benches/songs.rs @@ -0,0 +1,185 @@ +mod utils; + +use criterion::{criterion_group, criterion_main}; +use milli::update::Settings; +use utils::Conf; + +fn base_conf(builder: &mut Settings) { + let displayed_fields = [ + "id", "title", "album", "artist", "genre", "country", "released", "duration", + ] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_displayed_fields(displayed_fields); + + let searchable_fields = ["title", "album", "artist"] + .iter() + .map(|s| s.to_string()) + .collect(); + builder.set_searchable_fields(searchable_fields); + + let faceted_fields = [ + ("released-timestamp", "integer"), + ("duration-float", "float"), + ("genre", "string"), + ("country", "string"), + ("artist", "string"), + ] + .iter() + .map(|(a, b)| (a.to_string(), b.to_string())) + .collect(); + builder.set_faceted_fields(faceted_fields); +} + +const BASE_CONF: Conf = Conf { + dataset: "smol-songs.csv", + queries: &[ + "mingus ", + "thelonious monk ", + "Disneyland ", + "the white stripes ", + "indochine ", + "klub des loosers ", + "fear of the dark ", + "michel delpech ", + "stromae ", + "dire straits ", + "aretha franklin ", + ], + configure: base_conf, + ..Conf::BASE +}; + +fn bench_songs(c: &mut criterion::Criterion) { + let default_criterion: Vec = milli::default_criteria() + .iter() + .map(|criteria| criteria.to_string()) + .collect(); + let default_criterion = default_criterion.iter().map(|s| s.as_str()); + let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)") + .chain(default_criterion.clone()) + .collect(); + let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)") + .chain(default_criterion.clone()) + .collect(); + + let confs = &[ + /* first we bench each criterion alone */ + utils::Conf { + group_name: "proximity", + queries: &[ + "black saint sinner lady ", + "les dangeureuses 1960 ", + "The Disneyland Sing-Along Chorus ", + "Under Great Northern Lights ", + "7000 Danses Un Jour Dans Notre Vie", + ], + criterion: Some(&["proximity"]), + optional_words: false, + ..BASE_CONF + }, + utils::Conf { + group_name: "typo", + queries: &[ + "mongus ", + "thelonius monk ", + "Disnaylande ", + "the white striper ", + "indochie ", + "indochien ", + "klub des loopers ", + "fear of the duck ", + "michel depech ", + "stromal ", + "dire straights ", + "Arethla Franklin ", + ], + criterion: Some(&["typo"]), + optional_words: false, + ..BASE_CONF + }, + utils::Conf { + group_name: "words", + queries: &[ + "the black saint and the sinner lady and the good doggo ", // four words to pop + "les liaisons dangeureuses 1793 ", // one word to pop + "The Disneyland Children's Sing-Alone song ", // two words to pop + "seven nation mummy ", // one word to pop + "7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop + "Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop + "whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 16 + ], + criterion: Some(&["words"]), + ..BASE_CONF + }, + utils::Conf { + group_name: "asc", + criterion: Some(&["asc(released-timestamp)"]), + ..BASE_CONF + }, + utils::Conf { + group_name: "desc", + criterion: Some(&["desc(released-timestamp)"]), + ..BASE_CONF + }, + + /* then we bench the asc and desc criterion on top of the default criterion */ + utils::Conf { + group_name: "asc + default", + criterion: Some(&asc_default[..]), + ..BASE_CONF + }, + utils::Conf { + group_name: "desc + default", + criterion: Some(&desc_default[..]), + ..BASE_CONF + }, + + /* the we bench some global / normal search with all the default criterion in the default + * order */ + utils::Conf { + group_name: "basic placeholder", + queries: &[ + "", + ], + ..BASE_CONF + }, + utils::Conf { + group_name: "basic without quote", + queries: &[ + "david bowie", // 1200 + "michael jackson", // 600 + "marcus miller", // 60 + "Notstandskomitee", // 4 + ], + ..BASE_CONF + }, + utils::Conf { + group_name: "basic with quote", + queries: &[ + "\"david\" \"bowie\"", // 1200 + "\"michael\" \"jackson\"", // 600 + "\"marcus\" \"miller\"", // 60 + "\"Notstandskomitee\"", // 4 + ], + ..BASE_CONF + }, + utils::Conf { + group_name: "prefix search", + queries: &[ + "s", // 500k+ results + "a", + "b", + "i", + "x", // only 7k results + ], + ..BASE_CONF + }, + ]; + + utils::run_benches(c, confs); +} + +criterion_group!(benches, bench_songs); +criterion_main!(benches); diff --git a/milli/benches/utils.rs b/milli/benches/utils.rs index 9b58b54b8..b101adb63 100644 --- a/milli/benches/utils.rs +++ b/milli/benches/utils.rs @@ -26,34 +26,6 @@ pub struct Conf<'a> { impl Conf<'_> { fn nop(_builder: &mut Settings) {} - fn songs_conf(builder: &mut Settings) { - let displayed_fields = [ - "id", "title", "album", "artist", "genre", "country", "released", "duration", - ] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_displayed_fields(displayed_fields); - - let searchable_fields = ["title", "album", "artist"] - .iter() - .map(|s| s.to_string()) - .collect(); - builder.set_searchable_fields(searchable_fields); - - let faceted_fields = [ - ("released-timestamp", "integer"), - ("duration-float", "float"), - ("genre", "string"), - ("country", "string"), - ("artist", "string"), - ] - .iter() - .map(|(a, b)| (a.to_string(), b.to_string())) - .collect(); - builder.set_faceted_fields(faceted_fields); - } - pub const BASE: Self = Conf { database_name: "benches.mmdb", dataset: "", @@ -65,11 +37,6 @@ impl Conf<'_> { optional_words: true, }; - pub const BASE_SONGS: Self = Conf { - dataset: "smol-songs.csv", - configure: Self::songs_conf, - ..Self::BASE - }; } pub fn base_setup(conf: &Conf) -> Index {