refactorize everything related to the songs dataset in a songs benchmark file

This commit is contained in:
tamo 2021-04-13 18:34:00 +02:00 committed by Tamo
parent 136efd6b53
commit 5132a106a1
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
5 changed files with 186 additions and 187 deletions

View File

@ -61,5 +61,5 @@ rand = "0.8.3"
default = [] default = []
[[bench]] [[bench]]
name = "criterion" name = "songs"
harness = false harness = false

View File

@ -1,102 +0,0 @@
mod utils;
use criterion::{criterion_group, criterion_main};
fn bench_criterion(c: &mut criterion::Criterion) {
let songs_base_queries = &[
"mingus ",
"thelonious monk ",
"Disneyland ",
"the white stripes ",
"indochine ",
"klub des loosers ",
"fear of the dark ",
"michel delpech ",
"stromae ",
"dire straits ",
"aretha franklin ",
];
let default_criterion: Vec<String> = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)").chain(default_criterion.clone()).collect();
let confs = &[
utils::Conf {
group_name: "proximity",
queries: &[
"black saint sinner lady ",
"les dangeureuses 1960 ",
"The Disneyland Sing-Along Chorus ",
"Under Great Northern Lights ",
"7000 Danses Un Jour Dans Notre Vie",
],
criterion: Some(&["proximity"]),
optional_words: false,
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "typo",
queries: &[
"mongus ",
"thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
],
criterion: Some(&["typo"]),
optional_words: false,
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop
"les liaisons dangeureuses 1793 ", // one word to pop
"The Disneyland Children's Sing-Alone song ", // two words to pop
"seven nation mummy ", // one word to pop
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
"whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16
],
criterion: Some(&["words"]),
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "asc",
queries: songs_base_queries,
criterion: Some(&["asc(released-timestamp)"]),
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "desc",
queries: songs_base_queries,
criterion: Some(&["desc(released-timestamp)"]),
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "asc + default",
queries: songs_base_queries,
criterion: Some(&asc_default[..]),
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "desc + default",
queries: songs_base_queries,
criterion: Some(&desc_default[..]),
..utils::Conf::BASE_SONGS
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_criterion);
criterion_main!(benches);

View File

@ -1,51 +0,0 @@
mod utils;
use criterion::{criterion_group, criterion_main};
fn bench_normal(c: &mut criterion::Criterion) {
let confs = &[
utils::Conf {
group_name: "basic placeholder",
queries: &[
"",
],
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "basic without quote",
queries: &[
"david bowie", // 1200
"michael jackson", // 600
"marcus miller", // 60
"Notstandskomitee", // 4
],
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "basic with quote",
queries: &[
"\"david\" \"bowie\"", // 1200
"\"michael\" \"jackson\"", // 600
"\"marcus\" \"miller\"", // 60
"\"Notstandskomitee\"", // 4
],
..utils::Conf::BASE_SONGS
},
utils::Conf {
group_name: "prefix search",
queries: &[
"s", // 500k+ results
"a",
"b",
"i",
"x", // only 7k results
],
..utils::Conf::BASE_SONGS
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_normal);
criterion_main!(benches);

185
milli/benches/songs.rs Normal file
View File

@ -0,0 +1,185 @@
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
fn base_conf(builder: &mut Settings) {
let displayed_fields = [
"id", "title", "album", "artist", "genre", "country", "released", "duration",
]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields = [
("released-timestamp", "integer"),
("duration-float", "float"),
("genre", "string"),
("country", "string"),
("artist", "string"),
]
.iter()
.map(|(a, b)| (a.to_string(), b.to_string()))
.collect();
builder.set_faceted_fields(faceted_fields);
}
const BASE_CONF: Conf = Conf {
dataset: "smol-songs.csv",
queries: &[
"mingus ",
"thelonious monk ",
"Disneyland ",
"the white stripes ",
"indochine ",
"klub des loosers ",
"fear of the dark ",
"michel delpech ",
"stromae ",
"dire straits ",
"aretha franklin ",
],
configure: base_conf,
..Conf::BASE
};
fn bench_songs(c: &mut criterion::Criterion) {
let default_criterion: Vec<String> = milli::default_criteria()
.iter()
.map(|criteria| criteria.to_string())
.collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> = std::iter::once("asc(released-timestamp)")
.chain(default_criterion.clone())
.collect();
let desc_default: Vec<&str> = std::iter::once("desc(released-timestamp)")
.chain(default_criterion.clone())
.collect();
let confs = &[
/* first we bench each criterion alone */
utils::Conf {
group_name: "proximity",
queries: &[
"black saint sinner lady ",
"les dangeureuses 1960 ",
"The Disneyland Sing-Along Chorus ",
"Under Great Northern Lights ",
"7000 Danses Un Jour Dans Notre Vie",
],
criterion: Some(&["proximity"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "typo",
queries: &[
"mongus ",
"thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
],
criterion: Some(&["typo"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop
"les liaisons dangeureuses 1793 ", // one word to pop
"The Disneyland Children's Sing-Alone song ", // two words to pop
"seven nation mummy ", // one word to pop
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 16
],
criterion: Some(&["words"]),
..BASE_CONF
},
utils::Conf {
group_name: "asc",
criterion: Some(&["asc(released-timestamp)"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc",
criterion: Some(&["desc(released-timestamp)"]),
..BASE_CONF
},
/* then we bench the asc and desc criterion on top of the default criterion */
utils::Conf {
group_name: "asc + default",
criterion: Some(&asc_default[..]),
..BASE_CONF
},
utils::Conf {
group_name: "desc + default",
criterion: Some(&desc_default[..]),
..BASE_CONF
},
/* the we bench some global / normal search with all the default criterion in the default
* order */
utils::Conf {
group_name: "basic placeholder",
queries: &[
"",
],
..BASE_CONF
},
utils::Conf {
group_name: "basic without quote",
queries: &[
"david bowie", // 1200
"michael jackson", // 600
"marcus miller", // 60
"Notstandskomitee", // 4
],
..BASE_CONF
},
utils::Conf {
group_name: "basic with quote",
queries: &[
"\"david\" \"bowie\"", // 1200
"\"michael\" \"jackson\"", // 600
"\"marcus\" \"miller\"", // 60
"\"Notstandskomitee\"", // 4
],
..BASE_CONF
},
utils::Conf {
group_name: "prefix search",
queries: &[
"s", // 500k+ results
"a",
"b",
"i",
"x", // only 7k results
],
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_songs);
criterion_main!(benches);

View File

@ -26,34 +26,6 @@ pub struct Conf<'a> {
impl Conf<'_> { impl Conf<'_> {
fn nop(_builder: &mut Settings) {} fn nop(_builder: &mut Settings) {}
fn songs_conf(builder: &mut Settings) {
let displayed_fields = [
"id", "title", "album", "artist", "genre", "country", "released", "duration",
]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields = [
("released-timestamp", "integer"),
("duration-float", "float"),
("genre", "string"),
("country", "string"),
("artist", "string"),
]
.iter()
.map(|(a, b)| (a.to_string(), b.to_string()))
.collect();
builder.set_faceted_fields(faceted_fields);
}
pub const BASE: Self = Conf { pub const BASE: Self = Conf {
database_name: "benches.mmdb", database_name: "benches.mmdb",
dataset: "", dataset: "",
@ -65,11 +37,6 @@ impl Conf<'_> {
optional_words: true, optional_words: true,
}; };
pub const BASE_SONGS: Self = Conf {
dataset: "smol-songs.csv",
configure: Self::songs_conf,
..Self::BASE
};
} }
pub fn base_setup(conf: &Conf) -> Index { pub fn base_setup(conf: &Conf) -> Index {