From ea0c6d8c401a3ee37c14a62878e4b1641e08d726 Mon Sep 17 00:00:00 2001
From: tamo <tamo@meilisearch.com>
Date: Tue, 13 Apr 2021 10:44:27 +0200
Subject: [PATCH] add a bunch of queries and start the introduction of the
 filters and the new dataset

---
 milli/benches/criterion.rs     | 48 ++++++++++++++++++++++++++++++--
 milli/benches/normal_search.rs | 51 ++++++++++++++++++++++++++++++++++
 milli/benches/utils.rs         | 43 ++++++++++++++++++++++------
 3 files changed, 132 insertions(+), 10 deletions(-)
 create mode 100644 milli/benches/normal_search.rs
diff --git a/milli/benches/criterion.rs b/milli/benches/criterion.rs
index 3f0b6d6b7..bdfe3d478 100644
--- a/milli/benches/criterion.rs
+++ b/milli/benches/criterion.rs
@@ -3,6 +3,24 @@ mod utils;
 use criterion::{criterion_group, criterion_main};
 
 fn bench_criterion(c: &mut criterion::Criterion) {
+    let songs_base_queries = &[
+                "mingus ",
+                "thelonious monk ",
+                "Disneyland ",
+                "the white stripes ",
+                "indochine ",
+                "klub des loosers ",
+                "fear of the dark ",
+                "michel delpech ",
+                "stromae ",
+                "dire straits ",
+                "aretha franklin ",
+    ];
+    let default_criterion: Vec<String> = milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
+    let default_criterion = default_criterion.iter().map(|s| s.as_str());
+    let asc_default: Vec<&str> = std::iter::once("asc").chain(default_criterion.clone()).collect();
+    let desc_default: Vec<&str> = std::iter::once("desc").chain(default_criterion.clone()).collect();
+
     let confs = &[
         utils::Conf {
             group_name: "proximity",
@@ -15,6 +33,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
             ],
             criterion: Some(&["proximity"]),
             optional_words: false,
+            ..utils::Conf::BASE
         },
         utils::Conf {
             group_name: "typo",
@@ -34,6 +53,7 @@ fn bench_criterion(c: &mut criterion::Criterion) {
             ],
             criterion: Some(&["typo"]),
             optional_words: false,
+            ..utils::Conf::BASE
         },
         utils::Conf {
             group_name: "words",
@@ -47,8 +67,32 @@ fn bench_criterion(c: &mut criterion::Criterion) {
                 "whathavenotnsuchforth and then a good amount of words tot pop in order to match the first one ", // 16
             ],
             criterion: Some(&["words"]),
-            optional_words: true,
-        }
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "asc",
+            queries: songs_base_queries,
+            criterion: Some(&["asc"]),
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "desc",
+            queries: songs_base_queries,
+            criterion: Some(&["desc"]),
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "asc + default",
+            queries: songs_base_queries,
+            criterion: Some(&asc_default[..]),
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "desc + default",
+            queries: songs_base_queries,
+            criterion: Some(&desc_default[..]),
+            ..utils::Conf::BASE
+        },
     ];
 
     utils::run_benches(c, confs);
diff --git a/milli/benches/normal_search.rs b/milli/benches/normal_search.rs
new file mode 100644
index 000000000..39a343cf0
--- /dev/null
+++ b/milli/benches/normal_search.rs
@@ -0,0 +1,51 @@
+mod utils;
+
+use criterion::{criterion_group, criterion_main};
+
+fn bench_normal(c: &mut criterion::Criterion) {
+    let confs = &[
+        utils::Conf {
+            group_name: "basic placeholder",
+            queries: &[
+                "",
+            ],
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "basic without quote",
+            queries: &[
+                "david bowie", // 1200
+                "michael jackson", // 600
+                "marcus miller", // 60
+                "Notstandskomitee", // 4
+            ],
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "basic with quote",
+            queries: &[
+                "\"david\" \"bowie\"", // 1200
+                "\"michael\" \"jackson\"", // 600
+                "\"marcus\" \"miller\"", // 60
+                "\"Notstandskomitee\"", // 4
+            ],
+            ..utils::Conf::BASE
+        },
+        utils::Conf {
+            group_name: "prefix search",
+            queries: &[
+                "s", // 500k+ results
+                "a",
+                "b",
+                "i",
+                "x", // only 7k results
+            ],
+            ..utils::Conf::BASE
+        },
+    ];
+
+    utils::run_benches(c, confs);
+}
+
+criterion_group!(benches, bench_normal);
+criterion_main!(benches);
diff --git a/milli/benches/utils.rs b/milli/benches/utils.rs
index c608a3ef3..6c8360fe2 100644
--- a/milli/benches/utils.rs
+++ b/milli/benches/utils.rs
@@ -1,18 +1,40 @@
-use std::{fs::{File, create_dir_all}, time::Duration};
+use std::{fs::{File, create_dir_all, remove_dir_all}, time::Duration};
 
 use heed::EnvOpenOptions;
 use criterion::BenchmarkId;
-use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
+use milli::{FacetCondition, Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
 
 pub struct Conf<'a> {
+    /// where we are going to create our database.mmdb directory
+    /// each benchmark will first try to delete it and then recreate it
+    pub database_name: &'a str,
+    /// the dataset to be used, it must be an uncompressed csv
+    pub dataset: &'a str,
     pub group_name: &'a str,
     pub queries: &'a[&'a str],
     pub criterion: Option<&'a [&'a str]>,
+    pub facet_condition: Option<FacetCondition>,
     pub optional_words: bool,
 }
 
-pub fn base_setup(criterion: Option<Vec<String>>) -> Index {
-    let database = "songs.mmdb";
+impl Conf<'_> {
+    pub const BASE: Self = Conf {
+        database_name: "benches.mmdb",
+        dataset: "",
+        group_name: "",
+        queries: &[],
+        criterion: None,
+        facet_condition: None,
+        optional_words: true,
+    };
+}
+
+pub fn base_setup(database: &str, dataset: &str, criterion: Option<Vec<String>>) -> Index {
+    match remove_dir_all(&database) {
+        Ok(_) => (),
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
+        Err(e) => panic!("{}", e),
+    }
     create_dir_all(&database).unwrap();
 
     let mut options = EnvOpenOptions::new();
@@ -41,7 +63,7 @@ pub fn base_setup(criterion: Option<Vec<String>>) -> Index {
     builder.update_format(UpdateFormat::Csv);
     builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
     // we called from cargo the current directory is supposed to be milli/milli
-    let reader = File::open("benches/smol_songs.csv").unwrap();
+    let reader = File::open(dataset).unwrap();
     builder.execute(reader, |_, _| ()).unwrap();
     wtxn.commit().unwrap();
 
@@ -51,16 +73,21 @@ pub fn base_setup(criterion: Option<Vec<String>>) -> Index {
 pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
     for conf in confs {
         let criterion = conf.criterion.map(|s| s.iter().map(|s| s.to_string()).collect());
-        let index = base_setup(criterion);
+        let index = base_setup(conf.database_name, conf.dataset, criterion);
 
-        let mut group = c.benchmark_group(conf.group_name);
+        let mut group = c.benchmark_group(&format!("{}: {}", conf.dataset, conf.group_name));
         group.measurement_time(Duration::from_secs(10));
 
         for &query in conf.queries {
             group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
                 b.iter(|| {
                     let rtxn = index.read_txn().unwrap();
-                    let _documents_ids = index.search(&rtxn).query(query).optional_words(conf.optional_words).execute().unwrap();
+                    let mut search = index.search(&rtxn);
+                    search.query(query).optional_words(conf.optional_words);
+                    if let Some(facet_condition) = conf.facet_condition.clone() {
+                        search.facet_condition(facet_condition);
+                    }
+                    let _ids = search.execute().unwrap();
                 });
             });
         }