mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
uses an env variable to find the datasets
This commit is contained in:
parent
4969abeaab
commit
3c84075d2d
@ -13,3 +13,15 @@ You can run the following command from the root of this git repository
|
|||||||
```
|
```
|
||||||
wget https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-songs.csv.gz -O milli/benches/smol-songs.csv.gz
|
wget https://meili-datasets.s3.fr-par.scw.cloud/benchmarks/smol-songs.csv.gz -O milli/benches/smol-songs.csv.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- To run all the benchmarks we recommand using `cargo bench`, this should takes around ~4h
|
||||||
|
- You can also run the benchmarks on the `songs` dataset with `cargo bench --bench songs`, it should takes around 1h
|
||||||
|
- And on the `wiki` dataset with `cargo bench --bench wiki`, it should takes around 3h
|
||||||
|
|
||||||
|
By default the benchmarks expect the datasets to be uncompressed and present in `milli/milli/benches`, but you can also specify your own path with the environment variable `MILLI_BENCH_DATASETS_PATH` like that:
|
||||||
|
```
|
||||||
|
MILLI_BENCH_DATASETS_PATH=~/Downloads/datasets cargo bench --bench songs
|
||||||
|
```
|
||||||
|
|
||||||
|
Our benchmarking suite uses criterion which allow you to do a lot of configuration, see the documentation [here](https://bheisler.github.io/criterion.rs/book/user_guide/user_guide.html)
|
||||||
|
|
||||||
|
@ -7,6 +7,15 @@ use milli::{
|
|||||||
FacetCondition, Index,
|
FacetCondition, Index,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// The name of the environment variable used to select the path
|
||||||
|
/// of the directory containing the datasets
|
||||||
|
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
|
||||||
|
|
||||||
|
/// The default path for the dataset if nothing is specified
|
||||||
|
/// By default we chose `milli/benches` because any cargo command ran in `milli/milli/**` will be
|
||||||
|
/// executed with a pwd of `milli/milli`
|
||||||
|
const DEFAULT_DATASETS_PATH: &str = "milli/benches";
|
||||||
|
|
||||||
pub struct Conf<'a> {
|
pub struct Conf<'a> {
|
||||||
/// where we are going to create our database.mmdb directory
|
/// where we are going to create our database.mmdb directory
|
||||||
/// each benchmark will first try to delete it and then recreate it
|
/// each benchmark will first try to delete it and then recreate it
|
||||||
@ -78,7 +87,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
builder.update_format(UpdateFormat::Csv);
|
builder.update_format(UpdateFormat::Csv);
|
||||||
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||||
// we called from cargo the current directory is supposed to be milli/milli
|
// we called from cargo the current directory is supposed to be milli/milli
|
||||||
let dataset_path = format!("benches/{}", conf.dataset);
|
let base_dataset_path = std::env::vars()
|
||||||
|
.find(|var| var.0 == BASE_DATASETS_PATH_KEY)
|
||||||
|
.map_or(DEFAULT_DATASETS_PATH.to_owned(), |(_key, value)| value);
|
||||||
|
let dataset_path = format!("{}/{}", base_dataset_path, conf.dataset);
|
||||||
let reader = File::open(&dataset_path)
|
let reader = File::open(&dataset_path)
|
||||||
.expect(&format!("could not find the dataset in: {}", &dataset_path));
|
.expect(&format!("could not find the dataset in: {}", &dataset_path));
|
||||||
builder.execute(reader, |_, _| ()).unwrap();
|
builder.execute(reader, |_, _| ()).unwrap();
|
||||||
@ -100,7 +112,8 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
let mut search = index.search(&rtxn);
|
let mut search = index.search(&rtxn);
|
||||||
search.query(query).optional_words(conf.optional_words);
|
search.query(query).optional_words(conf.optional_words);
|
||||||
if let Some(facet_condition) = conf.facet_condition {
|
if let Some(facet_condition) = conf.facet_condition {
|
||||||
let facet_condition = FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
let facet_condition =
|
||||||
|
FacetCondition::from_str(&rtxn, &index, facet_condition).unwrap();
|
||||||
search.facet_condition(facet_condition);
|
search.facet_condition(facet_condition);
|
||||||
}
|
}
|
||||||
let _ids = search.execute().unwrap();
|
let _ids = search.execute().unwrap();
|
||||||
|
Loading…
Reference in New Issue
Block a user