604: Speed up debug builds r=Kerollmops a=loiclec

Note: this draft PR is based on https://github.com/meilisearch/milli/pull/601 , for no particular reason.

## What does this PR do?
Make a series of changes with the goal of speeding up debug builds:

1. Add an `all_languages` feature which compiles charabia with its `default` features activated.
The `all_languages` feature is activated by default. But running:
```
cargo build --no-default-features
```
on `milli` is now much faster.

2. Reduce the debug optimisation level from 3 to 0, except for a few critical dependencies.

3.  Compile the build dependencies quicker as well. Previously, all build dependencies were compiled with `opt-level = 3`. Now, only the critical build dependencies are compiled with optimisations.

4. Reduce the amount of code generated by the `documents!` macro

5. Make the "progress update" closure provided to indexing functions a trait object instead of a generic parameter. This avoids monomorphising the indexing code multiple times needlessly.

## Results
Initial build times on my computer before and after these changes:
|        | cargo check | cargo check --no-default-features | cargo test | cargo test --lib | cargo test --no-default-features | cargo test --lib --no-default-features |
|--------|-------------|-----------------------------------|------------|------------------|----------------------------------|----------------------------------------|
| before | 1m05s       | 1m05s                             | 2m06s      | 1m47s            | 2m06                             | 1m47s                                  |
| after  | 28.9s       | 13.1s                             | 40s      | 38s            | 23s                              | 21s                                  |



Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
bors[bot] 2022-10-12 08:54:48 +00:00 committed by GitHub
commit 2000f7958d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 29 deletions

View File

@ -4,19 +4,34 @@ members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker",
default-members = ["milli"] default-members = ["milli"]
[profile.dev] [profile.dev]
opt-level = 3 opt-level = 0
[profile.release] [profile.release]
debug = true debug = true
codegen-units = 1 codegen-units = 1
# Make sure that the build scripts and proc-macros are compiled with [profile.dev.package.grenad]
# all the optimizations. It speeds up the zip crate that we use in the build.rs.
[profile.dev.build-override]
opt-level = 3 opt-level = 3
[profile.release.build-override] [profile.dev.package.roaring]
opt-level = 3 opt-level = 3
[profile.bench.build-override]
[profile.dev.package.lindera-ipadic-builder]
opt-level = 3 opt-level = 3
[profile.test.build-override] [profile.dev.package.encoding]
opt-level = 3
[profile.dev.package.yada]
opt-level = 3
[profile.release.package.lindera-ipadic-builder]
opt-level = 3
[profile.release.package.encoding]
opt-level = 3
[profile.release.package.yada]
opt-level = 3
[profile.bench.package.lindera-ipadic-builder]
opt-level = 3
[profile.bench.package.encoding]
opt-level = 3
[profile.bench.package.yada]
opt-level = 3 opt-level = 3

View File

@ -7,7 +7,7 @@ publish = false
[dependencies] [dependencies]
anyhow = "1.0.56" anyhow = "1.0.56"
csv = "1.1.6" csv = "1.1.6"
milli = { path = "../milli" } milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.29", default-features = false } mimalloc = { version = "0.1.29", default-features = false }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }
@ -24,6 +24,9 @@ convert_case = "0.5.0"
flate2 = "1.0.22" flate2 = "1.0.22"
reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false } reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/default"]
[[bench]] [[bench]]
name = "search_songs" name = "search_songs"
harness = false harness = false

View File

@ -12,9 +12,12 @@ color-eyre = "0.6.1"
csv = "1.1.6" csv = "1.1.6"
eyre = "0.6.7" eyre = "0.6.7"
indicatif = "0.16.2" indicatif = "0.16.2"
milli = { path = "../milli" } milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.29", default-features = false } mimalloc = { version = "0.1.29", default-features = false }
serde = "1.0.136" serde = "1.0.136"
serde_json = "1.0.79" serde_json = "1.0.79"
stderrlog = "0.5.1" stderrlog = "0.5.1"
structopt = "0.3.26" structopt = "0.3.26"
[features]
default = ["milli/default"]

View File

@ -152,30 +152,33 @@ impl fmt::Display for Error {
impl std::error::Error for Error {} impl std::error::Error for Error {}
/// Macro used to generate documents, with the same syntax as `serde_json::json`
#[cfg(test)] #[cfg(test)]
macro_rules! documents { pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
($data:tt) => {{ let documents = match json {
let documents = serde_json::json!($data);
let documents = match documents {
object @ serde_json::Value::Object(_) => vec![object], object @ serde_json::Value::Object(_) => vec![object],
serde_json::Value::Array(objects) => objects, serde_json::Value::Array(objects) => objects,
invalid => { invalid => {
panic!("an array of objects must be specified, {:#?} is not an array", invalid) panic!("an array of objects must be specified, {:#?} is not an array", invalid)
} }
}; };
let mut objects = vec![];
let mut builder = crate::documents::DocumentsBatchBuilder::new(Vec::new());
for document in documents { for document in documents {
let object = match document { let object = match document {
serde_json::Value::Object(object) => object, serde_json::Value::Object(object) => object,
invalid => panic!("an object must be specified, {:#?} is not an object", invalid), invalid => panic!("an object must be specified, {:#?} is not an object", invalid),
}; };
builder.append_json_object(&object).unwrap(); objects.push(object);
}
objects
} }
let vector = builder.into_inner().unwrap(); /// Macro used to generate documents, with the same syntax as `serde_json::json`
crate::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap() #[cfg(test)]
macro_rules! documents {
($data:tt) => {{
let documents = serde_json::json!($data);
let documents = $crate::documents::objects_from_json_value(documents);
$crate::documents::documents_batch_reader_from_objects(documents)
}}; }};
} }
@ -187,7 +190,8 @@ pub fn documents_batch_reader_from_objects(
for object in objects { for object in objects {
builder.append_json_object(&object).unwrap(); builder.append_json_object(&object).unwrap();
} }
DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() let vector = builder.into_inner().unwrap();
DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap()
} }
#[cfg(test)] #[cfg(test)]