496: Improve the performances of the flattening subcrate r=irevoire a=Kerollmops

This PR adds some benchmarks to the _flatten-serde-json_ crate, this crate is responsible for transforming the original documents into flat versions that the engine can understand. It can probably be speed-up and this is why I added benchmarks to it.

I make some interesting performance improvements when I replaced the `json!` macro calls.

```
flatten/simple          time:   [452.44 ns 453.31 ns 454.18 ns]
                        change: [-15.036% -14.751% -14.473%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0007 s (4.9M i                                                                                  flatten/complex         time:   [1.0101 us 1.0131 us 1.0160 us]
                        change: [-18.001% -17.775% -17.536%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
```

---

_I removed this particular commit from this PR._ The reason is that the two other commits were enough for this PR to give enough impact and be merged. We will continue to explore where we can get performances later.

But when I changed the flattening function to accept an owned version of the objects, we lost a lot of performances. Yes, I rewrote the benchmarks (locally) to clone the input object (and measured both, previous and new versions, with the cloning benchmarks). Maybe cloning the benchmark inputs is not the right thing to do...

```
Benchmarking flatten/simple: Collecting 100 samples in estimated 5.0005 s (6.7M it                                                                                  flatten/simple          time:   [746.46 ns 749.59 ns 752.70 ns]
                        change: [+40.082% +40.714% +41.347%] (p = 0.00 < 0.05)
                        Performance has regressed.

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0047 s (2.9M i                                                                                  flatten/complex         time:   [1.7311 us 1.7342 us 1.7368 us]
                        change: [+40.976% +41.398% +41.807%] (p = 0.00 < 0.05)
                        Performance has regressed.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) low mild
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2022-04-13 11:14:29 +00:00 committed by GitHub
commit 456887a54a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 6 deletions

View File

@ -5,7 +5,12 @@ edition = "2021"
description = "Flatten serde-json objects like elastic search" description = "Flatten serde-json objects like elastic search"
readme = "README.md" readme = "README.md"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
serde_json = "1.0" serde_json = "1.0"
[dev-dependencies]
criterion = { version = "0.3", features = ["html_reports"] }
[[bench]]
name = "benchmarks"
harness = false

View File

@ -0,0 +1,42 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use flatten_serde_json::flatten;
use serde_json::json;
pub fn flatten_simple(c: &mut Criterion) {
let mut input = json!({
"a": {
"b": "c",
"d": "e",
"f": "g"
}
});
let object = input.as_object_mut().unwrap();
c.bench_with_input(BenchmarkId::new("flatten", "simple"), &object, |b, input| {
b.iter(|| flatten(input))
});
}
pub fn flatten_complex(c: &mut Criterion) {
let mut input = json!({
"a": [
"b",
["c", "d"],
{ "e": ["f", "g"] },
[
{ "h": "i" },
{ "e": ["j", { "z": "y" }] },
],
["l"],
"m",
]
});
let object = input.as_object_mut().unwrap();
c.bench_with_input(BenchmarkId::new("flatten", "complex"), &object, |b, input| {
b.iter(|| flatten(input))
});
}
criterion_group!(benches, flatten_simple, flatten_complex);
criterion_main!(benches);

View File

@ -1,6 +1,6 @@
#![doc = include_str!("../README.md")] #![doc = include_str!("../README.md")]
use serde_json::{json, Map, Value}; use serde_json::{Map, Value};
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> { pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
let mut obj = Map::new(); let mut obj = Map::new();
@ -42,7 +42,7 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
debug_assert!(!to_insert.is_object()); debug_assert!(!to_insert.is_object());
debug_assert!(!to_insert.is_array()); debug_assert!(!to_insert.is_array());
// does the field aleardy exists? // does the field already exists?
if let Some(value) = base_json.get_mut(key) { if let Some(value) = base_json.get_mut(key) {
// is it already an array // is it already an array
if let Some(array) = value.as_array_mut() { if let Some(array) = value.as_array_mut() {
@ -50,16 +50,18 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
// or is there a collision // or is there a collision
} else { } else {
let value = std::mem::take(value); let value = std::mem::take(value);
base_json[key] = json!([value, to_insert]); base_json[key] = Value::Array(vec![value, to_insert]);
} }
// if it does not exist we can push the value untouched // if it does not exist we can push the value untouched
} else { } else {
base_json.insert(key.to_string(), json!(to_insert)); base_json.insert(key.to_string(), to_insert);
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use serde_json::json;
use super::*; use super::*;
#[test] #[test]