mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
move the fuzzer to its own crate
This commit is contained in:
parent
002f42875f
commit
6c6387d05e
708
Cargo.lock
generated
708
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,8 @@ members = [
|
|||||||
"filter-parser",
|
"filter-parser",
|
||||||
"flatten-serde-json",
|
"flatten-serde-json",
|
||||||
"json-depth-checker",
|
"json-depth-checker",
|
||||||
"benchmarks"
|
"benchmarks",
|
||||||
|
"fuzzers",
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
|
20
fuzzers/Cargo.toml
Normal file
20
fuzzers/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[package]
|
||||||
|
name = "fuzzers"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
version.workspace = true
|
||||||
|
authors.workspace = true
|
||||||
|
description.workspace = true
|
||||||
|
homepage.workspace = true
|
||||||
|
readme.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||||
|
clap = { version = "4.3.0", features = ["derive"] }
|
||||||
|
fastrand = "1.9.0"
|
||||||
|
milli = { path = "../milli" }
|
||||||
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
|
tempfile = "3.5.0"
|
3
fuzzers/README.md
Normal file
3
fuzzers/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Fuzzers
|
||||||
|
|
||||||
|
The purpose of this crate is to contains all the handmade "fuzzer" we may need.
|
136
fuzzers/src/bin/fuzz.rs
Normal file
136
fuzzers/src/bin/fuzz.rs
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
use std::num::NonZeroUsize;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use arbitrary::{Arbitrary, Unstructured};
|
||||||
|
use clap::Parser;
|
||||||
|
use fuzzers::Operation;
|
||||||
|
use milli::heed::EnvOpenOptions;
|
||||||
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
||||||
|
use milli::Index;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
#[derive(Debug, Arbitrary)]
|
||||||
|
struct Batch([Operation; 5]);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Parser)]
|
||||||
|
struct Opt {
|
||||||
|
/// The number of fuzzer to run in parallel.
|
||||||
|
#[clap(long)]
|
||||||
|
par: Option<NonZeroUsize>,
|
||||||
|
// We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line
|
||||||
|
/// The path in which the databases will be created.
|
||||||
|
/// Using a ramdisk is recommended.
|
||||||
|
///
|
||||||
|
/// Linux:
|
||||||
|
///
|
||||||
|
/// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it
|
||||||
|
///
|
||||||
|
/// sudo umount ramdisk # to remove it
|
||||||
|
///
|
||||||
|
/// MacOS:
|
||||||
|
///
|
||||||
|
/// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it
|
||||||
|
///
|
||||||
|
/// hdiutil detach /dev/:the_disk
|
||||||
|
///
|
||||||
|
#[clap(long)]
|
||||||
|
path: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let opt = Opt::parse();
|
||||||
|
let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0)));
|
||||||
|
|
||||||
|
let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get();
|
||||||
|
let mut handles = Vec::with_capacity(par);
|
||||||
|
|
||||||
|
for _ in 0..par {
|
||||||
|
let opt = opt.clone();
|
||||||
|
|
||||||
|
let handle = std::thread::spawn(move || {
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||||
|
let tempdir = match opt.path {
|
||||||
|
Some(path) => TempDir::new_in(path).unwrap(),
|
||||||
|
None => TempDir::new().unwrap(),
|
||||||
|
};
|
||||||
|
let index = Index::new(options, tempdir.path()).unwrap();
|
||||||
|
let indexer_config = IndexerConfig::default();
|
||||||
|
let index_documents_config = IndexDocumentsConfig::default();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let v: Vec<u8> = std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect();
|
||||||
|
|
||||||
|
let mut data = Unstructured::new(&v);
|
||||||
|
let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap();
|
||||||
|
// will be used to display the error once a thread crashes
|
||||||
|
let dbg_input = format!("{:#?}", batches);
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
for batch in batches {
|
||||||
|
let mut builder = IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
&indexer_config,
|
||||||
|
index_documents_config.clone(),
|
||||||
|
|_| (),
|
||||||
|
|| false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
for op in batch.0 {
|
||||||
|
match op {
|
||||||
|
Operation::AddDoc(doc) => {
|
||||||
|
let documents =
|
||||||
|
milli::documents::objects_from_json_value(doc.to_d());
|
||||||
|
let documents =
|
||||||
|
milli::documents::documents_batch_reader_from_objects(
|
||||||
|
documents,
|
||||||
|
);
|
||||||
|
let (b, _added) =
|
||||||
|
builder.add_documents(documents).expect(&dbg_input);
|
||||||
|
builder = b;
|
||||||
|
}
|
||||||
|
Operation::DeleteDoc(id) => {
|
||||||
|
let (b, _removed) =
|
||||||
|
builder.remove_documents(vec![id.to_s()]).unwrap();
|
||||||
|
builder = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.execute().expect(&dbg_input);
|
||||||
|
|
||||||
|
// after executing a batch we check if the database is corrupted
|
||||||
|
let res = index.search(&wtxn).execute().expect(&dbg_input);
|
||||||
|
index.documents(&wtxn, res.documents_ids).expect(&dbg_input);
|
||||||
|
progression.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
wtxn.abort().unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
handles.push(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::thread::spawn(|| {
|
||||||
|
let mut last_value = 0;
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
loop {
|
||||||
|
let total = progression.load(Ordering::Relaxed);
|
||||||
|
println!(
|
||||||
|
"Has been running for {:?}. Tested {} new values for a total of {}.",
|
||||||
|
start.elapsed(),
|
||||||
|
total - last_value,
|
||||||
|
total
|
||||||
|
);
|
||||||
|
last_value = total;
|
||||||
|
std::thread::sleep(Duration::from_secs(1));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for handle in handles {
|
||||||
|
handle.join().unwrap();
|
||||||
|
}
|
||||||
|
}
|
46
fuzzers/src/lib.rs
Normal file
46
fuzzers/src/lib.rs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
use arbitrary::Arbitrary;
|
||||||
|
use serde_json::{json, Value};
|
||||||
|
|
||||||
|
#[derive(Debug, Arbitrary)]
|
||||||
|
pub enum Document {
|
||||||
|
One,
|
||||||
|
Two,
|
||||||
|
Three,
|
||||||
|
Four,
|
||||||
|
Five,
|
||||||
|
Six,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Document {
|
||||||
|
pub fn to_d(&self) -> Value {
|
||||||
|
match self {
|
||||||
|
Document::One => json!({ "id": 0, "doggo": "bernese" }),
|
||||||
|
Document::Two => json!({ "id": 0, "doggo": "golden" }),
|
||||||
|
Document::Three => json!({ "id": 0, "catto": "jorts" }),
|
||||||
|
Document::Four => json!({ "id": 1, "doggo": "bernese" }),
|
||||||
|
Document::Five => json!({ "id": 1, "doggo": "golden" }),
|
||||||
|
Document::Six => json!({ "id": 1, "catto": "jorts" }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Arbitrary)]
|
||||||
|
pub enum DocId {
|
||||||
|
Zero,
|
||||||
|
One,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DocId {
|
||||||
|
pub fn to_s(&self) -> String {
|
||||||
|
match self {
|
||||||
|
DocId::Zero => "0".to_string(),
|
||||||
|
DocId::One => "1".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Arbitrary)]
|
||||||
|
pub enum Operation {
|
||||||
|
AddDoc(Document),
|
||||||
|
DeleteDoc(DocId),
|
||||||
|
}
|
@ -65,13 +65,6 @@ maplit = "1.0.2"
|
|||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||||
|
|
||||||
# fuzzing
|
|
||||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
|
||||||
fastrand = "1.9.0"
|
|
||||||
|
|
||||||
[target.'cfg(fuzzing)'.dev-dependencies]
|
|
||||||
fuzzcheck = "0.12.1"
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
all-tokenizations = ["charabia/default"]
|
all-tokenizations = ["charabia/default"]
|
||||||
|
|
||||||
|
@ -52,7 +52,9 @@ enum Operation {
|
|||||||
#[derive(Debug, Arbitrary)]
|
#[derive(Debug, Arbitrary)]
|
||||||
struct Batch([Operation; 5]);
|
struct Batch([Operation; 5]);
|
||||||
|
|
||||||
fn main() {
|
#[test]
|
||||||
|
#[ignore]
|
||||||
|
fn fuzz() {
|
||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(1024 * 1024 * 1024 * 1024);
|
options.map_size(1024 * 1024 * 1024 * 1024);
|
||||||
let _tempdir = TempDir::new().unwrap();
|
let _tempdir = TempDir::new().unwrap();
|
Loading…
Reference in New Issue
Block a user