diff --git a/milli/README.md b/milli/README.md new file mode 100644 index 000000000..7479eff45 --- /dev/null +++ b/milli/README.md @@ -0,0 +1,26 @@ +# Milli + +## Fuzzing milli + +Currently you can only fuzz the indexation. +To execute the fuzzer run: +``` +cargo fuzz run indexing +``` + +To execute the fuzzer on multiple thread you can also run: +``` +cargo fuzz run -j4 indexing +``` + +Since the fuzzer is going to create a lot of temporary file to let milli index its documents +I would also recommand to execute it on a ramdisk. +Here is how to setup a ramdisk on linux: +``` +sudo mount -t tmpfs none path/to/your/ramdisk +``` +And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable +to make the fuzzer create its file in it: +``` +export TMPDIR=path/to/your/ramdisk +``` diff --git a/milli/fuzz/.gitignore b/milli/fuzz/.gitignore new file mode 100644 index 000000000..cb73742e4 --- /dev/null +++ b/milli/fuzz/.gitignore @@ -0,0 +1,2 @@ +/corpus/ +/artifacts/ diff --git a/milli/fuzz/Cargo.toml b/milli/fuzz/Cargo.toml new file mode 100644 index 000000000..04b329600 --- /dev/null +++ b/milli/fuzz/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "milli-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" } +serde_json = { version = "1.0.62", features = ["preserve_order"] } +anyhow = "1.0" +tempfile = "3.3" +arbitrary-json = { path = "../../../arbitrary-json" } + +[target.'cfg(target_os = "linux")'.dependencies] +jemallocator = "0.3.2" + +[dependencies.milli] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[profile.release] +debug = true + +[[bin]] +name = "indexing" +path = "fuzz_targets/indexing.rs" +test = false +doc = false diff --git a/milli/fuzz/fuzz_targets/indexing.rs b/milli/fuzz/fuzz_targets/indexing.rs new file mode 100644 index 000000000..179ccf757 --- /dev/null +++ b/milli/fuzz/fuzz_targets/indexing.rs @@ -0,0 +1,76 @@ +#![no_main] + +use std::io::{BufWriter, Cursor, Read, Seek, Write}; + +use anyhow::{bail, Result}; +use arbitrary_json::ArbitraryValue; +use heed::EnvOpenOptions; +use libfuzzer_sys::fuzz_target; +use milli::documents::{DocumentBatchBuilder, DocumentBatchReader}; +use milli::update::UpdateBuilder; +use milli::Index; +use serde_json::Value; + +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +/// reads json from input and write an obkv batch to writer. +pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { + let writer = BufWriter::new(writer); + let mut builder = DocumentBatchBuilder::new(writer)?; + builder.extend_from_json(input)?; + + if builder.len() == 0 { + bail!("Empty payload"); + } + + let count = builder.finish()?; + + Ok(count) +} + +fn index_documents( + index: &mut milli::Index, + documents: DocumentBatchReader>>, +) -> Result<()> { + let update_builder = UpdateBuilder::new(); + let mut wtxn = index.write_txn()?; + let builder = update_builder.index_documents(&mut wtxn, &index); + + builder.execute(documents, |_| ())?; + wtxn.commit()?; + Ok(()) +} + +fn create_index() -> Result { + let dir = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(100 * 1024 * 1024 * 1024); // 100 GB + options.max_readers(1); + Ok(Index::new(options, dir.path())?) +} + +fuzz_target!(|batches: Vec>| { + if let Ok(mut index) = create_index() { + for batch in batches { + let documents: Vec = + batch.into_iter().map(|value| serde_json::Value::from(value)).collect(); + let json = Value::Array(documents); + let json = serde_json::to_string(&json).unwrap(); + + let mut documents = Cursor::new(Vec::new()); + + // We ignore all badly generated documents + if let Ok(_count) = read_json(json.as_bytes(), &mut documents) { + let documents = DocumentBatchReader::from_reader(documents).unwrap(); + match index_documents(&mut index, documents) { + // Err(e @ InternalError(_) | e @ IoError(_)) => panic!("{:?}", e), + _ => (), + } + } + } + + index.prepare_for_closing().wait(); + } +});