From 2295e0e3ce32d72c9960d1ebfc04b637d07b5047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 12 Oct 2022 10:23:40 +0200 Subject: [PATCH] Use real delete function in facet indexing fuzz tests By deleting multiple docids at once instead of one-by-one --- milli/src/update/facet/incremental.rs | 49 ++++++++++++++++++--------- milli/src/update/facet/mod.rs | 18 ++++------ 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index 9dda86a46..a4c756aec 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -1018,25 +1018,26 @@ mod tests { txn.commit().unwrap(); milli_snap!(format!("{index}"), "after_delete"); } - - // fuzz tests } +// fuzz tests #[cfg(all(test, fuzzing))] mod fuzz { use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; use std::convert::TryFrom; + use std::iter::FromIterator; use std::rc::Rc; + use fuzzcheck::mutators::integer::U8Mutator; use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; + use fuzzcheck::mutators::vector::VecMutator; use fuzzcheck::DefaultMutator; use heed::BytesEncode; use roaring::RoaringBitmap; use tempfile::TempDir; use super::*; - use crate::milli_snap; use crate::update::facet::tests::FacetIndex; struct NEU16Codec; @@ -1074,10 +1075,10 @@ mod fuzz { *values |= new_values; } #[no_coverage] - pub fn delete(&mut self, field_id: u16, key: T, value: u32) { + pub fn delete(&mut self, field_id: u16, key: T, values_to_remove: &RoaringBitmap) { if let Some(values_field_id) = self.elements.get_mut(&field_id) { if let Some(values) = values_field_id.get_mut(&key) { - values.remove(value); + *values -= values_to_remove; if values.is_empty() { values_field_id.remove(&key); } @@ -1103,8 +1104,14 @@ mod fuzz { } #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] enum OperationKind { - Insert(Vec), - Delete(u8), + Insert( + #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] + Vec, + ), + Delete( + #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] + Vec, + ), } #[no_coverage] @@ -1131,13 +1138,23 @@ mod fuzz { index.insert(&mut txn, *field_id, key, &bitmap); trivial_db.insert(*field_id, *key, &bitmap); } - OperationKind::Delete(value) => { - if let Some(keys) = value_to_keys.get(value) { - for key in keys { - index.delete_single_docid(&mut txn, *field_id, key, *value as u32); - trivial_db.delete(*field_id, *key, *value as u32); + OperationKind::Delete(values) => { + let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); + let mut values_per_key = HashMap::new(); + + for value in values { + if let Some(keys) = value_to_keys.get(&(value as u8)) { + for key in keys { + let values: &mut RoaringBitmap = + values_per_key.entry(key).or_default(); + values.insert(value); + } } } + for (key, values) in values_per_key { + index.delete(&mut txn, *field_id, &key, &values); + trivial_db.delete(*field_id, *key, &values); + } } } } @@ -1221,7 +1238,7 @@ mod fuzz { {"key":166, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}}, {"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[61]}}, {"key":183, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}}, - {"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":50}} + {"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[50]}} ] "#; let operations: Vec> = serde_json::from_str(operations).unwrap(); @@ -1250,7 +1267,7 @@ mod fuzz { {"key":200, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}}, {"key":93, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[98]}}, {"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}}, - {"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":210}} + {"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[210]}} ] "#; let operations: Vec> = serde_json::from_str(operations).unwrap(); @@ -1285,7 +1302,7 @@ mod fuzz { let operations = r#"[ {"key":63499, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[87]}}, {"key":25374, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[14]}}, - {"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":87}}, + {"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":[87]}}, {"key":23038, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[173]}}, {"key":14862, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[8]}}, {"key":13145, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[5,64]}}, @@ -1337,7 +1354,7 @@ mod fuzz { "max_group_size":4, "min_level_size":25, "field_id":3, - "kind":{"Delete":11} + "kind":{"Delete":[11]} } ] "#; diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index a5d527282..5fb5c9e48 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -76,13 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8; pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_MIN_LEVEL_SIZE: u8 = 5; +use std::fs::File; + use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::ByteSliceRefCodec; use crate::{Index, Result}; -use std::fs::File; pub mod bulk; pub mod delete; @@ -153,6 +154,7 @@ impl<'i> FacetsUpdate<'i> { pub(crate) mod tests { use std::cell::Cell; use std::fmt::Display; + use std::iter::FromIterator; use std::marker::PhantomData; use std::rc::Rc; @@ -170,7 +172,7 @@ pub(crate) mod tests { use crate::update::FacetsUpdateIncrementalInner; use crate::CboRoaringBitmapCodec; - // A dummy index that only contains the facet database, used for testing + /// A dummy index that only contains the facet database, used for testing pub struct FacetIndex where for<'a> BoundCodec: @@ -287,17 +289,9 @@ pub(crate) mod tests { key: &'a >::EItem, docid: u32, ) { - let update = FacetsUpdateIncrementalInner { - db: self.content, - group_size: self.group_size.get(), - min_level_size: self.min_level_size.get(), - max_group_size: self.max_group_size.get(), - }; - let key_bytes = BoundCodec::bytes_encode(&key).unwrap(); - let mut docids = RoaringBitmap::new(); - docids.insert(docid); - update.delete(wtxn, field_id, &key_bytes, &docids).unwrap(); + self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid))) } + pub fn delete<'a>( &self, wtxn: &'a mut RwTxn,