use crate::facet::FacetType; use crate::heed_codec::facet::{ ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, }; use crate::search::facet::get_highest_level; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesDecode, Error, RoTxn, RwTxn}; use roaring::RoaringBitmap; use std::collections::HashMap; use std::fs::File; enum InsertionResult { InPlace, Insert, } enum DeletionResult { InPlace, Reduce { next: Option> }, Remove { next: Option> }, } /// Algorithm to incrementally insert and delete elememts into the /// `facet_id_(string/f64)_docids` databases. /// /// Rhe `faceted_documents_ids` value in the main database of `Index` /// is also updated to contain the new set of faceted documents. pub struct FacetsUpdateIncremental<'i> { index: &'i Index, inner: FacetsUpdateIncrementalInner, facet_type: FacetType, new_data: grenad::Reader, } impl<'i> FacetsUpdateIncremental<'i> { pub fn new( index: &'i Index, facet_type: FacetType, new_data: grenad::Reader, group_size: u8, min_level_size: u8, max_group_size: u8, ) -> Self { FacetsUpdateIncremental { index, inner: FacetsUpdateIncrementalInner { db: match facet_type { FacetType::String => index .facet_id_string_docids .remap_key_type::>(), FacetType::Number => index .facet_id_f64_docids .remap_key_type::>(), }, group_size, max_group_size, min_level_size, }, facet_type, new_data, } } pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { let mut new_faceted_docids = HashMap::::default(); let mut cursor = self.new_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { let key = FacetGroupKeyCodec::::bytes_decode(key) .ok_or(heed::Error::Encoding)?; let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?; *new_faceted_docids.entry(key.field_id).or_default() |= docids; } for (field_id, new_docids) in new_faceted_docids { let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?; docids |= new_docids; self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?; } Ok(()) } } /// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type pub struct FacetsUpdateIncrementalInner { pub db: heed::Database, FacetGroupValueCodec>, pub group_size: u8, pub min_level_size: u8, pub max_group_size: u8, } impl FacetsUpdateIncrementalInner { /// Find the `FacetGroupKey`/`FacetGroupValue` in the database that /// should be used to insert the new `facet_value` for the given `field_id` and `level` /// where `level` must be strictly greater than 0. /// /// For example, when inserting the facet value `4`, there are two possibilities: /// /// 1. We find a key whose lower bound is 3 followed by a key whose lower bound is 6. Therefore, /// we know that the implicit range of the first key is 3..6, which contains 4. /// So the new facet value belongs in that first key/value pair. /// /// 2. The first key of the level has a lower bound of `5`. We return this key/value pair /// but will need to change the lowerbound of this key to `4` in order to insert this facet value. fn find_insertion_key_value( &self, field_id: u16, level: u8, facet_value: &[u8], txn: &RoTxn, ) -> Result<(FacetGroupKey>, FacetGroupValue)> { assert!(level > 0); let mut prefix = vec![]; prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.push(level); prefix.extend_from_slice(facet_value); let mut prefix_iter = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, prefix.as_slice())?; if let Some(e) = prefix_iter.next() { let (key_bytes, value) = e?; Ok(( FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, )) } else { let key = FacetGroupKey { field_id, level, left_bound: facet_value }; match self.db.get_lower_than(txn, &key)? { Some((key, value)) => { if key.level != level { let mut prefix = vec![]; prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.push(level); let mut iter = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( txn, &prefix.as_slice(), )?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, )) } else { Ok((key.into_owned(), value)) } } None => panic!(), } } } /// Insert the given facet value and corresponding document ids in the level 0 of the database /// /// ## Return /// See documentation of `insert_in_level` fn insert_in_level_0<'t>( &self, txn: &'t mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, ) -> Result { let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; let value = FacetGroupValue { bitmap: docids.clone(), size: 1 }; let mut level0_prefix = vec![]; level0_prefix.extend_from_slice(&field_id.to_be_bytes()); level0_prefix.push(0); let mut iter = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, DecodeIgnore>(&txn, &level0_prefix)?; if iter.next().is_none() { drop(iter); self.db.put(txn, &key, &value)?; return Ok(InsertionResult::Insert); } else { drop(iter); let old_value = self.db.get(&txn, &key)?; match old_value { Some(mut updated_value) => { // now merge the two updated_value.bitmap |= value.bitmap; self.db.put(txn, &key, &updated_value)?; Ok(InsertionResult::InPlace) } None => { self.db.put(txn, &key, &value)?; Ok(InsertionResult::Insert) } } } } /// Insert the given facet value and corresponding document ids in all the levels of the database up to the given `level`. /// This function works recursively. /// /// ## Return /// Returns the effect of adding the facet value to the database on the given `level`. /// /// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have /// an effect on the number of keys in that level. Therefore, it did not increase the number of children /// of the parent node. /// /// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted /// in the addition of a new key in that level, and that therefore the number of children /// of the parent node should be incremented. fn insert_in_level<'t>( &self, txn: &'t mut RwTxn, field_id: u16, level: u8, facet_value: &[u8], docids: &RoaringBitmap, ) -> Result { if level == 0 { return self.insert_in_level_0(txn, field_id, facet_value, docids); } let max_group_size = self.max_group_size; let result = self.insert_in_level(txn, field_id, level - 1, facet_value.clone(), docids)?; // level below inserted an element let (insertion_key, insertion_value) = self.find_insertion_key_value(field_id, level, facet_value, txn)?; match result { // because we know that we inserted in place, the facet_value is not a new one // thus it doesn't extend a group, and thus the insertion key computed above is // still correct InsertionResult::InPlace => { let mut updated_value = insertion_value; updated_value.bitmap |= docids; self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; return Ok(InsertionResult::InPlace); } InsertionResult::Insert => {} } // Here we know that inserting the facet value in the level below resulted in the creation // of a new key. Therefore, it may be the case that we need to modify the left bound of the // insertion key (see documentation of `find_insertion_key_value` for an example of when that // could happen). let insertion_key = { let mut new_insertion_key = insertion_key.clone(); let mut key_should_be_modified = false; if facet_value < insertion_key.left_bound.as_slice() { new_insertion_key.left_bound = facet_value.to_vec(); key_should_be_modified = true; } if key_should_be_modified { let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?; assert!(is_deleted); self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?; } new_insertion_key }; // Now we know that the insertion key contains the `facet_value`. // We still need to update the insertion value by: // 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`) // 2. Merge the previous docids with the new one let mut updated_value = insertion_value; updated_value.size += 1; if updated_value.size < max_group_size { updated_value.bitmap |= docids; self.db.put(txn, &insertion_key.as_ref(), &updated_value)?; return Ok(InsertionResult::InPlace); } // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size` // Therefore it must be split into two nodes. let size_left = max_group_size / 2; let size_right = max_group_size - size_left; let level_below = level - 1; let start_key = FacetGroupKey { field_id, level: level_below, left_bound: insertion_key.left_bound.as_slice(), }; let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size as usize); let group_left = { let mut values_left = RoaringBitmap::new(); let mut i = 0; while let Some(next) = iter.next() { let (_key, value) = next?; i += 1; values_left |= &value.bitmap; if i == size_left { break; } } let key = FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; let value = FacetGroupValue { size: size_left as u8, bitmap: values_left }; (key, value) }; let group_right = { let ( FacetGroupKey { left_bound: right_left_bound, .. }, FacetGroupValue { bitmap: mut values_right, .. }, ) = iter.next().unwrap()?; while let Some(next) = iter.next() { let (_, value) = next?; values_right |= &value.bitmap; } let key = FacetGroupKey { field_id, level, left_bound: right_left_bound.to_vec() }; let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; (key, value) }; drop(iter); let _ = self.db.delete(txn, &insertion_key.as_ref())?; self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?; self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?; Ok(InsertionResult::Insert) } /// Insert the given facet value and corresponding document ids in the database. pub fn insert<'a, 't>( &self, txn: &'t mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, ) -> Result<()> { if docids.is_empty() { return Ok(()); } let group_size = self.group_size; let highest_level = get_highest_level(&txn, self.db, field_id)?; let result = self.insert_in_level(txn, field_id, highest_level as u8, facet_value, docids)?; match result { InsertionResult::InPlace => return Ok(()), InsertionResult::Insert => {} } // Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`. // If it has, we must build an addition level above it. let mut highest_level_prefix = vec![]; highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.push(highest_level); let size_highest_level = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? .count(); if size_highest_level < self.group_size as usize * self.min_level_size as usize { return Ok(()); } let mut groups_iter = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &highest_level_prefix)?; let mut to_add = vec![]; for _ in 0..self.min_level_size { let mut first_key = None; let mut values = RoaringBitmap::new(); for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)?; if first_key.is_none() { first_key = Some(key_i); } values |= value_i.bitmap; } let key = FacetGroupKey { field_id, level: highest_level + 1, left_bound: first_key.unwrap().left_bound, }; let value = FacetGroupValue { size: group_size as u8, bitmap: values }; to_add.push((key.into_owned(), value)); } drop(groups_iter); for (key, value) in to_add { self.db.put(txn, &key.as_ref(), &value)?; } Ok(()) } /// Delete the given document id from the given facet value in the database, from level 0 to the /// the given level. /// /// ## Return /// Returns the effect of removing the document id from the database on the given `level`. /// /// - `DeletionResult::InPlace` means that deleting the document id did not have /// an effect on the keys in that level. /// /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the /// number of keys in the level. For example, removing a document id from the facet value `3` could /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted /// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well. /// /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the /// bounds of the keys of the level. For example, removing a document id from the facet value /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore, /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). /// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust /// its left bound as well. fn delete_in_level<'t>( &self, txn: &'t mut RwTxn, field_id: u16, level: u8, facet_value: &[u8], docid: u32, ) -> Result { if level == 0 { return self.delete_in_level_0(txn, field_id, facet_value, docid); } let (deletion_key, mut bitmap) = self.find_insertion_key_value(field_id, level, facet_value, txn)?; let result = self.delete_in_level(txn, field_id, level - 1, facet_value.clone(), docid)?; let mut decrease_size = false; let next_key = match result { DeletionResult::InPlace => { bitmap.bitmap.remove(docid); self.db.put(txn, &deletion_key.as_ref(), &bitmap)?; return Ok(DeletionResult::InPlace); } DeletionResult::Reduce { next } => next, DeletionResult::Remove { next } => { decrease_size = true; next } }; // If either DeletionResult::Reduce or DeletionResult::Remove was returned, // then we may need to adjust the left_bound of the deletion key. // If DeletionResult::Remove was returned, then we need to decrease the group // size of the deletion key. let mut updated_value = bitmap; if decrease_size { updated_value.size -= 1; } if updated_value.size == 0 { self.db.delete(txn, &deletion_key.as_ref())?; Ok(DeletionResult::Remove { next: next_key }) } else { let mut updated_deletion_key = deletion_key.clone(); let reduced_range = facet_value == deletion_key.left_bound; if reduced_range { updated_deletion_key.left_bound = next_key.clone().unwrap(); } updated_value.bitmap.remove(docid); let _ = self.db.delete(txn, &deletion_key.as_ref())?; self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?; if reduced_range { Ok(DeletionResult::Reduce { next: next_key }) } else { Ok(DeletionResult::InPlace) } } } fn delete_in_level_0<'t>( &self, txn: &'t mut RwTxn, field_id: u16, facet_value: &[u8], docid: u32, ) -> Result { let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap; bitmap.remove(docid); if bitmap.is_empty() { let mut next_key = None; if let Some((next, _)) = self.db.remap_data_type::().get_greater_than(&txn, &key)? { if next.field_id == field_id && next.level == 0 { next_key = Some(next.left_bound.to_vec()); } } self.db.delete(txn, &key)?; Ok(DeletionResult::Remove { next: next_key }) } else { self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?; Ok(DeletionResult::InPlace) } } pub fn delete<'a, 't>( &self, txn: &'t mut RwTxn, field_id: u16, facet_value: &[u8], docid: u32, ) -> Result<()> { if self .db .remap_data_type::() .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })? .is_none() { return Ok(()); } let highest_level = get_highest_level(&txn, self.db, field_id)?; let result = self.delete_in_level(txn, field_id, highest_level as u8, facet_value, docid)?; match result { DeletionResult::InPlace => return Ok(()), DeletionResult::Reduce { .. } => return Ok(()), DeletionResult::Remove { .. } => {} } // if we either removed a key from the highest level, its size may have fallen // below `min_level_size`, in which case we need to remove the entire level let mut highest_level_prefix = vec![]; highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.push(highest_level); if highest_level == 0 || self .db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)? .count() >= self.min_level_size as usize { return Ok(()); } let mut to_delete = vec![]; let mut iter = self .db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?; while let Some(el) = iter.next() { let (k, _) = el?; to_delete.push( FacetGroupKeyCodec::::bytes_decode(k) .ok_or(Error::Encoding)? .into_owned(), ); } drop(iter); for k in to_delete { self.db.delete(txn, &k.as_ref())?; } Ok(()) } } impl<'a> FacetGroupKey<&'a [u8]> { pub fn into_owned(self) -> FacetGroupKey> { FacetGroupKey { field_id: self.field_id, level: self.level, left_bound: self.left_bound.to_vec(), } } } impl<'a> FacetGroupKey> { pub fn as_ref(&self) -> FacetGroupKey<&[u8]> { FacetGroupKey { field_id: self.field_id, level: self.level, left_bound: self.left_bound.as_slice(), } } } #[cfg(test)] mod tests { use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec}; use crate::milli_snap; use crate::update::facet::tests::FacetIndex; use rand::seq::SliceRandom; use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; #[test] fn append() { let index = FacetIndex::::new(4, 8, 5); for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 0, &(i as f64), &bitmap); txn.commit().unwrap(); } let txn = index.env.read_txn().unwrap(); index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn many_field_ids_append() { let index = FacetIndex::::new(4, 8, 5); for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 0, &(i as f64), &bitmap); txn.commit().unwrap(); } for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 2, &(i as f64), &bitmap); txn.commit().unwrap(); } for i in 0..256u16 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 1, &(i as f64), &bitmap); txn.commit().unwrap(); } let txn = index.env.read_txn().unwrap(); index.verify_structure_validity(&txn, 0); index.verify_structure_validity(&txn, 1); index.verify_structure_validity(&txn, 2); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn many_field_ids_prepend() { let index = FacetIndex::::new(4, 8, 5); for i in (0..256).into_iter().rev() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 0, &(i as f64), &bitmap); txn.commit().unwrap(); } for i in (0..256).into_iter().rev() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 2, &(i as f64), &bitmap); txn.commit().unwrap(); } for i in (0..256).into_iter().rev() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i as u32); let mut txn = index.env.write_txn().unwrap(); index.insert(&mut txn, 1, &(i as f64), &bitmap); txn.commit().unwrap(); } let txn = index.env.read_txn().unwrap(); index.verify_structure_validity(&txn, 0); index.verify_structure_validity(&txn, 1); index.verify_structure_validity(&txn, 2); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn prepend() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); for i in (0..256).into_iter().rev() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i); index.insert(&mut txn, 0, &(i as f64), &bitmap); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn shuffled() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); let mut keys = (0..256).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); index.insert(&mut txn, 0, &(key as f64), &bitmap); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn merge_values() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); let mut keys = (0..256).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for (_i, key) in keys.into_iter().enumerate() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); bitmap.insert(rng.gen_range(256..512)); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(key as f64), &bitmap); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn delete_from_end() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); for i in 0..256 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(&(i as f64)), &bitmap); } for i in (200..256).into_iter().rev() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 200); let mut txn = index.env.write_txn().unwrap(); for i in (150..200).into_iter().rev() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 150); let mut txn = index.env.write_txn().unwrap(); for i in (100..150).into_iter().rev() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 100); let mut txn = index.env.write_txn().unwrap(); for i in (17..100).into_iter().rev() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 17); let mut txn = index.env.write_txn().unwrap(); for i in (15..17).into_iter().rev() { index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 15); let mut txn = index.env.write_txn().unwrap(); for i in (0..15).into_iter().rev() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 0); } #[test] fn delete_from_start() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); for i in 0..256 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(i as f64), &bitmap); } for i in 0..128 { index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 127); let mut txn = index.env.write_txn().unwrap(); for i in 128..216 { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 215); let mut txn = index.env.write_txn().unwrap(); for i in 216..256 { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(i as f64), i as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 255); } #[test] fn delete_shuffled() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); for i in 0..256 { let mut bitmap = RoaringBitmap::new(); bitmap.insert(i); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(i as f64), &bitmap); } let mut keys = (0..256).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for i in 0..128 { let key = keys[i]; index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(key as f64), key as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 127); let mut txn = index.env.write_txn().unwrap(); for i in 128..216 { let key = keys[i]; index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(key as f64), key as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); let mut txn = index.env.write_txn().unwrap(); milli_snap!(format!("{index}"), 215); for i in 216..256 { let key = keys[i]; index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(key as f64), key as u32); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), 255); } #[test] fn in_place_level0_insert() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); let mut keys = (0..16).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for i in 0..4 { for &key in keys.iter() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(rng.gen_range(i * 256..(i + 1) * 256)); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(key as f64), &bitmap); } } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}")); } #[test] fn in_place_level0_delete() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); let mut keys = (0..64).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for &key in keys.iter() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); bitmap.insert(key + 100); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &(key as f64), &bitmap); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), "before_delete"); let mut txn = index.env.write_txn().unwrap(); for &key in keys.iter() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &(key as f64), key + 100); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), "after_delete"); } #[test] fn shuffle_merge_string_and_delete() { let index = FacetIndex::::new(4, 8, 5); let mut txn = index.env.write_txn().unwrap(); let mut keys = (1000..1064).into_iter().collect::>(); let mut rng = rand::rngs::SmallRng::from_seed([0; 32]); keys.shuffle(&mut rng); for &key in keys.iter() { let mut bitmap = RoaringBitmap::new(); bitmap.insert(key); bitmap.insert(key + 100); index.verify_structure_validity(&txn, 0); index.insert(&mut txn, 0, &format!("{key:x}").as_str(), &bitmap); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), "before_delete"); let mut txn = index.env.write_txn().unwrap(); for &key in keys.iter() { index.verify_structure_validity(&txn, 0); index.delete(&mut txn, 0, &format!("{key:x}").as_str(), key + 100); } index.verify_structure_validity(&txn, 0); txn.commit().unwrap(); milli_snap!(format!("{index}"), "after_delete"); } // fuzz tests } // #[cfg(all(test, fuzzing))] // mod fuzz { // use crate::codec::U16Codec; // use super::tests::verify_structure_validity; // use super::*; // use fuzzcheck::mutators::integer_within_range::U16WithinRangeMutator; // use fuzzcheck::DefaultMutator; // use roaring::RoaringBitmap; // use std::collections::BTreeMap; // use std::collections::HashMap; // #[derive(Default)] // pub struct TrivialDatabase { // pub elements: BTreeMap>, // } // impl TrivialDatabase // where // T: Ord + Clone + Copy + Eq + std::fmt::Debug, // { // pub fn insert(&mut self, field_id: u16, new_key: T, new_values: &RoaringBitmap) { // if new_values.is_empty() { // return; // } // let values_field_id = self.elements.entry(field_id).or_default(); // let values = values_field_id.entry(new_key).or_default(); // *values |= new_values; // } // pub fn delete(&mut self, field_id: u16, key: T, value: u32) { // if let Some(values_field_id) = self.elements.get_mut(&field_id) { // if let Some(values) = values_field_id.get_mut(&key) { // values.remove(value); // if values.is_empty() { // values_field_id.remove(&key); // } // } // if values_field_id.is_empty() { // self.elements.remove(&field_id); // } // } // } // } // #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] // struct Operation { // key: Key, // #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] // field_id: u16, // kind: OperationKind, // } // #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] // enum OperationKind { // Insert(Vec), // Delete(u8), // } // fn compare_with_trivial_database( // tempdir: Rc, // group_size: u8, // max_group_size: u8, // operations: &[Operation], // ) { // let index = FacetIndex::::open_from_tempdir(tempdir, group_size, max_group_size); // let mut trivial_db = TrivialDatabase::::default(); // let mut value_to_keys = HashMap::>::new(); // let mut txn = index.env.write_txn().unwrap(); // for Operation { key, field_id, kind } in operations { // match kind { // OperationKind::Insert(values) => { // let mut bitmap = RoaringBitmap::new(); // for value in values { // bitmap.insert(*value as u32); // value_to_keys.entry(*value).or_default().push(*key); // } // index.insert(&mut txn, *field_id, key, &bitmap); // trivial_db.insert(*field_id, *key, &bitmap); // } // OperationKind::Delete(value) => { // if let Some(keys) = value_to_keys.get(value) { // for key in keys { // index.delete(&mut txn, *field_id, key, *value as u32); // trivial_db.delete(*field_id, *key, *value as u32); // } // } // } // } // } // for (field_id, values_field_id) in trivial_db.elements.iter() { // let level0iter = index // .db // .content // .as_polymorph() // .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( // &mut txn, // &field_id.to_be_bytes(), // ) // .unwrap(); // for ((key, values), group) in values_field_id.iter().zip(level0iter) { // let (group_key, group_values) = group.unwrap(); // let group_key = FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); // assert_eq!(key, &group_key.left_bound); // assert_eq!(values, &group_values.bitmap); // } // } // txn.commit().unwrap(); // let mut txn = index.env.write_txn().unwrap(); // for (field_id, values_field_id) in trivial_db.elements.iter() { // let level0iter = index // .db // .content // .as_polymorph() // .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) // .unwrap(); // for ((key, values), group) in values_field_id.iter().zip(level0iter) { // let (group_key, group_values) = group.unwrap(); // let group_key = FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); // assert_eq!(key, &group_key.left_bound); // assert_eq!(values, &group_values.bitmap); // } // index.verify_structure_validity(*field_id); // } // index.db.content.clear(&mut txn).unwrap(); // txn.commit().unwrap(); // } // #[test] // fn fuzz() { // let tempdir = Rc::new(TempDir::new().unwrap()); // let tempdir_cloned = tempdir.clone(); // let result = fuzzcheck::fuzz_test(move |x: &(u8, u8, Vec>)| { // compare_with_trivial_database(tempdir_cloned.clone(), x.0, x.1, &x.2) // }) // .default_mutator() // .serde_serializer() // .default_sensor_and_pool_with_custom_filter(|file, function| { // if file.is_relative() // && !function.contains("serde") // && !function.contains("tests::") // && !function.contains("fuzz::") // && !function.contains("display_bitmap") // { // true // } else { // false // } // }) // .arguments_from_cargo_fuzzcheck() // .launch(); // assert!(!result.found_test_failure); // } // #[test] // fn reproduce_bug() { // let operations = r#" // [ // {"key":0, "field_id": 0, "kind":{"Insert":[109]}}, // {"key":143, "field_id": 0, "kind":{"Insert":[243]}}, // {"key":90, "field_id": 0, "kind":{"Insert":[217]}}, // {"key":172, "field_id": 0, "kind":{"Insert":[94]}}, // {"key":27, "field_id": 0, "kind":{"Insert":[4]}}, // {"key":124, "field_id": 0, "kind":{"Insert":[0]}}, // {"key":123, "field_id": 0, "kind":{"Insert":[0]}}, // {"key":67, "field_id": 0, "kind":{"Insert":[109]}}, // {"key":13, "field_id": 0, "kind":{"Insert":[0]}}, // {"key":162, "field_id": 0, "kind":{"Insert":[213]}}, // {"key":235, "field_id": 0, "kind":{"Insert":[67]}}, // {"key":251, "field_id": 0, "kind":{"Insert":[50]}}, // {"key":218, "field_id": 0, "kind":{"Insert":[164]}}, // {"key":166, "field_id": 0, "kind":{"Insert":[67]}}, // {"key":64, "field_id": 0, "kind":{"Insert":[61]}}, // {"key":183, "field_id": 0, "kind":{"Insert":[210]}}, // {"key":250, "field_id": 0, "kind":{"Delete":50}} // ] // "#; // let operations: Vec> = serde_json::from_str(operations).unwrap(); // let tempdir = TempDir::new().unwrap(); // compare_with_trivial_database(Rc::new(tempdir), 4, 8, &operations); // } // #[test] // fn reproduce_bug2() { // let operations = r#" // [ // {"key":102, "field_id": 0, "kind":{"Insert":[122]}}, // {"key":73, "field_id": 0, "kind":{"Insert":[132]}}, // {"key":20, "field_id": 0, "kind":{"Insert":[215]}}, // {"key":39, "field_id": 0, "kind":{"Insert":[152]}}, // {"key":151, "field_id": 0, "kind":{"Insert":[226]}}, // {"key":17, "field_id": 0, "kind":{"Insert":[101]}}, // {"key":74, "field_id": 0, "kind":{"Insert":[210]}}, // {"key":2, "field_id": 0, "kind":{"Insert":[130]}}, // {"key":64, "field_id": 0, "kind":{"Insert":[180]}}, // {"key":83, "field_id": 0, "kind":{"Insert":[250]}}, // {"key":80, "field_id": 0, "kind":{"Insert":[210]}}, // {"key":113, "field_id": 0, "kind":{"Insert":[63]}}, // {"key":201, "field_id": 0, "kind":{"Insert":[210]}}, // {"key":200, "field_id": 0, "kind":{"Insert":[5]}}, // {"key":93, "field_id": 0, "kind":{"Insert":[98]}}, // {"key":162, "field_id": 0, "kind":{"Insert":[5]}}, // {"key":80, "field_id": 0, "kind":{"Delete":210}} // ] // "#; // let operations: Vec> = serde_json::from_str(operations).unwrap(); // let tempdir = TempDir::new().unwrap(); // compare_with_trivial_database(Rc::new(tempdir), 4, 8, &operations); // } // #[test] // fn reproduce_bug3() { // let operations = r#" // [ // {"key":27488, "field_id": 0, "kind":{"Insert":[206]}}, // {"key":64716, "field_id": 0, "kind":{"Insert":[216]}}, // {"key":60886, "field_id": 0, "kind":{"Insert":[206]}}, // {"key":59509, "field_id": 0, "kind":{"Insert":[187,231]}}, // {"key":55057, "field_id": 0, "kind":{"Insert":[37]}}, // {"key":45200, "field_id": 0, "kind":{"Insert":[206]}}, // {"key":55056, "field_id": 0, "kind":{"Insert":[37]}}, // {"key":63679, "field_id": 0, "kind":{"Insert":[206]}}, // {"key":52155, "field_id": 0, "kind":{"Insert":[74]}}, // {"key":20648, "field_id": 0, "kind":{"Insert":[47,138,157]}} // ] // "#; // let operations: Vec> = serde_json::from_str(operations).unwrap(); // let tempdir = TempDir::new().unwrap(); // compare_with_trivial_database(Rc::new(tempdir), 0, 7, &operations); // } // #[test] // fn reproduce_bug4() { // let operations = r#" // [{"key":63499, "field_id": 0, "kind":{"Insert":[87]}},{"key":25374, "field_id": 0, "kind":{"Insert":[14]}},{"key":64481, "field_id": 0, "kind":{"Delete":87}},{"key":23038, "field_id": 0, "kind":{"Insert":[173]}},{"key":14862, "field_id": 0, "kind":{"Insert":[8]}},{"key":13145, "field_id": 0, "kind":{"Insert":[5,64]}},{"key":23446, "field_id": 0, "kind":{"Insert":[86,59]}},{"key":17972, "field_id": 0, "kind":{"Insert":[58,137]}},{"key":21273, "field_id": 0, "kind":{"Insert":[121,132,81,147]}},{"key":28264, "field_id": 0, "kind":{"Insert":[36]}},{"key":46659, "field_id": 0, "kind":{"Insert":[]}}] // "#; // let operations: Vec> = serde_json::from_str(operations).unwrap(); // let tempdir = TempDir::new().unwrap(); // compare_with_trivial_database(Rc::new(tempdir), 2, 1, &operations); // } // }