From 507bce791b50d2913d01b78581d715cf0ecd823c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 21 Jul 2024 15:40:32 +0200 Subject: [PATCH] Make sure all keys are prefixed --- milli/src/update/index_documents/cache.rs | 12 ++++++------ .../extract/extract_docid_word_positions.rs | 4 +++- .../extract/extract_facet_string_docids.rs | 4 +++- .../extract/extract_fid_docid_facet_values.rs | 16 ++++++++++++---- .../extract/extract_word_docids.rs | 4 +++- milli/src/update/index_documents/extract/mod.rs | 4 ++-- 6 files changed, 29 insertions(+), 15 deletions(-) diff --git a/milli/src/update/index_documents/cache.rs b/milli/src/update/index_documents/cache.rs index dc4404ab2..7c57c0b3e 100644 --- a/milli/src/update/index_documents/cache.rs +++ b/milli/src/update/index_documents/cache.rs @@ -11,7 +11,7 @@ use smallvec::SmallVec; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::CboRoaringBitmapCodec; -const DISABLED: bool = true; +const ENABLED: bool = true; pub struct SorterCacheDelAddCboRoaringBitmap { cache: ArcCache, DelAddRoaringBitmap>, @@ -45,7 +45,7 @@ where MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> Result, U>, { pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error> { - if DISABLED { + if !ENABLED { return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_u32(n)); } @@ -73,7 +73,7 @@ where key: &[u8], bitmap: RoaringBitmap, ) -> Result<(), grenad::Error> { - if DISABLED { + if !ENABLED { return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del(bitmap)); } @@ -97,7 +97,7 @@ where } pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error> { - if DISABLED { + if !ENABLED { return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add_u32(n)); } @@ -125,7 +125,7 @@ where key: &[u8], bitmap: RoaringBitmap, ) -> Result<(), grenad::Error> { - if DISABLED { + if !ENABLED { return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add(bitmap)); } @@ -149,7 +149,7 @@ where } pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error> { - if DISABLED { + if !ENABLED { return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_add_u32(n)); } diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 76d74bcd4..51b901f87 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -150,7 +150,9 @@ pub fn extract_docid_word_positions( for (field_id, value) in obkv.iter() { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&field_id.to_be_bytes()); - conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"dwp".to_vec(); + key.extend_from_slice(&key_buffer); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); docid_word_positions_sorter.insert(&key_buffer, value)?; } diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 52e734269..8adfd6ae0 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -106,7 +106,9 @@ pub fn extract_facet_string_docids( let key = (field_id, hyper_normalized_value.as_ref()); let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; - conn.merge(key_bytes.as_ref(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"nfs".to_vec(); + key.extend_from_slice(&key_bytes); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?; } diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index c3b21c9e2..e99cb537f 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -366,7 +366,9 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Deletion, bytes_of(&()))?; let bytes = obkv.into_inner()?; - conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"dfn".to_vec(); + key.extend_from_slice(key_buffer); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } } @@ -380,7 +382,9 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Addition, bytes_of(&()))?; let bytes = obkv.into_inner()?; - conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"dfn".to_vec(); + key.extend_from_slice(key_buffer); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } } @@ -426,7 +430,9 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Deletion, original)?; let bytes = obkv.into_inner()?; - conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"dfs".to_vec(); + key.extend_from_slice(key_buffer); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; } EitherOrBoth::Right((normalized, original)) => { @@ -436,7 +442,9 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Addition, original)?; let bytes = obkv.into_inner()?; - conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"dfs".to_vec(); + key.extend_from_slice(key_buffer); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; } } diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 15d165513..1e38706f2 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -253,7 +253,9 @@ where } // insert everything in the same writer. - conn.merge(word.as_bytes(), 1u32.to_ne_bytes()).unwrap(); + let mut key = b"wod".to_vec(); + key.extend_from_slice(word.as_bytes()); + conn.merge(key, 1u32.to_ne_bytes()).unwrap(); writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?; Ok(()) diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 1ad6ec84d..73f9981be 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -43,11 +43,11 @@ pub static SLED_DB: once_cell::sync::Lazy = once_cell::sync::Lazy::new ) -> Option> { let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap()); let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap(); - let count = current_count.saturating_add(new_count).to_be_bytes(); + let count = current_count.saturating_add(new_count).to_ne_bytes(); Some(count.to_vec()) } - let db = sled::open("write-stats").unwrap(); + let db = sled::open("write-stats.sled").unwrap(); db.set_merge_operator(increment_u32); db });