Make sure all keys are prefixed

This commit is contained in:
Clément Renault 2024-07-21 15:40:32 +02:00
parent 7adc715783
commit 507bce791b
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
6 changed files with 29 additions and 15 deletions

View File

@ -11,7 +11,7 @@ use smallvec::SmallVec;
use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::CboRoaringBitmapCodec; use crate::CboRoaringBitmapCodec;
const DISABLED: bool = true; const ENABLED: bool = true;
pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> { pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> {
cache: ArcCache<SmallVec<[u8; N]>, DelAddRoaringBitmap>, cache: ArcCache<SmallVec<[u8; N]>, DelAddRoaringBitmap>,
@ -45,7 +45,7 @@ where
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>, U>, MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>, U>,
{ {
pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> { pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
if DISABLED { if !ENABLED {
return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_u32(n)); return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_u32(n));
} }
@ -73,7 +73,7 @@ where
key: &[u8], key: &[u8],
bitmap: RoaringBitmap, bitmap: RoaringBitmap,
) -> Result<(), grenad::Error<U>> { ) -> Result<(), grenad::Error<U>> {
if DISABLED { if !ENABLED {
return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del(bitmap)); return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del(bitmap));
} }
@ -97,7 +97,7 @@ where
} }
pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> { pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
if DISABLED { if !ENABLED {
return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add_u32(n)); return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add_u32(n));
} }
@ -125,7 +125,7 @@ where
key: &[u8], key: &[u8],
bitmap: RoaringBitmap, bitmap: RoaringBitmap,
) -> Result<(), grenad::Error<U>> { ) -> Result<(), grenad::Error<U>> {
if DISABLED { if !ENABLED {
return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add(bitmap)); return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_add(bitmap));
} }
@ -149,7 +149,7 @@ where
} }
pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> { pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> Result<(), grenad::Error<U>> {
if DISABLED { if !ENABLED {
return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_add_u32(n)); return self.write_entry_to_sorter(key, DelAddRoaringBitmap::new_del_add_u32(n));
} }

View File

@ -150,7 +150,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
for (field_id, value) in obkv.iter() { for (field_id, value) in obkv.iter() {
key_buffer.truncate(mem::size_of::<u32>()); key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(&field_id.to_be_bytes()); key_buffer.extend_from_slice(&field_id.to_be_bytes());
conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"dwp".to_vec();
key.extend_from_slice(&key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
docid_word_positions_sorter.insert(&key_buffer, value)?; docid_word_positions_sorter.insert(&key_buffer, value)?;
} }

View File

@ -106,7 +106,9 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let key = (field_id, hyper_normalized_value.as_ref()); let key = (field_id, hyper_normalized_value.as_ref());
let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
conn.merge(key_bytes.as_ref(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"nfs".to_vec();
key.extend_from_slice(&key_bytes);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?; normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
} }

View File

@ -366,7 +366,9 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, bytes_of(&()))?; obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"dfn".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }
} }
@ -380,7 +382,9 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, bytes_of(&()))?; obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"dfn".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }
} }
@ -426,7 +430,9 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, original)?; obkv.insert(DelAdd::Deletion, original)?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"dfs".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
} }
EitherOrBoth::Right((normalized, original)) => { EitherOrBoth::Right((normalized, original)) => {
@ -436,7 +442,9 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, original)?; obkv.insert(DelAdd::Addition, original)?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"dfs".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
} }
} }

View File

@ -253,7 +253,9 @@ where
} }
// insert everything in the same writer. // insert everything in the same writer.
conn.merge(word.as_bytes(), 1u32.to_ne_bytes()).unwrap(); let mut key = b"wod".to_vec();
key.extend_from_slice(word.as_bytes());
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?; writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
Ok(()) Ok(())

View File

@ -43,11 +43,11 @@ pub static SLED_DB: once_cell::sync::Lazy<sled::Db> = once_cell::sync::Lazy::new
) -> Option<Vec<u8>> { ) -> Option<Vec<u8>> {
let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap()); let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap());
let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap(); let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap();
let count = current_count.saturating_add(new_count).to_be_bytes(); let count = current_count.saturating_add(new_count).to_ne_bytes();
Some(count.to_vec()) Some(count.to_vec())
} }
let db = sled::open("write-stats").unwrap(); let db = sled::open("write-stats.sled").unwrap();
db.set_merge_operator(increment_u32); db.set_merge_operator(increment_u32);
db db
}); });