diff --git a/Cargo.lock b/Cargo.lock index aa412dacf..da4ac157e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1879,16 +1879,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fst" version = "0.4.7" @@ -3404,7 +3394,7 @@ dependencies = [ "obkv", "once_cell", "ordered-float", - "parking_lot 0.12.3", + "parking_lot", "permissive-json-pointer", "pin-project-lite", "platform-dirs", @@ -3577,7 +3567,6 @@ dependencies = [ "rstar", "serde", "serde_json", - "sled", "slice-group-by", "smallstr", "smallvec", @@ -3891,17 +3880,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.3" @@ -3909,21 +3887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", - "parking_lot_core 0.9.8", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -4265,7 +4229,7 @@ dependencies = [ "lazy_static", "libc", "memchr", - "parking_lot 0.12.3", + "parking_lot", "procfs", "protobuf", "thiserror", @@ -5023,22 +4987,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", -] - [[package]] name = "slice-group-by" version = "0.3.1" @@ -5300,7 +5248,7 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" dependencies = [ - "parking_lot 0.12.3", + "parking_lot", ] [[package]] @@ -5374,7 +5322,7 @@ dependencies = [ "bstr", "fancy-regex 0.12.0", "lazy_static", - "parking_lot 0.12.3", + "parking_lot", "rustc-hash", ] @@ -5486,7 +5434,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot 0.12.3", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.5.5", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 9a9da9556..0944ba765 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -67,8 +67,6 @@ filter-parser = { path = "../filter-parser" } # documents words self-join itertools = "0.13.0" -sled = "0.34.7" - csv = "1.3.0" candle-core = { version = "0.6.0" } candle-transformers = { version = "0.6.0" } diff --git a/milli/src/update/index_documents/cache.rs b/milli/src/update/index_documents/cache.rs index 81616744f..49a64185e 100644 --- a/milli/src/update/index_documents/cache.rs +++ b/milli/src/update/index_documents/cache.rs @@ -1,10 +1,8 @@ -use std::borrow::{Borrow, Cow}; -use std::hash::Hash; -use std::iter::Chain; +use std::borrow::Cow; use std::mem; use std::num::NonZeroUsize; -use lru::{IntoIter, LruCache}; +use lru::LruCache; use roaring::RoaringBitmap; use smallvec::SmallVec; @@ -15,27 +13,18 @@ const ENABLED: bool = true; pub struct SorterCacheDelAddCboRoaringBitmap { cache: LruCache, DelAddRoaringBitmap>, - prefix: &'static [u8; 3], sorter: grenad::Sorter, deladd_buffer: Vec, cbo_buffer: Vec, - conn: sled::Db, } impl SorterCacheDelAddCboRoaringBitmap { - pub fn new( - cap: NonZeroUsize, - sorter: grenad::Sorter, - prefix: &'static [u8; 3], - conn: sled::Db, - ) -> Self { + pub fn new(cap: NonZeroUsize, sorter: grenad::Sorter) -> Self { SorterCacheDelAddCboRoaringBitmap { cache: LruCache::new(cap), - prefix, sorter, deladd_buffer: Vec::new(), cbo_buffer: Vec::new(), - conn, } } } @@ -56,7 +45,7 @@ where } None => { let value = DelAddRoaringBitmap::new_del_u32(n); - for (key, deladd) in self.cache.push(key.into(), value) { + if let Some((key, deladd)) = self.cache.push(key.into(), value) { self.write_entry_to_sorter(key, deladd)?; } } @@ -81,7 +70,7 @@ where } None => { let value = DelAddRoaringBitmap::new_del(bitmap); - for (key, deladd) in self.cache.push(key.into(), value) { + if let Some((key, deladd)) = self.cache.push(key.into(), value) { self.write_entry_to_sorter(key, deladd)?; } } @@ -102,7 +91,7 @@ where } None => { let value = DelAddRoaringBitmap::new_add_u32(n); - for (key, deladd) in self.cache.push(key.into(), value) { + if let Some((key, deladd)) = self.cache.push(key.into(), value) { self.write_entry_to_sorter(key, deladd)?; } } @@ -127,7 +116,7 @@ where } None => { let value = DelAddRoaringBitmap::new_add(bitmap); - for (key, deladd) in self.cache.push(key.into(), value) { + if let Some((key, deladd)) = self.cache.push(key.into(), value) { self.write_entry_to_sorter(key, deladd)?; } } @@ -149,7 +138,7 @@ where } None => { let value = DelAddRoaringBitmap::new_del_add_u32(n); - for (key, deladd) in self.cache.push(key.into(), value) { + if let Some((key, deladd)) = self.cache.push(key.into(), value) { self.write_entry_to_sorter(key, deladd)?; } } @@ -187,18 +176,10 @@ where } DelAddRoaringBitmap { del: None, add: None } => return Ok(()), } - self.cbo_buffer.clear(); - self.cbo_buffer.extend_from_slice(self.prefix); - self.cbo_buffer.extend_from_slice(key.as_ref()); - self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap(); self.sorter.insert(key, value_writer.into_inner().unwrap()) } pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> Result<(), grenad::Error> { - self.cbo_buffer.clear(); - self.cbo_buffer.extend_from_slice(self.prefix); - self.cbo_buffer.extend_from_slice(key); - self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap(); self.sorter.insert(key, val) } @@ -240,167 +221,3 @@ impl DelAddRoaringBitmap { DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } } } - -// TODO support custom State (3rd param S of LruCache) -pub struct ArcCache { - recent_set: LruCache, - recent_evicted: LruCache, - frequent_set: LruCache, - frequent_evicted: LruCache, - capacity: NonZeroUsize, - p: usize, -} - -impl ArcCache { - pub fn new(cap: NonZeroUsize) -> Self { - ArcCache { - recent_set: LruCache::new(cap), - recent_evicted: LruCache::new(cap), - frequent_set: LruCache::new(cap), - frequent_evicted: LruCache::new(cap), - capacity: cap, - p: 0, - } - } -} - -impl ArcCache { - fn get_mut(&mut self, k: &Q) -> (Option<&mut V>, Option<(K, V)>) - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { - if let Some((key, value)) = self.recent_set.pop_entry(k) { - let evicted = self.frequent_set.push(key, value); - (self.frequent_set.get_mut(k), evicted) - } else { - (self.frequent_set.get_mut(k), None) - } - } - - fn push(&mut self, key: K, value: V) -> Vec<(K, V)> { - let mut evicted = Vec::new(); - - if self.recent_set.contains(&key) { - if let Some(evicted_entry) = self.recent_set.pop_entry(&key) { - evicted.push(evicted_entry); - } - if let Some(evicted_entry) = self.frequent_set.push(key, value) { - evicted.push(evicted_entry); - } - return evicted; - } - - if self.frequent_set.contains(&key) { - if let Some(evicted_entry) = self.frequent_set.push(key, value) { - evicted.push(evicted_entry); - } - return evicted; - } - - if self.recent_set.len() + self.frequent_set.len() == self.capacity.get() { - if self.recent_set.len() < self.capacity.get() { - if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() { - self.recent_evicted.pop_lru(); - } - if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() { - self.frequent_evicted.put(lru_key.clone(), ()); - evicted.push((lru_key, lru_value)); - } - } else if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() { - self.recent_evicted.put(lru_key.clone(), ()); - evicted.push((lru_key, lru_value)); - } - } - - if self.recent_evicted.contains(&key) { - let delta = if self.recent_evicted.len() >= self.frequent_evicted.len() { - 1 - } else { - self.frequent_evicted.len() / self.recent_evicted.len() - }; - - self.p = (self.p + delta).min(self.capacity.get()); - if let Some(evicted_entry) = self.replace(&key) { - evicted.push(evicted_entry); - } - self.recent_evicted.pop(&key); - if let Some(evicted_entry) = self.frequent_set.push(key, value) { - evicted.push(evicted_entry); - } - } else if self.frequent_evicted.contains(&key) { - let delta = if self.frequent_evicted.len() >= self.recent_evicted.len() { - 1 - } else { - self.recent_evicted.len() / self.frequent_evicted.len() - }; - self.p = self.p.saturating_sub(delta); - if let Some(evicted_entry) = self.replace(&key) { - evicted.push(evicted_entry); - } - self.frequent_evicted.pop(&key); - if let Some(evicted_entry) = self.frequent_set.push(key, value) { - evicted.push(evicted_entry); - } - } else { - if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() { - if self.recent_set.len() < self.capacity.get() { - self.recent_evicted.pop_lru(); - if let Some(evicted_entry) = self.replace(&key) { - evicted.push(evicted_entry); - } - } else if let Some(evicted_entry) = self.recent_set.pop_lru() { - evicted.push(evicted_entry); - } - } else if self.recent_set.len() - + self.frequent_set.len() - + self.recent_evicted.len() - + self.frequent_evicted.len() - >= self.capacity.get() - { - if self.recent_set.len() - + self.frequent_set.len() - + self.recent_evicted.len() - + self.frequent_evicted.len() - == 2 * self.capacity.get() - { - self.frequent_evicted.pop_lru(); - } - if let Some(evicted_entry) = self.replace(&key) { - evicted.push(evicted_entry); - } - } - if let Some(evicted_entry) = self.recent_set.push(key, value) { - evicted.push(evicted_entry); - } - } - - evicted - } - - fn replace(&mut self, key: &K) -> Option<(K, V)> { - if !self.recent_set.is_empty() - && (self.recent_set.len() > self.p - || (self.frequent_evicted.contains(key) && self.recent_set.len() == self.p)) - { - if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() { - self.recent_evicted.put(lru_key.clone(), ()); - return Some((lru_key, lru_value)); - } - } else if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() { - self.frequent_evicted.put(lru_key.clone(), ()); - return Some((lru_key, lru_value)); - } - - None - } -} - -impl IntoIterator for ArcCache { - type Item = (K, V); - type IntoIter = Chain, IntoIter>; - - fn into_iter(self) -> Self::IntoIter { - self.recent_set.into_iter().chain(self.frequent_set) - } -} diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 51b901f87..813760acb 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -29,8 +29,6 @@ pub fn extract_docid_word_positions( settings_diff: &InnerIndexSettingsDiff, max_positions_per_attributes: Option, ) -> Result<(grenad::Reader>, ScriptLanguageDocidsMap)> { - let conn = super::SLED_DB.clone(); - let max_positions_per_attributes = max_positions_per_attributes .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); let max_memory = indexer.max_memory_by_thread(); @@ -152,7 +150,7 @@ pub fn extract_docid_word_positions( key_buffer.extend_from_slice(&field_id.to_be_bytes()); let mut key = b"dwp".to_vec(); key.extend_from_slice(&key_buffer); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); + // conn.merge(key, 1u32.to_ne_bytes()).unwrap(); docid_word_positions_sorter.insert(&key_buffer, value)?; } diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index 98bb48a18..20dff1b38 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -40,8 +40,6 @@ pub fn extract_facet_number_docids( SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), facet_number_docids_sorter, - b"fnd", - super::SLED_DB.clone(), ); let mut cursor = fid_docid_facet_number.into_cursor()?; diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 3bc43dc9b..d36d62b58 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -10,7 +10,6 @@ use heed::types::SerdeJson; use heed::BytesEncode; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; -use super::SLED_DB; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; use crate::heed_codec::{BEU16StrCodec, StrRefCodec}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; @@ -32,7 +31,6 @@ pub fn extract_facet_string_docids( indexer: GrenadParameters, _settings_diff: &InnerIndexSettingsDiff, ) -> Result<(grenad::Reader>, grenad::Reader>)> { - let conn = SLED_DB.clone(); let max_memory = indexer.max_memory_by_thread(); let options = NormalizerOption { lossy: true, ..Default::default() }; @@ -48,8 +46,6 @@ pub fn extract_facet_string_docids( SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), facet_string_docids_sorter, - b"fsd", - SLED_DB.clone(), ); let mut normalized_facet_string_docids_sorter = create_sorter( @@ -108,7 +104,7 @@ pub fn extract_facet_string_docids( let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; let mut key = b"nfs".to_vec(); key.extend_from_slice(&key_bytes); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); + // conn.merge(key, 1u32.to_ne_bytes()).unwrap(); normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?; } diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index e99cb537f..810fa26a9 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -46,7 +46,6 @@ pub fn extract_fid_docid_facet_values( indexer: GrenadParameters, settings_diff: &InnerIndexSettingsDiff, ) -> Result { - let mut conn = super::SLED_DB.clone(); let max_memory = indexer.max_memory_by_thread(); let mut fid_docid_facet_numbers_sorter = create_sorter( @@ -170,22 +169,20 @@ pub fn extract_fid_docid_facet_values( add_value.map(|value| extract_facet_values(&value, add_geo_support)); // Those closures are just here to simplify things a bit. - let mut insert_numbers_diff = |del_numbers, add_numbers, conn| { + let mut insert_numbers_diff = |del_numbers, add_numbers| { insert_numbers_diff( &mut fid_docid_facet_numbers_sorter, &mut numbers_key_buffer, del_numbers, add_numbers, - conn, ) }; - let mut insert_strings_diff = |del_strings, add_strings, conn| { + let mut insert_strings_diff = |del_strings, add_strings| { insert_strings_diff( &mut fid_docid_facet_strings_sorter, &mut strings_key_buffer, del_strings, add_strings, - conn, ) }; @@ -199,8 +196,8 @@ pub fn extract_fid_docid_facet_values( del_is_empty.insert(document); } Values { numbers, strings } => { - insert_numbers_diff(numbers, vec![], &mut conn)?; - insert_strings_diff(strings, vec![], &mut conn)?; + insert_numbers_diff(numbers, vec![])?; + insert_strings_diff(strings, vec![])?; } }, (None, Some(add_filterable_values)) => match add_filterable_values { @@ -211,8 +208,8 @@ pub fn extract_fid_docid_facet_values( add_is_empty.insert(document); } Values { numbers, strings } => { - insert_numbers_diff(vec![], numbers, &mut conn)?; - insert_strings_diff(vec![], strings, &mut conn)?; + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; } }, (Some(del_filterable_values), Some(add_filterable_values)) => { @@ -227,31 +224,31 @@ pub fn extract_fid_docid_facet_values( add_is_null.insert(document); } (Null, Values { numbers, strings }) => { - insert_numbers_diff(vec![], numbers, &mut conn)?; - insert_strings_diff(vec![], strings, &mut conn)?; + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; del_is_null.insert(document); } (Empty, Values { numbers, strings }) => { - insert_numbers_diff(vec![], numbers, &mut conn)?; - insert_strings_diff(vec![], strings, &mut conn)?; + insert_numbers_diff(vec![], numbers)?; + insert_strings_diff(vec![], strings)?; del_is_empty.insert(document); } (Values { numbers, strings }, Null) => { add_is_null.insert(document); - insert_numbers_diff(numbers, vec![], &mut conn)?; - insert_strings_diff(strings, vec![], &mut conn)?; + insert_numbers_diff(numbers, vec![])?; + insert_strings_diff(strings, vec![])?; } (Values { numbers, strings }, Empty) => { add_is_empty.insert(document); - insert_numbers_diff(numbers, vec![], &mut conn)?; - insert_strings_diff(strings, vec![], &mut conn)?; + insert_numbers_diff(numbers, vec![])?; + insert_strings_diff(strings, vec![])?; } ( Values { numbers: del_numbers, strings: del_strings }, Values { numbers: add_numbers, strings: add_strings }, ) => { - insert_numbers_diff(del_numbers, add_numbers, &mut conn)?; - insert_strings_diff(del_strings, add_strings, &mut conn)?; + insert_numbers_diff(del_numbers, add_numbers)?; + insert_strings_diff(del_strings, add_strings)?; } } } @@ -334,7 +331,6 @@ fn insert_numbers_diff( key_buffer: &mut Vec, mut del_numbers: Vec, mut add_numbers: Vec, - conn: &mut sled::Db, ) -> Result<()> where MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult, Error>, @@ -366,9 +362,6 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Deletion, bytes_of(&()))?; let bytes = obkv.into_inner()?; - let mut key = b"dfn".to_vec(); - key.extend_from_slice(key_buffer); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } } @@ -382,9 +375,6 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Addition, bytes_of(&()))?; let bytes = obkv.into_inner()?; - let mut key = b"dfn".to_vec(); - key.extend_from_slice(key_buffer); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; } } @@ -401,7 +391,6 @@ fn insert_strings_diff( key_buffer: &mut Vec, mut del_strings: Vec<(String, String)>, mut add_strings: Vec<(String, String)>, - conn: &mut sled::Db, ) -> Result<()> where MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult, Error>, @@ -430,9 +419,6 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Deletion, original)?; let bytes = obkv.into_inner()?; - let mut key = b"dfs".to_vec(); - key.extend_from_slice(key_buffer); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; } EitherOrBoth::Right((normalized, original)) => { @@ -442,9 +428,6 @@ where let mut obkv = KvWriterDelAdd::memory(); obkv.insert(DelAdd::Addition, original)?; let bytes = obkv.into_inner()?; - let mut key = b"dfs".to_vec(); - key.extend_from_slice(key_buffer); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; } } diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index a8b330655..ac68d4312 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -43,8 +43,6 @@ pub fn extract_fid_word_count_docids( SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), fid_word_count_docids_sorter, - b"fwc", - super::SLED_DB.clone(), ); let mut key_buffer = Vec::new(); diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 63dd7a2f7..ef1f1d832 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -11,7 +11,6 @@ use super::helpers::{ create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at, writer_into_reader, GrenadParameters, }; -use super::SLED_DB; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; use crate::index::db_name::DOCID_WORD_POSITIONS; @@ -52,8 +51,6 @@ pub fn extract_word_docids( let mut cached_word_fid_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, _>::new( NonZeroUsize::new(1000).unwrap(), word_fid_docids_sorter, - b"wfd", - SLED_DB.clone(), ); let mut key_buffer = Vec::new(); @@ -113,8 +110,6 @@ pub fn extract_word_docids( let mut cached_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), word_docids_sorter, - b"wdi", - SLED_DB.clone(), ); let exact_word_docids_sorter = create_sorter( @@ -128,8 +123,6 @@ pub fn extract_word_docids( let mut cached_exact_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), exact_word_docids_sorter, - b"ewd", - SLED_DB.clone(), ); let mut iter = cached_word_fid_docids_sorter.into_sorter()?.into_stream_merger_iter()?; @@ -221,7 +214,6 @@ fn docids_into_writers( deletions: &RoaringBitmap, additions: &RoaringBitmap, writer: &mut grenad::Writer, - conn: &mut sled::Db, ) -> Result<()> where W: std::io::Write, @@ -253,9 +245,6 @@ where } // insert everything in the same writer. - let mut key = b"wod".to_vec(); - key.extend_from_slice(word.as_bytes()); - conn.merge(key, 1u32.to_ne_bytes()).unwrap(); writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?; Ok(()) diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index e0678404a..b62872b6d 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -55,8 +55,6 @@ pub fn extract_word_pair_proximity_docids( SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), sorter, - b"wpp", - super::SLED_DB.clone(), ) }) .collect(); diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index dc12cefb5..7d3ed6b71 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -41,8 +41,6 @@ pub fn extract_word_position_docids( SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), word_position_docids_sorter, - b"wpd", - super::SLED_DB.clone(), ); let mut del_word_positions: BTreeSet<(u16, Vec)> = BTreeSet::new(); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 73f9981be..57d9d5e42 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -35,23 +35,6 @@ use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::PossibleEmbeddingMistakes; use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; -pub static SLED_DB: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - fn increment_u32( - _key: &[u8], - old_value: Option<&[u8]>, - merged_bytes: &[u8], - ) -> Option> { - let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap()); - let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap(); - let count = current_count.saturating_add(new_count).to_ne_bytes(); - Some(count.to_vec()) - } - - let db = sled::open("write-stats.sled").unwrap(); - db.set_merge_operator(increment_u32); - db -}); - /// Extract data for each databases from obkv documents in parallel. /// Send data in grenad file over provided Sender. #[allow(clippy::too_many_arguments)] diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 5b4503f51..053e66c96 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -14,7 +14,6 @@ use std::result::Result as StdResult; use std::sync::Arc; use crossbeam_channel::{Receiver, Sender}; -pub use extract::SLED_DB; use grenad::{Merger, MergerBuilder}; use heed::types::Str; use heed::Database; diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index d5ca711c2..5e579651c 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -6,7 +6,6 @@ use heed::types::Str; use heed::Database; use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap; -use super::index_documents::SLED_DB; use crate::update::del_add::deladd_serialize_add_side; use crate::update::index_documents::{ create_sorter, merge_deladd_cbo_roaring_bitmaps, @@ -68,8 +67,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { let mut cached_prefix_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), prefix_docids_sorter, - b"pdi", - SLED_DB.clone(), ); if !common_prefix_fst_words.is_empty() { diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 92e64625f..acfb7c836 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -15,7 +15,7 @@ use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap; use crate::update::index_documents::{ create_sorter, merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key, - write_sorter_into_database, CursorClonableMmap, MergeFn, SLED_DB, + write_sorter_into_database, CursorClonableMmap, MergeFn, }; use crate::{CboRoaringBitmapCodec, Result}; @@ -73,8 +73,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new( NonZeroUsize::new(1000).unwrap(), prefix_integer_docids_sorter, - b"pid", - SLED_DB.clone(), ); if !common_prefix_fst_words.is_empty() {