Disable sled logging

This commit is contained in:
Clément Renault 2024-07-22 14:06:31 +02:00
parent ca332883cc
commit 4d92df1b95
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
15 changed files with 33 additions and 335 deletions

64
Cargo.lock generated
View File

@ -1879,16 +1879,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "fst"
version = "0.4.7"
@ -3404,7 +3394,7 @@ dependencies = [
"obkv",
"once_cell",
"ordered-float",
"parking_lot 0.12.3",
"parking_lot",
"permissive-json-pointer",
"pin-project-lite",
"platform-dirs",
@ -3577,7 +3567,6 @@ dependencies = [
"rstar",
"serde",
"serde_json",
"sled",
"slice-group-by",
"smallstr",
"smallvec",
@ -3891,17 +3880,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core 0.8.6",
]
[[package]]
name = "parking_lot"
version = "0.12.3"
@ -3909,21 +3887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core 0.9.8",
]
[[package]]
name = "parking_lot_core"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall 0.2.16",
"smallvec",
"winapi",
"parking_lot_core",
]
[[package]]
@ -4265,7 +4229,7 @@ dependencies = [
"lazy_static",
"libc",
"memchr",
"parking_lot 0.12.3",
"parking_lot",
"procfs",
"protobuf",
"thiserror",
@ -5023,22 +4987,6 @@ dependencies = [
"autocfg",
]
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch",
"crossbeam-utils",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot 0.11.2",
]
[[package]]
name = "slice-group-by"
version = "0.3.1"
@ -5300,7 +5248,7 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050"
dependencies = [
"parking_lot 0.12.3",
"parking_lot",
]
[[package]]
@ -5374,7 +5322,7 @@ dependencies = [
"bstr",
"fancy-regex 0.12.0",
"lazy_static",
"parking_lot 0.12.3",
"parking_lot",
"rustc-hash",
]
@ -5486,7 +5434,7 @@ dependencies = [
"libc",
"mio",
"num_cpus",
"parking_lot 0.12.3",
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2 0.5.5",

View File

@ -67,8 +67,6 @@ filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.13.0"
sled = "0.34.7"
csv = "1.3.0"
candle-core = { version = "0.6.0" }
candle-transformers = { version = "0.6.0" }

View File

@ -1,10 +1,8 @@
use std::borrow::{Borrow, Cow};
use std::hash::Hash;
use std::iter::Chain;
use std::borrow::Cow;
use std::mem;
use std::num::NonZeroUsize;
use lru::{IntoIter, LruCache};
use lru::LruCache;
use roaring::RoaringBitmap;
use smallvec::SmallVec;
@ -15,27 +13,18 @@ const ENABLED: bool = true;
pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> {
cache: LruCache<SmallVec<[u8; N]>, DelAddRoaringBitmap>,
prefix: &'static [u8; 3],
sorter: grenad::Sorter<MF>,
deladd_buffer: Vec<u8>,
cbo_buffer: Vec<u8>,
conn: sled::Db,
}
impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> {
pub fn new(
cap: NonZeroUsize,
sorter: grenad::Sorter<MF>,
prefix: &'static [u8; 3],
conn: sled::Db,
) -> Self {
pub fn new(cap: NonZeroUsize, sorter: grenad::Sorter<MF>) -> Self {
SorterCacheDelAddCboRoaringBitmap {
cache: LruCache::new(cap),
prefix,
sorter,
deladd_buffer: Vec::new(),
cbo_buffer: Vec::new(),
conn,
}
}
}
@ -56,7 +45,7 @@ where
}
None => {
let value = DelAddRoaringBitmap::new_del_u32(n);
for (key, deladd) in self.cache.push(key.into(), value) {
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
self.write_entry_to_sorter(key, deladd)?;
}
}
@ -81,7 +70,7 @@ where
}
None => {
let value = DelAddRoaringBitmap::new_del(bitmap);
for (key, deladd) in self.cache.push(key.into(), value) {
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
self.write_entry_to_sorter(key, deladd)?;
}
}
@ -102,7 +91,7 @@ where
}
None => {
let value = DelAddRoaringBitmap::new_add_u32(n);
for (key, deladd) in self.cache.push(key.into(), value) {
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
self.write_entry_to_sorter(key, deladd)?;
}
}
@ -127,7 +116,7 @@ where
}
None => {
let value = DelAddRoaringBitmap::new_add(bitmap);
for (key, deladd) in self.cache.push(key.into(), value) {
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
self.write_entry_to_sorter(key, deladd)?;
}
}
@ -149,7 +138,7 @@ where
}
None => {
let value = DelAddRoaringBitmap::new_del_add_u32(n);
for (key, deladd) in self.cache.push(key.into(), value) {
if let Some((key, deladd)) = self.cache.push(key.into(), value) {
self.write_entry_to_sorter(key, deladd)?;
}
}
@ -187,18 +176,10 @@ where
}
DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
}
self.cbo_buffer.clear();
self.cbo_buffer.extend_from_slice(self.prefix);
self.cbo_buffer.extend_from_slice(key.as_ref());
self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap();
self.sorter.insert(key, value_writer.into_inner().unwrap())
}
pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> Result<(), grenad::Error<U>> {
self.cbo_buffer.clear();
self.cbo_buffer.extend_from_slice(self.prefix);
self.cbo_buffer.extend_from_slice(key);
self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap();
self.sorter.insert(key, val)
}
@ -240,167 +221,3 @@ impl DelAddRoaringBitmap {
DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) }
}
}
// TODO support custom State (3rd param S of LruCache)
pub struct ArcCache<K, V> {
recent_set: LruCache<K, V>,
recent_evicted: LruCache<K, ()>,
frequent_set: LruCache<K, V>,
frequent_evicted: LruCache<K, ()>,
capacity: NonZeroUsize,
p: usize,
}
impl<K: Eq + Hash, V> ArcCache<K, V> {
pub fn new(cap: NonZeroUsize) -> Self {
ArcCache {
recent_set: LruCache::new(cap),
recent_evicted: LruCache::new(cap),
frequent_set: LruCache::new(cap),
frequent_evicted: LruCache::new(cap),
capacity: cap,
p: 0,
}
}
}
impl<K: Eq + Hash + Clone, V> ArcCache<K, V> {
fn get_mut<Q>(&mut self, k: &Q) -> (Option<&mut V>, Option<(K, V)>)
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
if let Some((key, value)) = self.recent_set.pop_entry(k) {
let evicted = self.frequent_set.push(key, value);
(self.frequent_set.get_mut(k), evicted)
} else {
(self.frequent_set.get_mut(k), None)
}
}
fn push(&mut self, key: K, value: V) -> Vec<(K, V)> {
let mut evicted = Vec::new();
if self.recent_set.contains(&key) {
if let Some(evicted_entry) = self.recent_set.pop_entry(&key) {
evicted.push(evicted_entry);
}
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
evicted.push(evicted_entry);
}
return evicted;
}
if self.frequent_set.contains(&key) {
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
evicted.push(evicted_entry);
}
return evicted;
}
if self.recent_set.len() + self.frequent_set.len() == self.capacity.get() {
if self.recent_set.len() < self.capacity.get() {
if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() {
self.recent_evicted.pop_lru();
}
if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() {
self.frequent_evicted.put(lru_key.clone(), ());
evicted.push((lru_key, lru_value));
}
} else if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() {
self.recent_evicted.put(lru_key.clone(), ());
evicted.push((lru_key, lru_value));
}
}
if self.recent_evicted.contains(&key) {
let delta = if self.recent_evicted.len() >= self.frequent_evicted.len() {
1
} else {
self.frequent_evicted.len() / self.recent_evicted.len()
};
self.p = (self.p + delta).min(self.capacity.get());
if let Some(evicted_entry) = self.replace(&key) {
evicted.push(evicted_entry);
}
self.recent_evicted.pop(&key);
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
evicted.push(evicted_entry);
}
} else if self.frequent_evicted.contains(&key) {
let delta = if self.frequent_evicted.len() >= self.recent_evicted.len() {
1
} else {
self.recent_evicted.len() / self.frequent_evicted.len()
};
self.p = self.p.saturating_sub(delta);
if let Some(evicted_entry) = self.replace(&key) {
evicted.push(evicted_entry);
}
self.frequent_evicted.pop(&key);
if let Some(evicted_entry) = self.frequent_set.push(key, value) {
evicted.push(evicted_entry);
}
} else {
if self.recent_set.len() + self.recent_evicted.len() == self.capacity.get() {
if self.recent_set.len() < self.capacity.get() {
self.recent_evicted.pop_lru();
if let Some(evicted_entry) = self.replace(&key) {
evicted.push(evicted_entry);
}
} else if let Some(evicted_entry) = self.recent_set.pop_lru() {
evicted.push(evicted_entry);
}
} else if self.recent_set.len()
+ self.frequent_set.len()
+ self.recent_evicted.len()
+ self.frequent_evicted.len()
>= self.capacity.get()
{
if self.recent_set.len()
+ self.frequent_set.len()
+ self.recent_evicted.len()
+ self.frequent_evicted.len()
== 2 * self.capacity.get()
{
self.frequent_evicted.pop_lru();
}
if let Some(evicted_entry) = self.replace(&key) {
evicted.push(evicted_entry);
}
}
if let Some(evicted_entry) = self.recent_set.push(key, value) {
evicted.push(evicted_entry);
}
}
evicted
}
fn replace(&mut self, key: &K) -> Option<(K, V)> {
if !self.recent_set.is_empty()
&& (self.recent_set.len() > self.p
|| (self.frequent_evicted.contains(key) && self.recent_set.len() == self.p))
{
if let Some((lru_key, lru_value)) = self.recent_set.pop_lru() {
self.recent_evicted.put(lru_key.clone(), ());
return Some((lru_key, lru_value));
}
} else if let Some((lru_key, lru_value)) = self.frequent_set.pop_lru() {
self.frequent_evicted.put(lru_key.clone(), ());
return Some((lru_key, lru_value));
}
None
}
}
impl<K: Hash + Eq, V> IntoIterator for ArcCache<K, V> {
type Item = (K, V);
type IntoIter = Chain<IntoIter<K, V>, IntoIter<K, V>>;
fn into_iter(self) -> Self::IntoIter {
self.recent_set.into_iter().chain(self.frequent_set)
}
}

View File

@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
settings_diff: &InnerIndexSettingsDiff,
max_positions_per_attributes: Option<u32>,
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
let conn = super::SLED_DB.clone();
let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
let max_memory = indexer.max_memory_by_thread();
@ -152,7 +150,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
key_buffer.extend_from_slice(&field_id.to_be_bytes());
let mut key = b"dwp".to_vec();
key.extend_from_slice(&key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
// conn.merge(key, 1u32.to_ne_bytes()).unwrap();
docid_word_positions_sorter.insert(&key_buffer, value)?;
}

View File

@ -40,8 +40,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
facet_number_docids_sorter,
b"fnd",
super::SLED_DB.clone(),
);
let mut cursor = fid_docid_facet_number.into_cursor()?;

View File

@ -10,7 +10,6 @@ use heed::types::SerdeJson;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use super::SLED_DB;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
@ -32,7 +31,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
let conn = SLED_DB.clone();
let max_memory = indexer.max_memory_by_thread();
let options = NormalizerOption { lossy: true, ..Default::default() };
@ -48,8 +46,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
facet_string_docids_sorter,
b"fsd",
SLED_DB.clone(),
);
let mut normalized_facet_string_docids_sorter = create_sorter(
@ -108,7 +104,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let mut key = b"nfs".to_vec();
key.extend_from_slice(&key_bytes);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
// conn.merge(key, 1u32.to_ne_bytes()).unwrap();
normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
}

View File

@ -46,7 +46,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<ExtractedFacetValues> {
let mut conn = super::SLED_DB.clone();
let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter(
@ -170,22 +169,20 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
add_value.map(|value| extract_facet_values(&value, add_geo_support));
// Those closures are just here to simplify things a bit.
let mut insert_numbers_diff = |del_numbers, add_numbers, conn| {
let mut insert_numbers_diff = |del_numbers, add_numbers| {
insert_numbers_diff(
&mut fid_docid_facet_numbers_sorter,
&mut numbers_key_buffer,
del_numbers,
add_numbers,
conn,
)
};
let mut insert_strings_diff = |del_strings, add_strings, conn| {
let mut insert_strings_diff = |del_strings, add_strings| {
insert_strings_diff(
&mut fid_docid_facet_strings_sorter,
&mut strings_key_buffer,
del_strings,
add_strings,
conn,
)
};
@ -199,8 +196,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
del_is_empty.insert(document);
}
Values { numbers, strings } => {
insert_numbers_diff(numbers, vec![], &mut conn)?;
insert_strings_diff(strings, vec![], &mut conn)?;
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
}
},
(None, Some(add_filterable_values)) => match add_filterable_values {
@ -211,8 +208,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
add_is_empty.insert(document);
}
Values { numbers, strings } => {
insert_numbers_diff(vec![], numbers, &mut conn)?;
insert_strings_diff(vec![], strings, &mut conn)?;
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
}
},
(Some(del_filterable_values), Some(add_filterable_values)) => {
@ -227,31 +224,31 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
add_is_null.insert(document);
}
(Null, Values { numbers, strings }) => {
insert_numbers_diff(vec![], numbers, &mut conn)?;
insert_strings_diff(vec![], strings, &mut conn)?;
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
del_is_null.insert(document);
}
(Empty, Values { numbers, strings }) => {
insert_numbers_diff(vec![], numbers, &mut conn)?;
insert_strings_diff(vec![], strings, &mut conn)?;
insert_numbers_diff(vec![], numbers)?;
insert_strings_diff(vec![], strings)?;
del_is_empty.insert(document);
}
(Values { numbers, strings }, Null) => {
add_is_null.insert(document);
insert_numbers_diff(numbers, vec![], &mut conn)?;
insert_strings_diff(strings, vec![], &mut conn)?;
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
}
(Values { numbers, strings }, Empty) => {
add_is_empty.insert(document);
insert_numbers_diff(numbers, vec![], &mut conn)?;
insert_strings_diff(strings, vec![], &mut conn)?;
insert_numbers_diff(numbers, vec![])?;
insert_strings_diff(strings, vec![])?;
}
(
Values { numbers: del_numbers, strings: del_strings },
Values { numbers: add_numbers, strings: add_strings },
) => {
insert_numbers_diff(del_numbers, add_numbers, &mut conn)?;
insert_strings_diff(del_strings, add_strings, &mut conn)?;
insert_numbers_diff(del_numbers, add_numbers)?;
insert_strings_diff(del_strings, add_strings)?;
}
}
}
@ -334,7 +331,6 @@ fn insert_numbers_diff<MF>(
key_buffer: &mut Vec<u8>,
mut del_numbers: Vec<f64>,
mut add_numbers: Vec<f64>,
conn: &mut sled::Db,
) -> Result<()>
where
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
@ -366,9 +362,6 @@ where
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
let mut key = b"dfn".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}
}
@ -382,9 +375,6 @@ where
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
let mut key = b"dfn".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}
}
@ -401,7 +391,6 @@ fn insert_strings_diff<MF>(
key_buffer: &mut Vec<u8>,
mut del_strings: Vec<(String, String)>,
mut add_strings: Vec<(String, String)>,
conn: &mut sled::Db,
) -> Result<()>
where
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
@ -430,9 +419,6 @@ where
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, original)?;
let bytes = obkv.into_inner()?;
let mut key = b"dfs".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
}
EitherOrBoth::Right((normalized, original)) => {
@ -442,9 +428,6 @@ where
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, original)?;
let bytes = obkv.into_inner()?;
let mut key = b"dfs".to_vec();
key.extend_from_slice(key_buffer);
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
}
}

View File

@ -43,8 +43,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
fid_word_count_docids_sorter,
b"fwc",
super::SLED_DB.clone(),
);
let mut key_buffer = Vec::new();

View File

@ -11,7 +11,6 @@ use super::helpers::{
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
writer_into_reader, GrenadParameters,
};
use super::SLED_DB;
use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec;
use crate::index::db_name::DOCID_WORD_POSITIONS;
@ -52,8 +51,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut cached_word_fid_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, _>::new(
NonZeroUsize::new(1000).unwrap(),
word_fid_docids_sorter,
b"wfd",
SLED_DB.clone(),
);
let mut key_buffer = Vec::new();
@ -113,8 +110,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut cached_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
word_docids_sorter,
b"wdi",
SLED_DB.clone(),
);
let exact_word_docids_sorter = create_sorter(
@ -128,8 +123,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let mut cached_exact_word_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
exact_word_docids_sorter,
b"ewd",
SLED_DB.clone(),
);
let mut iter = cached_word_fid_docids_sorter.into_sorter()?.into_stream_merger_iter()?;
@ -221,7 +214,6 @@ fn docids_into_writers<W>(
deletions: &RoaringBitmap,
additions: &RoaringBitmap,
writer: &mut grenad::Writer<W>,
conn: &mut sled::Db,
) -> Result<()>
where
W: std::io::Write,
@ -253,9 +245,6 @@ where
}
// insert everything in the same writer.
let mut key = b"wod".to_vec();
key.extend_from_slice(word.as_bytes());
conn.merge(key, 1u32.to_ne_bytes()).unwrap();
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
Ok(())

View File

@ -55,8 +55,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
sorter,
b"wpp",
super::SLED_DB.clone(),
)
})
.collect();

View File

@ -41,8 +41,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
word_position_docids_sorter,
b"wpd",
super::SLED_DB.clone(),
);
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();

View File

@ -35,23 +35,6 @@ use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::PossibleEmbeddingMistakes;
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub static SLED_DB: once_cell::sync::Lazy<sled::Db> = once_cell::sync::Lazy::new(|| {
fn increment_u32(
_key: &[u8],
old_value: Option<&[u8]>,
merged_bytes: &[u8],
) -> Option<Vec<u8>> {
let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap());
let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap();
let count = current_count.saturating_add(new_count).to_ne_bytes();
Some(count.to_vec())
}
let db = sled::open("write-stats.sled").unwrap();
db.set_merge_operator(increment_u32);
db
});
/// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)]

View File

@ -14,7 +14,6 @@ use std::result::Result as StdResult;
use std::sync::Arc;
use crossbeam_channel::{Receiver, Sender};
pub use extract::SLED_DB;
use grenad::{Merger, MergerBuilder};
use heed::types::Str;
use heed::Database;

View File

@ -6,7 +6,6 @@ use heed::types::Str;
use heed::Database;
use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
use super::index_documents::SLED_DB;
use crate::update::del_add::deladd_serialize_add_side;
use crate::update::index_documents::{
create_sorter, merge_deladd_cbo_roaring_bitmaps,
@ -68,8 +67,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
let mut cached_prefix_docids_sorter = SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
prefix_docids_sorter,
b"pdi",
SLED_DB.clone(),
);
if !common_prefix_fst_words.is_empty() {

View File

@ -15,7 +15,7 @@ use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
use crate::update::index_documents::{
create_sorter, merge_deladd_cbo_roaring_bitmaps,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
write_sorter_into_database, CursorClonableMmap, MergeFn, SLED_DB,
write_sorter_into_database, CursorClonableMmap, MergeFn,
};
use crate::{CboRoaringBitmapCodec, Result};
@ -73,8 +73,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(1000).unwrap(),
prefix_integer_docids_sorter,
b"pid",
SLED_DB.clone(),
);
if !common_prefix_fst_words.is_empty() {