Use sled to count the write insertions

This commit is contained in:
Clément Renault 2024-07-20 11:16:57 +02:00
parent 2603d8d0d0
commit f355cf6985
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
15 changed files with 106 additions and 72 deletions

96
Cargo.lock generated
View File

@ -1047,16 +1047,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "combine"
version = "4.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
dependencies = [
"bytes",
"memchr",
]
[[package]] [[package]]
name = "concat-arrays" name = "concat-arrays"
version = "0.1.2" version = "0.1.2"
@ -1889,6 +1879,16 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]] [[package]]
name = "fst" name = "fst"
version = "0.4.7" version = "0.4.7"
@ -3404,7 +3404,7 @@ dependencies = [
"obkv", "obkv",
"once_cell", "once_cell",
"ordered-float", "ordered-float",
"parking_lot", "parking_lot 0.12.3",
"permissive-json-pointer", "permissive-json-pointer",
"pin-project-lite", "pin-project-lite",
"platform-dirs", "platform-dirs",
@ -3572,12 +3572,12 @@ dependencies = [
"rand", "rand",
"rayon", "rayon",
"rayon-par-bridge", "rayon-par-bridge",
"redis",
"rhai", "rhai",
"roaring", "roaring",
"rstar", "rstar",
"serde", "serde",
"serde_json", "serde_json",
"sled",
"slice-group-by", "slice-group-by",
"smallstr", "smallstr",
"smallvec", "smallvec",
@ -3891,6 +3891,17 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core 0.8.6",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.12.3" version = "0.12.3"
@ -3898,7 +3909,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [ dependencies = [
"lock_api", "lock_api",
"parking_lot_core", "parking_lot_core 0.9.8",
]
[[package]]
name = "parking_lot_core"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall 0.2.16",
"smallvec",
"winapi",
] ]
[[package]] [[package]]
@ -4240,7 +4265,7 @@ dependencies = [
"lazy_static", "lazy_static",
"libc", "libc",
"memchr", "memchr",
"parking_lot", "parking_lot 0.12.3",
"procfs", "procfs",
"protobuf", "protobuf",
"thiserror", "thiserror",
@ -4441,21 +4466,6 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430"
[[package]]
name = "redis"
version = "0.25.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0d7a6955c7511f60f3ba9e86c6d02b3c3f144f8c24b288d1f4e18074ab8bbec"
dependencies = [
"combine",
"itoa",
"percent-encoding",
"ryu",
"sha1_smol",
"socket2 0.5.5",
"url",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.2.16" version = "0.2.16"
@ -4927,12 +4937,6 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "sha1_smol"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.10.8" version = "0.10.8"
@ -5019,6 +5023,22 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch",
"crossbeam-utils",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot 0.11.2",
]
[[package]] [[package]]
name = "slice-group-by" name = "slice-group-by"
version = "0.3.1" version = "0.3.1"
@ -5280,7 +5300,7 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050"
dependencies = [ dependencies = [
"parking_lot", "parking_lot 0.12.3",
] ]
[[package]] [[package]]
@ -5354,7 +5374,7 @@ dependencies = [
"bstr", "bstr",
"fancy-regex 0.12.0", "fancy-regex 0.12.0",
"lazy_static", "lazy_static",
"parking_lot", "parking_lot 0.12.3",
"rustc-hash", "rustc-hash",
] ]
@ -5466,7 +5486,7 @@ dependencies = [
"libc", "libc",
"mio", "mio",
"num_cpus", "num_cpus",
"parking_lot", "parking_lot 0.12.3",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"socket2 0.5.5", "socket2 0.5.5",

View File

@ -67,7 +67,7 @@ filter-parser = { path = "../filter-parser" }
# documents words self-join # documents words self-join
itertools = "0.13.0" itertools = "0.13.0"
redis = "0.25.4" sled = "0.34.7"
csv = "1.3.0" csv = "1.3.0"
candle-core = { version = "0.6.0" } candle-core = { version = "0.6.0" }

View File

@ -19,7 +19,7 @@ pub struct SorterCacheDelAddCboRoaringBitmap<const N: usize, MF> {
sorter: grenad::Sorter<MF>, sorter: grenad::Sorter<MF>,
deladd_buffer: Vec<u8>, deladd_buffer: Vec<u8>,
cbo_buffer: Vec<u8>, cbo_buffer: Vec<u8>,
conn: redis::Connection, conn: sled::Db,
} }
impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> { impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> {
@ -27,7 +27,7 @@ impl<const N: usize, MF> SorterCacheDelAddCboRoaringBitmap<N, MF> {
cap: NonZeroUsize, cap: NonZeroUsize,
sorter: grenad::Sorter<MF>, sorter: grenad::Sorter<MF>,
prefix: &'static [u8; 3], prefix: &'static [u8; 3],
conn: redis::Connection, conn: sled::Db,
) -> Self { ) -> Self {
SorterCacheDelAddCboRoaringBitmap { SorterCacheDelAddCboRoaringBitmap {
cache: ArcCache::new(cap), cache: ArcCache::new(cap),
@ -205,7 +205,7 @@ where
self.cbo_buffer.clear(); self.cbo_buffer.clear();
self.cbo_buffer.extend_from_slice(self.prefix); self.cbo_buffer.extend_from_slice(self.prefix);
self.cbo_buffer.extend_from_slice(key.as_ref()); self.cbo_buffer.extend_from_slice(key.as_ref());
redis::cmd("INCR").arg(&self.cbo_buffer).query::<usize>(&mut self.conn).unwrap(); self.conn.merge(&self.cbo_buffer, 1u32.to_ne_bytes()).unwrap();
self.sorter.insert(key, value_writer.into_inner().unwrap()) self.sorter.insert(key, value_writer.into_inner().unwrap())
} }

View File

@ -29,7 +29,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
max_positions_per_attributes: Option<u32>, max_positions_per_attributes: Option<u32>,
) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> { ) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
let mut conn = super::REDIS_CLIENT.get_connection().unwrap(); let conn = super::SLED_DB.clone();
let max_positions_per_attributes = max_positions_per_attributes let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
@ -150,7 +150,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
for (field_id, value) in obkv.iter() { for (field_id, value) in obkv.iter() {
key_buffer.truncate(mem::size_of::<u32>()); key_buffer.truncate(mem::size_of::<u32>());
key_buffer.extend_from_slice(&field_id.to_be_bytes()); key_buffer.extend_from_slice(&field_id.to_be_bytes());
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(&mut conn).unwrap(); conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap();
docid_word_positions_sorter.insert(&key_buffer, value)?; docid_word_positions_sorter.insert(&key_buffer, value)?;
} }

View File

@ -41,7 +41,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(20).unwrap(), NonZeroUsize::new(20).unwrap(),
facet_number_docids_sorter, facet_number_docids_sorter,
b"fnd", b"fnd",
super::REDIS_CLIENT.get_connection().unwrap(), super::SLED_DB.clone(),
); );
let mut cursor = fid_docid_facet_number.into_cursor()?; let mut cursor = fid_docid_facet_number.into_cursor()?;

View File

@ -10,7 +10,7 @@ use heed::types::SerdeJson;
use heed::BytesEncode; use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use super::REDIS_CLIENT; use super::SLED_DB;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::{BEU16StrCodec, StrRefCodec}; use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
@ -32,7 +32,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters, indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff, _settings_diff: &InnerIndexSettingsDiff,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> { ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
let mut conn = REDIS_CLIENT.get_connection().unwrap(); let conn = SLED_DB.clone();
let max_memory = indexer.max_memory_by_thread(); let max_memory = indexer.max_memory_by_thread();
let options = NormalizerOption { lossy: true, ..Default::default() }; let options = NormalizerOption { lossy: true, ..Default::default() };
@ -49,7 +49,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(200).unwrap(), NonZeroUsize::new(200).unwrap(),
facet_string_docids_sorter, facet_string_docids_sorter,
b"fsd", b"fsd",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
let mut normalized_facet_string_docids_sorter = create_sorter( let mut normalized_facet_string_docids_sorter = create_sorter(
@ -106,7 +106,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let key = (field_id, hyper_normalized_value.as_ref()); let key = (field_id, hyper_normalized_value.as_ref());
let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
redis::cmd("INCR").arg(key_bytes.as_ref()).query::<usize>(&mut conn).unwrap(); conn.merge(key_bytes.as_ref(), 1u32.to_ne_bytes()).unwrap();
normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?; normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
} }

View File

@ -46,7 +46,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer: GrenadParameters, indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff, settings_diff: &InnerIndexSettingsDiff,
) -> Result<ExtractedFacetValues> { ) -> Result<ExtractedFacetValues> {
let mut conn = super::REDIS_CLIENT.get_connection().unwrap(); let mut conn = super::SLED_DB.clone();
let max_memory = indexer.max_memory_by_thread(); let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter( let mut fid_docid_facet_numbers_sorter = create_sorter(
@ -334,7 +334,7 @@ fn insert_numbers_diff<MF>(
key_buffer: &mut Vec<u8>, key_buffer: &mut Vec<u8>,
mut del_numbers: Vec<f64>, mut del_numbers: Vec<f64>,
mut add_numbers: Vec<f64>, mut add_numbers: Vec<f64>,
conn: &mut redis::Connection, conn: &mut sled::Db,
) -> Result<()> ) -> Result<()>
where where
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>, MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
@ -366,7 +366,7 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, bytes_of(&()))?; obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap(); conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }
} }
@ -380,7 +380,7 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, bytes_of(&()))?; obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap(); conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
} }
} }
@ -397,7 +397,7 @@ fn insert_strings_diff<MF>(
key_buffer: &mut Vec<u8>, key_buffer: &mut Vec<u8>,
mut del_strings: Vec<(String, String)>, mut del_strings: Vec<(String, String)>,
mut add_strings: Vec<(String, String)>, mut add_strings: Vec<(String, String)>,
conn: &mut redis::Connection, conn: &mut sled::Db,
) -> Result<()> ) -> Result<()>
where where
MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>, MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
@ -426,7 +426,7 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, original)?; obkv.insert(DelAdd::Deletion, original)?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap(); conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
} }
EitherOrBoth::Right((normalized, original)) => { EitherOrBoth::Right((normalized, original)) => {
@ -436,7 +436,7 @@ where
let mut obkv = KvWriterDelAdd::memory(); let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, original)?; obkv.insert(DelAdd::Addition, original)?;
let bytes = obkv.into_inner()?; let bytes = obkv.into_inner()?;
redis::cmd("INCR").arg(key_buffer.as_slice()).query::<usize>(conn).unwrap(); conn.merge(key_buffer.as_slice(), 1u32.to_ne_bytes()).unwrap();
fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?; fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
} }
} }

View File

@ -44,7 +44,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(300).unwrap(), NonZeroUsize::new(300).unwrap(),
fid_word_count_docids_sorter, fid_word_count_docids_sorter,
b"fwc", b"fwc",
super::REDIS_CLIENT.get_connection().unwrap(), super::SLED_DB.clone(),
); );
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();

View File

@ -11,7 +11,7 @@ use super::helpers::{
create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at, create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
writer_into_reader, GrenadParameters, writer_into_reader, GrenadParameters,
}; };
use super::REDIS_CLIENT; use super::SLED_DB;
use crate::error::SerializationError; use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec; use crate::heed_codec::StrBEU16Codec;
use crate::index::db_name::DOCID_WORD_POSITIONS; use crate::index::db_name::DOCID_WORD_POSITIONS;
@ -53,7 +53,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(300).unwrap(), NonZeroUsize::new(300).unwrap(),
word_fid_docids_sorter, word_fid_docids_sorter,
b"wfd", b"wfd",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();
@ -114,7 +114,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(100).unwrap(), NonZeroUsize::new(100).unwrap(),
word_docids_sorter, word_docids_sorter,
b"wdi", b"wdi",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
let exact_word_docids_sorter = create_sorter( let exact_word_docids_sorter = create_sorter(
@ -129,7 +129,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(100).unwrap(), NonZeroUsize::new(100).unwrap(),
exact_word_docids_sorter, exact_word_docids_sorter,
b"ewd", b"ewd",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
let mut iter = cached_word_fid_docids_sorter.into_sorter()?.into_stream_merger_iter()?; let mut iter = cached_word_fid_docids_sorter.into_sorter()?.into_stream_merger_iter()?;
@ -221,7 +221,7 @@ fn docids_into_writers<W>(
deletions: &RoaringBitmap, deletions: &RoaringBitmap,
additions: &RoaringBitmap, additions: &RoaringBitmap,
writer: &mut grenad::Writer<W>, writer: &mut grenad::Writer<W>,
conn: &mut redis::Connection, conn: &mut sled::Db,
) -> Result<()> ) -> Result<()>
where where
W: std::io::Write, W: std::io::Write,
@ -253,7 +253,7 @@ where
} }
// insert everything in the same writer. // insert everything in the same writer.
redis::cmd("INCR").arg(word.as_bytes()).query::<usize>(conn).unwrap(); conn.merge(word.as_bytes(), 1u32.to_ne_bytes()).unwrap();
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?; writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
Ok(()) Ok(())

View File

@ -56,7 +56,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(100).unwrap(), NonZeroUsize::new(100).unwrap(),
sorter, sorter,
b"wpp", b"wpp",
super::REDIS_CLIENT.get_connection().unwrap(), super::SLED_DB.clone(),
) )
}) })
.collect(); .collect();

View File

@ -42,7 +42,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
NonZeroUsize::new(300).unwrap(), NonZeroUsize::new(300).unwrap(),
word_position_docids_sorter, word_position_docids_sorter,
b"wpd", b"wpd",
super::REDIS_CLIENT.get_connection().unwrap(), super::SLED_DB.clone(),
); );
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new(); let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();

View File

@ -35,8 +35,22 @@ use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::PossibleEmbeddingMistakes; use crate::vector::error::PossibleEmbeddingMistakes;
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub static REDIS_CLIENT: once_cell::sync::Lazy<redis::Client> = pub static SLED_DB: once_cell::sync::Lazy<sled::Db> = once_cell::sync::Lazy::new(|| {
once_cell::sync::Lazy::new(|| redis::Client::open("redis://127.0.0.1/").unwrap()); fn increment_u32(
_key: &[u8],
old_value: Option<&[u8]>,
merged_bytes: &[u8],
) -> Option<Vec<u8>> {
let current_count = old_value.map_or(0, |b| b.try_into().map(u32::from_ne_bytes).unwrap());
let new_count = merged_bytes.try_into().map(u32::from_ne_bytes).unwrap();
let count = current_count.saturating_add(new_count).to_be_bytes();
Some(count.to_vec())
}
let db = sled::open("write-stats").unwrap();
db.set_merge_operator(increment_u32);
db
});
/// Extract data for each databases from obkv documents in parallel. /// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender. /// Send data in grenad file over provided Sender.

View File

@ -14,7 +14,7 @@ use std::result::Result as StdResult;
use std::sync::Arc; use std::sync::Arc;
use crossbeam_channel::{Receiver, Sender}; use crossbeam_channel::{Receiver, Sender};
pub use extract::REDIS_CLIENT; pub use extract::SLED_DB;
use grenad::{Merger, MergerBuilder}; use grenad::{Merger, MergerBuilder};
use heed::types::Str; use heed::types::Str;
use heed::Database; use heed::Database;

View File

@ -6,7 +6,7 @@ use heed::types::Str;
use heed::Database; use heed::Database;
use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap; use super::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
use super::index_documents::REDIS_CLIENT; use super::index_documents::SLED_DB;
use crate::update::del_add::deladd_serialize_add_side; use crate::update::del_add::deladd_serialize_add_side;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, merge_deladd_cbo_roaring_bitmaps, create_sorter, merge_deladd_cbo_roaring_bitmaps,
@ -69,7 +69,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
NonZeroUsize::new(200).unwrap(), NonZeroUsize::new(200).unwrap(),
prefix_docids_sorter, prefix_docids_sorter,
b"pdi", b"pdi",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
if !common_prefix_fst_words.is_empty() { if !common_prefix_fst_words.is_empty() {

View File

@ -15,7 +15,7 @@ use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, merge_deladd_cbo_roaring_bitmaps, create_sorter, merge_deladd_cbo_roaring_bitmaps,
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
write_sorter_into_database, CursorClonableMmap, MergeFn, REDIS_CLIENT, write_sorter_into_database, CursorClonableMmap, MergeFn, SLED_DB,
}; };
use crate::{CboRoaringBitmapCodec, Result}; use crate::{CboRoaringBitmapCodec, Result};
@ -74,7 +74,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
NonZeroUsize::new(200).unwrap(), NonZeroUsize::new(200).unwrap(),
prefix_integer_docids_sorter, prefix_integer_docids_sorter,
b"pid", b"pid",
REDIS_CLIENT.get_connection().unwrap(), SLED_DB.clone(),
); );
if !common_prefix_fst_words.is_empty() { if !common_prefix_fst_words.is_empty() {