mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-02-20 01:27:52 +08:00
Introduce the compressed obkv readers and writers
This commit is contained in:
parent
2099b4f0dd
commit
2f0567fad1
20
Cargo.lock
generated
20
Cargo.lock
generated
@ -3268,6 +3268,15 @@ version = "0.4.22"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lz4_flex"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
|
||||||
|
dependencies = [
|
||||||
|
"twox-hash",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lzma-rs"
|
name = "lzma-rs"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@ -3526,6 +3535,7 @@ dependencies = [
|
|||||||
"json-depth-checker",
|
"json-depth-checker",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
"liquid",
|
"liquid",
|
||||||
|
"lz4_flex",
|
||||||
"maplit",
|
"maplit",
|
||||||
"md5",
|
"md5",
|
||||||
"meili-snap",
|
"meili-snap",
|
||||||
@ -5618,6 +5628,16 @@ version = "0.2.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "twox-hash"
|
||||||
|
version = "1.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"static_assertions",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typenum"
|
name = "typenum"
|
||||||
version = "1.17.0"
|
version = "1.17.0"
|
||||||
|
@ -38,6 +38,7 @@ heed = { version = "0.20.3", default-features = false, features = [
|
|||||||
indexmap = { version = "2.2.6", features = ["serde"] }
|
indexmap = { version = "2.2.6", features = ["serde"] }
|
||||||
json-depth-checker = { path = "../json-depth-checker" }
|
json-depth-checker = { path = "../json-depth-checker" }
|
||||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||||
|
lz4_flex = "0.11.3"
|
||||||
memmap2 = "0.9.4"
|
memmap2 = "0.9.4"
|
||||||
obkv = "0.2.2"
|
obkv = "0.2.2"
|
||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
|
50
milli/src/heed_codec/compressed_obkv_codec.rs
Normal file
50
milli/src/heed_codec/compressed_obkv_codec.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use heed::BoxedError;
|
||||||
|
use obkv::KvReaderU16;
|
||||||
|
|
||||||
|
pub struct ObkvCompressedCodec;
|
||||||
|
|
||||||
|
impl<'a> heed::BytesDecode<'a> for ObkvCompressedCodec {
|
||||||
|
type DItem = CompressedKvReaderU16<'a>;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||||
|
Ok(CompressedKvReaderU16(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl heed::BytesEncode<'_> for ObkvCompressedCodec {
|
||||||
|
type EItem = CompressedKvWriterU16;
|
||||||
|
|
||||||
|
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
|
||||||
|
Ok(Cow::Borrowed(&item.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CompressedKvReaderU16<'a>(&'a [u8]);
|
||||||
|
|
||||||
|
impl<'a> CompressedKvReaderU16<'a> {
|
||||||
|
pub fn decompress_with<'b>(
|
||||||
|
&self,
|
||||||
|
buffer: &'b mut Vec<u8>,
|
||||||
|
dictionnary: &[u8],
|
||||||
|
) -> Result<KvReaderU16<'b>, lz4_flex::block::DecompressError> {
|
||||||
|
let max_size = lz4_flex::block::get_maximum_output_size(self.0.len());
|
||||||
|
buffer.resize(max_size, 0);
|
||||||
|
let size = lz4_flex::block::decompress_into_with_dict(
|
||||||
|
self.0,
|
||||||
|
&mut buffer[..max_size],
|
||||||
|
dictionnary,
|
||||||
|
)?;
|
||||||
|
Ok(KvReaderU16::new(&buffer[..size]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct CompressedKvWriterU16(Vec<u8>);
|
||||||
|
|
||||||
|
impl CompressedKvWriterU16 {
|
||||||
|
// TODO ask for a KvReaderU16 here
|
||||||
|
pub fn new_with_dictionnary(writer: &[u8], dictionnary: &[u8]) -> Self {
|
||||||
|
CompressedKvWriterU16(lz4_flex::block::compress_with_dict(writer, dictionnary))
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
mod beu16_str_codec;
|
mod beu16_str_codec;
|
||||||
mod beu32_str_codec;
|
mod beu32_str_codec;
|
||||||
mod byte_slice_ref;
|
mod byte_slice_ref;
|
||||||
|
mod compressed_obkv_codec;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
mod field_id_word_count_codec;
|
mod field_id_word_count_codec;
|
||||||
mod fst_set_codec;
|
mod fst_set_codec;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user