mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-29 16:45:30 +08:00
More efficiently merge MTBLs, more than two at a time
This commit is contained in:
parent
1df1f88fe1
commit
3a23dc242e
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -996,7 +996,7 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "oxidized-mtbl"
|
name = "oxidized-mtbl"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=8918476#8918476f61f4430890d067db7b4a6cfb2d549c43"
|
source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=6acef3d#6acef3d0fc7fec6a3701038860e51f8bbcee1ee6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder 1.3.4",
|
"byteorder 1.3.4",
|
||||||
"crc32c",
|
"crc32c",
|
||||||
|
@ -18,7 +18,7 @@ jemallocator = "0.3.2"
|
|||||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
once_cell = "1.4.0"
|
once_cell = "1.4.0"
|
||||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "8918476" }
|
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "6acef3d" }
|
||||||
rayon = "1.3.0"
|
rayon = "1.3.0"
|
||||||
roaring = "0.5.2"
|
roaring = "0.5.2"
|
||||||
slice-group-by = "0.2.6"
|
slice-group-by = "0.2.6"
|
||||||
|
@ -100,36 +100,38 @@ impl MtblKvStore {
|
|||||||
Ok(MtblKvStore(Some(out)))
|
Ok(MtblKvStore(Some(out)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge(key: &[u8], left: &[u8], right: &[u8]) -> Option<Vec<u8>> {
|
fn merge(key: &[u8], values: &[Vec<u8>]) -> Option<Vec<u8>> {
|
||||||
if key == b"\0words-fst" {
|
if key == b"\0words-fst" {
|
||||||
let left_fst = fst::Set::new(left).unwrap();
|
let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
|
||||||
let right_fst = fst::Set::new(right).unwrap();
|
|
||||||
|
|
||||||
// Union of the two FSTs
|
// Union of the two FSTs
|
||||||
let op = fst::set::OpBuilder::new()
|
let mut op = fst::set::OpBuilder::new();
|
||||||
.add(left_fst.into_stream())
|
fsts.iter().for_each(|fst| op.push(fst.into_stream()));
|
||||||
.add(right_fst.into_stream())
|
let op = op.r#union();
|
||||||
.r#union();
|
|
||||||
|
|
||||||
let mut build = fst::SetBuilder::memory();
|
let mut build = fst::SetBuilder::memory();
|
||||||
build.extend_stream(op.into_stream()).unwrap();
|
build.extend_stream(op.into_stream()).unwrap();
|
||||||
Some(build.into_inner().unwrap())
|
Some(build.into_inner().unwrap())
|
||||||
}
|
}
|
||||||
else if key == b"\0headers" {
|
else if key == b"\0headers" {
|
||||||
assert_eq!(left, right);
|
assert!(values.windows(2).all(|vs| vs[0] == vs[1]));
|
||||||
Some(left.to_vec())
|
Some(values[0].to_vec())
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[1]) || key.starts_with(&[2]) {
|
else if key.starts_with(&[1]) || key.starts_with(&[2]) {
|
||||||
let mut left = RoaringBitmap::deserialize_from(left).unwrap();
|
let mut first = RoaringBitmap::deserialize_from(values[0].as_slice()).unwrap();
|
||||||
let right = RoaringBitmap::deserialize_from(right).unwrap();
|
|
||||||
left.union_with(&right);
|
for value in &values[1..] {
|
||||||
|
let bitmap = RoaringBitmap::deserialize_from(value.as_slice()).unwrap();
|
||||||
|
first.union_with(&bitmap);
|
||||||
|
}
|
||||||
|
|
||||||
let mut vec = Vec::new();
|
let mut vec = Vec::new();
|
||||||
left.serialize_into(&mut vec).unwrap();
|
first.serialize_into(&mut vec).unwrap();
|
||||||
Some(vec)
|
Some(vec)
|
||||||
}
|
}
|
||||||
else if key.starts_with(&[3]) {
|
else if key.starts_with(&[3]) {
|
||||||
assert_eq!(left, right);
|
assert!(values.windows(2).all(|vs| vs[0] == vs[1]));
|
||||||
Some(left.to_vec())
|
Some(values[0].to_vec())
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
panic!("wut? {:?}", key)
|
panic!("wut? {:?}", key)
|
||||||
|
Loading…
Reference in New Issue
Block a user