mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Introduce the roaring bitmap heed codec
This commit is contained in:
parent
8148210860
commit
2f0e1afd16
3
src/heed_codec/mod.rs
Normal file
3
src/heed_codec/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
mod roaring_bitmap;
|
||||||
|
|
||||||
|
pub use self::roaring_bitmap::RoaringBitmapCodec;
|
22
src/heed_codec/roaring_bitmap.rs
Normal file
22
src/heed_codec/roaring_bitmap.rs
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
pub struct RoaringBitmapCodec;
|
||||||
|
|
||||||
|
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||||
|
type DItem = RoaringBitmap;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||||
|
RoaringBitmap::deserialize_from(bytes).ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||||
|
type EItem = RoaringBitmap;
|
||||||
|
|
||||||
|
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
item.serialize_into(&mut bytes).ok()?;
|
||||||
|
Some(Cow::Owned(bytes))
|
||||||
|
}
|
||||||
|
}
|
18
src/lib.rs
18
src/lib.rs
@ -1,4 +1,5 @@
|
|||||||
mod best_proximity;
|
mod best_proximity;
|
||||||
|
mod heed_codec;
|
||||||
mod iter_shortest_paths;
|
mod iter_shortest_paths;
|
||||||
mod query_tokens;
|
mod query_tokens;
|
||||||
|
|
||||||
@ -16,8 +17,9 @@ use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use self::query_tokens::{QueryTokens, QueryToken};
|
|
||||||
use self::best_proximity::BestProximity;
|
use self::best_proximity::BestProximity;
|
||||||
|
use self::heed_codec::RoaringBitmapCodec;
|
||||||
|
use self::query_tokens::{QueryTokens, QueryToken};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||||
@ -35,10 +37,10 @@ pub type AttributeId = u32;
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
pub main: PolyDatabase,
|
pub main: PolyDatabase,
|
||||||
pub postings_attrs: Database<Str, ByteSlice>,
|
pub postings_attrs: Database<Str, RoaringBitmapCodec>,
|
||||||
pub prefix_postings_attrs: Database<ByteSlice, ByteSlice>,
|
pub prefix_postings_attrs: Database<ByteSlice, RoaringBitmapCodec>,
|
||||||
pub postings_ids: Database<ByteSlice, ByteSlice>,
|
pub postings_ids: Database<ByteSlice, RoaringBitmapCodec>,
|
||||||
pub prefix_postings_ids: Database<ByteSlice, ByteSlice>,
|
pub prefix_postings_ids: Database<ByteSlice, RoaringBitmapCodec>,
|
||||||
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,8 +107,7 @@ impl Index {
|
|||||||
let mut stream = fst.search(&dfa).into_stream();
|
let mut stream = fst.search(&dfa).into_stream();
|
||||||
while let Some(word) = stream.next() {
|
while let Some(word) = stream.next() {
|
||||||
let word = std::str::from_utf8(word)?;
|
let word = std::str::from_utf8(word)?;
|
||||||
if let Some(attrs) = self.postings_attrs.get(rtxn, word)? {
|
if let Some(right) = self.postings_attrs.get(rtxn, word)? {
|
||||||
let right = RoaringBitmap::deserialize_from_slice(attrs)?;
|
|
||||||
union_positions.union_with(&right);
|
union_positions.union_with(&right);
|
||||||
derived_words.push((word.as_bytes().to_vec(), right));
|
derived_words.push((word.as_bytes().to_vec(), right));
|
||||||
count += 1;
|
count += 1;
|
||||||
@ -130,8 +131,7 @@ impl Index {
|
|||||||
if attrs.contains(pos) {
|
if attrs.contains(pos) {
|
||||||
let mut key = word.clone();
|
let mut key = word.clone();
|
||||||
key.extend_from_slice(&pos.to_be_bytes());
|
key.extend_from_slice(&pos.to_be_bytes());
|
||||||
if let Some(attrs) = self.postings_ids.get(rtxn, &key).unwrap() {
|
if let Some(right) = self.postings_ids.get(rtxn, &key).unwrap() {
|
||||||
let right = RoaringBitmap::deserialize_from_slice(attrs).unwrap();
|
|
||||||
union_docids.union_with(&right);
|
union_docids.union_with(&right);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user