mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Compute word integer prefix cache
This commit is contained in:
parent
70ce40828c
commit
5a9c96e1db
@ -9,9 +9,11 @@ use log::debug;
|
|||||||
use crate::error::SerializationError;
|
use crate::error::SerializationError;
|
||||||
use crate::heed_codec::StrBEU16Codec;
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
|
use crate::index::main_key::WORDS_PREFIXES_FST_KEY;
|
||||||
|
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvWriterDelAdd};
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, valid_lmdb_key,
|
create_sorter, merge_deladd_cbo_roaring_bitmaps,
|
||||||
CursorClonableMmap, MergeFn,
|
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, valid_lmdb_key,
|
||||||
|
write_sorter_into_database, CursorClonableMmap, MergeFn,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, Result};
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
@ -55,7 +57,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
|||||||
|
|
||||||
let mut prefix_integer_docids_sorter = create_sorter(
|
let mut prefix_integer_docids_sorter = create_sorter(
|
||||||
grenad::SortAlgorithm::Unstable,
|
grenad::SortAlgorithm::Unstable,
|
||||||
merge_cbo_roaring_bitmaps,
|
merge_deladd_cbo_roaring_bitmaps,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.max_nb_chunks,
|
self.max_nb_chunks,
|
||||||
@ -108,6 +110,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
|||||||
|
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
let db = self.word_database.remap_data_type::<ByteSlice>();
|
let db = self.word_database.remap_data_type::<ByteSlice>();
|
||||||
|
let mut buffer = Vec::new();
|
||||||
for prefix_bytes in new_prefix_fst_words {
|
for prefix_bytes in new_prefix_fst_words {
|
||||||
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
let prefix = str::from_utf8(prefix_bytes.as_bytes()).map_err(|_| {
|
||||||
SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) }
|
SerializationError::Decoding { db_name: Some(WORDS_PREFIXES_FST_KEY) }
|
||||||
@ -123,7 +126,11 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
|||||||
if word.starts_with(prefix) {
|
if word.starts_with(prefix) {
|
||||||
let key = (prefix, pos);
|
let key = (prefix, pos);
|
||||||
let bytes = StrBEU16Codec::bytes_encode(&key).unwrap();
|
let bytes = StrBEU16Codec::bytes_encode(&key).unwrap();
|
||||||
prefix_integer_docids_sorter.insert(bytes, data)?;
|
|
||||||
|
buffer.clear();
|
||||||
|
let mut writer = KvWriterDelAdd::new(&mut buffer);
|
||||||
|
writer.insert(DelAdd::Addition, data)?;
|
||||||
|
prefix_integer_docids_sorter.insert(bytes, writer.into_inner()?)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -143,12 +150,16 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
|||||||
drop(iter);
|
drop(iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let database_is_empty = self.prefix_database.is_empty(self.wtxn)?;
|
||||||
|
|
||||||
// We finally write all the word prefix integer docids into the LMDB database.
|
// We finally write all the word prefix integer docids into the LMDB database.
|
||||||
sorter_into_lmdb_database(
|
write_sorter_into_database(
|
||||||
self.wtxn,
|
|
||||||
*self.prefix_database.as_polymorph(),
|
|
||||||
prefix_integer_docids_sorter,
|
prefix_integer_docids_sorter,
|
||||||
merge_cbo_roaring_bitmaps,
|
&self.prefix_database,
|
||||||
|
self.wtxn,
|
||||||
|
database_is_empty,
|
||||||
|
deladd_serialize_add_side,
|
||||||
|
merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -159,6 +170,7 @@ fn write_prefixes_in_sorter(
|
|||||||
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
prefixes: &mut HashMap<Vec<u8>, Vec<Vec<u8>>>,
|
||||||
sorter: &mut grenad::Sorter<MergeFn>,
|
sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
// TODO: Merge before insertion.
|
||||||
for (key, data_slices) in prefixes.drain() {
|
for (key, data_slices) in prefixes.drain() {
|
||||||
for data in data_slices {
|
for data in data_slices {
|
||||||
if valid_lmdb_key(&key) {
|
if valid_lmdb_key(&key) {
|
||||||
|
Loading…
Reference in New Issue
Block a user