Add TODOs to the prefix functions

This commit is contained in:
Clément Renault 2023-10-19 17:58:52 +02:00
parent 066221fd2b
commit 02a40645e2
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -40,6 +40,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
#[logging_timer::time("WordPrefixDocids::{}")] #[logging_timer::time("WordPrefixDocids::{}")]
pub fn execute( pub fn execute(
self, self,
// TODO grenad::Reader<onkv::Reader<Word, obkv::Reader<DelAdd, CboRoaringBitmap>>>
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>, mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,
new_prefix_fst_words: &[String], new_prefix_fst_words: &[String],
common_prefix_fst_words: &[&[String]], common_prefix_fst_words: &[&[String]],
@ -51,6 +52,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
// and write into it at the same time, therefore we write into another file. // and write into it at the same time, therefore we write into another file.
let mut prefix_docids_sorter = create_sorter( let mut prefix_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable, grenad::SortAlgorithm::Unstable,
// TODO change to merge_deladd_cbo_roaring_bitmaps
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
self.chunk_compression_type, self.chunk_compression_type,
self.chunk_compression_level, self.chunk_compression_level,
@ -96,6 +98,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
let prefix = std::str::from_utf8(prefix.as_bytes())?; let prefix = std::str::from_utf8(prefix.as_bytes())?;
for result in db.prefix_iter(self.wtxn, prefix)? { for result in db.prefix_iter(self.wtxn, prefix)? {
let (_word, data) = result?; let (_word, data) = result?;
// TODO fake a DelAdd -> Add(`data`)
prefix_docids_sorter.insert(prefix, data)?; prefix_docids_sorter.insert(prefix, data)?;
} }
} }
@ -111,10 +114,13 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
drop(iter); drop(iter);
// We finally write the word prefix docids into the LMDB database. // We finally write the word prefix docids into the LMDB database.
// TODO introduce a new function that is similar to `append_entries_into_database`
// and accepts the `merge_deladd_cbo_roaring_bitmaps` function
sorter_into_lmdb_database( sorter_into_lmdb_database(
self.wtxn, self.wtxn,
*self.word_prefix_docids.as_polymorph(), *self.word_prefix_docids.as_polymorph(),
prefix_docids_sorter, prefix_docids_sorter,
// TODO change to `merge_deladd_cbo_roaring_bitmaps`
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
)?; )?;
@ -127,6 +133,7 @@ fn write_prefixes_in_sorter(
sorter: &mut grenad::Sorter<MergeFn>, sorter: &mut grenad::Sorter<MergeFn>,
) -> Result<()> { ) -> Result<()> {
for (key, data_slices) in prefixes.drain() { for (key, data_slices) in prefixes.drain() {
// TODO merge keys before inserting them in the sorter
for data in data_slices { for data in data_slices {
if valid_lmdb_key(&key) { if valid_lmdb_key(&key) {
sorter.insert(&key, data)?; sorter.insert(&key, data)?;