Use a bufWriter to build word FSTs

This commit is contained in:
ManyTheFish 2024-09-12 11:48:00 +02:00
parent eb9a20ff0b
commit aa69308e45

View File

@ -6,6 +6,7 @@ use heed::types::Bytes;
use heed::{Database, RoTxn}; use heed::{Database, RoTxn};
use memmap2::Mmap; use memmap2::Mmap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::io::BufWriter;
use tempfile::tempfile; use tempfile::tempfile;
use super::channel::*; use super::channel::*;
@ -60,8 +61,8 @@ pub fn merge_grenad_entries(
let span = let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
let _entered = span.enter(); let _entered = span.enter();
let mut add_words_fst = SetBuilder::new(tempfile()?)?; let mut add_words_fst = SetBuilder::new(BufWriter::new(tempfile()?))?;
let mut del_words_fst = SetBuilder::new(tempfile()?)?; let mut del_words_fst = SetBuilder::new(BufWriter::new(tempfile()?))?;
merge_and_send_docids( merge_and_send_docids(
merger, merger,
@ -153,16 +154,16 @@ pub fn merge_grenad_entries(
} }
fn compute_new_words_fst( fn compute_new_words_fst(
add_words_fst: SetBuilder<File>, add_words_fst: SetBuilder<BufWriter<File>>,
del_words_fst: SetBuilder<File>, del_words_fst: SetBuilder<BufWriter<File>>,
words_fst: Set<std::borrow::Cow<'_, [u8]>>, words_fst: Set<std::borrow::Cow<'_, [u8]>>,
) -> Result<Mmap> { ) -> Result<Mmap> {
let add_words_fst_file = add_words_fst.into_inner()?; let add_words_fst_file = add_words_fst.into_inner()?;
let add_words_fst_mmap = unsafe { Mmap::map(&add_words_fst_file)? }; let add_words_fst_mmap = unsafe { Mmap::map(&add_words_fst_file.into_inner().unwrap())? };
let add_words_fst = Set::new(&add_words_fst_mmap)?; let add_words_fst = Set::new(&add_words_fst_mmap)?;
let del_words_fst_file = del_words_fst.into_inner()?; let del_words_fst_file = del_words_fst.into_inner()?;
let del_words_fst_mmap = unsafe { Mmap::map(&del_words_fst_file)? }; let del_words_fst_mmap = unsafe { Mmap::map(&del_words_fst_file.into_inner().unwrap())? };
let del_words_fst = Set::new(&del_words_fst_mmap)?; let del_words_fst = Set::new(&del_words_fst_mmap)?;
let diff = words_fst.op().add(&del_words_fst).difference(); let diff = words_fst.op().add(&del_words_fst).difference();