Use the sorter cache when extracting the facet number docids

This commit is contained in:
Clément Renault 2024-07-17 16:48:21 +02:00
parent 02fff51902
commit a3beaa90c5
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F

View File

@ -1,5 +1,6 @@
use std::fs::File; use std::fs::File;
use std::io::{self, BufReader}; use std::io::{self, BufReader};
use std::num::NonZeroUsize;
use heed::{BytesDecode, BytesEncode}; use heed::{BytesDecode, BytesEncode};
@ -9,8 +10,10 @@ use super::helpers::{
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec, FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
}; };
use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd}; use crate::update::del_add::{DelAdd, KvReaderDelAdd};
use crate::update::index_documents::cache::SorterCacheDelAddCboRoaringBitmap;
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::update::MergeFn;
use crate::Result; use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear. /// Extracts the facet number and the documents ids where this facet number appear.
@ -23,10 +26,9 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters, indexer: GrenadParameters,
_settings_diff: &InnerIndexSettingsDiff, _settings_diff: &InnerIndexSettingsDiff,
) -> Result<grenad::Reader<BufReader<File>>> { ) -> Result<grenad::Reader<BufReader<File>>> {
let mut conn = super::REDIS_CLIENT.get_connection().unwrap();
let max_memory = indexer.max_memory_by_thread(); let max_memory = indexer.max_memory_by_thread();
let mut facet_number_docids_sorter = create_sorter( let facet_number_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable, grenad::SortAlgorithm::Unstable,
merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps,
indexer.chunk_compression_type, indexer.chunk_compression_type,
@ -34,8 +36,13 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
indexer.max_nb_chunks, indexer.max_nb_chunks,
max_memory, max_memory,
); );
let mut cached_facet_number_docids_sorter =
SorterCacheDelAddCboRoaringBitmap::<20, MergeFn>::new(
NonZeroUsize::new(20).unwrap(),
facet_number_docids_sorter,
super::REDIS_CLIENT.get_connection().unwrap(),
);
let mut buffer = Vec::new();
let mut cursor = fid_docid_facet_number.into_cursor()?; let mut cursor = fid_docid_facet_number.into_cursor()?;
while let Some((key_bytes, deladd_obkv_bytes)) = cursor.move_on_next()? { while let Some((key_bytes, deladd_obkv_bytes)) = cursor.move_on_next()? {
let (field_id, document_id, number) = let (field_id, document_id, number) =
@ -43,17 +50,17 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let key = FacetGroupKey { field_id, level: 0, left_bound: number }; let key = FacetGroupKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer);
for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() { for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
obkv.insert(deladd_key, document_id.to_ne_bytes())?; match deladd_key {
DelAdd::Deletion => {
cached_facet_number_docids_sorter.insert_del_u32(&key_bytes, document_id)?
}
DelAdd::Addition => {
cached_facet_number_docids_sorter.insert_add_u32(&key_bytes, document_id)?
}
}
} }
obkv.finish()?;
redis::cmd("INCR").arg(key_bytes.as_ref()).query::<usize>(&mut conn).unwrap();
facet_number_docids_sorter.insert(key_bytes, &buffer)?;
} }
sorter_into_reader(facet_number_docids_sorter, indexer) sorter_into_reader(cached_facet_number_docids_sorter.into_sorter()?, indexer)
} }