Support cancelation in merge and send

This commit is contained in:
Louis Dureuil 2024-11-07 11:23:49 +01:00
parent c9f478bc45
commit 1477b81d38
No known key found for this signature in database
2 changed files with 37 additions and 9 deletions

View File

@ -57,6 +57,7 @@ mod steps {
"extracting word proximity", "extracting word proximity",
"extracting embeddings", "extracting embeddings",
"writing to database", "writing to database",
"writing embeddings to database",
"post-processing facets", "post-processing facets",
"post-processing words", "post-processing words",
"finalizing", "finalizing",
@ -94,15 +95,19 @@ mod steps {
step(5) step(5)
} }
pub const fn post_processing_facets() -> (u16, &'static str) { pub const fn write_embedding_db() -> (u16, &'static str) {
step(6) step(6)
} }
pub const fn post_processing_words() -> (u16, &'static str) {
pub const fn post_processing_facets() -> (u16, &'static str) {
step(7) step(7)
} }
pub const fn post_processing_words() -> (u16, &'static str) {
step(8)
}
pub const fn finalizing() -> (u16, &'static str) { pub const fn finalizing() -> (u16, &'static str) {
step(8) step(9)
} }
} }
@ -239,6 +244,7 @@ where
index.word_docids.remap_types(), index.word_docids.remap_types(),
index, index,
extractor_sender.docids::<WordDocids>(), extractor_sender.docids::<WordDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
@ -251,7 +257,8 @@ where
word_fid_docids, word_fid_docids,
index.word_fid_docids.remap_types(), index.word_fid_docids.remap_types(),
index, index,
extractor_sender.docids::<WordFidDocids>() extractor_sender.docids::<WordFidDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
@ -265,6 +272,7 @@ where
index.exact_word_docids.remap_types(), index.exact_word_docids.remap_types(),
index, index,
extractor_sender.docids::<ExactWordDocids>(), extractor_sender.docids::<ExactWordDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
@ -278,6 +286,7 @@ where
index.word_position_docids.remap_types(), index.word_position_docids.remap_types(),
index, index,
extractor_sender.docids::<WordPositionDocids>(), extractor_sender.docids::<WordPositionDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
@ -291,6 +300,7 @@ where
index.field_id_word_count_docids.remap_types(), index.field_id_word_count_docids.remap_types(),
index, index,
extractor_sender.docids::<FidWordCountDocids>(), extractor_sender.docids::<FidWordCountDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
} }
@ -310,6 +320,7 @@ where
index.word_pair_proximity_docids.remap_types(), index.word_pair_proximity_docids.remap_types(),
index, index,
extractor_sender.docids::<WordPairProximityDocids>(), extractor_sender.docids::<WordPairProximityDocids>(),
&indexing_context.must_stop_processing,
)?; )?;
} }
@ -376,8 +387,6 @@ where
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
let indexer_span = tracing::Span::current();
let vector_arroy = index.vector_arroy; let vector_arroy = index.vector_arroy;
let mut rng = rand::rngs::StdRng::seed_from_u64(42); let mut rng = rand::rngs::StdRng::seed_from_u64(42);
let indexer_span = tracing::Span::current(); let indexer_span = tracing::Span::current();
@ -450,6 +459,15 @@ where
ArroyOperation::Finish { mut user_provided } => { ArroyOperation::Finish { mut user_provided } => {
let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build"); let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
let _entered = span.enter(); let _entered = span.enter();
let (finished_steps, step_name) = steps::write_embedding_db();
(indexing_context.send_progress)(Progress {
finished_steps,
total_steps,
step_name,
finished_total_documents: None,
});
for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
&mut arroy_writers &mut arroy_writers
{ {

View File

@ -12,7 +12,10 @@ use super::extract::{
merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind, merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
}; };
use super::DocumentChange; use super::DocumentChange;
use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Result}; use crate::{
CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, InternalError,
Result,
};
pub struct GeoExtractor { pub struct GeoExtractor {
rtree: Option<rstar::RTree<GeoPoint>>, rtree: Option<rstar::RTree<GeoPoint>>,
@ -63,15 +66,22 @@ impl GeoExtractor {
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
pub fn merge_and_send_docids<'extractor>( pub fn merge_and_send_docids<'extractor, MSP>(
mut caches: Vec<BalancedCaches<'extractor>>, mut caches: Vec<BalancedCaches<'extractor>>,
database: Database<Bytes, Bytes>, database: Database<Bytes, Bytes>,
index: &Index, index: &Index,
docids_sender: impl DocidsSender + Sync, docids_sender: impl DocidsSender + Sync,
) -> Result<()> { must_stop_processing: &MSP,
) -> Result<()>
where
MSP: Fn() -> bool + Sync,
{
transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| { transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let mut buffer = Vec::new(); let mut buffer = Vec::new();
if must_stop_processing() {
return Err(InternalError::AbortedIndexation.into());
}
merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get(&rtxn, key)?; let current = database.get(&rtxn, key)?;
match merge_cbo_bitmaps(current, del, add)? { match merge_cbo_bitmaps(current, del, add)? {