inital implementation of the progress

This commit is contained in:
Tamo 2024-12-10 16:30:48 +01:00
parent 8c19cb0a0b
commit df9b68f8ed
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
29 changed files with 585 additions and 414 deletions

View File

@ -8,6 +8,7 @@ use bumpalo::Bump;
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{criterion_group, criterion_main, Criterion};
use milli::documents::PrimaryKey; use milli::documents::PrimaryKey;
use milli::heed::{EnvOpenOptions, RwTxn}; use milli::heed::{EnvOpenOptions, RwTxn};
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -10,6 +10,7 @@ use bumpalo::Bump;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use memmap2::Mmap; use memmap2::Mmap;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -10,6 +10,7 @@ use either::Either;
use fuzzers::Operation; use fuzzers::Operation;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig}; use milli::update::{IndexDocumentsMethod, IndexerConfig};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -128,7 +129,7 @@ fn main() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -143,7 +144,7 @@ fn main() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -22,8 +22,6 @@ use std::ffi::OsStr;
use std::fmt; use std::fmt;
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::BufWriter; use std::io::BufWriter;
use std::sync::atomic::{self, AtomicU64};
use std::time::Duration;
use bumpalo::collections::CollectIn; use bumpalo::collections::CollectIn;
use bumpalo::Bump; use bumpalo::Bump;
@ -32,6 +30,7 @@ use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
@ -41,9 +40,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{
}; };
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use time::macros::format_description; use time::macros::format_description;
@ -561,11 +558,12 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned /// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields /// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch( pub(crate) fn process_batch(
&self, &self,
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
#[cfg(test)] #[cfg(test)]
{ {
@ -953,7 +951,7 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
{ {
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
@ -996,6 +994,7 @@ impl IndexScheduler {
self.process_batch( self.process_batch(
Batch::IndexUpdate { index_uid, primary_key, task }, Batch::IndexUpdate { index_uid, primary_key, task },
current_batch, current_batch,
progress,
) )
} }
Batch::IndexUpdate { index_uid, primary_key, mut task } => { Batch::IndexUpdate { index_uid, primary_key, mut task } => {
@ -1168,7 +1167,7 @@ impl IndexScheduler {
/// The list of processed tasks. /// The list of processed tasks.
#[tracing::instrument( #[tracing::instrument(
level = "trace", level = "trace",
skip(self, index_wtxn, index), skip(self, index_wtxn, index, progress),
target = "indexing::scheduler" target = "indexing::scheduler"
)] )]
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
@ -1176,44 +1175,12 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
let secs_since_started_processing_at = AtomicU64::new(0);
const PRINT_SECS_DELTA: u64 = 5;
let processing_tasks = self.processing_tasks.clone();
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
let send_progress = |progress| {
let now = std::time::Instant::now();
let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
let previous = started_processing_at + Duration::from_secs(elapsed);
let elapsed = now - previous;
if elapsed.as_secs() < PRINT_SECS_DELTA {
return;
}
secs_since_started_processing_at
.store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
let TaskProgress {
current_step,
finished_steps,
total_steps,
finished_substeps,
total_substeps,
} = processing_tasks.write().unwrap().update_progress(progress);
tracing::info!(
current_step,
finished_steps,
total_steps,
finished_substeps,
total_substeps
);
};
match operation { match operation {
IndexOperation::DocumentClear { index_uid, mut tasks } => { IndexOperation::DocumentClear { index_uid, mut tasks } => {
@ -1308,7 +1275,7 @@ impl IndexScheduler {
primary_key.as_deref(), primary_key.as_deref(),
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, progress.clone(),
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
@ -1356,7 +1323,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
@ -1470,7 +1437,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@ -1621,7 +1588,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@ -1673,12 +1640,14 @@ impl IndexScheduler {
index_uid: index_uid.clone(), index_uid: index_uid.clone(),
tasks: cleared_tasks, tasks: cleared_tasks,
}, },
progress.clone(),
)?; )?;
let settings_tasks = self.apply_index_operation( let settings_tasks = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
)?; )?;
let mut tasks = settings_tasks; let mut tasks = settings_tasks;
@ -1702,8 +1671,8 @@ impl IndexScheduler {
let all_task_ids = self.all_task_ids(wtxn)?; let all_task_ids = self.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks; let mut to_delete_tasks = all_task_ids & matched_tasks;
to_delete_tasks -= processing_tasks; to_delete_tasks -= &**processing_tasks;
to_delete_tasks -= enqueued_tasks; to_delete_tasks -= &enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them // 2. We now have a list of tasks to delete, delete them

View File

@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
pub fn snapshot_batch(batch: &Batch) -> String { pub fn snapshot_batch(batch: &Batch) -> String {
let mut snap = String::new(); let mut snap = String::new();
let Batch { uid, details, stats, started_at, finished_at } = batch; let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
if let Some(finished_at) = finished_at { if let Some(finished_at) = finished_at {
assert!(finished_at > started_at); assert!(finished_at > started_at);
} }

View File

@ -26,6 +26,7 @@ mod index_mapper;
#[cfg(test)] #[cfg(test)]
mod insta_snapshot; mod insta_snapshot;
mod lru; mod lru;
mod processing;
mod utils; mod utils;
pub mod uuid_codec; pub mod uuid_codec;
@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView; use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use processing::ProcessingTasks;
use rayon::current_num_threads; use rayon::current_num_threads;
use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
use uuid::Uuid; use uuid::Uuid;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch}; use crate::processing::{AtomicTaskStep, BatchProgress};
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = I128<BE>; pub(crate) type BEI128 = I128<BE>;
@ -163,48 +165,6 @@ impl Query {
} }
} }
#[derive(Debug, Clone)]
pub struct ProcessingTasks {
batch: Option<ProcessingBatch>,
/// The list of tasks ids that are currently running.
processing: RoaringBitmap,
/// The progress on processing tasks
progress: Option<TaskProgress>,
}
impl ProcessingTasks {
/// Creates an empty `ProcessingAt` struct.
fn new() -> ProcessingTasks {
ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
}
/// Stores the currently processing tasks, and the date time at which it started.
fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
self.batch = Some(processing_batch);
self.processing = processing;
}
fn update_progress(&mut self, progress: Progress) -> TaskProgress {
self.progress.get_or_insert_with(TaskProgress::default).update(progress)
}
/// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
}
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
!self.processing.is_disjoint(canceled_tasks)
}
}
#[derive(Default, Clone, Debug)] #[derive(Default, Clone, Debug)]
struct MustStopProcessing(Arc<AtomicBool>); struct MustStopProcessing(Arc<AtomicBool>);
@ -813,7 +773,7 @@ impl IndexScheduler {
let mut batch_tasks = RoaringBitmap::new(); let mut batch_tasks = RoaringBitmap::new();
for batch_uid in batch_uids { for batch_uid in batch_uids {
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
batch_tasks |= &processing_tasks; batch_tasks |= &*processing_tasks;
} else { } else {
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
} }
@ -827,13 +787,13 @@ impl IndexScheduler {
match status { match status {
// special case for Processing tasks // special case for Processing tasks
Status::Processing => { Status::Processing => {
status_tasks |= &processing_tasks; status_tasks |= &*processing_tasks;
} }
status => status_tasks |= &self.get_status(rtxn, *status)?, status => status_tasks |= &self.get_status(rtxn, *status)?,
}; };
} }
if !status.contains(&Status::Processing) { if !status.contains(&Status::Processing) {
tasks -= &processing_tasks; tasks -= &*processing_tasks;
} }
tasks &= status_tasks; tasks &= status_tasks;
} }
@ -882,7 +842,7 @@ impl IndexScheduler {
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
tasks = { tasks = {
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
(&tasks - &processing_tasks, &tasks & &processing_tasks); (&tasks - &*processing_tasks, &tasks & &*processing_tasks);
// special case for Processing tasks // special case for Processing tasks
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
@ -1090,7 +1050,7 @@ impl IndexScheduler {
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`. // Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
batches = { batches = {
let (mut filtered_non_processing_batches, mut filtered_processing_batches) = let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
(&batches - &processing.processing, &batches & &processing.processing); (&batches - &*processing.processing, &batches & &*processing.processing);
// special case for Processing batches // special case for Processing batches
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
@ -1606,7 +1566,8 @@ impl IndexScheduler {
// We reset the must_stop flag to be sure that we don't stop processing tasks // We reset the must_stop flag to be sure that we don't stop processing tasks
self.must_stop_processing.reset(); self.must_stop_processing.reset();
self.processing_tasks let progress = self
.processing_tasks
.write() .write()
.unwrap() .unwrap()
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
@ -1619,11 +1580,12 @@ impl IndexScheduler {
let res = { let res = {
let cloned_index_scheduler = self.private_clone(); let cloned_index_scheduler = self.private_clone();
let processing_batch = &mut processing_batch; let processing_batch = &mut processing_batch;
let progress = progress.clone();
std::thread::scope(|s| { std::thread::scope(|s| {
let handle = std::thread::Builder::new() let handle = std::thread::Builder::new()
.name(String::from("batch-operation")) .name(String::from("batch-operation"))
.spawn_scoped(s, move || { .spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch) cloned_index_scheduler.process_batch(batch, processing_batch, progress)
}) })
.unwrap(); .unwrap();
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
@ -1636,6 +1598,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished(); processing_batch.finished();
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
@ -1645,12 +1608,15 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::ProcessBatchSucceeded); self.breakpoint(Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj);
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
#[allow(unused_variables)] #[allow(unused_variables)]
for (i, mut task) in tasks.into_iter().enumerate() { for (i, mut task) in tasks.into_iter().enumerate() {
task_progress.fetch_add(1, Ordering::Relaxed);
processing_batch.update(&mut task); processing_batch.update(&mut task);
if task.status == Status::Canceled { if task.status == Status::Canceled {
canceled.insert(task.uid); canceled.insert(task.uid);
@ -1718,8 +1684,12 @@ impl IndexScheduler {
Err(err) => { Err(err) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::ProcessBatchFailed); self.breakpoint(Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
progress.update_progress(task_progress_obj);
let error: ResponseError = err.into(); let error: ResponseError = err.into();
for id in ids.iter() { for id in ids.iter() {
task_progress.fetch_add(1, Ordering::Relaxed);
let mut task = self let mut task = self
.get_task(&wtxn, id) .get_task(&wtxn, id)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?

View File

@ -0,0 +1,205 @@
use crate::utils::ProcessingBatch;
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
use roaring::RoaringBitmap;
use std::{borrow::Cow, sync::Arc};
#[derive(Clone)]
pub struct ProcessingTasks {
pub batch: Option<Arc<ProcessingBatch>>,
/// The list of tasks ids that are currently running.
pub processing: Arc<RoaringBitmap>,
/// The progress on processing tasks
pub progress: Option<Progress>,
}
impl ProcessingTasks {
/// Creates an empty `ProcessingAt` struct.
pub fn new() -> ProcessingTasks {
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
}
pub fn get_progress_view(&self) -> Option<ProgressView> {
Some(self.progress.as_ref()?.as_progress_view())
}
/// Stores the currently processing tasks, and the date time at which it started.
pub fn start_processing(
&mut self,
processing_batch: ProcessingBatch,
processing: RoaringBitmap,
) -> Progress {
self.batch = Some(Arc::new(processing_batch));
self.processing = Arc::new(processing);
let progress = Progress::default();
progress.update_progress(BatchProgress::ProcessingTasks);
self.progress = Some(progress.clone());
progress
}
/// Set the processing tasks to an empty list
pub fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
}
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
!self.processing.is_disjoint(canceled_tasks)
}
}
#[repr(u8)]
#[derive(Copy, Clone)]
pub enum BatchProgress {
ProcessingTasks,
WritingTasksToDisk,
}
impl Step for BatchProgress {
fn name(&self) -> Cow<'static, str> {
match self {
BatchProgress::ProcessingTasks => Cow::Borrowed("processing tasks"),
BatchProgress::WritingTasksToDisk => Cow::Borrowed("writing tasks to disk"),
}
}
fn current(&self) -> u32 {
*self as u8 as u32
}
fn total(&self) -> u32 {
2
}
}
#[derive(Default)]
pub struct Task {}
impl NamedStep for Task {
fn name(&self) -> &'static str {
"task"
}
}
pub type AtomicTaskStep = AtomicSubStep<Task>;
#[cfg(test)]
mod test {
use std::sync::atomic::Ordering;
use meili_snap::{json_string, snapshot};
use super::*;
#[test]
fn one_level() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "processing tasks",
"finished": 0,
"total": 2
}
],
"percentage": 0.0
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
}
#[test]
fn task_progress() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
let (atomic, tasks) = AtomicTaskStep::new(10);
processing.progress.as_ref().unwrap().update_progress(tasks);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "processing tasks",
"finished": 0,
"total": 2
},
{
"name": "task",
"finished": 0,
"total": 10
}
],
"percentage": 0.0
}
"#);
atomic.fetch_add(6, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "processing tasks",
"finished": 0,
"total": 2
},
{
"name": "task",
"finished": 6,
"total": 10
}
],
"percentage": 30.000002
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
let (atomic, tasks) = AtomicTaskStep::new(5);
processing.progress.as_ref().unwrap().update_progress(tasks);
atomic.fetch_add(4, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"name": "writing tasks to disk",
"finished": 1,
"total": 2
},
{
"name": "task",
"finished": 4,
"total": 5
}
],
"percentage": 90.0
}
"#);
}
}

View File

@ -134,6 +134,7 @@ impl ProcessingBatch {
pub fn to_batch(&self) -> Batch { pub fn to_batch(&self) -> Batch {
Batch { Batch {
uid: self.uid, uid: self.uid,
progress: None,
details: self.details.clone(), details: self.details.clone(),
stats: self.stats.clone(), stats: self.stats.clone(),
started_at: self.started_at, started_at: self.started_at,
@ -187,6 +188,7 @@ impl IndexScheduler {
&batch.uid, &batch.uid,
&Batch { &Batch {
uid: batch.uid, uid: batch.uid,
progress: None,
details: batch.details, details: batch.details,
stats: batch.stats, stats: batch.stats,
started_at: batch.started_at, started_at: batch.started_at,
@ -273,7 +275,10 @@ impl IndexScheduler {
.into_iter() .into_iter()
.map(|batch_id| { .map(|batch_id| {
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
Ok(processing.batch.as_ref().unwrap().to_batch()) let mut batch = processing.batch.as_ref().unwrap().to_batch();
println!("here with progress: {}", processing.progress.is_some());
batch.progress = processing.get_progress_view();
Ok(batch)
} else { } else {
self.get_batch(rtxn, batch_id) self.get_batch(rtxn, batch_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))

View File

@ -1,3 +1,4 @@
use milli::progress::ProgressView;
use serde::Serialize; use serde::Serialize;
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
@ -11,6 +12,7 @@ use crate::{
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct BatchView { pub struct BatchView {
pub uid: BatchId, pub uid: BatchId,
pub progress: Option<ProgressView>,
pub details: DetailsView, pub details: DetailsView,
pub stats: BatchStats, pub stats: BatchStats,
#[serde(serialize_with = "serialize_duration", default)] #[serde(serialize_with = "serialize_duration", default)]
@ -25,6 +27,7 @@ impl BatchView {
pub fn from_batch(batch: &Batch) -> Self { pub fn from_batch(batch: &Batch) -> Self {
Self { Self {
uid: batch.uid, uid: batch.uid,
progress: batch.progress.clone(),
details: batch.details.clone(), details: batch.details.clone(),
stats: batch.stats.clone(), stats: batch.stats.clone(),
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at), duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),

View File

@ -1,5 +1,6 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use milli::progress::ProgressView;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -15,6 +16,8 @@ pub type BatchId = u32;
pub struct Batch { pub struct Batch {
pub uid: BatchId, pub uid: BatchId,
#[serde(skip_deserializing)]
pub progress: Option<ProgressView>,
pub details: DetailsView, pub details: DetailsView,
pub stats: BatchStats, pub stats: BatchStats,

View File

@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
use std::str::FromStr; use std::str::FromStr;
use enum_iterator::Sequence; use enum_iterator::Sequence;
use milli::update::new::indexer::document_changes::Progress;
use milli::update::IndexDocumentsMethod; use milli::update::IndexDocumentsMethod;
use milli::Object; use milli::Object;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -41,62 +40,6 @@ pub struct Task {
pub kind: KindWithContent, pub kind: KindWithContent,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskProgress {
pub current_step: &'static str,
pub finished_steps: u16,
pub total_steps: u16,
pub finished_substeps: Option<u32>,
pub total_substeps: Option<u32>,
}
impl Default for TaskProgress {
fn default() -> Self {
Self::new()
}
}
impl TaskProgress {
pub fn new() -> Self {
Self {
current_step: "start",
finished_steps: 0,
total_steps: 1,
finished_substeps: None,
total_substeps: None,
}
}
pub fn update(&mut self, progress: Progress) -> TaskProgress {
if self.finished_steps > progress.finished_steps {
return *self;
}
if self.current_step != progress.step_name {
self.current_step = progress.step_name
}
self.total_steps = progress.total_steps;
if self.finished_steps < progress.finished_steps {
self.finished_substeps = None;
self.total_substeps = None;
}
self.finished_steps = progress.finished_steps;
if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
if let Some(task_finished_substeps) = self.finished_substeps {
if task_finished_substeps > finished_substeps {
return *self;
}
}
self.finished_substeps = Some(finished_substeps);
self.total_substeps = Some(total_substeps);
}
*self
}
}
impl Task { impl Task {
pub fn index_uid(&self) -> Option<&str> { pub fn index_uid(&self) -> Option<&str> {
use KindWithContent::*; use KindWithContent::*;

View File

@ -1734,6 +1734,7 @@ pub(crate) mod tests {
use crate::error::{Error, InternalError}; use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::progress::Progress;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::settings::InnerIndexSettings; use crate::update::settings::InnerIndexSettings;
use crate::update::{ use crate::update::{
@ -1810,7 +1811,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
)?; )?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1829,7 +1830,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
}) })
.unwrap()?; .unwrap()?;
@ -1901,7 +1902,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
)?; )?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1920,7 +1921,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
}) })
.unwrap()?; .unwrap()?;
@ -1982,7 +1983,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2001,7 +2002,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| should_abort.load(Relaxed), &|| should_abort.load(Relaxed),
&|_| (), &Progress::default(),
) )
}) })
.unwrap() .unwrap()

View File

@ -31,6 +31,7 @@ pub mod vector;
#[macro_use] #[macro_use]
pub mod snapshot_tests; pub mod snapshot_tests;
mod fieldids_weights_map; mod fieldids_weights_map;
pub mod progress;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};

View File

@ -0,0 +1,116 @@
use std::{
any::TypeId,
borrow::Cow,
sync::{
atomic::{AtomicU32, Ordering},
Arc, RwLock,
},
};
use serde::Serialize;
pub trait Step: 'static + Send + Sync {
fn name(&self) -> Cow<'static, str>;
fn current(&self) -> u32;
fn total(&self) -> u32;
}
#[derive(Clone, Default)]
pub struct Progress {
steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
}
impl Progress {
pub fn update_progress<P: Step>(&self, sub_progress: P) {
let mut steps = self.steps.write().unwrap();
let step_type = TypeId::of::<P>();
if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
steps.truncate(idx);
}
steps.push((step_type, Box::new(sub_progress)));
}
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
pub fn as_progress_view(&self) -> ProgressView {
let steps = self.steps.read().unwrap();
let mut percentage = 0.0;
let mut prev_factors = 1.0;
let mut step_view = Vec::new();
for (_, step) in steps.iter() {
prev_factors *= step.total() as f32;
percentage += step.current() as f32 / prev_factors;
step_view.push(ProgressStepView {
name: step.name(),
finished: step.current(),
total: step.total(),
});
}
ProgressView { steps: step_view, percentage: percentage * 100.0 }
}
}
/// This trait lets you use the AtomicSubStep defined right below.
/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
pub trait NamedStep: 'static + Send + Sync + Default {
fn name(&self) -> &'static str;
}
/// Structure to quickly define steps that need very quick, lockless updating of their current step.
/// You can use this struct if:
/// - The name of the step doesn't change
/// - The total number of steps doesn't change
pub struct AtomicSubStep<Name: NamedStep> {
name: Name,
current: Arc<AtomicU32>,
total: u32,
}
impl<Name: NamedStep> AtomicSubStep<Name> {
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
let current = Arc::new(AtomicU32::new(0));
(current.clone(), Self { current, total, name: Name::default() })
}
}
impl<Name: NamedStep> Step for AtomicSubStep<Name> {
fn name(&self) -> Cow<'static, str> {
self.name.name().into()
}
fn current(&self) -> u32 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
self.total
}
}
#[derive(Default)]
pub struct Document {}
impl NamedStep for Document {
fn name(&self) -> &'static str {
"document"
}
}
pub type AtomicDocumentStep = AtomicSubStep<Document>;
#[derive(Debug, Serialize, Clone)]
pub struct ProgressView {
steps: Vec<ProgressStepView>,
percentage: f32,
}
#[derive(Debug, Serialize, Clone)]
pub struct ProgressStepView {
name: Cow<'static, str>,
finished: u32,
total: u32,
}

View File

@ -5,6 +5,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{btreemap, hashset}; use maplit::{btreemap, hashset};
use crate::progress::Progress;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs; use crate::vector::EmbeddingConfigs;
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -766,6 +766,7 @@ mod tests {
use crate::documents::mmap_from_objects; use crate::documents::mmap_from_objects;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::index::IndexEmbeddingConfig; use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress;
use crate::search::TermsMatchingStrategy; use crate::search::TermsMatchingStrategy;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::Setting; use crate::update::Setting;
@ -1964,7 +1965,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2148,7 +2149,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2163,7 +2164,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2210,7 +2211,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2225,7 +2226,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2263,7 +2264,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2278,7 +2279,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2315,7 +2316,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2330,7 +2331,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2369,7 +2370,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2384,7 +2385,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2428,7 +2429,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2443,7 +2444,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2480,7 +2481,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2495,7 +2496,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2532,7 +2533,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2547,7 +2548,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2726,7 +2727,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2741,7 +2742,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2785,7 +2786,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2800,7 +2801,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2841,7 +2842,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2856,7 +2857,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender; use crate::update::new::channel::FieldIdDocidFacetSender;
use crate::update::new::extract::perm_json_p; use crate::update::new::extract::perm_json_p;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {
impl FacetedDocidsExtractor { impl FacetedDocidsExtractor {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
pub fn run_extraction< pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
sender: &FieldIdDocidFacetSender, sender: &FieldIdDocidFacetSender,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let index = indexing_context.index; let index = indexing_context.index;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;

View File

@ -15,23 +15,22 @@ pub use geo::*;
pub use searchable::*; pub use searchable::*;
pub use vectors::EmbeddingExtractor; pub use vectors::EmbeddingExtractor;
use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress}; use super::indexer::document_changes::{DocumentChanges, IndexingContext};
use super::steps::Step; use super::steps::IndexingStep;
use super::thread_local::{FullySend, ThreadLocal}; use super::thread_local::{FullySend, ThreadLocal};
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
use crate::Result; use crate::Result;
pub trait DocidsExtractor { pub trait DocidsExtractor {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync;
SP: Fn(Progress) + Sync;
} }
/// TODO move in permissive json pointer /// TODO move in permissive json pointer

View File

@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
pub struct WordDocidsExtractors; pub struct WordDocidsExtractors;
impl WordDocidsExtractors { impl WordDocidsExtractors {
pub fn run_extraction< pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<WordDocidsCaches<'extractor>> ) -> Result<WordDocidsCaches<'extractor>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let index = indexing_context.index; let index = indexing_context.index;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;

View File

@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::cache::BalancedCaches; use super::cache::BalancedCaches;
use super::DocidsExtractor; use super::DocidsExtractor;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
} }
pub trait SearchableExtractor: Sized + Sync { pub trait SearchableExtractor: Sized + Sync {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let rtxn = indexing_context.index.read_txn()?; let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?; let stop_words = indexing_context.index.stop_words(&rtxn)?;
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
} }
impl<T: SearchableExtractor> DocidsExtractor for T { impl<T: SearchableExtractor> DocidsExtractor for T {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
Self::run_extraction( Self::run_extraction(
grenad_parameters, grenad_parameters,

View File

@ -1,4 +1,5 @@
use std::cell::{Cell, RefCell}; use std::cell::{Cell, RefCell};
use std::sync::atomic::Ordering;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use bumpalo::Bump; use bumpalo::Bump;
@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::progress::{AtomicDocumentStep, Progress};
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
@ -133,10 +135,8 @@ pub struct IndexingContext<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> where > where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
pub index: &'index Index, pub index: &'index Index,
pub db_fields_ids_map: &'indexer FieldsIdsMap, pub db_fields_ids_map: &'indexer FieldsIdsMap,
@ -144,7 +144,8 @@ pub struct IndexingContext<
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>, pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>, pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
pub must_stop_processing: &'indexer MSP, pub must_stop_processing: &'indexer MSP,
pub send_progress: &'indexer SP, // TODO: TAMO: Rename field to progress
pub send_progress: &'indexer Progress,
} }
impl< impl<
@ -152,18 +153,15 @@ impl<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> Copy > Copy
for IndexingContext< for IndexingContext<
'fid, // invariant lifetime of fields ids map 'fid, // invariant lifetime of fields ids map
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> >
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
} }
@ -172,18 +170,15 @@ impl<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> Clone > Clone
for IndexingContext< for IndexingContext<
'fid, // invariant lifetime of fields ids map 'fid, // invariant lifetime of fields ids map
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> >
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
fn clone(&self) -> Self { fn clone(&self) -> Self {
*self *self
@ -202,7 +197,6 @@ pub fn extract<
EX, EX,
DC: DocumentChanges<'pl>, DC: DocumentChanges<'pl>,
MSP, MSP,
SP,
>( >(
document_changes: &DC, document_changes: &DC,
extractor: &EX, extractor: &EX,
@ -214,17 +208,17 @@ pub fn extract<
fields_ids_map_store, fields_ids_map_store,
must_stop_processing, must_stop_processing,
send_progress, send_progress,
}: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, }: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
datastore: &'data ThreadLocal<EX::Data>, datastore: &'data ThreadLocal<EX::Data>,
step: Step, step: IndexingStep,
) -> Result<()> ) -> Result<()>
where where
EX: Extractor<'extractor>, EX: Extractor<'extractor>,
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
tracing::trace!("We are resetting the extractor allocators"); tracing::trace!("We are resetting the extractor allocators");
send_progress.update_progress(step);
// Clean up and reuse the extractor allocs // Clean up and reuse the extractor allocs
for extractor_alloc in extractor_allocs.iter_mut() { for extractor_alloc in extractor_allocs.iter_mut() {
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@ -232,6 +226,8 @@ where
} }
let total_documents = document_changes.len() as u32; let total_documents = document_changes.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
send_progress.update_progress(progress_step);
let pi = document_changes.iter(CHUNK_SIZE); let pi = document_changes.iter(CHUNK_SIZE);
pi.enumerate().try_arc_for_each_try_init( pi.enumerate().try_arc_for_each_try_init(
@ -253,7 +249,7 @@ where
} }
let finished_documents = (finished_documents * CHUNK_SIZE) as u32; let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
(send_progress)(Progress::from_step_substep(step, finished_documents, total_documents)); step.store(finished_documents, Ordering::Relaxed);
// Clean up and reuse the document-specific allocator // Clean up and reuse the document-specific allocator
context.doc_alloc.reset(); context.doc_alloc.reset();
@ -271,32 +267,7 @@ where
res res
}, },
)?; )?;
step.store(total_documents, Ordering::Relaxed);
(send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
Ok(()) Ok(())
} }
pub struct Progress {
pub finished_steps: u16,
pub total_steps: u16,
pub step_name: &'static str,
pub finished_total_substep: Option<(u32, u32)>,
}
impl Progress {
pub fn from_step(step: Step) -> Self {
Self {
finished_steps: step.finished_steps(),
total_steps: Step::total_steps(),
step_name: step.name(),
finished_total_substep: None,
}
}
pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
Self {
finished_total_substep: Some((finished_substep, total_substep)),
..Progress::from_step(step)
}
}
}

View File

@ -92,11 +92,12 @@ mod test {
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::progress::Progress;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, Extractor, IndexingContext, extract, DocumentChangeContext, Extractor, IndexingContext,
}; };
use crate::update::new::indexer::DocumentDeletion; use crate::update::new::indexer::DocumentDeletion;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{MostlySend, ThreadLocal}; use crate::update::new::thread_local::{MostlySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::DocumentId; use crate::DocumentId;
@ -164,7 +165,7 @@ mod test {
doc_allocs: &doc_allocs, doc_allocs: &doc_allocs,
fields_ids_map_store: &fields_ids_map_store, fields_ids_map_store: &fields_ids_map_store,
must_stop_processing: &(|| false), must_stop_processing: &(|| false),
send_progress: &(|_progress| {}), send_progress: &Progress::default(),
}; };
for _ in 0..3 { for _ in 0..3 {
@ -176,7 +177,7 @@ mod test {
context, context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingDocuments, IndexingStep::ExtractingDocuments,
) )
.unwrap(); .unwrap();

View File

@ -1,3 +1,5 @@
use std::sync::atomic::Ordering;
use bumpalo::collections::CollectIn; use bumpalo::collections::CollectIn;
use bumpalo::Bump; use bumpalo::Bump;
use bumparaw_collections::RawMap; use bumparaw_collections::RawMap;
@ -10,11 +12,12 @@ use serde_json::value::RawValue;
use serde_json::Deserializer; use serde_json::Deserializer;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress}; use super::document_changes::{DocumentChangeContext, DocumentChanges};
use super::retrieve_or_guess_primary_key; use super::retrieve_or_guess_primary_key;
use crate::documents::PrimaryKey; use crate::documents::PrimaryKey;
use crate::progress::{AtomicSubStep, Progress};
use crate::update::new::document::Versions; use crate::update::new::document::Versions;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::MostlySend; use crate::update::new::thread_local::MostlySend;
use crate::update::new::{Deletion, Insertion, Update}; use crate::update::new::{Deletion, Insertion, Update};
use crate::update::{AvailableIds, IndexDocumentsMethod}; use crate::update::{AvailableIds, IndexDocumentsMethod};
@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
pub fn into_changes<MSP, SP>( pub fn into_changes<MSP>(
self, self,
indexer: &'pl Bump, indexer: &'pl Bump,
index: &Index, index: &Index,
@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
primary_key_from_op: Option<&'pl str>, primary_key_from_op: Option<&'pl str>,
new_fields_ids_map: &mut FieldsIdsMap, new_fields_ids_map: &mut FieldsIdsMap,
must_stop_processing: &MSP, must_stop_processing: &MSP,
send_progress: &SP, progress: Progress,
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)> ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
where where
MSP: Fn() -> bool, MSP: Fn() -> bool,
SP: Fn(Progress),
{ {
progress.update_progress(IndexingStep::PreparingPayloads);
let Self { operations, method } = self; let Self { operations, method } = self;
let documents_ids = index.documents_ids(rtxn)?; let documents_ids = index.documents_ids(rtxn)?;
@ -68,16 +71,15 @@ impl<'pl> DocumentOperation<'pl> {
let mut primary_key = None; let mut primary_key = None;
let payload_count = operations.len(); let payload_count = operations.len();
let (step, progress_step) =
AtomicSubStep::<crate::progress::Document>::new(payload_count as u32);
progress.update_progress(progress_step);
for (payload_index, operation) in operations.into_iter().enumerate() { for (payload_index, operation) in operations.into_iter().enumerate() {
if must_stop_processing() { if must_stop_processing() {
return Err(InternalError::AbortedIndexation.into()); return Err(InternalError::AbortedIndexation.into());
} }
send_progress(Progress::from_step_substep( step.store(payload_index as u32, Ordering::Relaxed);
Step::PreparingPayloads,
payload_index as u32,
payload_count as u32,
));
let mut bytes = 0; let mut bytes = 0;
let result = match operation { let result = match operation {
@ -118,12 +120,7 @@ impl<'pl> DocumentOperation<'pl> {
}; };
operations_stats.push(PayloadStats { document_count, bytes, error }); operations_stats.push(PayloadStats { document_count, bytes, error });
} }
step.store(payload_count as u32, Ordering::Relaxed);
send_progress(Progress::from_step_substep(
Step::PreparingPayloads,
payload_count as u32,
payload_count as u32,
));
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> = let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =

View File

@ -5,7 +5,7 @@ use std::thread::{self, Builder};
use big_s::S; use big_s::S;
use bumparaw_collections::RawMap; use bumparaw_collections::RawMap;
use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; use document_changes::{extract, DocumentChanges, IndexingContext};
pub use document_deletion::DocumentDeletion; pub use document_deletion::DocumentDeletion;
pub use document_operation::{DocumentOperation, PayloadStats}; pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap; use hashbrown::HashMap;
@ -22,7 +22,7 @@ use super::channel::*;
use super::extract::*; use super::extract::*;
use super::facet_search_builder::FacetSearchBuilder; use super::facet_search_builder::FacetSearchBuilder;
use super::merger::FacetFieldIdsDelta; use super::merger::FacetFieldIdsDelta;
use super::steps::Step; use super::steps::IndexingStep;
use super::thread_local::ThreadLocal; use super::thread_local::ThreadLocal;
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
use super::words_prefix_docids::{ use super::words_prefix_docids::{
@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
use crate::progress::Progress;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::new::extract::EmbeddingExtractor; use crate::update::new::extract::EmbeddingExtractor;
@ -60,7 +61,7 @@ mod update_by_function;
/// ///
/// TODO return stats /// TODO return stats
#[allow(clippy::too_many_arguments)] // clippy: 😝 #[allow(clippy::too_many_arguments)] // clippy: 😝
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( pub fn index<'pl, 'indexer, 'index, DC, MSP>(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &'index Index, index: &'index Index,
pool: &ThreadPoolNoAbort, pool: &ThreadPoolNoAbort,
@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
document_changes: &DC, document_changes: &DC,
embedders: EmbeddingConfigs, embedders: EmbeddingConfigs,
must_stop_processing: &'indexer MSP, must_stop_processing: &'indexer MSP,
send_progress: &'indexer SP, send_progress: &'indexer Progress,
) -> Result<()> ) -> Result<()>
where where
DC: DocumentChanges<'pl>, DC: DocumentChanges<'pl>,
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let mut bbbuffers = Vec::new(); let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false); let finished_extraction = AtomicBool::new(false);
@ -159,7 +159,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingDocuments, IndexingStep::ExtractingDocuments,
)?; )?;
} }
{ {
@ -191,7 +191,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&extractor_sender.field_id_docid_facet_sender(), &extractor_sender.field_id_docid_facet_sender(),
Step::ExtractingFacets IndexingStep::ExtractingFacets
)? )?
}; };
@ -224,7 +224,7 @@ where
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWords IndexingStep::ExtractingWords
)? )?
}; };
@ -302,7 +302,7 @@ where
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWordProximity, IndexingStep::ExtractingWordProximity,
)? )?
}; };
@ -338,7 +338,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingEmbeddings, IndexingStep::ExtractingEmbeddings,
)?; )?;
} }
{ {
@ -371,7 +371,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::WritingGeoPoints IndexingStep::WritingGeoPoints
)?; )?;
} }
@ -383,9 +383,7 @@ where
&indexing_context.must_stop_processing, &indexing_context.must_stop_processing,
)?; )?;
} }
indexing_context.send_progress.update_progress(IndexingStep::WritingToDatabase);
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
Result::Ok((facet_field_ids_delta, index_embeddings)) Result::Ok((facet_field_ids_delta, index_embeddings))
@ -485,7 +483,7 @@ where
)?; )?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); indexing_context.send_progress.update_progress(IndexingStep::WaitingForExtractors);
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
@ -498,10 +496,9 @@ where
break 'vectors; break 'vectors;
} }
(indexing_context.send_progress)(Progress::from_step( indexing_context
Step::WritingEmbeddingsToDatabase, .send_progress
)); .update_progress(IndexingStep::WritingEmbeddingsToDatabase);
let mut rng = rand::rngs::StdRng::seed_from_u64(42); let mut rng = rand::rngs::StdRng::seed_from_u64(42);
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
let dimensions = *dimensions; let dimensions = *dimensions;
@ -517,21 +514,19 @@ where
index.put_embedding_configs(wtxn, index_embeddings)?; index.put_embedding_configs(wtxn, index_embeddings)?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets)); indexing_context.send_progress.update_progress(IndexingStep::PostProcessingFacets);
if index.facet_search(wtxn)? { if index.facet_search(wtxn)? {
compute_facet_search_database(index, wtxn, global_fields_ids_map)?; compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
} }
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords)); indexing_context.send_progress.update_progress(IndexingStep::PostProcessingWords);
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::Finalizing)); indexing_context.send_progress.update_progress(IndexingStep::Finalizing);
Ok(()) as Result<_> Ok(()) as Result<_>
})?; })?;

View File

@ -1,8 +1,12 @@
use std::borrow::Cow;
use enum_iterator::Sequence; use enum_iterator::Sequence;
use crate::progress::Step;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
#[repr(u16)] #[repr(u8)]
pub enum Step { pub enum IndexingStep {
PreparingPayloads, PreparingPayloads,
ExtractingDocuments, ExtractingDocuments,
ExtractingFacets, ExtractingFacets,
@ -18,30 +22,31 @@ pub enum Step {
Finalizing, Finalizing,
} }
impl Step { impl Step for IndexingStep {
pub fn name(&self) -> &'static str { fn name(&self) -> Cow<'static, str> {
match self { match self {
Step::PreparingPayloads => "preparing update file", IndexingStep::PreparingPayloads => "preparing update file",
Step::ExtractingDocuments => "extracting documents", IndexingStep::ExtractingDocuments => "extracting documents",
Step::ExtractingFacets => "extracting facets", IndexingStep::ExtractingFacets => "extracting facets",
Step::ExtractingWords => "extracting words", IndexingStep::ExtractingWords => "extracting words",
Step::ExtractingWordProximity => "extracting word proximity", IndexingStep::ExtractingWordProximity => "extracting word proximity",
Step::ExtractingEmbeddings => "extracting embeddings", IndexingStep::ExtractingEmbeddings => "extracting embeddings",
Step::WritingGeoPoints => "writing geo points", IndexingStep::WritingGeoPoints => "writing geo points",
Step::WritingToDatabase => "writing to database", IndexingStep::WritingToDatabase => "writing to database",
Step::WaitingForExtractors => "waiting for extractors", IndexingStep::WaitingForExtractors => "waiting for extractors",
Step::WritingEmbeddingsToDatabase => "writing embeddings to database", IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
Step::PostProcessingFacets => "post-processing facets", IndexingStep::PostProcessingFacets => "post-processing facets",
Step::PostProcessingWords => "post-processing words", IndexingStep::PostProcessingWords => "post-processing words",
Step::Finalizing => "finalizing", IndexingStep::Finalizing => "finalizing",
} }
.into()
} }
pub fn finished_steps(self) -> u16 { fn current(&self) -> u32 {
self as u16 *self as u32
} }
pub const fn total_steps() -> u16 { fn total(&self) -> u32 {
Self::CARDINALITY as u16 Self::CARDINALITY as u32
} }
} }

View File

@ -3,6 +3,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::hashset; use maplit::hashset;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -7,6 +7,7 @@ use bumpalo::Bump;
use either::{Either, Left, Right}; use either::{Either, Left, Right};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{btreemap, hashset}; use maplit::{btreemap, hashset};
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -5,6 +5,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use itertools::Itertools; use itertools::Itertools;
use maplit::hashset; use maplit::hashset;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -326,7 +327,7 @@ fn criteria_ascdesc() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -341,7 +342,7 @@ fn criteria_ascdesc() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -3,6 +3,7 @@ use std::collections::BTreeSet;
use bumpalo::Bump; use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();