diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 7c1783a1a..4bd5315ff 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -10,7 +10,7 @@ use milli::documents::PrimaryKey; use milli::heed::{EnvOpenOptions, RwTxn}; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::Index; use rand::seq::SliceRandom; @@ -138,10 +138,9 @@ fn indexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -205,10 +204,9 @@ fn reindexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -250,10 +248,9 @@ fn reindexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -319,10 +316,9 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -396,10 +392,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -441,10 +436,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -482,10 +476,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -549,11 +542,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -617,10 +609,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -684,10 +675,9 @@ fn indexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -750,10 +740,9 @@ fn reindexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -795,10 +784,9 @@ fn reindexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -863,10 +851,9 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -939,11 +926,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -985,11 +971,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1027,11 +1012,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1095,10 +1079,9 @@ fn indexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1161,10 +1144,9 @@ fn reindexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1206,10 +1188,9 @@ fn reindexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1274,10 +1255,9 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1387,10 +1367,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1432,10 +1411,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1473,10 +1451,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1563,10 +1540,9 @@ fn indexing_nested_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1654,10 +1630,9 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1737,10 +1712,9 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1804,10 +1778,9 @@ fn indexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1870,10 +1843,9 @@ fn reindexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1915,10 +1887,9 @@ fn reindexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1983,10 +1954,9 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs index b472b4f6b..5baeca869 100644 --- a/crates/benchmarks/benches/utils.rs +++ b/crates/benchmarks/benches/utils.rs @@ -12,7 +12,7 @@ use memmap2::Mmap; use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy}; use serde_json::Value; @@ -99,8 +99,8 @@ pub fn base_setup(conf: &Conf) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let documents = documents_from(conf.dataset, conf.dataset_format); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs index 1216083ca..ef4e4f8b0 100644 --- a/crates/fuzzers/src/bin/fuzz-indexing.rs +++ b/crates/fuzzers/src/bin/fuzz-indexing.rs @@ -89,9 +89,7 @@ fn main() { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new( - IndexDocumentsMethod::ReplaceDocuments, - ); + let mut indexer = indexer::DocumentOperation::new(); let mut operations = Vec::new(); for op in batch.0 { @@ -115,7 +113,7 @@ fn main() { for op in &operations { match op { Either::Left(documents) => { - indexer.add_documents(documents).unwrap() + indexer.replace_documents(documents).unwrap() } Either::Right(ids) => indexer.delete_documents(ids), }