From 60470bb647494cce1be2448cfb3b2452be45fc27 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 28 Jan 2025 14:41:36 +0100 Subject: [PATCH] Fix the tests to use the new replace/update documents --- crates/fuzzers/src/bin/fuzz-indexing.rs | 2 +- crates/milli/src/index.rs | 19 ++++--- .../milli/src/search/new/tests/integration.rs | 6 +- .../milli/src/update/index_documents/mod.rs | 56 +++++++++---------- .../milli/tests/search/facet_distribution.rs | 6 +- crates/milli/tests/search/mod.rs | 6 +- crates/milli/tests/search/query_criteria.rs | 6 +- crates/milli/tests/search/typo_tolerance.rs | 6 +- 8 files changed, 56 insertions(+), 51 deletions(-) diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs index ef4e4f8b0..e26303010 100644 --- a/crates/fuzzers/src/bin/fuzz-indexing.rs +++ b/crates/fuzzers/src/bin/fuzz-indexing.rs @@ -12,7 +12,7 @@ use milli::documents::mmap_from_objects; use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig}; +use milli::update::IndexerConfig; use milli::vector::EmbeddingConfigs; use milli::Index; use serde_json::Value; diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 944fb6cd4..6c7534553 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1839,9 +1839,15 @@ pub(crate) mod tests { let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; - let mut indexer = - indexer::DocumentOperation::new(self.index_documents_config.update_method); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + match self.index_documents_config.update_method { + IndexDocumentsMethod::ReplaceDocuments => { + indexer.replace_documents(&documents).unwrap() + } + IndexDocumentsMethod::UpdateDocuments => { + indexer.update_documents(&documents).unwrap() + } + } let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer.into_changes( @@ -1928,8 +1934,7 @@ pub(crate) mod tests { let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; - let mut indexer = - indexer::DocumentOperation::new(self.index_documents_config.update_method); + let mut indexer = indexer::DocumentOperation::new(); let external_document_ids: Vec<_> = external_document_ids.iter().map(AsRef::as_ref).collect(); indexer.delete_documents(external_document_ids.as_slice()); @@ -2006,13 +2011,13 @@ pub(crate) mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let payload = documents!([ { "id": 1, "name": "kevin" }, { "id": 2, "name": "bob", "age": 20 }, { "id": 2, "name": "bob", "age": 20 }, ]); - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 99d5dc033..e60a09ec5 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -7,7 +7,7 @@ use maplit::{btreemap, hashset}; use crate::progress::Progress; use crate::update::new::indexer; -use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use crate::update::{IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; use crate::{db_snap, Criterion, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); @@ -55,7 +55,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); file.write_all(CONTENT.as_bytes()).unwrap(); @@ -63,7 +63,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; // index documents - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 154db7875..4615a0202 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -1951,11 +1951,11 @@ mod tests { let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&doc1).unwrap(); - indexer.add_documents(&doc2).unwrap(); - indexer.add_documents(&doc3).unwrap(); - indexer.add_documents(&doc4).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&doc1).unwrap(); + indexer.replace_documents(&doc2).unwrap(); + indexer.replace_documents(&doc3).unwrap(); + indexer.replace_documents(&doc4).unwrap(); let indexer_alloc = Bump::new(); let (_document_changes, operation_stats, _primary_key) = indexer @@ -2112,8 +2112,8 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&documents).unwrap(); indexer.delete_documents(&["2"]); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2165,14 +2165,14 @@ mod tests { { "id": 2, "doggo": { "name": "bob", "age": 20 } }, { "id": 3, "name": "jean", "age": 25 }, ]); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); let documents = documents!([ { "id": 2, "catto": "jorts" }, { "id": 3, "legs": 4 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2"]); let indexer_alloc = Bump::new(); @@ -2227,8 +2227,8 @@ mod tests { ]); let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2278,8 +2278,8 @@ mod tests { ]); let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2"]); let (document_changes, _operation_stats, primary_key) = indexer @@ -2327,14 +2327,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2"]); let documents = documents!([ { "id": 2, "doggo": { "name": "jean", "age": 20 } }, { "id": 3, "name": "bob", "age": 25 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2382,7 +2382,7 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2", "1", "2"]); @@ -2391,7 +2391,7 @@ mod tests { { "id": 2, "doggo": { "name": "jean", "age": 20 } }, { "id": 3, "name": "bob", "age": 25 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2", "1", "2"]); @@ -2440,12 +2440,12 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ { "id": 1, "doggo": "kevin" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2489,7 +2489,7 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); @@ -2497,7 +2497,7 @@ mod tests { { "id": 1, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2683,14 +2683,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); // OP let documents = documents!([ { "id": 1, "doggo": "bernese" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); // FINISHING let (document_changes, _operation_stats, primary_key) = indexer @@ -2743,14 +2743,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); let documents = documents!([ { "id": 0, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2801,12 +2801,12 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ { "id": 1, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index db9f86357..4d8bf324c 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -5,7 +5,7 @@ use maplit::hashset; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{FacetDistribution, Index, Object, OrderBy}; use serde_json::{from_value, json}; @@ -36,7 +36,7 @@ fn test_facet_distribution_with_no_facet_values() { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let doc1: Object = from_value( json!({ "id": 123, "title": "What a week, hu...", "genres": [], "tags": ["blue"] }), @@ -47,7 +47,7 @@ fn test_facet_distribution_with_no_facet_values() { let documents = mmap_from_objects(vec![doc1, doc2]); // index documents - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 662715638..337a4c88c 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -9,7 +9,7 @@ use heed::EnvOpenOptions; use maplit::{btreemap, hashset}; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy}; use serde::{Deserialize, Deserializer}; @@ -72,7 +72,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); file.write_all(CONTENT.as_bytes()).unwrap(); @@ -80,7 +80,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; // index documents - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs index d47c9539d..3cc747f06 100644 --- a/crates/milli/tests/search/query_criteria.rs +++ b/crates/milli/tests/search/query_criteria.rs @@ -7,7 +7,7 @@ use itertools::Itertools; use maplit::hashset; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy}; use rand::Rng; @@ -288,7 +288,7 @@ fn criteria_ascdesc() { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); (0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| { @@ -318,7 +318,7 @@ fn criteria_ascdesc() { file.sync_all().unwrap(); let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( &indexer_alloc, diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs index b640fa910..837b5e6b2 100644 --- a/crates/milli/tests/search/typo_tolerance.rs +++ b/crates/milli/tests/search/typo_tolerance.rs @@ -5,7 +5,7 @@ use heed::EnvOpenOptions; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{Criterion, Index, Object, Search, TermsMatchingStrategy}; use serde_json::from_value; @@ -123,9 +123,9 @@ fn test_typo_disabled_on_word() { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer