From 1693332cab0c209d5729233ddceae4e9bc483ea5 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 20 Jun 2024 15:59:32 +0200 Subject: [PATCH 1/3] Update arroy and always build the tree that need to be built --- Cargo.lock | 24 ++++---- index-scheduler/Cargo.toml | 2 +- index-scheduler/src/lib.rs | 2 +- meilisearch/tests/common/mod.rs | 2 +- meilisearch/tests/vector/mod.rs | 82 +++++++++++++++++++++++++ milli/Cargo.toml | 2 +- milli/src/error.rs | 5 +- milli/src/index.rs | 4 +- milli/src/update/index_documents/mod.rs | 5 +- 9 files changed, 106 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a5960502..3c728f348 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -381,9 +381,9 @@ dependencies = [ [[package]] name = "arroy" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9" +checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" dependencies = [ "bytemuck", "byteorder", @@ -679,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.15.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" dependencies = [ "bytemuck_derive", ] @@ -2273,9 +2273,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" -version = "0.20.1" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd" +checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8" dependencies = [ "bitflags 2.5.0", "byteorder", @@ -3172,9 +3172,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da" [[package]] name = "lmdb-master-sys" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a" +checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3" dependencies = [ "cc", "doxygen-rs", @@ -5053,18 +5053,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 8959bb070..aff3b379f 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -40,7 +40,7 @@ ureq = "2.9.7" uuid = { version = "1.6.1", features = ["serde", "v4"] } [dev-dependencies] -arroy = "0.3.1" +arroy = "0.4.0" big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.34.0", features = ["json", "redactions"] } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 88997b715..213ec3230 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -5396,7 +5396,7 @@ mod tests { let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) .map(Some) .or_else(|e| match e { - arroy::Error::MissingMetadata => Ok(None), + arroy::Error::MissingMetadata(_) => Ok(None), e => Err(e), }) .transpose(); diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 317e5e171..4476e0d1f 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -65,7 +65,7 @@ impl Display for Value { write!( f, "{}", - json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }) + json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" }) ) } } diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 8d619a15a..53b2cca76 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -225,3 +225,85 @@ async fn clear_documents() { } "###); } + +#[actix_rt::test] +async fn add_remove_one_vector_4588() { + // https://github.com/meilisearch/meilisearch/issues/4588 + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, name: "settings-processed"); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, name: "document-added"); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": null }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, name: "document-deleted"); + + let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; + snapshot!(json_string!(documents), @r###" + { + "hits": [ + { + "id": 0, + "name": "kefir" + } + ], + "query": "", + "processingTimeMs": 1, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1, + "semanticHitCount": 1 + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": {} + } + ], + "offset": 0, + "limit": 20, + "total": 1 + } + "###); +} diff --git a/milli/Cargo.toml b/milli/Cargo.toml index a4aa4ef95..fd7bde99b 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", ] } tiktoken-rs = "0.5.8" liquid = "0.26.4" -arroy = "0.3.1" +arroy = "0.4.0" rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.9.7", features = ["json"] } diff --git a/milli/src/error.rs b/milli/src/error.rs index 7420ce667..8210d92e0 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -281,8 +281,9 @@ impl From for Error { arroy::Error::DatabaseFull | arroy::Error::InvalidItemAppend | arroy::Error::UnmatchingDistance { .. } - | arroy::Error::MissingNode - | arroy::Error::MissingMetadata => { + | arroy::Error::NeedBuild(_) + | arroy::Error::MissingKey { .. } + | arroy::Error::MissingMetadata(_) => { Error::InternalError(InternalError::ArroyError(value)) } } diff --git a/milli/src/index.rs b/milli/src/index.rs index d325d6fa4..0a7a20ce0 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1610,7 +1610,7 @@ impl Index { arroy::Reader::open(rtxn, k, self.vector_arroy) .map(Some) .or_else(|e| match e { - arroy::Error::MissingMetadata => Ok(None), + arroy::Error::MissingMetadata(_) => Ok(None), e => Err(e.into()), }) .transpose() @@ -1643,7 +1643,7 @@ impl Index { let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) .map(Some) .or_else(|e| match e { - arroy::Error::MissingMetadata => Ok(None), + arroy::Error::MissingMetadata(_) => Ok(None), e => Err(e), }) .transpose(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 3586c9c6d..089b56025 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -547,10 +547,11 @@ where pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { let writer = arroy::Writer::new(vector_arroy, k, dimension); - if writer.is_empty(wtxn)? { + if writer.need_build(wtxn)? { + writer.build(wtxn, &mut rng, None)?; + } else if writer.is_empty(wtxn)? { break; } - writer.build(wtxn, &mut rng, None)?; } Result::Ok(()) }) From 7be17b7e4c37b6fee5062062d3fab815fa21aa01 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 24 Jun 2024 10:52:57 +0200 Subject: [PATCH 2/3] add the missing snapshots --- .../document-added.snap | 19 +++++++++++++++ .../document-deleted.snap | 19 +++++++++++++++ .../settings-processed.snap | 23 +++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap create mode 100644 meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap create mode 100644 meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap new file mode 100644 index 000000000..52d9ad38d --- /dev/null +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap @@ -0,0 +1,19 @@ +--- +source: meilisearch/tests/vector/mod.rs +--- +{ + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap new file mode 100644 index 000000000..de02d0b1d --- /dev/null +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap @@ -0,0 +1,19 @@ +--- +source: meilisearch/tests/vector/mod.rs +--- +{ + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap new file mode 100644 index 000000000..316305fa8 --- /dev/null +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap @@ -0,0 +1,23 @@ +--- +source: meilisearch/tests/vector/mod.rs +--- +{ + "uid": 0, + "indexUid": "doggo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} From 606e108420d9474fe7295d662c4bb4e2c7882f81 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 24 Jun 2024 11:13:45 +0200 Subject: [PATCH 3/3] fix all the flaky snapshots --- meilisearch/tests/vector/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 53b2cca76..4172ef444 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -213,11 +213,11 @@ async fn clear_documents() { // Make sure the arroy DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; - snapshot!(json_string!(documents), @r###" + snapshot!(documents, @r###" { "hits": [], "query": "", - "processingTimeMs": 0, + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 0, @@ -272,7 +272,7 @@ async fn add_remove_one_vector_4588() { snapshot!(task, name: "document-deleted"); let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; - snapshot!(json_string!(documents), @r###" + snapshot!(documents, @r###" { "hits": [ { @@ -281,7 +281,7 @@ async fn add_remove_one_vector_4588() { } ], "query": "", - "processingTimeMs": 1, + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 1,