From 09d9b63e1c0c1369e2c92b66e329d21e837f49d3 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 13 Jun 2024 17:16:41 +0200 Subject: [PATCH] - test case where all vectors were generated - update tests following changes in behavior from previous commit --- meilisearch/tests/search/hybrid.rs | 82 ++++++++++++++++++++++++++++ meilisearch/tests/vector/settings.rs | 82 ++++++++++++++-------------- 2 files changed, 124 insertions(+), 40 deletions(-) diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index be6e0b1c8..31b2940d8 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -482,3 +482,85 @@ async fn query_combination() { snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###); snapshot!(response["semanticHitCount"], @"0"); } + +#[actix_rt::test] +async fn retrieve_vectors() { + let server = Server::new().await; + let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + ) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###" + [ + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + }, + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + } + ] + "###); + + // remove `_vectors` from displayed attributes + let (response, code) = + index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(response.uid()).await; + + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + ) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###" + [ + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2" + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3" + }, + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1" + } + ] + "###); +} diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 3fe161f9b..e53ceb383 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -73,7 +73,48 @@ async fn reset_embedder_documents() { server.wait_task(response.uid()).await; // Make sure the documents are still present - let (documents, _code) = index.get_all_documents(Default::default()).await; + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { + limit: None, + offset: None, + retrieve_vectors: false, + fields: None, + }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir" + }, + { + "id": 1, + "name": "echo" + }, + { + "id": 2, + "name": "billou" + }, + { + "id": 3, + "name": "intel" + }, + { + "id": 4, + "name": "max" + } + ], + "offset": 0, + "limit": 20, + "total": 5 + } + "###); + + // Make sure we are still able to retrieve their vectors + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; snapshot!(json_string!(documents), @r###" { "results": [ @@ -174,45 +215,6 @@ async fn reset_embedder_documents() { } "###); - // Make sure we are still able to retrieve their vectors - let (documents, _code) = index - .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) - .await; - snapshot!(json_string!(documents), @r###" - { - "results": [ - { - "id": 0, - "name": "kefir", - "_vectors": {} - }, - { - "id": 1, - "name": "echo", - "_vectors": {} - }, - { - "id": 2, - "name": "billou", - "_vectors": {} - }, - { - "id": 3, - "name": "intel", - "_vectors": {} - }, - { - "id": 4, - "name": "max", - "_vectors": {} - } - ], - "offset": 0, - "limit": 20, - "total": 5 - } - "###); - // Make sure the arroy DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; snapshot!(json_string!(documents), @r###"