diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 67f7909b9..028b341cb 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -5,7 +5,10 @@ use crate::common::index::Index; use crate::common::{Server, Value}; use crate::json; -async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> { +async fn index_with_documents_user_provided<'a>( + server: &'a Server, + documents: &Value, +) -> Index<'a> { let index = server.index("test"); let (response, code) = server.set_features(json!({"vectorStore": true})).await; @@ -34,7 +37,39 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde index } -static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { +async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> { + let index = server.index("test"); + + let (response, code) = server.set_features(json!({"vectorStore": true})).await; + + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "exportPuffinReports": false + } + "###); + + let (response, code) = index + .update_settings(json!({ "embedders": {"default": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "documentTemplate": "{{doc.title}}, {{doc.desc}}" + }}} )) + .await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(response.uid()).await; + + let (response, code) = index.add_documents(documents.clone(), None).await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(response.uid()).await; + index +} + +static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy = Lazy::new(|| { json!([ { "title": "Shazam!", @@ -56,7 +91,7 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { }]) }); -static SINGLE_DOCUMENT: Lazy = Lazy::new(|| { +static SINGLE_DOCUMENT_VEC: Lazy = Lazy::new(|| { json!([{ "title": "Shazam!", "desc": "a Captain Marvel ersatz", @@ -65,10 +100,29 @@ static SINGLE_DOCUMENT: Lazy = Lazy::new(|| { }]) }); +static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + }, + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + }]) +}); + #[actix_rt::test] async fn simple_search() { let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let (response, code) = index .search_post( @@ -98,10 +152,59 @@ async fn simple_search() { snapshot!(response["semanticHitCount"], @"3"); } +#[actix_rt::test] +async fn simple_search_hf() { + let server = Server::new().await; + let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + let (response, code) = + index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); + snapshot!(response["semanticHitCount"], @"0"); + + let (response, code) = index + .search_post( + // disable ranking score as the vectors between architectures are not equal + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); + snapshot!(response["semanticHitCount"], @"1"); + + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###); + snapshot!(response["semanticHitCount"], @"3"); + + let (response, code) = index + .search_post( + json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###); + snapshot!(response["semanticHitCount"], @"3"); + + let (response, code) = index + .search_post( + json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###); + snapshot!(response["semanticHitCount"], @"3"); +} + #[actix_rt::test] async fn distribution_shift() { let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}}); let (response, code) = index.search_post(search.clone()).await; @@ -133,7 +236,7 @@ async fn distribution_shift() { #[actix_rt::test] async fn highlighter() { let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], @@ -184,7 +287,7 @@ async fn highlighter() { #[actix_rt::test] async fn invalid_semantic_ratio() { let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let (response, code) = index .search_post( @@ -256,7 +359,7 @@ async fn invalid_semantic_ratio() { #[actix_rt::test] async fn single_document() { let server = Server::new().await; - let index = index_with_documents(&server, &SINGLE_DOCUMENT).await; + let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await; let (response, code) = index .search_post( @@ -272,7 +375,7 @@ async fn single_document() { #[actix_rt::test] async fn query_combination() { let server = Server::new().await; - let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; // search without query and vector, but with hybrid => still placeholder let (response, code) = index