From 014eaea428f9968b3bb92aa6e674a03d89f3dc05 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 14:47:28 +0100 Subject: [PATCH 1/2] Use MatchingWords from keyword search instead of the one from vector search --- milli/src/search/hybrid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 67365cf52..b4c79f7f5 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -102,7 +102,7 @@ impl ScoreWithRatioResult { } SearchResult { - matching_words: left.matching_words, + matching_words: right.matching_words, candidates: left.candidates | right.candidates, documents_ids, document_scores, From da7c796be188e4b53690dfa8d53ece286095a7a4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 15:09:49 +0100 Subject: [PATCH 2/2] Add test --- meilisearch/tests/search/hybrid.rs | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 79819cab2..6ea9920f6 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -87,6 +87,52 @@ async fn simple_search() { snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###); } +#[actix_rt::test] +async fn highlighter() { + let server = Server::new().await; + let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await; + + let (response, code) = index + .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], + "hybrid": {"semanticRatio": 0.2}, + "attributesToHighlight": [ + "desc" + ], + "highlightPreTag": "**BEGIN**", + "highlightPostTag": "**END**" + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###); + + let (response, code) = index + .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], + "hybrid": {"semanticRatio": 0.8}, + "attributesToHighlight": [ + "desc" + ], + "highlightPreTag": "**BEGIN**", + "highlightPostTag": "**END**" + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###); + + // no highlighting on full semantic + let (response, code) = index + .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], + "hybrid": {"semanticRatio": 1.0}, + "attributesToHighlight": [ + "desc" + ], + "highlightPreTag": "**BEGIN**", + "highlightPostTag": "**END**" + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###); +} + #[actix_rt::test] async fn invalid_semantic_ratio() { let server = Server::new().await;