mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 09:35:51 +08:00
Add semanticHitCount
This commit is contained in:
parent
3c6e9851a4
commit
1ff2a2d6fb
@ -760,6 +760,7 @@ impl SearchAggregator {
|
|||||||
query: _,
|
query: _,
|
||||||
processing_time_ms,
|
processing_time_ms,
|
||||||
hits_info: _,
|
hits_info: _,
|
||||||
|
semantic_hit_count: _,
|
||||||
facet_distribution: _,
|
facet_distribution: _,
|
||||||
facet_stats: _,
|
facet_stats: _,
|
||||||
degraded,
|
degraded,
|
||||||
|
@ -385,6 +385,9 @@ pub struct SearchResult {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||||
|
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub semantic_hit_count: Option<u32>,
|
||||||
|
|
||||||
// These fields are only used for analytics purposes
|
// These fields are only used for analytics purposes
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
pub degraded: bool,
|
pub degraded: bool,
|
||||||
@ -553,16 +556,23 @@ pub fn perform_search(
|
|||||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||||
prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
|
prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
|
||||||
|
|
||||||
let milli::SearchResult {
|
let (
|
||||||
documents_ids,
|
milli::SearchResult {
|
||||||
matching_words,
|
documents_ids,
|
||||||
candidates,
|
matching_words,
|
||||||
document_scores,
|
candidates,
|
||||||
degraded,
|
document_scores,
|
||||||
used_negative_operator,
|
degraded,
|
||||||
..
|
used_negative_operator,
|
||||||
} = match &search_kind {
|
},
|
||||||
SearchKind::KeywordOnly | SearchKind::SemanticOnly { .. } => search.execute()?,
|
semantic_hit_count,
|
||||||
|
) = match &search_kind {
|
||||||
|
SearchKind::KeywordOnly => (search.execute()?, None),
|
||||||
|
SearchKind::SemanticOnly { .. } => {
|
||||||
|
let results = search.execute()?;
|
||||||
|
let semantic_hit_count = results.document_scores.len() as u32;
|
||||||
|
(results, Some(semantic_hit_count))
|
||||||
|
}
|
||||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -760,6 +770,7 @@ pub fn perform_search(
|
|||||||
facet_stats,
|
facet_stats,
|
||||||
degraded,
|
degraded,
|
||||||
used_negative_operator,
|
used_negative_operator,
|
||||||
|
semantic_hit_count,
|
||||||
};
|
};
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
@ -77,6 +77,16 @@ async fn simple_search() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(
|
||||||
|
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}}),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"1");
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
@ -85,6 +95,7 @@ async fn simple_search() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -136,6 +147,7 @@ async fn highlighter() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
@ -149,6 +161,7 @@ async fn highlighter() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
|
|
||||||
// no highlighting on full semantic
|
// no highlighting on full semantic
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
@ -163,6 +176,7 @@ async fn highlighter() {
|
|||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -250,4 +264,20 @@ async fn single_document() {
|
|||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
|
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"1");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn query_combination() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
|
||||||
|
|
||||||
|
// search without query and vector, but with hybrid => still placeholder
|
||||||
|
let (response, code) = index
|
||||||
|
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
|
||||||
|
snapshot!(response["semanticHitCount"], @"1");
|
||||||
}
|
}
|
||||||
|
@ -84,45 +84,73 @@ impl ScoreWithRatioResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge(left: Self, right: Self, from: usize, length: usize) -> SearchResult {
|
fn merge(
|
||||||
let mut documents_ids =
|
vector_results: Self,
|
||||||
Vec::with_capacity(left.document_scores.len() + right.document_scores.len());
|
keyword_results: Self,
|
||||||
let mut document_scores =
|
from: usize,
|
||||||
Vec::with_capacity(left.document_scores.len() + right.document_scores.len());
|
length: usize,
|
||||||
|
) -> (SearchResult, u32) {
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
enum ResultSource {
|
||||||
|
Semantic,
|
||||||
|
Keyword,
|
||||||
|
}
|
||||||
|
let mut semantic_hit_count = 0;
|
||||||
|
|
||||||
|
let mut documents_ids = Vec::with_capacity(
|
||||||
|
vector_results.document_scores.len() + keyword_results.document_scores.len(),
|
||||||
|
);
|
||||||
|
let mut document_scores = Vec::with_capacity(
|
||||||
|
vector_results.document_scores.len() + keyword_results.document_scores.len(),
|
||||||
|
);
|
||||||
|
|
||||||
let mut documents_seen = RoaringBitmap::new();
|
let mut documents_seen = RoaringBitmap::new();
|
||||||
for (docid, (main_score, _sub_score)) in left
|
for ((docid, (main_score, _sub_score)), source) in vector_results
|
||||||
.document_scores
|
.document_scores
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.merge_by(right.document_scores.into_iter(), |(_, left), (_, right)| {
|
.zip(std::iter::repeat(ResultSource::Semantic))
|
||||||
// the first value is the one with the greatest score
|
.merge_by(
|
||||||
compare_scores(left, right).is_ge()
|
keyword_results
|
||||||
})
|
.document_scores
|
||||||
|
.into_iter()
|
||||||
|
.zip(std::iter::repeat(ResultSource::Keyword)),
|
||||||
|
|((_, left), _), ((_, right), _)| {
|
||||||
|
// the first value is the one with the greatest score
|
||||||
|
compare_scores(left, right).is_ge()
|
||||||
|
},
|
||||||
|
)
|
||||||
// remove documents we already saw
|
// remove documents we already saw
|
||||||
.filter(|(docid, _)| documents_seen.insert(*docid))
|
.filter(|((docid, _), _)| documents_seen.insert(*docid))
|
||||||
// start skipping **after** the filter
|
// start skipping **after** the filter
|
||||||
.skip(from)
|
.skip(from)
|
||||||
// take **after** skipping
|
// take **after** skipping
|
||||||
.take(length)
|
.take(length)
|
||||||
{
|
{
|
||||||
|
if let ResultSource::Semantic = source {
|
||||||
|
semantic_hit_count += 1;
|
||||||
|
}
|
||||||
documents_ids.push(docid);
|
documents_ids.push(docid);
|
||||||
// TODO: pass both scores to documents_score in some way?
|
// TODO: pass both scores to documents_score in some way?
|
||||||
document_scores.push(main_score);
|
document_scores.push(main_score);
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchResult {
|
(
|
||||||
matching_words: right.matching_words,
|
SearchResult {
|
||||||
candidates: left.candidates | right.candidates,
|
matching_words: keyword_results.matching_words,
|
||||||
documents_ids,
|
candidates: vector_results.candidates | keyword_results.candidates,
|
||||||
document_scores,
|
documents_ids,
|
||||||
degraded: left.degraded | right.degraded,
|
document_scores,
|
||||||
used_negative_operator: left.used_negative_operator | right.used_negative_operator,
|
degraded: vector_results.degraded | keyword_results.degraded,
|
||||||
}
|
used_negative_operator: vector_results.used_negative_operator
|
||||||
|
| keyword_results.used_negative_operator,
|
||||||
|
},
|
||||||
|
semantic_hit_count,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<SearchResult> {
|
pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
|
||||||
// TODO: find classier way to achieve that than to reset vector and query params
|
// TODO: find classier way to achieve that than to reset vector and query params
|
||||||
// create separate keyword and semantic searches
|
// create separate keyword and semantic searches
|
||||||
let mut search = Search {
|
let mut search = Search {
|
||||||
@ -148,14 +176,16 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
// completely skip semantic search if the results of the keyword search are good enough
|
// completely skip semantic search if the results of the keyword search are good enough
|
||||||
if self.results_good_enough(&keyword_results, semantic_ratio) {
|
if self.results_good_enough(&keyword_results, semantic_ratio) {
|
||||||
return Ok(keyword_results);
|
return Ok((keyword_results, Some(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// no vector search against placeholder search
|
// no vector search against placeholder search
|
||||||
let Some(query) = search.query.take() else { return Ok(keyword_results) };
|
let Some(query) = search.query.take() else {
|
||||||
|
return Ok((keyword_results, Some(0)));
|
||||||
|
};
|
||||||
// no embedder, no semantic search
|
// no embedder, no semantic search
|
||||||
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
|
||||||
return Ok(keyword_results);
|
return Ok((keyword_results, Some(0)));
|
||||||
};
|
};
|
||||||
|
|
||||||
let vector_query = match vector {
|
let vector_query = match vector {
|
||||||
@ -166,7 +196,7 @@ impl<'a> Search<'a> {
|
|||||||
Ok(embedding) => embedding,
|
Ok(embedding) => embedding,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
tracing::error!(error=%error, "Embedding failed");
|
tracing::error!(error=%error, "Embedding failed");
|
||||||
return Ok(keyword_results);
|
return Ok((keyword_results, Some(0)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -181,10 +211,10 @@ impl<'a> Search<'a> {
|
|||||||
let keyword_results = ScoreWithRatioResult::new(keyword_results, 1.0 - semantic_ratio);
|
let keyword_results = ScoreWithRatioResult::new(keyword_results, 1.0 - semantic_ratio);
|
||||||
let vector_results = ScoreWithRatioResult::new(vector_results, semantic_ratio);
|
let vector_results = ScoreWithRatioResult::new(vector_results, semantic_ratio);
|
||||||
|
|
||||||
let merge_results =
|
let (merge_results, semantic_hit_count) =
|
||||||
ScoreWithRatioResult::merge(vector_results, keyword_results, self.offset, self.limit);
|
ScoreWithRatioResult::merge(vector_results, keyword_results, self.offset, self.limit);
|
||||||
assert!(merge_results.documents_ids.len() <= self.limit);
|
assert!(merge_results.documents_ids.len() <= self.limit);
|
||||||
Ok(merge_results)
|
Ok((merge_results, Some(semantic_hit_count)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn results_good_enough(&self, keyword_results: &SearchResult, semantic_ratio: f32) -> bool {
|
fn results_good_enough(&self, keyword_results: &SearchResult, semantic_ratio: f32) -> bool {
|
||||||
|
Loading…
Reference in New Issue
Block a user