Fix behavior of limit and offset for hybrid search when keyword results are returned early

The test is fixed
This commit is contained in:
Louis Dureuil 2024-06-27 14:21:48 +02:00
parent 8c4921b9dd
commit e53de15b8e
No known key found for this signature in database
2 changed files with 45 additions and 4 deletions

View File

@ -161,7 +161,7 @@ async fn limit_offset() {
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
assert_eq!(response["hits"].as_array().unwrap().len(), 1); assert_eq!(response["hits"].as_array().unwrap().len(), 1);

View File

@ -178,16 +178,16 @@ impl<'a> Search<'a> {
// completely skip semantic search if the results of the keyword search are good enough // completely skip semantic search if the results of the keyword search are good enough
if self.results_good_enough(&keyword_results, semantic_ratio) { if self.results_good_enough(&keyword_results, semantic_ratio) {
return Ok((keyword_results, Some(0))); return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
} }
// no vector search against placeholder search // no vector search against placeholder search
let Some(query) = search.query.take() else { let Some(query) = search.query.take() else {
return Ok((keyword_results, Some(0))); return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
}; };
// no embedder, no semantic search // no embedder, no semantic search
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
return Ok((keyword_results, Some(0))); return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
}; };
let vector_query = match vector { let vector_query = match vector {
@ -239,3 +239,44 @@ impl<'a> Search<'a> {
true true
} }
} }
fn return_keyword_results(
limit: usize,
offset: usize,
SearchResult {
matching_words,
candidates,
mut documents_ids,
mut document_scores,
degraded,
used_negative_operator,
}: SearchResult,
) -> (SearchResult, Option<u32>) {
let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
// technically redudant because documents_ids.len() == document_scores.len(),
// defensive programming
offset >= document_scores.len()
{
(vec![], vec![])
} else {
// PANICS: offset < len
documents_ids.rotate_left(offset);
documents_ids.truncate(limit);
// PANICS: offset < len
document_scores.rotate_left(offset);
document_scores.truncate(limit);
(documents_ids, document_scores)
};
(
SearchResult {
matching_words,
candidates,
documents_ids,
document_scores,
degraded,
used_negative_operator,
},
Some(0),
)
}