diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 31b2940d8..02768bf60 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -150,6 +150,35 @@ async fn simple_search() { snapshot!(response["semanticHitCount"], @"3"); } +#[actix_rt::test] +async fn limit_offset() { + let server = Server::new().await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; + + let (response, code) = index + .search_post( + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###); + snapshot!(response["semanticHitCount"], @"0"); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + + let server = Server::new().await; + let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; + + let (response, code) = index + .search_post( + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), + ) + .await; + snapshot!(code, @"200 OK"); + snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###); + snapshot!(response["semanticHitCount"], @"1"); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); +} + #[actix_rt::test] async fn simple_search_hf() { let server = Server::new().await; diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 1c784097d..f7e1aa492 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -178,16 +178,16 @@ impl<'a> Search<'a> { // completely skip semantic search if the results of the keyword search are good enough if self.results_good_enough(&keyword_results, semantic_ratio) { - return Ok((keyword_results, Some(0))); + return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); } // no vector search against placeholder search let Some(query) = search.query.take() else { - return Ok((keyword_results, Some(0))); + return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; // no embedder, no semantic search let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { - return Ok((keyword_results, Some(0))); + return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; let vector_query = match vector { @@ -239,3 +239,44 @@ impl<'a> Search<'a> { true } } + +fn return_keyword_results( + limit: usize, + offset: usize, + SearchResult { + matching_words, + candidates, + mut documents_ids, + mut document_scores, + degraded, + used_negative_operator, + }: SearchResult, +) -> (SearchResult, Option) { + let (documents_ids, document_scores) = if offset >= documents_ids.len() || + // technically redudant because documents_ids.len() == document_scores.len(), + // defensive programming + offset >= document_scores.len() + { + (vec![], vec![]) + } else { + // PANICS: offset < len + documents_ids.rotate_left(offset); + documents_ids.truncate(limit); + + // PANICS: offset < len + document_scores.rotate_left(offset); + document_scores.truncate(limit); + (documents_ids, document_scores) + }; + ( + SearchResult { + matching_words, + candidates, + documents_ids, + document_scores, + degraded, + used_negative_operator, + }, + Some(0), + ) +}