4108: Fix bug where search with distinct attribute and no ranking, returns offset+limit hits r=curquiza a=vivek-26

# Pull Request

## Related issue
Fixes #4078 

## What does this PR do?
This PR - 
- Fixes bug where search with distinct attribute and no ranking, returns offset+limit hits.
- Adds unit and integration tests.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
This commit is contained in:
meili-bors[bot] 2023-10-12 07:51:29 +00:00 committed by GitHub
commit f343ef5f2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 132 additions and 2 deletions

View File

@ -0,0 +1,63 @@
use meili_snap::snapshot;
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use crate::common::Server;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{"productId": 1, "shopId": 1},
{"productId": 2, "shopId": 1},
{"productId": 3, "shopId": 2},
{"productId": 4, "shopId": 2},
{"productId": 5, "shopId": 3},
{"productId": 6, "shopId": 3},
{"productId": 7, "shopId": 4},
{"productId": 8, "shopId": 4},
{"productId": 9, "shopId": 5},
{"productId": 10, "shopId": 5}
])
});
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test]
async fn distinct_search_with_offset_no_ranking() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await;
fn get_hits(response: Value) -> Vec<i64> {
let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
}
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[1, 2]");
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[3, 4]");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"1");
snapshot!(format!("{:?}", hits), @"[5]");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
let hits = get_hits(response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
}

View File

@ -1,6 +1,7 @@
// This modules contains all the test concerning search. Each particular feature of the search // This modules contains all the test concerning search. Each particular feature of the search
// should be tested in its own module to isolate tests and keep the tests readable. // should be tested in its own module to isolate tests and keep the tests readable.
mod distinct;
mod errors; mod errors;
mod facet_search; mod facet_search;
mod formatted; mod formatted;

View File

@ -46,18 +46,27 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
if let Some(distinct_fid) = distinct_fid { if let Some(distinct_fid) = distinct_fid {
let mut excluded = RoaringBitmap::new(); let mut excluded = RoaringBitmap::new();
let mut results = vec![]; let mut results = vec![];
let mut skip = 0;
for docid in universe.iter() { for docid in universe.iter() {
if results.len() >= from + length { if results.len() >= length {
break; break;
} }
if excluded.contains(docid) { if excluded.contains(docid) {
continue; continue;
} }
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
skip += 1;
if skip <= from {
continue;
}
results.push(docid); results.push(docid);
} }
let mut all_candidates = universe - excluded; let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied()); all_candidates.extend(results.iter().copied());
return Ok(BucketSortOutput { return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()], scores: vec![Default::default(); results.len()],
docids: results, docids: results,

View File

@ -8,7 +8,7 @@ use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
macro_rules! test_distinct { macro_rules! test_distinct {
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => { ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
#[test] #[test]
fn $func() { fn $func() {
let criteria = $criteria; let criteria = $criteria;
@ -27,6 +27,7 @@ macro_rules! test_distinct {
let mut search = Search::new(&rtxn, &index); let mut search = Search::new(&rtxn, &index);
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit($limit); search.limit($limit);
search.offset($offset);
search.exhaustive_number_hits($exhaustive); search.exhaustive_number_hits($exhaustive);
search.terms_matching_strategy(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
@ -47,6 +48,7 @@ macro_rules! test_distinct {
Some(d.id) Some(d.id)
} }
}) })
.skip($offset)
.take($limit) .take($limit)
.collect(); .collect();
@ -61,6 +63,7 @@ test_distinct!(
tag, tag,
true, true,
1, 1,
0,
vec![Words, Typo, Proximity, Attribute, Exactness], vec![Words, Typo, Proximity, Attribute, Exactness],
3 3
); );
@ -69,6 +72,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
true, true,
1, 1,
0,
vec![Words, Typo, Proximity, Attribute, Exactness], vec![Words, Typo, Proximity, Attribute, Exactness],
7 7
); );
@ -77,6 +81,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
true, true,
0, 0,
0,
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo], vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
7 7
); );
@ -86,6 +91,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Typo, Proximity, Attribute, Exactness], vec![Words, Typo, Proximity, Attribute, Exactness],
3 3
); );
@ -94,6 +100,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Typo, Proximity, Attribute, Exactness], vec![Words, Typo, Proximity, Attribute, Exactness],
7 7
); );
@ -102,6 +109,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words], vec![Words],
3 3
); );
@ -110,6 +118,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words], vec![Words],
7 7
); );
@ -118,6 +127,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Typo], vec![Words, Typo],
3 3
); );
@ -126,6 +136,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Typo], vec![Words, Typo],
7 7
); );
@ -134,6 +145,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Proximity], vec![Words, Proximity],
3 3
); );
@ -142,6 +154,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Proximity], vec![Words, Proximity],
7 7
); );
@ -150,6 +163,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Attribute], vec![Words, Attribute],
3 3
); );
@ -158,6 +172,7 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Attribute], vec![Words, Attribute],
7 7
); );
@ -166,6 +181,7 @@ test_distinct!(
tag, tag,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Exactness], vec![Words, Exactness],
3 3
); );
@ -174,6 +190,47 @@ test_distinct!(
asc_desc_rank, asc_desc_rank,
false, false,
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
0,
vec![Words, Exactness], vec![Words, Exactness],
7 7
); );
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
distinct_string_limit_and_offset,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
1,
vec![],
2
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
exhaustive_distinct_string_limit_and_offset,
tag,
true,
1,
2,
vec![],
1
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
distinct_number_limit_and_offset,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
2,
vec![],
5
);
test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078
exhaustive_distinct_number_limit_and_offset,
asc_desc_rank,
true,
2,
4,
vec![],
3
);