From d4da06ff472e8f333570679baa49e111c80d7b89 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Mon, 9 Oct 2023 16:16:50 +0530 Subject: [PATCH 1/3] fix bug where distinct search with no ranking returns offset+limit hits --- milli/src/search/new/bucket_sort.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 03e613b37..cf2f08cce 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -46,18 +46,27 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( if let Some(distinct_fid) = distinct_fid { let mut excluded = RoaringBitmap::new(); let mut results = vec![]; + let mut skip = 0; for docid in universe.iter() { - if results.len() >= from + length { + if results.len() >= length { break; } if excluded.contains(docid) { continue; } + distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; + skip += 1; + if skip <= from { + continue; + } + results.push(docid); } + let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + return Ok(BucketSortOutput { scores: vec![Default::default(); results.len()], docids: results, From 19ba129165ecd6f895d6767e94ae3e7dddb045a6 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Mon, 9 Oct 2023 16:18:05 +0530 Subject: [PATCH 2/3] add unit test for distinct search with no ranking --- milli/tests/search/distinct.rs | 59 +++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index d8291ee30..e1876286c 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -8,7 +8,7 @@ use Criterion::*; use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; macro_rules! test_distinct { - ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => { + ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => { #[test] fn $func() { let criteria = $criteria; @@ -27,6 +27,7 @@ macro_rules! test_distinct { let mut search = Search::new(&rtxn, &index); search.query(search::TEST_QUERY); search.limit($limit); + search.offset($offset); search.exhaustive_number_hits($exhaustive); search.terms_matching_strategy(TermsMatchingStrategy::default()); @@ -47,6 +48,7 @@ macro_rules! test_distinct { Some(d.id) } }) + .skip($offset) .take($limit) .collect(); @@ -61,6 +63,7 @@ test_distinct!( tag, true, 1, + 0, vec![Words, Typo, Proximity, Attribute, Exactness], 3 ); @@ -69,6 +72,7 @@ test_distinct!( asc_desc_rank, true, 1, + 0, vec![Words, Typo, Proximity, Attribute, Exactness], 7 ); @@ -77,6 +81,7 @@ test_distinct!( asc_desc_rank, true, 0, + 0, vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo], 7 ); @@ -86,6 +91,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Typo, Proximity, Attribute, Exactness], 3 ); @@ -94,6 +100,7 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Typo, Proximity, Attribute, Exactness], 7 ); @@ -102,6 +109,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words], 3 ); @@ -110,6 +118,7 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words], 7 ); @@ -118,6 +127,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Typo], 3 ); @@ -126,6 +136,7 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Typo], 7 ); @@ -134,6 +145,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Proximity], 3 ); @@ -142,6 +154,7 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Proximity], 7 ); @@ -150,6 +163,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Attribute], 3 ); @@ -158,6 +172,7 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Attribute], 7 ); @@ -166,6 +181,7 @@ test_distinct!( tag, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Exactness], 3 ); @@ -174,6 +190,47 @@ test_distinct!( asc_desc_rank, false, EXTERNAL_DOCUMENTS_IDS.len(), + 0, vec![Words, Exactness], 7 ); +test_distinct!( + // testing: https://github.com/meilisearch/meilisearch/issues/4078 + distinct_string_limit_and_offset, + tag, + false, + EXTERNAL_DOCUMENTS_IDS.len(), + 1, + vec![], + 2 +); +test_distinct!( + // testing: https://github.com/meilisearch/meilisearch/issues/4078 + exhaustive_distinct_string_limit_and_offset, + tag, + true, + 1, + 2, + vec![], + 1 +); +test_distinct!( + // testing: https://github.com/meilisearch/meilisearch/issues/4078 + distinct_number_limit_and_offset, + asc_desc_rank, + false, + EXTERNAL_DOCUMENTS_IDS.len(), + 2, + vec![], + 5 +); +test_distinct!( + // testing: https://github.com/meilisearch/meilisearch/issues/4078 + exhaustive_distinct_number_limit_and_offset, + asc_desc_rank, + true, + 2, + 4, + vec![], + 3 +); From d1331d8abf8f1a49e442b24eefa5c760be96f10e Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Mon, 9 Oct 2023 16:19:36 +0530 Subject: [PATCH 3/3] add integration test for distinct search with no ranking --- meilisearch/tests/search/distinct.rs | 63 ++++++++++++++++++++++++++++ meilisearch/tests/search/mod.rs | 1 + 2 files changed, 64 insertions(+) create mode 100644 meilisearch/tests/search/distinct.rs diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs new file mode 100644 index 000000000..068bd12b1 --- /dev/null +++ b/meilisearch/tests/search/distinct.rs @@ -0,0 +1,63 @@ +use meili_snap::snapshot; +use once_cell::sync::Lazy; +use serde_json::{json, Value}; + +use crate::common::Server; + +pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + {"productId": 1, "shopId": 1}, + {"productId": 2, "shopId": 1}, + {"productId": 3, "shopId": 2}, + {"productId": 4, "shopId": 2}, + {"productId": 5, "shopId": 3}, + {"productId": 6, "shopId": 3}, + {"productId": 7, "shopId": 4}, + {"productId": 8, "shopId": 4}, + {"productId": 9, "shopId": 5}, + {"productId": 10, "shopId": 5} + ]) +}); + +pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId"; +pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId"; + +/// testing: https://github.com/meilisearch/meilisearch/issues/4078 +#[actix_rt::test] +async fn distinct_search_with_offset_no_ranking() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(1).await; + + fn get_hits(response: Value) -> Vec { + let hits_array = response["hits"].as_array().unwrap(); + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::>() + } + + let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await; + let hits = get_hits(response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @"[1, 2]"); + + let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await; + let hits = get_hits(response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @"[3, 4]"); + + let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await; + let hits = get_hits(response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"1"); + snapshot!(format!("{:?}", hits), @"[5]"); + + let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await; + let hits = get_hits(response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 3aefe7e83..61db0cd10 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1,6 +1,7 @@ // This modules contains all the test concerning search. Each particular feature of the search // should be tested in its own module to isolate tests and keep the tests readable. +mod distinct; mod errors; mod facet_search; mod formatted;