diff --git a/Cargo.lock b/Cargo.lock index b3991d130..aa7df19ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "bytes", @@ -1206,7 +1206,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -1417,7 +1417,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.4.1" +version = "1.4.2" dependencies = [ "faux", "tempfile", @@ -1439,7 +1439,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "nom", @@ -1459,7 +1459,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -1577,7 +1577,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.4.1" +version = "1.4.2" dependencies = [ "arbitrary", "clap", @@ -1891,7 +1891,7 @@ dependencies = [ [[package]] name = "index-scheduler" -version = "1.4.1" +version = "1.4.2" dependencies = [ "anyhow", "big_s", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.4.1" +version = "1.4.2" dependencies = [ "criterion", "serde_json", @@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.4.1" +version = "1.4.2" dependencies = [ "insta", "md5", @@ -2509,7 +2509,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-cors", "actix-http", @@ -2599,7 +2599,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.4.1" +version = "1.4.2" dependencies = [ "base64 0.21.2", "enum-iterator", @@ -2618,7 +2618,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.4.1" +version = "1.4.2" dependencies = [ "actix-web", "anyhow", @@ -2672,7 +2672,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "bimap", @@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "permissive-json-pointer" -version = "1.4.1" +version = "1.4.2" dependencies = [ "big_s", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 05c7b1012..a40af10f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ members = [ ] [workspace.package] -version = "1.4.1" +version = "1.4.2" authors = ["Quentin de Quelen ", "Clément Renault "] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index 93c5197a6..14ce88da2 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -6,21 +6,109 @@ use crate::json; pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ - {"productId": 1, "shopId": 1}, - {"productId": 2, "shopId": 1}, - {"productId": 3, "shopId": 2}, - {"productId": 4, "shopId": 2}, - {"productId": 5, "shopId": 3}, - {"productId": 6, "shopId": 3}, - {"productId": 7, "shopId": 4}, - {"productId": 8, "shopId": 4}, - {"productId": 9, "shopId": 5}, - {"productId": 10, "shopId": 5} + { + "id": 1, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Brown" + }, + { + "id": 2, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Black" + }, + { + "id": 3, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": "Blue" + }, + { + "id": 4, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Red" + }, + { + "id": 5, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": "Blue" + }, + { + "id": 6, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "Black" + }, + { + "id": 7, + "description": "Running Shoes", + "brand": "Adidas", + "product_id": "456789", + "color": "White" + }, + { + "id": 8, + "description": "Hoodie", + "brand": "Puma", + "product_id": "987654", + "color": "Gray" + }, + { + "id": 9, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Green" + }, + { + "id": 10, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Red" + }, + { + "id": 11, + "description": "Sweater", + "brand": "Gap", + "product_id": "234567", + "color": "Blue" + }, + { + "id": 12, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Indigo" + }, + { + "id": 13, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Black" + }, + { + "id": 14, + "description": "Jeans", + "brand": "Levi's", + "product_id": "345678", + "color": "Stone Wash" + } ]) }); -pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId"; -pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId"; +pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id"; +pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id"; /// testing: https://github.com/meilisearch/meilisearch/issues/4078 #[actix_rt::test] @@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() { index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; index.wait_task(1).await; - fn get_hits(Value(response): Value) -> Vec { + fn get_hits(response: &Value) -> Vec<&str> { let hits_array = response["hits"].as_array().unwrap(); - hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::>() + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() } - let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[1, 2]"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["estimatedTotalHits"] , @"11"); - let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"2"); - snapshot!(format!("{:?}", hits), @"[3, 4]"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["estimatedTotalHits"], @"10"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"1"); - snapshot!(format!("{:?}", hits), @"[5]"); + snapshot!(format!("{:?}", hits), @r#"["345678"]"#); + snapshot!(response["estimatedTotalHits"], @"6"); - let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await; - let hits = get_hits(response); + let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await; + let hits = get_hits(&response); snapshot!(code, @"200 OK"); snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); + + let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["estimatedTotalHits"], @"6"); +} + +/// testing: https://github.com/meilisearch/meilisearch/issues/4130 +#[actix_rt::test] +async fn distinct_search_with_pagination_no_ranking() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; + index.wait_task(1).await; + + fn get_hits(response: &Value) -> Vec<&str> { + let hits_array = response["hits"].as_array().unwrap(); + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() + } + + let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"0"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#); + snapshot!(response["page"], @"1"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"2"); + snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#); + snapshot!(response["page"], @"3"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"4"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); + + let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"3"); + snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#); + snapshot!(response["page"], @"2"); + snapshot!(response["totalPages"], @"2"); + snapshot!(response["totalHits"], @"6"); } diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index cf2f08cce..b46f6124f 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( if let Some(distinct_fid) = distinct_fid { let mut excluded = RoaringBitmap::new(); let mut results = vec![]; - let mut skip = 0; for docid in universe.iter() { - if results.len() >= length { + if results.len() >= from + length { break; } if excluded.contains(docid) { @@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; - skip += 1; - if skip <= from { - continue; - } - results.push(docid); } let mut all_candidates = universe - excluded; all_candidates.extend(results.iter().copied()); + // drain the results of the skipped elements + // this **must** be done **after** writing the entire results in `all_candidates` to ensure + // e.g. estimatedTotalHits is correct. + if results.len() >= from { + results.drain(..from); + } else { + results.clear(); + } return Ok(BucketSortOutput { scores: vec![Default::default(); results.len()], diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index e1876286c..fc890dfe8 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -202,7 +202,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 1, vec![], - 2 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -212,7 +212,7 @@ test_distinct!( 1, 2, vec![], - 1 + 3 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -222,7 +222,7 @@ test_distinct!( EXTERNAL_DOCUMENTS_IDS.len(), 2, vec![], - 5 + 7 ); test_distinct!( // testing: https://github.com/meilisearch/meilisearch/issues/4078 @@ -232,5 +232,5 @@ test_distinct!( 2, 4, vec![], - 3 + 7 );