4151: Bring back changes from v1.4.2 into `release-v1.5.0` r=dureuill a=curquiza

This will bring the fixes in v1.4.2 for v1.5.0 release

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Vivek Kumar <vivek.26@outlook.com>
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
This commit is contained in:
meili-bors[bot] 2023-10-23 10:11:11 +00:00 committed by GitHub
commit e4c24ca6a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 231 additions and 51 deletions

28
Cargo.lock generated
View File

@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@ -1206,7 +1206,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -1417,7 +1417,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@ -1439,7 +1439,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@ -1459,7 +1459,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -1577,7 +1577,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@ -1891,7 +1891,7 @@ dependencies = [
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -2088,7 +2088,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -2500,7 +2500,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@ -2509,7 +2509,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@ -2599,7 +2599,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"base64 0.21.2", "base64 0.21.2",
"enum-iterator", "enum-iterator",
@ -2618,7 +2618,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@ -2672,7 +2672,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"big_s", "big_s",
"bimap", "bimap",
@ -2994,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.4.1" version = "1.4.2"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",

View File

@ -18,7 +18,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.4.1" version = "1.4.2"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"] authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server" description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com" homepage = "https://meilisearch.com"

View File

@ -6,21 +6,109 @@ use crate::json;
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| { pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([ json!([
{"productId": 1, "shopId": 1}, {
{"productId": 2, "shopId": 1}, "id": 1,
{"productId": 3, "shopId": 2}, "description": "Leather Jacket",
{"productId": 4, "shopId": 2}, "brand": "Lee Jeans",
{"productId": 5, "shopId": 3}, "product_id": "123456",
{"productId": 6, "shopId": 3}, "color": "Brown"
{"productId": 7, "shopId": 4}, },
{"productId": 8, "shopId": 4}, {
{"productId": 9, "shopId": 5}, "id": 2,
{"productId": 10, "shopId": 5} "description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": "Black"
},
{
"id": 3,
"description": "Leather Jacket",
"brand": "Lee Jeans",
"product_id": "123456",
"color": "Blue"
},
{
"id": 4,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": "Red"
},
{
"id": 5,
"description": "T-Shirt",
"brand": "Nike",
"product_id": "789012",
"color": "Blue"
},
{
"id": 6,
"description": "Running Shoes",
"brand": "Adidas",
"product_id": "456789",
"color": "Black"
},
{
"id": 7,
"description": "Running Shoes",
"brand": "Adidas",
"product_id": "456789",
"color": "White"
},
{
"id": 8,
"description": "Hoodie",
"brand": "Puma",
"product_id": "987654",
"color": "Gray"
},
{
"id": 9,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Green"
},
{
"id": 10,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Red"
},
{
"id": 11,
"description": "Sweater",
"brand": "Gap",
"product_id": "234567",
"color": "Blue"
},
{
"id": 12,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Indigo"
},
{
"id": 13,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Black"
},
{
"id": 14,
"description": "Jeans",
"brand": "Levi's",
"product_id": "345678",
"color": "Stone Wash"
}
]) ])
}); });
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId"; pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId"; pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";
/// testing: https://github.com/meilisearch/meilisearch/issues/4078 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
#[actix_rt::test] #[actix_rt::test]
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await; index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await; index.wait_task(1).await;
fn get_hits(Value(response): Value) -> Vec<i64> { fn get_hits(response: &Value) -> Vec<&str> {
let hits_array = response["hits"].as_array().unwrap(); let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>() hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
} }
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await; let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
let hits = get_hits(response); let hits = get_hits(&response);
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2"); snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[1, 2]"); snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
snapshot!(response["estimatedTotalHits"] , @"11");
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await; let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
let hits = get_hits(response); let hits = get_hits(&response);
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2"); snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @"[3, 4]"); snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
snapshot!(response["estimatedTotalHits"], @"10");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await; let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
let hits = get_hits(response); let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
snapshot!(response["estimatedTotalHits"], @"6");
let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"1"); snapshot!(hits.len(), @"1");
snapshot!(format!("{:?}", hits), @"[5]"); snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
snapshot!(response["estimatedTotalHits"], @"6");
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await; let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
let hits = get_hits(response); let hits = get_hits(&response);
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0"); snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["estimatedTotalHits"], @"6");
let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["estimatedTotalHits"], @"6");
}
/// testing: https://github.com/meilisearch/meilisearch/issues/4130
#[actix_rt::test]
async fn distinct_search_with_pagination_no_ranking() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
index.wait_task(1).await;
fn get_hits(response: &Value) -> Vec<&str> {
let hits_array = response["hits"].as_array().unwrap();
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
}
let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["page"], @"0");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
snapshot!(response["page"], @"1");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
snapshot!(response["page"], @"2");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"2");
snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
snapshot!(response["page"], @"3");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"0");
snapshot!(format!("{:?}", hits), @r#"[]"#);
snapshot!(response["page"], @"4");
snapshot!(response["totalPages"], @"3");
snapshot!(response["totalHits"], @"6");
let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
let hits = get_hits(&response);
snapshot!(code, @"200 OK");
snapshot!(hits.len(), @"3");
snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
snapshot!(response["page"], @"2");
snapshot!(response["totalPages"], @"2");
snapshot!(response["totalHits"], @"6");
} }

View File

@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
if let Some(distinct_fid) = distinct_fid { if let Some(distinct_fid) = distinct_fid {
let mut excluded = RoaringBitmap::new(); let mut excluded = RoaringBitmap::new();
let mut results = vec![]; let mut results = vec![];
let mut skip = 0;
for docid in universe.iter() { for docid in universe.iter() {
if results.len() >= length { if results.len() >= from + length {
break; break;
} }
if excluded.contains(docid) { if excluded.contains(docid) {
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} }
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?; distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
skip += 1;
if skip <= from {
continue;
}
results.push(docid); results.push(docid);
} }
let mut all_candidates = universe - excluded; let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied()); all_candidates.extend(results.iter().copied());
// drain the results of the skipped elements
// this **must** be done **after** writing the entire results in `all_candidates` to ensure
// e.g. estimatedTotalHits is correct.
if results.len() >= from {
results.drain(..from);
} else {
results.clear();
}
return Ok(BucketSortOutput { return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()], scores: vec![Default::default(); results.len()],

View File

@ -202,7 +202,7 @@ test_distinct!(
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
1, 1,
vec![], vec![],
2 3
); );
test_distinct!( test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078 // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -212,7 +212,7 @@ test_distinct!(
1, 1,
2, 2,
vec![], vec![],
1 3
); );
test_distinct!( test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078 // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -222,7 +222,7 @@ test_distinct!(
EXTERNAL_DOCUMENTS_IDS.len(), EXTERNAL_DOCUMENTS_IDS.len(),
2, 2,
vec![], vec![],
5 7
); );
test_distinct!( test_distinct!(
// testing: https://github.com/meilisearch/meilisearch/issues/4078 // testing: https://github.com/meilisearch/meilisearch/issues/4078
@ -232,5 +232,5 @@ test_distinct!(
2, 2,
4, 4,
vec![], vec![],
3 7
); );