mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Merge #849
849: Update nbHits count with filtered documents r=MarinPostma a=balajisivaraman Closes #764 close #1039 After discussing with @MarinPostma on Slack, this is my first attempt at implementing this for the basic flow that will go through `bucket_sort_with_distinct`. A few thoughts here: - For getting the count of filtered documents alone, I originally thought of using `filter_map.values().filter(|&&v| !v).count()`. In a few cases, this was the same as what I have now implemented. But I realised I couldn't do something similar for `distinct`. So for being consistent, I have implemented both in a similar fashion. - I also needed the `contains_key` check to ensure we're not counting the same document ID twice. @MarinPostma also mentioned that this will be an approximation since the sort is lazy. In the test example that I've updated, the actual filtered count will be just 19 (for `male` records), but due to the `limit` in play, it returns 32 (filtering out 11 records overall). Please let me know if this is the kind of fix we are looking for, and I can implement it in the placeholder search also. Co-authored-by: Balaji Sivaraman <balaji@balajisivaraman.com>
This commit is contained in:
commit
f564a9ce51
@ -212,6 +212,7 @@ where
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
@ -322,19 +323,36 @@ where
|
||||
let filter_accepted = match &filter {
|
||||
Some(filter) => {
|
||||
let entry = filter_map.entry(document.id);
|
||||
*entry.or_insert_with(|| (filter)(document.id))
|
||||
*entry.or_insert_with(|| {
|
||||
let accepted = (filter)(document.id);
|
||||
// we only want to count it out the first time we see it
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let entry = key_cache.entry(document.id);
|
||||
let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
|
||||
let mut seen = true;
|
||||
let key = entry.or_insert_with(|| {
|
||||
seen = false;
|
||||
(distinct)(document.id).map(Rc::new)
|
||||
});
|
||||
|
||||
match key.clone() {
|
||||
let distinct = match key.clone() {
|
||||
Some(key) => buf_distinct.register(key),
|
||||
None => buf_distinct.register_without_key(),
|
||||
};
|
||||
|
||||
// we only want to count the document if it is the first time we see it and
|
||||
// if it wasn't accepted by distinct
|
||||
if !seen && !distinct {
|
||||
filtered_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// the requested range end is reached: stop computing distinct
|
||||
@ -396,7 +414,7 @@ where
|
||||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
result.nb_hits = docids.len() - filtered_count;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -225,10 +225,17 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
|
||||
fn sort_result_from_docids(&self, docids: &[DocumentId], range: Range<usize>) -> SortResult {
|
||||
let mut sort_result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
let mut result = match self.filter {
|
||||
Some(ref filter) => docids
|
||||
.iter()
|
||||
.filter(|item| (filter)(**item))
|
||||
.filter(|item| {
|
||||
let accepted = (filter)(**item);
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
.skip(range.start)
|
||||
.take(range.end - range.start)
|
||||
.map(|&id| Document::from_highlights(id, &[]))
|
||||
@ -248,15 +255,19 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
result.retain(|doc| {
|
||||
let id = doc.id;
|
||||
let key = (distinct)(id);
|
||||
match key {
|
||||
let distinct_accepted = match key {
|
||||
Some(key) => distinct_map.register(key),
|
||||
None => distinct_map.register_without_key(),
|
||||
};
|
||||
if !distinct_accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
distinct_accepted
|
||||
});
|
||||
}
|
||||
|
||||
sort_result.documents = result;
|
||||
sort_result.nb_hits = docids.len();
|
||||
sort_result.nb_hits = docids.len() - filtered_count;
|
||||
sort_result
|
||||
}
|
||||
|
||||
|
@ -588,3 +588,48 @@ async fn placeholder_search_with_empty_query() {
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
|
||||
});
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_filter_nb_hits_search_placeholder() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
"primaryKey": "id",
|
||||
});
|
||||
|
||||
server.create_index(body).await;
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 1,
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 2,
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "a",
|
||||
"color": "blue",
|
||||
"size": 3,
|
||||
},
|
||||
]);
|
||||
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
let (response, _) = server.search_post(json!({})).await;
|
||||
assert_eq!(response["nbHits"], 3);
|
||||
|
||||
server.update_distinct_attribute(json!("color")).await;
|
||||
|
||||
let (response, _) = server.search_post(json!({})).await;
|
||||
assert_eq!(response["nbHits"], 2);
|
||||
|
||||
let (response, _) = server.search_post(json!({"filters": "size < 3"})).await;
|
||||
println!("result: {}", response);
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
}
|
||||
|
@ -1829,3 +1829,51 @@ async fn update_documents_with_facet_distribution() {
|
||||
let (response2, _) = server.search_post(search).await;
|
||||
assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_filter_nb_hits_search_normal() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
"primaryKey": "id",
|
||||
});
|
||||
|
||||
server.create_index(body).await;
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 1,
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 2,
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "a",
|
||||
"color": "blue",
|
||||
"size": 3,
|
||||
},
|
||||
]);
|
||||
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
let (response, _) = server.search_post(json!({"q": "a"})).await;
|
||||
assert_eq!(response["nbHits"], 3);
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a", "filters": "size = 1"})).await;
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
|
||||
server.update_distinct_attribute(json!("color")).await;
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a"})).await;
|
||||
assert_eq!(response["nbHits"], 2);
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a", "filters": "size < 3"})).await;
|
||||
println!("result: {}", response);
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user