From 54dacb362d9f60b8b9c60b962cceae8b3a3c477e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 14 Jan 2020 17:10:35 +0100 Subject: [PATCH] Use different algorithms for different documents ratios --- meilisearch-core/src/bucket_sort.rs | 68 ++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 1ff05086b..bebfa5a5f 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -94,37 +94,65 @@ where let before = Instant::now(); + let docidslen = docids.len() as f32; let mut bare_matches = Vec::new(); mk_arena!(arena); for ((query, input, distance), matches) in queries { let postings_list_view = PostingsListView::original(Rc::from(input), Rc::new(matches)); - let mut offset = 0; - for id in docids.as_slice() { - let di = DocIndex { document_id: *id, ..DocIndex::default() }; - let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x); + let pllen = postings_list_view.len() as f32; - let group = postings_list_view[offset + pos..] - .linear_group_by_key(|m| m.document_id) - .next() - .filter(|matches| matches[0].document_id == *id); + if docidslen / pllen >= 0.8 { + let mut offset = 0; + for matches in postings_list_view.linear_group_by_key(|m| m.document_id) { + let document_id = matches[0].document_id; + if docids.contains(&document_id) { + let range = postings_list_view.range(offset, matches.len()); + let posting_list_index = arena.add(range); - offset += pos; + let bare_match = BareMatch { + document_id, + query_index: query.id, + distance, + is_exact: true, // TODO where can I find this info? + postings_list: posting_list_index, + }; - if let Some(matches) = group { - let range = postings_list_view.range(pos, matches.len()); - let posting_list_index = arena.add(range); + bare_matches.push(bare_match); + } - let bare_match = BareMatch { - document_id: *id, - query_index: query.id, - distance: distance, - is_exact: true, // TODO where can I find this info? - postings_list: posting_list_index, - }; + offset += matches.len(); + } - bare_matches.push(bare_match); + } else { + + let mut offset = 0; + for id in docids.as_slice() { + let di = DocIndex { document_id: *id, ..DocIndex::default() }; + let pos = exponential_search(&postings_list_view[offset..], &di).unwrap_or_else(|x| x); + + offset += pos; + + let group = postings_list_view[offset..] + .linear_group_by_key(|m| m.document_id) + .next() + .filter(|matches| matches[0].document_id == *id); + + if let Some(matches) = group { + let range = postings_list_view.range(offset, matches.len()); + let posting_list_index = arena.add(range); + + let bare_match = BareMatch { + document_id: *id, + query_index: query.id, + distance, + is_exact: true, // TODO where can I find this info? + postings_list: posting_list_index, + }; + + bare_matches.push(bare_match); + } } } }