From 998d46ac1033e36496cfe7349a6312bc5ea03531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 27 Feb 2023 16:14:53 +0100 Subject: [PATCH] Add support for search offset and limit --- milli/src/search/new/logger/detailed.rs | 27 ++++++++- milli/src/search/new/logger/mod.rs | 17 +++++- milli/src/search/new/ranking_rules.rs | 77 +++++++++++++++++++++---- 3 files changed, 105 insertions(+), 16 deletions(-) diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index b59b30e6e..dc79a8d29 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -29,7 +29,7 @@ pub enum SearchEvents { universe: RoaringBitmap, }, ExtendResults { - new: RoaringBitmap, + new: Vec, }, WordsState { query_graph: QueryGraph, @@ -39,6 +39,7 @@ pub enum SearchEvents { paths: PathsMap, empty_paths_cache: EmptyPathsCache, }, + RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap }, } pub struct DetailedSearchLogger { @@ -97,6 +98,17 @@ impl SearchLogger for DetailedSearchLogger { universe: universe.clone(), }) } + fn skip_bucket_ranking_rule<'transaction>( + &mut self, + ranking_rule_idx: usize, + ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, + candidates: &RoaringBitmap, + ) { + self.events.push(SearchEvents::RankingRuleSkipBucket { + ranking_rule_idx, + candidates: candidates.clone(), + }) + } fn end_iteration_ranking_rule<'transaction>( &mut self, @@ -109,8 +121,8 @@ impl SearchLogger for DetailedSearchLogger { universe: universe.clone(), }) } - fn add_to_results(&mut self, docids: &mut dyn Iterator) { - self.events.push(SearchEvents::ExtendResults { new: docids.collect() }); + fn add_to_results(&mut self, docids: &[u32]) { + self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() }); } fn log_words_state(&mut self, query_graph: &QueryGraph) { @@ -175,6 +187,15 @@ impl DetailedSearchLogger { "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket",) .unwrap(); } + SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, candidates } => { + let old_activated_id = activated_id(×tamp); + *timestamp.last_mut().unwrap() += 1; + let next_activated_id = activated_id(×tamp); + let len = candidates.len(); + writeln!(&mut file, + "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",) + .unwrap(); + } SearchEvents::RankingRuleEndIteration { universe, ranking_rule_idx } => { let cur_activated_id = activated_id(×tamp); timestamp.pop(); diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index d1a94f7e5..fd39819ed 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -35,6 +35,13 @@ impl SearchLogger for DefaultSearchLogger { universe: &RoaringBitmap, ) { } + fn skip_bucket_ranking_rule<'transaction>( + &mut self, + ranking_rule_idx: usize, + ranking_rule: &dyn RankingRule<'transaction, Q>, + candidates: &RoaringBitmap, + ) { + } fn end_iteration_ranking_rule<'transaction>( &mut self, @@ -44,7 +51,7 @@ impl SearchLogger for DefaultSearchLogger { ) { } - fn add_to_results(&mut self, docids: &mut dyn Iterator) {} + fn add_to_results(&mut self, docids: &[u32]) {} fn log_words_state(&mut self, query_graph: &Q) {} @@ -76,13 +83,19 @@ pub trait SearchLogger { ranking_rule: &dyn RankingRule<'transaction, Q>, universe: &RoaringBitmap, ); + fn skip_bucket_ranking_rule<'transaction>( + &mut self, + ranking_rule_idx: usize, + ranking_rule: &dyn RankingRule<'transaction, Q>, + candidates: &RoaringBitmap, + ); fn end_iteration_ranking_rule<'transaction>( &mut self, ranking_rule_idx: usize, ranking_rule: &dyn RankingRule<'transaction, Q>, universe: &RoaringBitmap, ); - fn add_to_results(&mut self, docids: &mut dyn Iterator); + fn add_to_results(&mut self, docids: &[u32]); fn log_words_state(&mut self, query_graph: &Q); diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index 70682a561..ed51d3345 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -119,8 +119,8 @@ pub fn execute_search<'transaction>( universe: &RoaringBitmap, query_graph: &QueryGraph, logger: &mut dyn SearchLogger, - // _from: usize, - // _length: usize, + from: usize, + length: usize, ) -> Result> { let words = Words::new(TermsMatchingStrategy::Last); // let sort = Sort::new(index, txn, "sort1".to_owned(), true)?; @@ -158,20 +158,58 @@ pub fn execute_search<'transaction>( } let mut results = vec![]; + let mut cur_offset = 0usize; + macro_rules! add_to_results { ($candidates:expr) => { - logger.add_to_results(&mut $candidates.iter().take(20 - results.len())); - let iter = $candidates.iter().take(20 - results.len()); - results.extend(iter); + let candidates = $candidates; + let len = candidates.len(); + if !candidates.is_empty() { + println!("cur_offset: {}, candidates_len: {}", cur_offset, candidates.len()); + if cur_offset < from { + println!(" cur_offset < from"); + if cur_offset + (candidates.len() as usize) < from { + println!(" cur_offset + candidates_len < from"); + logger.skip_bucket_ranking_rule( + cur_ranking_rule_index, + ranking_rules[cur_ranking_rule_index].as_ref(), + &candidates, + ); + } else { + println!(" cur_offset + candidates_len >= from"); + let all_candidates = candidates.iter().collect::>(); + let (skipped_candidates, candidates) = + all_candidates.split_at(from - cur_offset); + logger.skip_bucket_ranking_rule( + cur_ranking_rule_index, + ranking_rules[cur_ranking_rule_index].as_ref(), + &skipped_candidates.into_iter().collect(), + ); + let candidates = candidates + .iter() + .take(length - results.len()) + .copied() + .collect::>(); + logger.add_to_results(&candidates); + results.extend(&candidates); + } + } else { + let candidates = + candidates.iter().take(length - results.len()).collect::>(); + logger.add_to_results(&candidates); + results.extend(&candidates); + } + } + cur_offset += len as usize; }; } // TODO: skip buckets when we want to start from an offset - while results.len() < 20 { + while results.len() < length { // The universe for this bucket is zero or one element, so we don't need to sort // anything, just extend the results and go back to the parent ranking rule. if candidates[cur_ranking_rule_index].len() <= 1 { - add_to_results!(candidates[cur_ranking_rule_index]); + add_to_results!(&candidates[cur_ranking_rule_index]); back!(); continue; } @@ -197,6 +235,14 @@ pub fn execute_search<'transaction>( // many candidates, give to next ranking rule, if any if cur_ranking_rule_index == ranking_rules_len - 1 { add_to_results!(next_bucket.candidates); + } else if cur_offset + (next_bucket.candidates.len() as usize) < from { + cur_offset += next_bucket.candidates.len() as usize; + logger.skip_bucket_ranking_rule( + cur_ranking_rule_index, + ranking_rules[cur_ranking_rule_index].as_ref(), + &next_bucket.candidates, + ); + continue; } else { cur_ranking_rule_index += 1; candidates[cur_ranking_rule_index] = next_bucket.candidates.clone(); @@ -296,7 +342,9 @@ mod tests { &mut db_cache, &universe, &query_graph, - &mut logger, /* 0, 20 */ + &mut logger, + 0, + 20, ) .unwrap(); println!("{results:?}") @@ -317,8 +365,13 @@ mod tests { let mut db_cache = DatabaseCache::default(); - let query_graph = - make_query_graph(&index, &txn, &mut db_cache, "a a a a a a a a a a").unwrap(); + let query_graph = make_query_graph( + &index, + &txn, + &mut db_cache, + "and he was released from prison by the government", + ) + .unwrap(); // TODO: filters + maybe distinct attributes? let universe = get_start_universe( @@ -338,7 +391,9 @@ mod tests { &mut db_cache, &universe, &query_graph, - &mut logger, //&mut DefaultSearchLogger, /* 0, 20 */ + &mut logger, //&mut DefaultSearchLogger, + 500, + 100, ) .unwrap();