From ef381e17bbd4647ae9894f6d56f5db87fce2f861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 19 Feb 2021 11:20:42 +0100 Subject: [PATCH] Compute the candidates for each sub query tree --- milli/src/search/criteria/words.rs | 77 +++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs index bf3aa8b12..93298b64e 100644 --- a/milli/src/search/criteria/words.rs +++ b/milli/src/search/criteria/words.rs @@ -1,10 +1,11 @@ use std::collections::HashMap; use std::mem::take; +use anyhow::bail; use roaring::RoaringBitmap; use crate::search::query_tree::Operation; -use super::{Candidates, Criterion, CriterionResult, Context}; +use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; pub struct Words<'t> { ctx: &'t dyn Context, @@ -64,9 +65,13 @@ impl<'t> Criterion for Words<'t> { None => candidates.clone(), }; + let mut found_candidates = resolve_candidates(self.ctx, &qt, &mut self.candidates_cache)?; + found_candidates.intersect_with(&candidates); + candidates.difference_with(&found_candidates); + return Ok(Some(CriterionResult { query_tree: Some(qt), - candidates: candidates.clone(), + candidates: found_candidates, bucket_candidates, })); }, @@ -107,3 +112,71 @@ fn explode_query_tree(query_tree: Operation) -> Vec { otherwise => vec![otherwise], } } + +fn resolve_candidates<'t>( + ctx: &'t dyn Context, + query_tree: &Operation, + cache: &mut HashMap<(Operation, u8), RoaringBitmap>, +) -> anyhow::Result +{ + fn resolve_operation<'t>( + ctx: &'t dyn Context, + query_tree: &Operation, + cache: &mut HashMap<(Operation, u8), RoaringBitmap>, + ) -> anyhow::Result + { + use Operation::{And, Consecutive, Or, Query}; + + match query_tree { + And(ops) => { + let mut candidates = RoaringBitmap::new(); + let mut first_loop = true; + for op in ops { + let docids = resolve_operation(ctx, op, cache)?; + if first_loop { + candidates = docids; + first_loop = false; + } else { + candidates.intersect_with(&docids); + } + } + Ok(candidates) + }, + Consecutive(ops) => { + let mut candidates = RoaringBitmap::new(); + let mut first_loop = true; + for slice in ops.windows(2) { + match (&slice[0], &slice[1]) { + (Operation::Query(left), Operation::Query(right)) => { + match query_pair_proximity_docids(ctx, left, right, 1)? { + pair_docids if pair_docids.is_empty() => { + return Ok(RoaringBitmap::new()) + }, + pair_docids if first_loop => { + candidates = pair_docids; + first_loop = false; + }, + pair_docids => { + candidates.intersect_with(&pair_docids); + }, + } + }, + _ => bail!("invalid consecutive query type"), + } + } + Ok(candidates) + }, + Or(_, ops) => { + let mut candidates = RoaringBitmap::new(); + for op in ops { + let docids = resolve_operation(ctx, op, cache)?; + candidates.union_with(&docids); + } + Ok(candidates) + }, + Query(q) => Ok(query_docids(ctx, q)?), + } + } + + resolve_operation(ctx, query_tree, cache) +}