From d4715e0c4d1a2b7517eb03ad10c3c586ce86a12d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 12 Dec 2023 12:08:23 +0100 Subject: [PATCH] Fix same vector sort bug --- milli/src/search/new/vector_sort.rs | 42 +++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 2d7cdbe39..38fcfde48 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -35,7 +35,11 @@ impl VectorSort { }) } - fn fill_buffer(&mut self, ctx: &mut SearchContext<'_>) -> Result<()> { + fn fill_buffer( + &mut self, + ctx: &mut SearchContext<'_>, + vector_candidates: &RoaringBitmap, + ) -> Result<()> { let readers: std::result::Result, _> = (0..=u8::MAX) .map_while(|k| { arroy::Reader::open(ctx.txn, k.into(), ctx.index.vector_arroy) @@ -54,13 +58,8 @@ impl VectorSort { let mut results = Vec::new(); for reader in readers.iter() { - let nns_by_vector = reader.nns_by_vector( - ctx.txn, - target, - self.limit, - None, - Some(&self.vector_candidates), - )?; + let nns_by_vector = + reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; let vectors: std::result::Result, _> = nns_by_vector .iter() .map(|(docid, _)| reader.item_vector(ctx.txn, *docid).transpose().unwrap()) @@ -90,8 +89,8 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { assert!(self.query.is_none()); self.query = Some(query.clone()); - self.vector_candidates &= universe; - self.fill_buffer(ctx)?; + let vector_candidates = &self.vector_candidates & universe; + self.fill_buffer(ctx, &vector_candidates)?; Ok(()) } @@ -103,9 +102,9 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { universe: &RoaringBitmap, ) -> Result>> { let query = self.query.as_ref().unwrap().clone(); - self.vector_candidates &= universe; + let vector_candidates = &self.vector_candidates & universe; - if self.vector_candidates.is_empty() { + if vector_candidates.is_empty() { return Ok(Some(RankingRuleOutput { query, candidates: universe.clone(), @@ -117,7 +116,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { } for (docid, distance, vector) in self.cached_sorted_docids.by_ref() { - if self.vector_candidates.contains(docid) { + if vector_candidates.contains(docid) { let score = 1.0 - distance; let score = self .distribution_shift @@ -136,7 +135,22 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { // if we got out of this loop it means we've exhausted our cache. // we need to refill it and run the function again. - self.fill_buffer(ctx)?; + self.fill_buffer(ctx, &vector_candidates)?; + + // we tried filling the buffer, but it remained empty 😢 + // it means we don't actually have any document remaining in the universe with a vector. + // => exit + if self.cached_sorted_docids.len() == 0 { + return Ok(Some(RankingRuleOutput { + query, + candidates: universe.clone(), + score: ScoreDetails::Vector(score_details::Vector { + target_vector: self.target.clone(), + value_similarity: None, + }), + })); + } + self.next_bucket(ctx, _logger, universe) }