From 026f54dcf7d8ea4760f6bf4fcb60fa5836211999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 2 Dec 2020 21:08:48 +0100 Subject: [PATCH] Use the field id docid facet value database when sorting documents --- src/search/mod.rs | 75 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/src/search/mod.rs b/src/search/mod.rs index 652995f54..d1850871d 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -9,10 +9,12 @@ use levenshtein_automata::DFA; use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; use log::debug; use once_cell::sync::Lazy; +use ordered_float::OrderedFloat; use roaring::bitmap::RoaringBitmap; use crate::facet::FacetType; use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec}; +use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; use crate::mdfs::Mdfs; use crate::query_tokens::{QueryTokens, QueryToken}; use crate::{Index, FieldId, DocumentId, Criterion}; @@ -162,34 +164,69 @@ impl<'a> Search<'a> { let mut output = Vec::new(); match facet_type { FacetType::Float => { - let facet_fn = if ascending { - FacetIter::::new + if documents_ids.len() <= 1000 { + let db = self.index.field_id_docid_facet_values.remap_key_type::(); + let mut docids_values = Vec::with_capacity(documents_ids.len() as usize); + for docid in documents_ids { + let left = (field_id, docid, f64::MIN); + let right = (field_id, docid, f64::MAX); + let mut iter = db.range(self.rtxn, &(left..=right))?; + let entry = if ascending { iter.next() } else { iter.last() }; + if let Some(((_, _, value), ())) = entry.transpose()? { + docids_values.push((docid, OrderedFloat(value))); + } + } + docids_values.sort_unstable_by_key(|(_, value)| *value); + let iter = docids_values.into_iter().map(|(id, _)| id).take(limit); + if ascending { Ok(iter.collect()) } else { Ok(iter.rev().collect()) } } else { - FacetIter::::new_reverse - }; - for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { - let (_val, docids) = result?; - limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); - output.push(docids); - if limit_tmp == 0 { break } + let facet_fn = if ascending { + FacetIter::::new + } else { + FacetIter::::new_reverse + }; + for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { + let (_val, docids) = result?; + limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); + output.push(docids); + if limit_tmp == 0 { break } + } + Ok(output.into_iter().flatten().take(limit).collect()) } }, FacetType::Integer => { - let facet_fn = if ascending { - FacetIter::::new + if documents_ids.len() <= 1000 { + let db = self.index.field_id_docid_facet_values.remap_key_type::(); + let mut docids_values = Vec::with_capacity(documents_ids.len() as usize); + for docid in documents_ids { + let left = (field_id, docid, i64::MIN); + let right = (field_id, docid, i64::MAX); + let mut iter = db.range(self.rtxn, &(left..=right))?; + let entry = if ascending { iter.next() } else { iter.last() }; + if let Some(((_, _, value), ())) = entry.transpose()? { + docids_values.push((docid, value)); + } + } + docids_values.sort_unstable_by_key(|(_, value)| *value); + let iter = docids_values.into_iter().map(|(id, _)| id).take(limit); + if ascending { Ok(iter.collect()) } else { Ok(iter.rev().collect()) } } else { - FacetIter::::new_reverse - }; - for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { - let (_val, docids) = result?; - limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); - output.push(docids); - if limit_tmp == 0 { break } + let facet_fn = if ascending { + FacetIter::::new + } else { + FacetIter::::new_reverse + }; + for result in facet_fn(self.rtxn, self.index, field_id, documents_ids)? { + let (_val, docids) = result?; + limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); + output.push(docids); + if limit_tmp == 0 { break } + } + Ok(output.into_iter().flatten().take(limit).collect()) } }, FacetType::String => bail!("criteria facet type must be a number"), } - Ok(output.into_iter().flatten().take(limit).collect()) } pub fn execute(&self) -> anyhow::Result {