mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
Merge #742
742: Add a "Criterion implementation strategy" parameter to Search r=irevoire a=loiclec Add a parameter to search requests which determines the implementation strategy of the criteria. This can be either `set-based`, `iterative`, or `dynamic` (ie choosing between set-based or iterative at search time). See https://github.com/meilisearch/milli/issues/755 for more context about this change. Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
a8defb585b
@ -1,4 +1,5 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::fmt::Display;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{stdin, BufRead, BufReader, Cursor, Read, Write};
|
use std::io::{stdin, BufRead, BufReader, Cursor, Read, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@ -13,7 +14,7 @@ use milli::update::UpdateIndexingStep::{
|
|||||||
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
|
ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition,
|
||||||
};
|
};
|
||||||
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
|
use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig};
|
||||||
use milli::{heed, Index, Object};
|
use milli::{heed, CriterionImplementationStrategy, Index, Object};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
@ -349,6 +350,29 @@ fn documents_from_csv(reader: impl Read) -> Result<Vec<u8>> {
|
|||||||
documents.into_inner().map_err(Into::into)
|
documents.into_inner().map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct SearchStrategyOption(CriterionImplementationStrategy);
|
||||||
|
impl FromStr for SearchStrategyOption {
|
||||||
|
type Err = String;
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s.to_lowercase().as_str() {
|
||||||
|
"dynamic" => Ok(SearchStrategyOption(CriterionImplementationStrategy::Dynamic)),
|
||||||
|
"set" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlySetBased)),
|
||||||
|
"iterative" => Ok(SearchStrategyOption(CriterionImplementationStrategy::OnlyIterative)),
|
||||||
|
_ => Err("could not parse {s} as a criterion implementation strategy, available options are `dynamic`, `set`, and `iterative`".to_owned()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Display for SearchStrategyOption {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self.0 {
|
||||||
|
CriterionImplementationStrategy::OnlyIterative => Display::fmt("iterative", f),
|
||||||
|
CriterionImplementationStrategy::OnlySetBased => Display::fmt("set", f),
|
||||||
|
CriterionImplementationStrategy::Dynamic => Display::fmt("dynamic", f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
struct Search {
|
struct Search {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
@ -360,6 +384,8 @@ struct Search {
|
|||||||
limit: Option<usize>,
|
limit: Option<usize>,
|
||||||
#[structopt(short, long, conflicts_with = "query")]
|
#[structopt(short, long, conflicts_with = "query")]
|
||||||
interactive: bool,
|
interactive: bool,
|
||||||
|
#[structopt(short, long)]
|
||||||
|
strategy: Option<SearchStrategyOption>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Performer for Search {
|
impl Performer for Search {
|
||||||
@ -379,6 +405,7 @@ impl Performer for Search {
|
|||||||
&self.filter,
|
&self.filter,
|
||||||
&self.offset,
|
&self.offset,
|
||||||
&self.limit,
|
&self.limit,
|
||||||
|
&self.strategy,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let time = now.elapsed();
|
let time = now.elapsed();
|
||||||
@ -386,6 +413,7 @@ impl Performer for Search {
|
|||||||
let hits = serde_json::to_string_pretty(&jsons)?;
|
let hits = serde_json::to_string_pretty(&jsons)?;
|
||||||
|
|
||||||
println!("{}", hits);
|
println!("{}", hits);
|
||||||
|
|
||||||
eprintln!("found {} results in {:.02?}", jsons.len(), time);
|
eprintln!("found {} results in {:.02?}", jsons.len(), time);
|
||||||
}
|
}
|
||||||
_ => break,
|
_ => break,
|
||||||
@ -399,6 +427,7 @@ impl Performer for Search {
|
|||||||
&self.filter,
|
&self.filter,
|
||||||
&self.offset,
|
&self.offset,
|
||||||
&self.limit,
|
&self.limit,
|
||||||
|
&self.strategy,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let time = now.elapsed();
|
let time = now.elapsed();
|
||||||
@ -420,6 +449,7 @@ impl Search {
|
|||||||
filter: &Option<String>,
|
filter: &Option<String>,
|
||||||
offset: &Option<usize>,
|
offset: &Option<usize>,
|
||||||
limit: &Option<usize>,
|
limit: &Option<usize>,
|
||||||
|
strategy: &Option<SearchStrategyOption>,
|
||||||
) -> Result<Vec<Object>> {
|
) -> Result<Vec<Object>> {
|
||||||
let txn = index.read_txn()?;
|
let txn = index.read_txn()?;
|
||||||
let mut search = index.search(&txn);
|
let mut search = index.search(&txn);
|
||||||
@ -441,6 +471,9 @@ impl Search {
|
|||||||
if let Some(limit) = limit {
|
if let Some(limit) = limit {
|
||||||
search.limit(*limit);
|
search.limit(*limit);
|
||||||
}
|
}
|
||||||
|
if let Some(strategy) = strategy {
|
||||||
|
search.criterion_implementation_strategy(strategy.0);
|
||||||
|
}
|
||||||
|
|
||||||
let result = search.execute()?;
|
let result = search.execute()?;
|
||||||
|
|
||||||
|
@ -42,8 +42,9 @@ pub use self::heed_codec::{
|
|||||||
};
|
};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||||
MatchingWords, Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||||
|
DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
@ -12,6 +12,7 @@ use crate::heed_codec::ByteSliceRefCodec;
|
|||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
|
use crate::search::CriterionImplementationStrategy;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
/// Threshold on the number of candidates that will make
|
/// Threshold on the number of candidates that will make
|
||||||
@ -29,6 +30,7 @@ pub struct AscDesc<'t> {
|
|||||||
allowed_candidates: RoaringBitmap,
|
allowed_candidates: RoaringBitmap,
|
||||||
initial_candidates: InitialCandidates,
|
initial_candidates: InitialCandidates,
|
||||||
faceted_candidates: RoaringBitmap,
|
faceted_candidates: RoaringBitmap,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,8 +40,9 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
Self::new(index, rtxn, parent, field_name, true)
|
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn desc(
|
pub fn desc(
|
||||||
@ -47,8 +50,9 @@ impl<'t> AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
Self::new(index, rtxn, parent, field_name, false)
|
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new(
|
fn new(
|
||||||
@ -57,6 +61,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
is_ascending: bool,
|
is_ascending: bool,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let field_id = fields_ids_map.id(&field_name);
|
let field_id = fields_ids_map.id(&field_name);
|
||||||
@ -82,6 +87,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
allowed_candidates: RoaringBitmap::new(),
|
allowed_candidates: RoaringBitmap::new(),
|
||||||
faceted_candidates,
|
faceted_candidates,
|
||||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||||
|
implementation_strategy,
|
||||||
parent,
|
parent,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -149,6 +155,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
field_id,
|
field_id,
|
||||||
self.is_ascending,
|
self.is_ascending,
|
||||||
candidates & &self.faceted_candidates,
|
candidates & &self.faceted_candidates,
|
||||||
|
self.implementation_strategy,
|
||||||
)?,
|
)?,
|
||||||
None => Box::new(std::iter::empty()),
|
None => Box::new(std::iter::empty()),
|
||||||
};
|
};
|
||||||
@ -170,6 +177,51 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn facet_ordered_iterative<'t>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
is_ascending: bool,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
|
let number_iter = iterative_facet_number_ordered_iter(
|
||||||
|
index,
|
||||||
|
rtxn,
|
||||||
|
field_id,
|
||||||
|
is_ascending,
|
||||||
|
candidates.clone(),
|
||||||
|
)?;
|
||||||
|
let string_iter =
|
||||||
|
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||||
|
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn facet_ordered_set_based<'t>(
|
||||||
|
index: &'t Index,
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
is_ascending: bool,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
|
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
|
||||||
|
|
||||||
|
let number_iter = make_iter(
|
||||||
|
rtxn,
|
||||||
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
|
field_id,
|
||||||
|
candidates.clone(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let string_iter = make_iter(
|
||||||
|
rtxn,
|
||||||
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
|
field_id,
|
||||||
|
candidates,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(Box::new(number_iter.chain(string_iter)))
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||||
///
|
///
|
||||||
/// It will either use an iterative or a recursive method on the whole facet database depending
|
/// It will either use an iterative or a recursive method on the whole facet database depending
|
||||||
@ -180,36 +232,22 @@ fn facet_ordered<'t>(
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
is_ascending: bool,
|
is_ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
match implementation_strategy {
|
||||||
let number_iter = iterative_facet_number_ordered_iter(
|
CriterionImplementationStrategy::OnlyIterative => {
|
||||||
index,
|
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||||
rtxn,
|
}
|
||||||
field_id,
|
CriterionImplementationStrategy::OnlySetBased => {
|
||||||
is_ascending,
|
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||||
candidates.clone(),
|
}
|
||||||
)?;
|
CriterionImplementationStrategy::Dynamic => {
|
||||||
let string_iter =
|
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
} else {
|
||||||
} else {
|
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||||
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
|
}
|
||||||
|
}
|
||||||
let number_iter = make_iter(
|
|
||||||
rtxn,
|
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
|
||||||
field_id,
|
|
||||||
candidates.clone(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let string_iter = make_iter(
|
|
||||||
rtxn,
|
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
|
||||||
field_id,
|
|
||||||
candidates,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok(Box::new(number_iter.chain(string_iter)))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,7 +9,9 @@ use roaring::RoaringBitmap;
|
|||||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::search::criteria::{InitialCandidates, Query};
|
use crate::search::criteria::{InitialCandidates, Query};
|
||||||
use crate::search::query_tree::{Operation, QueryKind};
|
use crate::search::query_tree::{Operation, QueryKind};
|
||||||
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
use crate::search::{
|
||||||
|
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
|
||||||
|
};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// To be able to divide integers by the number of words in the query
|
/// To be able to divide integers by the number of words in the query
|
||||||
@ -30,10 +32,15 @@ pub struct Attribute<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Attribute<'t> {
|
impl<'t> Attribute<'t> {
|
||||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
pub fn new(
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
|
) -> Self {
|
||||||
Attribute {
|
Attribute {
|
||||||
ctx,
|
ctx,
|
||||||
state: None,
|
state: None,
|
||||||
@ -41,6 +48,7 @@ impl<'t> Attribute<'t> {
|
|||||||
parent,
|
parent,
|
||||||
linear_buckets: None,
|
linear_buckets: None,
|
||||||
set_buckets: None,
|
set_buckets: None,
|
||||||
|
implementation_strategy,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -64,7 +72,15 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||||
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
|
let found_candidates = if matches!(
|
||||||
|
self.implementation_strategy,
|
||||||
|
CriterionImplementationStrategy::OnlyIterative
|
||||||
|
) || (matches!(
|
||||||
|
self.implementation_strategy,
|
||||||
|
CriterionImplementationStrategy::Dynamic
|
||||||
|
) && allowed_candidates.len()
|
||||||
|
< CANDIDATES_THRESHOLD)
|
||||||
|
{
|
||||||
let linear_buckets = match self.linear_buckets.as_mut() {
|
let linear_buckets = match self.linear_buckets.as_mut() {
|
||||||
Some(linear_buckets) => linear_buckets,
|
Some(linear_buckets) => linear_buckets,
|
||||||
None => {
|
None => {
|
||||||
|
@ -14,6 +14,7 @@ use self::r#final::Final;
|
|||||||
use self::typo::Typo;
|
use self::typo::Typo;
|
||||||
use self::words::Words;
|
use self::words::Words;
|
||||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||||
|
use super::CriterionImplementationStrategy;
|
||||||
use crate::search::criteria::geo::Geo;
|
use crate::search::criteria::geo::Geo;
|
||||||
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
|
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
|
||||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
||||||
@ -369,6 +370,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn build<D: 't + Distinct>(
|
pub fn build<D: 't + Distinct>(
|
||||||
&'t self,
|
&'t self,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
@ -377,6 +379,7 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
sort_criteria: Option<Vec<AscDescName>>,
|
sort_criteria: Option<Vec<AscDescName>>,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
distinct: Option<D>,
|
distinct: Option<D>,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
) -> Result<Final<'t>> {
|
) -> Result<Final<'t>> {
|
||||||
use crate::criterion::Criterion as Name;
|
use crate::criterion::Criterion as Name;
|
||||||
|
|
||||||
@ -402,12 +405,14 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
self.rtxn,
|
self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
|
implementation_strategy,
|
||||||
)?),
|
)?),
|
||||||
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
|
||||||
self.index,
|
self.index,
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
criterion,
|
criterion,
|
||||||
field.to_string(),
|
field.to_string(),
|
||||||
|
implementation_strategy,
|
||||||
)?),
|
)?),
|
||||||
AscDescName::Asc(Member::Geo(point)) => {
|
AscDescName::Asc(Member::Geo(point)) => {
|
||||||
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
|
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
|
||||||
@ -421,15 +426,27 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
}
|
}
|
||||||
None => criterion,
|
None => criterion,
|
||||||
},
|
},
|
||||||
Name::Proximity => Box::new(Proximity::new(self, criterion)),
|
Name::Proximity => {
|
||||||
Name::Attribute => Box::new(Attribute::new(self, criterion)),
|
Box::new(Proximity::new(self, criterion, implementation_strategy))
|
||||||
|
}
|
||||||
|
Name::Attribute => {
|
||||||
|
Box::new(Attribute::new(self, criterion, implementation_strategy))
|
||||||
|
}
|
||||||
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
|
||||||
Name::Asc(field) => {
|
Name::Asc(field) => Box::new(AscDesc::asc(
|
||||||
Box::new(AscDesc::asc(self.index, self.rtxn, criterion, field)?)
|
self.index,
|
||||||
}
|
self.rtxn,
|
||||||
Name::Desc(field) => {
|
criterion,
|
||||||
Box::new(AscDesc::desc(self.index, self.rtxn, criterion, field)?)
|
field,
|
||||||
}
|
implementation_strategy,
|
||||||
|
)?),
|
||||||
|
Name::Desc(field) => Box::new(AscDesc::desc(
|
||||||
|
self.index,
|
||||||
|
self.rtxn,
|
||||||
|
criterion,
|
||||||
|
field,
|
||||||
|
implementation_strategy,
|
||||||
|
)?),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ use super::{
|
|||||||
};
|
};
|
||||||
use crate::search::criteria::InitialCandidates;
|
use crate::search::criteria::InitialCandidates;
|
||||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||||
use crate::search::{build_dfa, WordDerivationsCache};
|
use crate::search::{build_dfa, CriterionImplementationStrategy, WordDerivationsCache};
|
||||||
use crate::{Position, Result};
|
use crate::{Position, Result};
|
||||||
|
|
||||||
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
|
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
|
||||||
@ -33,10 +33,15 @@ pub struct Proximity<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates_cache: Cache,
|
candidates_cache: Cache,
|
||||||
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Proximity<'t> {
|
impl<'t> Proximity<'t> {
|
||||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
pub fn new(
|
||||||
|
ctx: &'t dyn Context<'t>,
|
||||||
|
parent: Box<dyn Criterion + 't>,
|
||||||
|
implementation_strategy: CriterionImplementationStrategy,
|
||||||
|
) -> Self {
|
||||||
Proximity {
|
Proximity {
|
||||||
ctx,
|
ctx,
|
||||||
state: None,
|
state: None,
|
||||||
@ -45,6 +50,7 @@ impl<'t> Proximity<'t> {
|
|||||||
parent,
|
parent,
|
||||||
candidates_cache: Cache::new(),
|
candidates_cache: Cache::new(),
|
||||||
plane_sweep_cache: None,
|
plane_sweep_cache: None,
|
||||||
|
implementation_strategy,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -72,8 +78,15 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
self.state = None; // reset state
|
self.state = None; // reset state
|
||||||
}
|
}
|
||||||
Some((_, query_tree, allowed_candidates)) => {
|
Some((_, query_tree, allowed_candidates)) => {
|
||||||
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD
|
let mut new_candidates = if matches!(
|
||||||
&& self.proximity > PROXIMITY_THRESHOLD
|
self.implementation_strategy,
|
||||||
|
CriterionImplementationStrategy::OnlyIterative
|
||||||
|
) || (matches!(
|
||||||
|
self.implementation_strategy,
|
||||||
|
CriterionImplementationStrategy::Dynamic
|
||||||
|
) && allowed_candidates.len()
|
||||||
|
<= CANDIDATES_THRESHOLD
|
||||||
|
&& self.proximity > PROXIMITY_THRESHOLD)
|
||||||
{
|
{
|
||||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||||
match cache.next() {
|
match cache.next() {
|
||||||
|
@ -49,6 +49,7 @@ pub struct Search<'a> {
|
|||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: usize,
|
words_limit: usize,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
|
criterion_implementation_strategy: CriterionImplementationStrategy,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
}
|
}
|
||||||
@ -65,6 +66,7 @@ impl<'a> Search<'a> {
|
|||||||
authorize_typos: true,
|
authorize_typos: true,
|
||||||
exhaustive_number_hits: false,
|
exhaustive_number_hits: false,
|
||||||
words_limit: 10,
|
words_limit: 10,
|
||||||
|
criterion_implementation_strategy: CriterionImplementationStrategy::default(),
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
}
|
}
|
||||||
@ -117,6 +119,14 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn criterion_implementation_strategy(
|
||||||
|
&mut self,
|
||||||
|
strategy: CriterionImplementationStrategy,
|
||||||
|
) -> &mut Search<'a> {
|
||||||
|
self.criterion_implementation_strategy = strategy;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
fn is_typo_authorized(&self) -> Result<bool> {
|
fn is_typo_authorized(&self) -> Result<bool> {
|
||||||
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
|
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
|
||||||
// only authorize typos if both the index and the query allow it.
|
// only authorize typos if both the index and the query allow it.
|
||||||
@ -204,6 +214,7 @@ impl<'a> Search<'a> {
|
|||||||
self.sort_criteria.clone(),
|
self.sort_criteria.clone(),
|
||||||
self.exhaustive_number_hits,
|
self.exhaustive_number_hits,
|
||||||
None,
|
None,
|
||||||
|
self.criterion_implementation_strategy,
|
||||||
)?;
|
)?;
|
||||||
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
|
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
|
||||||
}
|
}
|
||||||
@ -220,6 +231,7 @@ impl<'a> Search<'a> {
|
|||||||
self.sort_criteria.clone(),
|
self.sort_criteria.clone(),
|
||||||
self.exhaustive_number_hits,
|
self.exhaustive_number_hits,
|
||||||
Some(distinct.clone()),
|
Some(distinct.clone()),
|
||||||
|
self.criterion_implementation_strategy,
|
||||||
)?;
|
)?;
|
||||||
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
||||||
}
|
}
|
||||||
@ -288,6 +300,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
authorize_typos,
|
authorize_typos,
|
||||||
words_limit,
|
words_limit,
|
||||||
exhaustive_number_hits,
|
exhaustive_number_hits,
|
||||||
|
criterion_implementation_strategy,
|
||||||
rtxn: _,
|
rtxn: _,
|
||||||
index: _,
|
index: _,
|
||||||
} = self;
|
} = self;
|
||||||
@ -300,6 +313,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("terms_matching_strategy", terms_matching_strategy)
|
.field("terms_matching_strategy", terms_matching_strategy)
|
||||||
.field("authorize_typos", authorize_typos)
|
.field("authorize_typos", authorize_typos)
|
||||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||||
|
.field("criterion_implementation_strategy", criterion_implementation_strategy)
|
||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
@ -313,6 +327,14 @@ pub struct SearchResult {
|
|||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy)]
|
||||||
|
pub enum CriterionImplementationStrategy {
|
||||||
|
OnlyIterative,
|
||||||
|
OnlySetBased,
|
||||||
|
#[default]
|
||||||
|
Dynamic,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum TermsMatchingStrategy {
|
pub enum TermsMatchingStrategy {
|
||||||
// remove last word first
|
// remove last word first
|
||||||
|
Loading…
Reference in New Issue
Block a user