2023-04-04 18:02:46 +02:00
|
|
|
mod bucket_sort;
|
2023-03-05 20:07:37 +01:00
|
|
|
mod db_cache;
|
2023-03-09 15:20:29 +01:00
|
|
|
mod distinct;
|
2023-04-13 13:45:34 +02:00
|
|
|
mod geo_sort;
|
2023-03-05 20:07:37 +01:00
|
|
|
mod graph_based_ranking_rule;
|
2023-03-06 19:21:55 +01:00
|
|
|
mod interner;
|
2023-03-31 09:19:18 +02:00
|
|
|
mod limits;
|
2023-03-05 20:07:37 +01:00
|
|
|
mod logger;
|
2023-04-06 13:58:56 +02:00
|
|
|
pub mod matches;
|
2023-03-05 20:07:37 +01:00
|
|
|
mod query_graph;
|
|
|
|
mod query_term;
|
|
|
|
mod ranking_rule_graph;
|
|
|
|
mod ranking_rules;
|
|
|
|
mod resolve_query_graph;
|
2023-03-07 14:42:58 +01:00
|
|
|
mod small_bitmap;
|
2023-04-04 17:12:07 +02:00
|
|
|
|
|
|
|
mod exact_attribute;
|
2023-03-05 20:07:37 +01:00
|
|
|
mod sort;
|
2023-11-15 15:46:37 +01:00
|
|
|
mod vector_sort;
|
2023-02-21 09:41:58 +01:00
|
|
|
|
2023-04-04 15:38:30 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|
|
|
|
|
2023-07-24 14:50:07 +05:30
|
|
|
use std::collections::HashSet;
|
2023-03-08 09:55:53 +01:00
|
|
|
|
2023-04-13 13:45:34 +02:00
|
|
|
use bucket_sort::{bucket_sort, BucketSortOutput};
|
2023-03-23 09:39:16 +01:00
|
|
|
use charabia::TokenizerBuilder;
|
2023-03-06 08:35:01 +01:00
|
|
|
use db_cache::DatabaseCache;
|
2023-04-13 13:45:34 +02:00
|
|
|
use exact_attribute::ExactAttribute;
|
|
|
|
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
2023-03-06 08:35:01 +01:00
|
|
|
use heed::RoTxn;
|
2023-04-13 13:45:34 +02:00
|
|
|
use interner::{DedupInterner, Interner};
|
2023-04-11 11:56:31 +02:00
|
|
|
pub use logger::visual::VisualSearchLogger;
|
2023-03-19 15:15:58 +01:00
|
|
|
pub use logger::{DefaultSearchLogger, SearchLogger};
|
2023-03-30 13:12:51 +02:00
|
|
|
use query_graph::{QueryGraph, QueryNode};
|
2024-03-28 15:51:43 +01:00
|
|
|
use query_term::{
|
|
|
|
located_query_terms_from_tokens, ExtractedTokens, LocatedQueryTerm, Phrase, QueryTerm,
|
|
|
|
};
|
2023-04-13 13:45:34 +02:00
|
|
|
use ranking_rules::{
|
|
|
|
BoxRankingRule, PlaceholderQuery, RankingRule, RankingRuleOutput, RankingRuleQueryTrait,
|
|
|
|
};
|
|
|
|
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
2023-03-07 14:42:58 +01:00
|
|
|
use roaring::RoaringBitmap;
|
2023-04-13 13:45:34 +02:00
|
|
|
use sort::Sort;
|
2023-03-08 09:55:53 +01:00
|
|
|
|
2023-07-17 18:24:24 +02:00
|
|
|
use self::distinct::facet_string_values;
|
2023-04-13 13:45:34 +02:00
|
|
|
use self::geo_sort::GeoSort;
|
|
|
|
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
2023-05-08 11:52:43 +02:00
|
|
|
use self::graph_based_ranking_rule::Words;
|
2023-04-13 13:45:34 +02:00
|
|
|
use self::interner::Interned;
|
2023-11-15 15:46:37 +01:00
|
|
|
use self::vector_sort::VectorSort;
|
2023-06-20 17:04:59 +02:00
|
|
|
use crate::error::FieldIdMapMissingEntry;
|
2023-06-15 17:37:16 +02:00
|
|
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
2023-04-03 10:09:27 +02:00
|
|
|
use crate::search::new::distinct::apply_distinct_rule;
|
2024-03-28 11:50:53 +01:00
|
|
|
use crate::vector::Embedder;
|
2023-06-14 14:20:05 +02:00
|
|
|
use crate::{
|
2024-03-05 11:21:46 +01:00
|
|
|
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
|
|
|
UserError,
|
2023-06-14 14:20:05 +02:00
|
|
|
};
|
2023-04-11 18:27:41 +02:00
|
|
|
|
2023-03-16 11:52:51 +01:00
|
|
|
/// A structure used throughout the execution of a search query.
|
2023-03-13 14:03:48 +01:00
|
|
|
pub struct SearchContext<'ctx> {
|
|
|
|
pub index: &'ctx Index,
|
|
|
|
pub txn: &'ctx RoTxn<'ctx>,
|
|
|
|
pub db_cache: DatabaseCache<'ctx>,
|
2023-03-14 16:37:47 +01:00
|
|
|
pub word_interner: DedupInterner<String>,
|
|
|
|
pub phrase_interner: DedupInterner<Phrase>,
|
2023-03-30 09:52:47 +02:00
|
|
|
pub term_interner: Interner<QueryTerm>,
|
2023-03-30 11:10:38 +02:00
|
|
|
pub phrase_docids: PhraseDocIdsCache,
|
2023-11-28 18:37:57 +01:00
|
|
|
pub restricted_fids: Option<RestrictedFids>,
|
2023-03-06 19:21:55 +01:00
|
|
|
}
|
2023-04-06 15:02:23 +02:00
|
|
|
|
2023-03-13 14:03:48 +01:00
|
|
|
impl<'ctx> SearchContext<'ctx> {
|
|
|
|
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
2023-03-06 19:21:55 +01:00
|
|
|
Self {
|
|
|
|
index,
|
|
|
|
txn,
|
|
|
|
db_cache: <_>::default(),
|
|
|
|
word_interner: <_>::default(),
|
|
|
|
phrase_interner: <_>::default(),
|
2023-03-14 10:54:55 +01:00
|
|
|
term_interner: <_>::default(),
|
2023-03-30 11:10:38 +02:00
|
|
|
phrase_docids: <_>::default(),
|
2023-11-28 18:37:57 +01:00
|
|
|
restricted_fids: None,
|
2023-03-06 19:21:55 +01:00
|
|
|
}
|
|
|
|
}
|
2023-06-13 14:42:38 +02:00
|
|
|
|
|
|
|
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
2023-06-13 18:52:02 +02:00
|
|
|
let fids_map = self.index.fields_ids_map(self.txn)?;
|
2023-06-20 17:04:59 +02:00
|
|
|
let searchable_names = self.index.searchable_fields(self.txn)?;
|
2023-11-28 14:55:29 +01:00
|
|
|
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
2023-06-20 17:04:59 +02:00
|
|
|
|
2023-11-28 18:37:57 +01:00
|
|
|
let mut restricted_fids = RestrictedFids::default();
|
2023-07-12 10:08:29 +02:00
|
|
|
let mut contains_wildcard = false;
|
2023-06-20 17:04:59 +02:00
|
|
|
for field_name in searchable_attributes {
|
2023-07-12 10:08:29 +02:00
|
|
|
if field_name == "*" {
|
|
|
|
contains_wildcard = true;
|
|
|
|
continue;
|
|
|
|
}
|
2023-06-20 17:04:59 +02:00
|
|
|
let searchable_contains_name =
|
|
|
|
searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
|
|
|
|
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
|
|
|
// The Field id exist and the field is searchable
|
|
|
|
(Some(fid), Some(true)) | (Some(fid), None) => fid,
|
|
|
|
// The field is searchable but the Field id doesn't exist => Internal Error
|
|
|
|
(None, Some(true)) => {
|
|
|
|
return Err(FieldIdMapMissingEntry::FieldName {
|
|
|
|
field_name: field_name.to_string(),
|
|
|
|
process: "search",
|
|
|
|
}
|
|
|
|
.into())
|
|
|
|
}
|
2023-07-12 10:08:56 +02:00
|
|
|
// The field is not searchable, but the searchableAttributes are set to * => ignore field
|
|
|
|
(None, None) => continue,
|
2023-06-20 17:04:59 +02:00
|
|
|
// The field is not searchable => User error
|
2023-07-12 10:08:56 +02:00
|
|
|
(_fid, Some(false)) => {
|
2023-07-24 14:50:07 +05:30
|
|
|
let (valid_fields, hidden_fields) = match searchable_names {
|
|
|
|
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
|
|
|
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
|
|
|
};
|
2023-06-20 17:04:59 +02:00
|
|
|
|
|
|
|
let field = field_name.to_string();
|
|
|
|
return Err(UserError::InvalidSearchableAttribute {
|
|
|
|
field,
|
|
|
|
valid_fields,
|
|
|
|
hidden_fields,
|
|
|
|
}
|
|
|
|
.into());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-11-28 14:55:29 +01:00
|
|
|
if exact_attributes_ids.contains(&fid) {
|
2023-11-28 18:37:57 +01:00
|
|
|
restricted_fids.exact.push(fid);
|
2023-11-28 14:55:29 +01:00
|
|
|
} else {
|
2023-11-28 18:37:57 +01:00
|
|
|
restricted_fids.tolerant.push(fid);
|
2023-11-28 14:55:29 +01:00
|
|
|
};
|
2023-06-20 17:04:59 +02:00
|
|
|
}
|
|
|
|
|
2023-11-28 18:37:57 +01:00
|
|
|
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
2023-06-13 14:42:38 +02:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2023-03-06 19:21:55 +01:00
|
|
|
}
|
|
|
|
|
2023-04-11 18:26:44 +02:00
|
|
|
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
|
|
|
|
pub enum Word {
|
|
|
|
Original(Interned<String>),
|
|
|
|
Derived(Interned<String>),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Word {
|
|
|
|
pub fn interned(&self) -> Interned<String> {
|
|
|
|
match self {
|
|
|
|
Word::Original(word) => *word,
|
|
|
|
Word::Derived(word) => *word,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-28 18:37:57 +01:00
|
|
|
#[derive(Debug, Clone, Default)]
|
|
|
|
pub struct RestrictedFids {
|
|
|
|
pub tolerant: Vec<FieldId>,
|
|
|
|
pub exact: Vec<FieldId>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RestrictedFids {
|
|
|
|
pub fn contains(&self, fid: &FieldId) -> bool {
|
|
|
|
self.tolerant.contains(fid) || self.exact.contains(fid)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-16 11:52:51 +01:00
|
|
|
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
2023-03-23 09:15:57 +01:00
|
|
|
fn resolve_maximally_reduced_query_graph(
|
|
|
|
ctx: &mut SearchContext,
|
2023-03-06 08:35:01 +01:00
|
|
|
universe: &RoaringBitmap,
|
|
|
|
query_graph: &QueryGraph,
|
|
|
|
matching_strategy: TermsMatchingStrategy,
|
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
) -> Result<RoaringBitmap> {
|
|
|
|
let mut graph = query_graph.clone();
|
2023-03-30 13:12:51 +02:00
|
|
|
|
|
|
|
let nodes_to_remove = match matching_strategy {
|
|
|
|
TermsMatchingStrategy::Last => query_graph
|
2023-03-30 14:54:08 +02:00
|
|
|
.removal_order_for_terms_matching_strategy_last(ctx)
|
2023-03-30 13:12:51 +02:00
|
|
|
.iter()
|
|
|
|
.flat_map(|x| x.iter())
|
|
|
|
.collect(),
|
2023-03-06 08:35:01 +01:00
|
|
|
TermsMatchingStrategy::All => vec![],
|
|
|
|
};
|
2023-03-30 14:49:25 +02:00
|
|
|
graph.remove_nodes_keep_edges(&nodes_to_remove);
|
2023-03-30 13:12:51 +02:00
|
|
|
|
2023-04-06 16:24:44 +02:00
|
|
|
logger.query_for_initial_universe(&graph);
|
2023-03-30 11:10:38 +02:00
|
|
|
let docids = compute_query_graph_docids(ctx, &graph, universe)?;
|
2023-03-06 08:35:01 +01:00
|
|
|
|
|
|
|
Ok(docids)
|
|
|
|
}
|
2023-03-16 11:52:51 +01:00
|
|
|
|
2024-03-05 11:05:20 +01:00
|
|
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
2023-04-24 16:57:12 +02:00
|
|
|
fn resolve_universe(
|
|
|
|
ctx: &mut SearchContext,
|
|
|
|
initial_universe: &RoaringBitmap,
|
|
|
|
query_graph: &QueryGraph,
|
|
|
|
matching_strategy: TermsMatchingStrategy,
|
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
) -> Result<RoaringBitmap> {
|
2023-04-25 17:52:42 +02:00
|
|
|
resolve_maximally_reduced_query_graph(
|
|
|
|
ctx,
|
|
|
|
initial_universe,
|
|
|
|
query_graph,
|
|
|
|
matching_strategy,
|
|
|
|
logger,
|
|
|
|
)
|
2023-04-24 16:57:12 +02:00
|
|
|
}
|
|
|
|
|
2024-03-26 17:31:56 +01:00
|
|
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
|
|
|
fn resolve_negative_words(
|
|
|
|
ctx: &mut SearchContext,
|
|
|
|
negative_words: &[Word],
|
|
|
|
) -> Result<RoaringBitmap> {
|
|
|
|
let mut negative_bitmap = RoaringBitmap::new();
|
|
|
|
for &word in negative_words {
|
|
|
|
if let Some(bitmap) = ctx.word_docids(word)? {
|
|
|
|
negative_bitmap |= bitmap;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(negative_bitmap)
|
|
|
|
}
|
|
|
|
|
2024-03-28 15:51:43 +01:00
|
|
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
|
|
|
fn resolve_negative_phrases(
|
|
|
|
ctx: &mut SearchContext,
|
|
|
|
negative_phrases: &[LocatedQueryTerm],
|
|
|
|
) -> Result<RoaringBitmap> {
|
|
|
|
let mut negative_bitmap = RoaringBitmap::new();
|
|
|
|
for term in negative_phrases {
|
|
|
|
let query_term = ctx.term_interner.get(term.value);
|
|
|
|
if let Some(phrase) = query_term.original_phrase() {
|
|
|
|
negative_bitmap |= ctx.get_phrase_docids(phrase)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(negative_bitmap)
|
|
|
|
}
|
|
|
|
|
2023-03-16 11:52:51 +01:00
|
|
|
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
2023-03-13 14:03:48 +01:00
|
|
|
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|
|
|
ctx: &SearchContext<'ctx>,
|
2023-03-28 12:40:52 +02:00
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
2023-04-13 13:45:34 +02:00
|
|
|
geo_strategy: geo_sort::Strategy,
|
2023-03-28 12:39:42 +02:00
|
|
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
2023-03-28 16:41:03 +02:00
|
|
|
let mut sort = false;
|
2023-04-13 13:45:34 +02:00
|
|
|
let mut sorted_fields = HashSet::new();
|
|
|
|
let mut geo_sorted = false;
|
2023-03-28 16:41:03 +02:00
|
|
|
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
|
2023-03-13 09:52:17 +01:00
|
|
|
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
|
|
|
for rr in settings_ranking_rules {
|
|
|
|
match rr {
|
2023-03-29 09:11:06 +02:00
|
|
|
// These rules need a query to have an effect; ignore them in placeholder search
|
2023-03-13 09:52:17 +01:00
|
|
|
crate::Criterion::Words
|
|
|
|
| crate::Criterion::Typo
|
|
|
|
| crate::Criterion::Attribute
|
|
|
|
| crate::Criterion::Proximity
|
|
|
|
| crate::Criterion::Exactness => continue,
|
2023-03-28 16:41:03 +02:00
|
|
|
crate::Criterion::Sort => {
|
|
|
|
if sort {
|
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
resolve_sort_criteria(
|
|
|
|
sort_criteria,
|
|
|
|
ctx,
|
|
|
|
&mut ranking_rules,
|
|
|
|
&mut sorted_fields,
|
|
|
|
&mut geo_sorted,
|
|
|
|
geo_strategy,
|
|
|
|
)?;
|
2023-03-28 16:41:03 +02:00
|
|
|
sort = true;
|
|
|
|
}
|
2023-03-28 16:41:25 +02:00
|
|
|
crate::Criterion::Asc(field_name) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-28 16:41:25 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
2023-03-28 16:41:25 +02:00
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
|
|
|
}
|
|
|
|
crate::Criterion::Desc(field_name) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-28 16:41:25 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
2023-03-28 16:41:25 +02:00
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
|
|
|
|
}
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(ranking_rules)
|
|
|
|
}
|
2023-03-16 11:52:51 +01:00
|
|
|
|
2023-11-15 15:46:37 +01:00
|
|
|
fn get_ranking_rules_for_vector<'ctx>(
|
|
|
|
ctx: &SearchContext<'ctx>,
|
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
|
|
|
geo_strategy: geo_sort::Strategy,
|
2023-12-07 17:03:10 +01:00
|
|
|
limit_plus_offset: usize,
|
2023-11-15 15:46:37 +01:00
|
|
|
target: &[f32],
|
2023-12-13 15:38:44 +01:00
|
|
|
embedder_name: &str,
|
2024-03-28 11:50:53 +01:00
|
|
|
embedder: &Embedder,
|
2023-11-15 15:46:37 +01:00
|
|
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
|
|
|
// query graph search
|
|
|
|
|
|
|
|
let mut sort = false;
|
|
|
|
let mut sorted_fields = HashSet::new();
|
|
|
|
let mut geo_sorted = false;
|
|
|
|
|
|
|
|
let mut vector = false;
|
|
|
|
let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
|
|
|
|
|
|
|
|
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
|
|
|
for rr in settings_ranking_rules {
|
|
|
|
match rr {
|
|
|
|
crate::Criterion::Words
|
|
|
|
| crate::Criterion::Typo
|
|
|
|
| crate::Criterion::Proximity
|
|
|
|
| crate::Criterion::Attribute
|
|
|
|
| crate::Criterion::Exactness => {
|
|
|
|
if !vector {
|
|
|
|
let vector_candidates = ctx.index.documents_ids(ctx.txn)?;
|
2023-12-07 17:03:10 +01:00
|
|
|
let vector_sort = VectorSort::new(
|
|
|
|
ctx,
|
|
|
|
target.to_vec(),
|
|
|
|
vector_candidates,
|
|
|
|
limit_plus_offset,
|
2023-12-13 15:38:44 +01:00
|
|
|
embedder_name,
|
2024-03-28 11:50:53 +01:00
|
|
|
embedder,
|
2023-12-07 17:03:10 +01:00
|
|
|
)?;
|
2023-11-15 15:46:37 +01:00
|
|
|
ranking_rules.push(Box::new(vector_sort));
|
|
|
|
vector = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
crate::Criterion::Sort => {
|
|
|
|
if sort {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
resolve_sort_criteria(
|
|
|
|
sort_criteria,
|
|
|
|
ctx,
|
|
|
|
&mut ranking_rules,
|
|
|
|
&mut sorted_fields,
|
|
|
|
&mut geo_sorted,
|
|
|
|
geo_strategy,
|
|
|
|
)?;
|
|
|
|
sort = true;
|
|
|
|
}
|
|
|
|
crate::Criterion::Asc(field_name) => {
|
|
|
|
if sorted_fields.contains(&field_name) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
sorted_fields.insert(field_name.clone());
|
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
|
|
|
}
|
|
|
|
crate::Criterion::Desc(field_name) => {
|
|
|
|
if sorted_fields.contains(&field_name) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
sorted_fields.insert(field_name.clone());
|
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(ranking_rules)
|
|
|
|
}
|
|
|
|
|
2023-03-16 11:52:51 +01:00
|
|
|
/// Return the list of initialised ranking rules to be used for a query graph search.
|
2023-03-13 14:03:48 +01:00
|
|
|
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|
|
|
ctx: &SearchContext<'ctx>,
|
2023-03-28 12:40:52 +02:00
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
2023-04-13 13:45:34 +02:00
|
|
|
geo_strategy: geo_sort::Strategy,
|
2023-03-13 09:52:17 +01:00
|
|
|
terms_matching_strategy: TermsMatchingStrategy,
|
2023-04-25 17:52:42 +02:00
|
|
|
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
2023-03-13 09:52:17 +01:00
|
|
|
// query graph search
|
|
|
|
let mut words = false;
|
|
|
|
let mut typo = false;
|
|
|
|
let mut proximity = false;
|
2023-03-28 12:40:52 +02:00
|
|
|
let mut sort = false;
|
2023-04-12 12:01:50 +02:00
|
|
|
let mut attribute = false;
|
2023-04-04 17:12:07 +02:00
|
|
|
let mut exactness = false;
|
2023-04-13 13:45:34 +02:00
|
|
|
let mut sorted_fields = HashSet::new();
|
|
|
|
let mut geo_sorted = false;
|
2023-03-13 09:52:17 +01:00
|
|
|
|
2023-05-08 11:52:43 +02:00
|
|
|
// Don't add the `words` ranking rule if the term matching strategy is `All`
|
|
|
|
if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
|
|
|
|
words = true;
|
|
|
|
}
|
|
|
|
|
2023-03-28 12:39:42 +02:00
|
|
|
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
|
2023-03-13 09:52:17 +01:00
|
|
|
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
|
|
|
for rr in settings_ranking_rules {
|
2023-04-24 16:57:12 +02:00
|
|
|
// Add Words before any of: typo, proximity, attribute
|
2023-03-13 09:52:17 +01:00
|
|
|
match rr {
|
2023-04-25 17:52:42 +02:00
|
|
|
crate::Criterion::Typo
|
|
|
|
| crate::Criterion::Attribute
|
|
|
|
| crate::Criterion::Proximity
|
|
|
|
| crate::Criterion::Exactness => {
|
2023-03-13 09:52:17 +01:00
|
|
|
if !words {
|
|
|
|
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
|
|
|
words = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
match rr {
|
|
|
|
crate::Criterion::Words => {
|
|
|
|
if words {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
|
|
|
words = true;
|
|
|
|
}
|
|
|
|
crate::Criterion::Typo => {
|
|
|
|
if typo {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
typo = true;
|
2023-03-30 12:12:41 +02:00
|
|
|
ranking_rules.push(Box::new(Typo::new(None)));
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
crate::Criterion::Proximity => {
|
|
|
|
if proximity {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
proximity = true;
|
2023-03-30 12:12:41 +02:00
|
|
|
ranking_rules.push(Box::new(Proximity::new(None)));
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
crate::Criterion::Attribute => {
|
|
|
|
if attribute {
|
|
|
|
continue;
|
|
|
|
}
|
2023-04-12 12:01:50 +02:00
|
|
|
attribute = true;
|
2023-04-13 10:46:09 +02:00
|
|
|
ranking_rules.push(Box::new(Fid::new(None)));
|
|
|
|
ranking_rules.push(Box::new(Position::new(None)));
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
crate::Criterion::Sort => {
|
|
|
|
if sort {
|
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
resolve_sort_criteria(
|
|
|
|
sort_criteria,
|
|
|
|
ctx,
|
|
|
|
&mut ranking_rules,
|
|
|
|
&mut sorted_fields,
|
|
|
|
&mut geo_sorted,
|
|
|
|
geo_strategy,
|
|
|
|
)?;
|
2023-03-28 12:40:52 +02:00
|
|
|
sort = true;
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
crate::Criterion::Exactness => {
|
|
|
|
if exactness {
|
|
|
|
continue;
|
|
|
|
}
|
2023-04-04 17:12:07 +02:00
|
|
|
ranking_rules.push(Box::new(ExactAttribute::new()));
|
|
|
|
ranking_rules.push(Box::new(Exactness::new()));
|
|
|
|
exactness = true;
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
2023-03-28 16:41:25 +02:00
|
|
|
crate::Criterion::Asc(field_name) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-13 09:52:17 +01:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
2023-03-28 16:41:25 +02:00
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
2023-03-28 16:41:25 +02:00
|
|
|
crate::Criterion::Desc(field_name) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-13 09:52:17 +01:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
2023-03-28 16:41:25 +02:00
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-04-25 17:52:42 +02:00
|
|
|
Ok(ranking_rules)
|
2023-03-13 09:52:17 +01:00
|
|
|
}
|
2023-03-06 08:35:01 +01:00
|
|
|
|
2023-03-28 16:41:03 +02:00
|
|
|
fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
|
|
|
ctx: &SearchContext<'ctx>,
|
|
|
|
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields: &mut HashSet<String>,
|
|
|
|
geo_sorted: &mut bool,
|
|
|
|
geo_strategy: geo_sort::Strategy,
|
2023-03-28 16:41:03 +02:00
|
|
|
) -> Result<()> {
|
|
|
|
let sort_criteria = sort_criteria.clone().unwrap_or_default();
|
|
|
|
ranking_rules.reserve(sort_criteria.len());
|
|
|
|
for criterion in sort_criteria {
|
2023-04-13 13:45:34 +02:00
|
|
|
match criterion {
|
2023-03-28 16:41:03 +02:00
|
|
|
AscDesc::Asc(Member::Field(field_name)) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-28 16:41:03 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
|
2023-03-28 16:41:03 +02:00
|
|
|
}
|
|
|
|
AscDesc::Desc(Member::Field(field_name)) => {
|
2023-04-13 13:45:34 +02:00
|
|
|
if sorted_fields.contains(&field_name) {
|
2023-03-28 16:41:03 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
sorted_fields.insert(field_name.clone());
|
|
|
|
ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
|
2023-03-28 16:41:03 +02:00
|
|
|
}
|
2023-04-13 13:45:34 +02:00
|
|
|
AscDesc::Asc(Member::Geo(point)) => {
|
|
|
|
if *geo_sorted {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
|
|
|
ranking_rules.push(Box::new(GeoSort::new(
|
|
|
|
geo_strategy,
|
|
|
|
geo_faceted_docids,
|
|
|
|
point,
|
|
|
|
true,
|
|
|
|
)?));
|
|
|
|
}
|
|
|
|
AscDesc::Desc(Member::Geo(point)) => {
|
|
|
|
if *geo_sorted {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
|
|
|
ranking_rules.push(Box::new(GeoSort::new(
|
|
|
|
geo_strategy,
|
|
|
|
geo_faceted_docids,
|
|
|
|
point,
|
|
|
|
false,
|
|
|
|
)?));
|
2023-03-28 16:41:03 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2023-11-15 15:46:37 +01:00
|
|
|
pub fn filtered_universe(ctx: &SearchContext, filters: &Option<Filter>) -> Result<RoaringBitmap> {
|
|
|
|
Ok(if let Some(filters) = filters {
|
|
|
|
filters.evaluate(ctx.txn, ctx.index)?
|
|
|
|
} else {
|
|
|
|
ctx.index.documents_ids(ctx.txn)?
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
|
|
pub fn execute_vector_search(
|
|
|
|
ctx: &mut SearchContext,
|
|
|
|
vector: &[f32],
|
|
|
|
scoring_strategy: ScoringStrategy,
|
|
|
|
universe: RoaringBitmap,
|
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
|
|
|
geo_strategy: geo_sort::Strategy,
|
|
|
|
from: usize,
|
|
|
|
length: usize,
|
2023-12-13 15:38:44 +01:00
|
|
|
embedder_name: &str,
|
2024-03-28 11:50:53 +01:00
|
|
|
embedder: &Embedder,
|
2024-03-05 11:21:46 +01:00
|
|
|
time_budget: TimeBudget,
|
2023-11-15 15:46:37 +01:00
|
|
|
) -> Result<PartialSearchResult> {
|
|
|
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
|
|
|
|
2023-12-12 21:19:48 +01:00
|
|
|
// FIXME: input universe = universe & documents_with_vectors
|
2023-11-15 15:46:37 +01:00
|
|
|
// for now if we're computing embeddings for ALL documents, we can assume that this is just universe
|
2023-12-12 10:05:06 +01:00
|
|
|
let ranking_rules = get_ranking_rules_for_vector(
|
|
|
|
ctx,
|
|
|
|
sort_criteria,
|
|
|
|
geo_strategy,
|
|
|
|
from + length,
|
|
|
|
vector,
|
2023-12-13 15:38:44 +01:00
|
|
|
embedder_name,
|
2024-03-28 11:50:53 +01:00
|
|
|
embedder,
|
2023-12-12 10:05:06 +01:00
|
|
|
)?;
|
2023-11-15 15:46:37 +01:00
|
|
|
|
|
|
|
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
|
|
|
let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
|
|
|
|
&mut placeholder_search_logger;
|
|
|
|
|
2024-03-05 11:21:46 +01:00
|
|
|
let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
|
2023-11-15 15:46:37 +01:00
|
|
|
ctx,
|
|
|
|
ranking_rules,
|
|
|
|
&PlaceholderQuery,
|
|
|
|
&universe,
|
|
|
|
from,
|
|
|
|
length,
|
|
|
|
scoring_strategy,
|
|
|
|
placeholder_search_logger,
|
2024-03-05 11:21:46 +01:00
|
|
|
time_budget,
|
2023-11-15 15:46:37 +01:00
|
|
|
)?;
|
|
|
|
|
|
|
|
Ok(PartialSearchResult {
|
|
|
|
candidates: all_candidates,
|
|
|
|
document_scores: scores,
|
|
|
|
documents_ids: docids,
|
|
|
|
located_query_terms: None,
|
2024-03-05 11:21:46 +01:00
|
|
|
degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
used_negative_operator: false,
|
2023-11-15 15:46:37 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-03-06 08:35:01 +01:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2024-03-05 11:05:20 +01:00
|
|
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
2023-03-23 09:15:57 +01:00
|
|
|
pub fn execute_search(
|
|
|
|
ctx: &mut SearchContext,
|
2023-11-15 15:46:37 +01:00
|
|
|
query: Option<&str>,
|
2023-03-13 09:52:17 +01:00
|
|
|
terms_matching_strategy: TermsMatchingStrategy,
|
2023-06-06 18:26:33 +02:00
|
|
|
scoring_strategy: ScoringStrategy,
|
2023-03-30 16:10:10 +02:00
|
|
|
exhaustive_number_hits: bool,
|
2023-11-15 15:46:37 +01:00
|
|
|
mut universe: RoaringBitmap,
|
2023-03-28 12:40:52 +02:00
|
|
|
sort_criteria: &Option<Vec<AscDesc>>,
|
2023-04-13 13:45:34 +02:00
|
|
|
geo_strategy: geo_sort::Strategy,
|
2023-03-06 08:35:01 +01:00
|
|
|
from: usize,
|
|
|
|
length: usize,
|
2023-03-23 09:35:53 +01:00
|
|
|
words_limit: Option<usize>,
|
2023-03-13 09:52:17 +01:00
|
|
|
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
|
|
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
2024-03-05 11:21:46 +01:00
|
|
|
time_budget: TimeBudget,
|
2023-04-06 15:02:23 +02:00
|
|
|
) -> Result<PartialSearchResult> {
|
2023-04-26 16:28:17 +02:00
|
|
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
|
|
|
|
2024-03-26 18:01:27 +01:00
|
|
|
let mut used_negative_operator = false;
|
2023-06-27 12:29:11 +02:00
|
|
|
let mut located_query_terms = None;
|
2023-04-27 13:30:09 +02:00
|
|
|
let query_terms = if let Some(query) = query {
|
2024-03-05 11:05:20 +01:00
|
|
|
let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
|
|
|
|
let entered = span.enter();
|
|
|
|
|
2023-03-23 09:35:53 +01:00
|
|
|
// We make sure that the analyzer is aware of the stop words
|
|
|
|
// this ensures that the query builder is able to properly remove them.
|
|
|
|
let mut tokbuilder = TokenizerBuilder::new();
|
|
|
|
let stop_words = ctx.index.stop_words(ctx.txn)?;
|
|
|
|
if let Some(ref stop_words) = stop_words {
|
|
|
|
tokbuilder.stop_words(stop_words);
|
|
|
|
}
|
|
|
|
|
2023-07-24 18:35:20 +02:00
|
|
|
let separators = ctx.index.allowed_separators(ctx.txn)?;
|
|
|
|
let separators: Option<Vec<_>> =
|
|
|
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
|
|
if let Some(ref separators) = separators {
|
|
|
|
tokbuilder.separators(separators);
|
|
|
|
}
|
|
|
|
|
|
|
|
let dictionary = ctx.index.dictionary(ctx.txn)?;
|
|
|
|
let dictionary: Option<Vec<_>> =
|
|
|
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
|
|
if let Some(ref dictionary) = dictionary {
|
|
|
|
tokbuilder.words_dict(dictionary);
|
|
|
|
}
|
|
|
|
|
2023-03-23 09:35:53 +01:00
|
|
|
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
|
|
|
if !script_lang_map.is_empty() {
|
|
|
|
tokbuilder.allow_list(&script_lang_map);
|
|
|
|
}
|
|
|
|
|
|
|
|
let tokenizer = tokbuilder.build();
|
2024-03-05 11:05:20 +01:00
|
|
|
drop(entered);
|
|
|
|
|
|
|
|
let span = tracing::trace_span!(target: "search::tokens", "tokenize");
|
|
|
|
let entered = span.enter();
|
2023-03-23 09:39:16 +01:00
|
|
|
let tokens = tokenizer.tokenize(query);
|
2024-03-05 11:05:20 +01:00
|
|
|
drop(entered);
|
2023-03-23 09:35:53 +01:00
|
|
|
|
2024-03-28 15:51:43 +01:00
|
|
|
let ExtractedTokens { query_terms, negative_words, negative_phrases } =
|
2024-03-26 17:31:56 +01:00
|
|
|
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
2024-03-28 15:51:43 +01:00
|
|
|
used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty();
|
2024-03-26 17:31:56 +01:00
|
|
|
|
|
|
|
let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
|
2024-03-28 15:51:43 +01:00
|
|
|
let ignored_phrases = resolve_negative_phrases(ctx, &negative_phrases)?;
|
|
|
|
|
2024-03-26 17:31:56 +01:00
|
|
|
universe -= ignored_documents;
|
2024-03-28 15:51:43 +01:00
|
|
|
universe -= ignored_phrases;
|
2024-03-26 17:31:56 +01:00
|
|
|
|
2023-04-27 13:30:09 +02:00
|
|
|
if query_terms.is_empty() {
|
|
|
|
// Do a placeholder search instead
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
Some(query_terms)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
2024-03-26 17:31:56 +01:00
|
|
|
|
2023-04-27 13:30:09 +02:00
|
|
|
let bucket_sort_output = if let Some(query_terms) = query_terms {
|
2023-05-03 13:39:19 +02:00
|
|
|
let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
|
|
|
|
located_query_terms = Some(new_located_query_terms);
|
2023-03-23 09:35:53 +01:00
|
|
|
|
2023-04-29 11:40:00 +02:00
|
|
|
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
|
|
|
ctx,
|
|
|
|
sort_criteria,
|
|
|
|
geo_strategy,
|
|
|
|
terms_matching_strategy,
|
|
|
|
)?;
|
2023-04-24 16:57:12 +02:00
|
|
|
|
2023-11-15 15:46:37 +01:00
|
|
|
universe &=
|
2023-04-25 17:52:42 +02:00
|
|
|
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
2023-03-13 09:52:17 +01:00
|
|
|
|
2023-06-15 17:37:16 +02:00
|
|
|
bucket_sort(
|
|
|
|
ctx,
|
|
|
|
ranking_rules,
|
|
|
|
&graph,
|
|
|
|
&universe,
|
|
|
|
from,
|
|
|
|
length,
|
2023-06-06 18:26:33 +02:00
|
|
|
scoring_strategy,
|
2023-06-15 17:37:16 +02:00
|
|
|
query_graph_logger,
|
2024-03-05 11:21:46 +01:00
|
|
|
time_budget,
|
2023-06-15 17:37:16 +02:00
|
|
|
)?
|
2023-03-13 09:52:17 +01:00
|
|
|
} else {
|
2023-04-13 13:45:34 +02:00
|
|
|
let ranking_rules =
|
|
|
|
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
2023-03-13 09:52:17 +01:00
|
|
|
bucket_sort(
|
|
|
|
ctx,
|
|
|
|
ranking_rules,
|
|
|
|
&PlaceholderQuery,
|
|
|
|
&universe,
|
|
|
|
from,
|
|
|
|
length,
|
2023-06-06 18:26:33 +02:00
|
|
|
scoring_strategy,
|
2023-03-13 09:52:17 +01:00
|
|
|
placeholder_search_logger,
|
2024-03-05 11:21:46 +01:00
|
|
|
time_budget,
|
2023-03-18 15:04:34 +01:00
|
|
|
)?
|
|
|
|
};
|
|
|
|
|
2024-03-05 11:21:46 +01:00
|
|
|
let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
|
2023-06-15 17:37:16 +02:00
|
|
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
2023-04-07 11:09:01 +02:00
|
|
|
|
2023-03-30 16:10:10 +02:00
|
|
|
// The candidates is the universe unless the exhaustive number of hits
|
|
|
|
// is requested and a distinct attribute is set.
|
|
|
|
if exhaustive_number_hits {
|
|
|
|
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
2023-06-15 17:37:16 +02:00
|
|
|
if let Some(distinct_fid) = fields_ids_map.id(f) {
|
2023-04-07 11:09:01 +02:00
|
|
|
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
2023-03-30 16:10:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-11 12:22:34 +02:00
|
|
|
Ok(PartialSearchResult {
|
2023-04-07 11:09:01 +02:00
|
|
|
candidates: all_candidates,
|
2023-06-06 18:26:33 +02:00
|
|
|
document_scores: scores,
|
2023-04-07 11:09:01 +02:00
|
|
|
documents_ids: docids,
|
2023-04-11 15:41:44 +02:00
|
|
|
located_query_terms,
|
2024-03-05 11:21:46 +01:00
|
|
|
degraded,
|
2024-03-26 18:01:27 +01:00
|
|
|
used_negative_operator,
|
2023-03-18 15:04:34 +01:00
|
|
|
})
|
|
|
|
}
|
2023-03-29 15:21:54 +02:00
|
|
|
|
|
|
|
fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>) -> Result<()> {
|
|
|
|
let sort_criteria = if let Some(sort_criteria) = sort_criteria {
|
|
|
|
sort_criteria
|
|
|
|
} else {
|
|
|
|
return Ok(());
|
|
|
|
};
|
|
|
|
|
|
|
|
if sort_criteria.is_empty() {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
// We check that the sort ranking rule exists and throw an
|
|
|
|
// error if we try to use it and that it doesn't.
|
|
|
|
let sort_ranking_rule_missing = !ctx.index.criteria(ctx.txn)?.contains(&crate::Criterion::Sort);
|
|
|
|
if sort_ranking_rule_missing {
|
|
|
|
return Err(UserError::SortRankingRuleMissing.into());
|
|
|
|
}
|
|
|
|
|
|
|
|
// We check that we are allowed to use the sort criteria, we check
|
|
|
|
// that they are declared in the sortable fields.
|
|
|
|
let sortable_fields = ctx.index.sortable_fields(ctx.txn)?;
|
|
|
|
for asc_desc in sort_criteria {
|
|
|
|
match asc_desc.member() {
|
|
|
|
Member::Field(ref field) if !crate::is_faceted(field, &sortable_fields) => {
|
2023-07-24 14:50:07 +05:30
|
|
|
let (valid_fields, hidden_fields) =
|
|
|
|
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
|
|
|
|
2023-03-29 15:21:54 +02:00
|
|
|
return Err(UserError::InvalidSortableAttribute {
|
|
|
|
field: field.to_string(),
|
2023-07-24 14:50:07 +05:30
|
|
|
valid_fields,
|
|
|
|
hidden_fields,
|
2023-11-23 12:20:44 +01:00
|
|
|
}
|
|
|
|
.into());
|
2023-03-29 15:21:54 +02:00
|
|
|
}
|
|
|
|
Member::Geo(_) if !sortable_fields.contains("_geo") => {
|
2023-07-24 14:50:07 +05:30
|
|
|
let (valid_fields, hidden_fields) =
|
|
|
|
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
|
|
|
|
2023-03-29 15:21:54 +02:00
|
|
|
return Err(UserError::InvalidSortableAttribute {
|
|
|
|
field: "_geo".to_string(),
|
2023-07-24 14:50:07 +05:30
|
|
|
valid_fields,
|
|
|
|
hidden_fields,
|
2023-11-23 12:20:44 +01:00
|
|
|
}
|
|
|
|
.into());
|
2023-03-29 15:21:54 +02:00
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
2023-04-06 15:02:23 +02:00
|
|
|
|
|
|
|
pub struct PartialSearchResult {
|
|
|
|
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
|
|
|
|
pub candidates: RoaringBitmap,
|
|
|
|
pub documents_ids: Vec<DocumentId>,
|
2023-06-06 18:26:33 +02:00
|
|
|
pub document_scores: Vec<Vec<ScoreDetails>>,
|
2024-03-05 11:21:46 +01:00
|
|
|
|
|
|
|
pub degraded: bool,
|
2024-03-26 18:01:27 +01:00
|
|
|
pub used_negative_operator: bool,
|
2023-04-06 15:02:23 +02:00
|
|
|
}
|