mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
Prevent the exactness
ranking rule from removing random words
Make it strictly follow the term matching strategy
This commit is contained in:
parent
d3a94e8b25
commit
3421125a55
@ -51,7 +51,6 @@ use resolve_query_graph::compute_query_graph_docids;
|
|||||||
use sort::Sort;
|
use sort::Sort;
|
||||||
|
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
use self::query_term::ExactTerm;
|
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
pub struct SearchContext<'ctx> {
|
pub struct SearchContext<'ctx> {
|
||||||
@ -120,73 +119,20 @@ fn resolve_maximally_reduced_query_graph(
|
|||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_docids_containing_any_exact_word(
|
|
||||||
ctx: &mut SearchContext,
|
|
||||||
universe: &RoaringBitmap,
|
|
||||||
query_graph: &QueryGraph,
|
|
||||||
) -> Result<RoaringBitmap> {
|
|
||||||
let mut docids = RoaringBitmap::new();
|
|
||||||
for (_, node) in query_graph.nodes.iter() {
|
|
||||||
let term = match &node.data {
|
|
||||||
query_graph::QueryNodeData::Term(term) => term,
|
|
||||||
query_graph::QueryNodeData::Deleted
|
|
||||||
| query_graph::QueryNodeData::Start
|
|
||||||
| query_graph::QueryNodeData::End => {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if term.term_ids.len() != 1 {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let Some(exact_term) = term.term_subset.exact_term(ctx) else {
|
|
||||||
continue
|
|
||||||
};
|
|
||||||
let exact_term_docids = match exact_term {
|
|
||||||
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)? & universe,
|
|
||||||
ExactTerm::Word(word) => {
|
|
||||||
if let Some(word_docids) = ctx.word_docids(Word::Original(word))? {
|
|
||||||
word_docids & universe
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
docids |= exact_term_docids;
|
|
||||||
}
|
|
||||||
Ok(docids)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn resolve_universe(
|
fn resolve_universe(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
initial_universe: &RoaringBitmap,
|
initial_universe: &RoaringBitmap,
|
||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
method: UniverseResolutionMethod,
|
|
||||||
matching_strategy: TermsMatchingStrategy,
|
matching_strategy: TermsMatchingStrategy,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
match method {
|
resolve_maximally_reduced_query_graph(
|
||||||
UniverseResolutionMethod::TermMatchingStrategyOnly => {
|
ctx,
|
||||||
resolve_maximally_reduced_query_graph(
|
initial_universe,
|
||||||
ctx,
|
query_graph,
|
||||||
initial_universe,
|
matching_strategy,
|
||||||
query_graph,
|
logger,
|
||||||
matching_strategy,
|
)
|
||||||
logger,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
UniverseResolutionMethod::TermMatchingStrategyAndExactness => {
|
|
||||||
let mut resolved_universe = resolve_maximally_reduced_query_graph(
|
|
||||||
ctx,
|
|
||||||
initial_universe,
|
|
||||||
query_graph,
|
|
||||||
matching_strategy,
|
|
||||||
logger,
|
|
||||||
)?;
|
|
||||||
resolved_universe |=
|
|
||||||
resolve_docids_containing_any_exact_word(ctx, initial_universe, query_graph)?;
|
|
||||||
Ok(resolved_universe)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
||||||
@ -233,17 +179,12 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
Ok(ranking_rules)
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
enum UniverseResolutionMethod {
|
|
||||||
TermMatchingStrategyOnly,
|
|
||||||
TermMatchingStrategyAndExactness,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the list of initialised ranking rules to be used for a query graph search.
|
/// Return the list of initialised ranking rules to be used for a query graph search.
|
||||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
) -> Result<(Vec<BoxRankingRule<'ctx, QueryGraph>>, UniverseResolutionMethod)> {
|
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
||||||
// query graph search
|
// query graph search
|
||||||
let mut words = false;
|
let mut words = false;
|
||||||
let mut typo = false;
|
let mut typo = false;
|
||||||
@ -254,14 +195,15 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
let mut asc = HashSet::new();
|
let mut asc = HashSet::new();
|
||||||
let mut desc = HashSet::new();
|
let mut desc = HashSet::new();
|
||||||
|
|
||||||
let mut universe_resolution_method = UniverseResolutionMethod::TermMatchingStrategyOnly;
|
|
||||||
|
|
||||||
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
|
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
|
||||||
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
|
||||||
for rr in settings_ranking_rules {
|
for rr in settings_ranking_rules {
|
||||||
// Add Words before any of: typo, proximity, attribute
|
// Add Words before any of: typo, proximity, attribute
|
||||||
match rr {
|
match rr {
|
||||||
crate::Criterion::Typo | crate::Criterion::Attribute | crate::Criterion::Proximity => {
|
crate::Criterion::Typo
|
||||||
|
| crate::Criterion::Attribute
|
||||||
|
| crate::Criterion::Proximity
|
||||||
|
| crate::Criterion::Exactness => {
|
||||||
if !words {
|
if !words {
|
||||||
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
|
||||||
words = true;
|
words = true;
|
||||||
@ -313,11 +255,6 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
ranking_rules.push(Box::new(ExactAttribute::new()));
|
ranking_rules.push(Box::new(ExactAttribute::new()));
|
||||||
ranking_rules.push(Box::new(Exactness::new()));
|
ranking_rules.push(Box::new(Exactness::new()));
|
||||||
exactness = true;
|
exactness = true;
|
||||||
|
|
||||||
if !words {
|
|
||||||
universe_resolution_method =
|
|
||||||
UniverseResolutionMethod::TermMatchingStrategyAndExactness;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
crate::Criterion::Asc(field_name) => {
|
crate::Criterion::Asc(field_name) => {
|
||||||
if asc.contains(&field_name) {
|
if asc.contains(&field_name) {
|
||||||
@ -335,7 +272,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok((ranking_rules, universe_resolution_method))
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||||
@ -417,17 +354,11 @@ pub fn execute_search(
|
|||||||
|
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
let (ranking_rules, universe_resolution_method) =
|
let ranking_rules =
|
||||||
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
||||||
|
|
||||||
universe = resolve_universe(
|
universe =
|
||||||
ctx,
|
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
||||||
&universe,
|
|
||||||
&graph,
|
|
||||||
universe_resolution_method,
|
|
||||||
terms_matching_strategy,
|
|
||||||
query_graph_logger,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
||||||
} else {
|
} else {
|
||||||
|
@ -3,13 +3,14 @@ use roaring::RoaringBitmap;
|
|||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
|
||||||
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||||
use crate::search::new::Word;
|
use crate::search::new::Word;
|
||||||
use crate::{Result, SearchContext};
|
use crate::{Result, SearchContext};
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum ExactnessCondition {
|
pub enum ExactnessCondition {
|
||||||
ExactInAttribute(LocatedQueryTermSubset),
|
ExactInAttribute(LocatedQueryTermSubset),
|
||||||
Skip(LocatedQueryTermSubset),
|
Any(LocatedQueryTermSubset),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum ExactnessGraph {}
|
pub enum ExactnessGraph {}
|
||||||
@ -54,7 +55,11 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
|||||||
end_term_subset.term_subset.make_mandatory();
|
end_term_subset.term_subset.make_mandatory();
|
||||||
(compute_docids(ctx, dest_node, universe)?, end_term_subset)
|
(compute_docids(ctx, dest_node, universe)?, end_term_subset)
|
||||||
}
|
}
|
||||||
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node.clone()),
|
ExactnessCondition::Any(dest_node) => {
|
||||||
|
let docids =
|
||||||
|
universe & compute_query_term_subset_docids(ctx, &dest_node.term_subset)?;
|
||||||
|
(docids, dest_node.clone())
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(ComputedCondition {
|
Ok(ComputedCondition {
|
||||||
@ -74,7 +79,7 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
|||||||
let exact_condition = ExactnessCondition::ExactInAttribute(dest_node.clone());
|
let exact_condition = ExactnessCondition::ExactInAttribute(dest_node.clone());
|
||||||
let exact_condition = conditions_interner.insert(exact_condition);
|
let exact_condition = conditions_interner.insert(exact_condition);
|
||||||
|
|
||||||
let skip_condition = ExactnessCondition::Skip(dest_node.clone());
|
let skip_condition = ExactnessCondition::Any(dest_node.clone());
|
||||||
let skip_condition = conditions_interner.insert(skip_condition);
|
let skip_condition = conditions_interner.insert(skip_condition);
|
||||||
|
|
||||||
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
|
||||||
|
@ -6,19 +6,17 @@ This module tests the following properties about the exactness ranking rule:
|
|||||||
2. documents which have an attribute which start with the whole query
|
2. documents which have an attribute which start with the whole query
|
||||||
3. documents which contain the most exact words from the query
|
3. documents which contain the most exact words from the query
|
||||||
|
|
||||||
- the set of all candidates when `exactness` precedes `word` is the union of:
|
- the `exactness` ranking rule must be preceded by the `words` ranking rule
|
||||||
1. the same set of candidates that would be returned normally
|
|
||||||
2. the set of documents that contain at least one exact word from the query
|
|
||||||
|
|
||||||
- if it is placed after `word`, then it will only sort documents by:
|
- if `words` has already removed terms from the query, then exactness will sort documents as follows:
|
||||||
1. those that have an attribute which is equal to the whole remaining query, if this query does not have any "gap"
|
1. those that have an attribute which is equal to the whole remaining query, if this query does not have any "gap"
|
||||||
2. those that have an attribute which start with the whole remaining query, if this query does not have any "gap"
|
2. those that have an attribute which start with the whole remaining query, if this query does not have any "gap"
|
||||||
3. those that contain the most exact words from the remaining query
|
3. those that contain the most exact words from the remaining query
|
||||||
|
|
||||||
- if it is followed by other ranking rules, then:
|
- if it is followed by other graph-based ranking rules (`typo`, `proximity`, `attribute`).
|
||||||
1. `word` will not remove the exact terms matched by `exactness`
|
Then these rules will only work with
|
||||||
2. graph-based ranking rules (`typo`, `proximity`, `attribute`) will only work with
|
1. the exact terms selected by `exactness
|
||||||
(1) the exact terms selected by `exactness` or (2) the full query term otherwise
|
2. the full query term otherwise
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -440,14 +438,14 @@ fn test_exactness_simple_ordered() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 6, 7, 5, 4, 3, 2, 1]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 7, 6, 5, 4, 3, 2, 1]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps over the lazy\"",
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
"\"the quick brown fox jumps over\"",
|
|
||||||
"\"the quick brown fox jumps over the\"",
|
"\"the quick brown fox jumps over the\"",
|
||||||
|
"\"the quick brown fox jumps over\"",
|
||||||
"\"the quick brown fox jumps\"",
|
"\"the quick brown fox jumps\"",
|
||||||
"\"the quick brown fox\"",
|
"\"the quick brown fox\"",
|
||||||
"\"the quick brown\"",
|
"\"the quick brown\"",
|
||||||
@ -467,19 +465,17 @@ fn test_exactness_simple_reversed() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 7, 6, 5, 4, 3, 2, 1]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
"\"quick brown fox jumps over the lazy dog\"",
|
"\"quick brown fox jumps over the lazy dog\"",
|
||||||
"\"brown fox jumps over the lazy dog\"",
|
|
||||||
"\"fox jumps over the lazy dog\"",
|
|
||||||
"\"jumps over the lazy dog\"",
|
|
||||||
"\"over the lazy dog\"",
|
|
||||||
"\"the lazy dog\"",
|
"\"the lazy dog\"",
|
||||||
"\"lazy dog\"",
|
"\"over the lazy dog\"",
|
||||||
"\"dog\"",
|
"\"jumps over the lazy dog\"",
|
||||||
|
"\"fox jumps over the lazy dog\"",
|
||||||
|
"\"brown fox jumps over the lazy dog\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -487,19 +483,17 @@ fn test_exactness_simple_reversed() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 7, 6, 5, 4, 3, 2, 1]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 8, 3, 4, 5, 6, 7]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
"\"quick brown fox jumps over the lazy dog\"",
|
"\"quick brown fox jumps over the lazy dog\"",
|
||||||
"\"brown fox jumps over the lazy dog\"",
|
|
||||||
"\"fox jumps over the lazy dog\"",
|
|
||||||
"\"jumps over the lazy dog\"",
|
|
||||||
"\"over the lazy dog\"",
|
|
||||||
"\"the lazy dog\"",
|
"\"the lazy dog\"",
|
||||||
"\"lazy dog\"",
|
"\"over the lazy dog\"",
|
||||||
"\"dog\"",
|
"\"jumps over the lazy dog\"",
|
||||||
|
"\"fox jumps over the lazy dog\"",
|
||||||
|
"\"brown fox jumps over the lazy dog\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -514,18 +508,16 @@ fn test_exactness_simple_random() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[8, 7, 5, 6, 3, 4, 1, 2]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[8, 7, 4, 6, 3, 5]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"the jumps dog quick over brown lazy fox\"",
|
"\"the jumps dog quick over brown lazy fox\"",
|
||||||
"\"the dog brown over jumps quick lazy\"",
|
"\"the dog brown over jumps quick lazy\"",
|
||||||
"\"fox the lazy dog brown\"",
|
"\"jump dog quick the\"",
|
||||||
"\"jump fox quick lazy the dog\"",
|
"\"jump fox quick lazy the dog\"",
|
||||||
"\"brown the lazy\"",
|
"\"brown the lazy\"",
|
||||||
"\"jump dog quick the\"",
|
"\"fox the lazy dog brown\"",
|
||||||
"\"over\"",
|
|
||||||
"\"jump dog\"",
|
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -540,17 +532,13 @@ fn test_exactness_attribute_starts_with_simple() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("this balcony");
|
s.query("this balcony");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 3, 4, 5, 6]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"this balcony\"",
|
"\"this balcony\"",
|
||||||
"\"this balcony is overlooking the sea\"",
|
"\"this balcony is overlooking the sea\"",
|
||||||
"\"what a lovely view from this balcony, I love it\"",
|
"\"what a lovely view from this balcony, I love it\"",
|
||||||
"\"over looking the sea is a beautiful balcony\"",
|
|
||||||
"\"a beautiful balcony is overlooking the sea\"",
|
|
||||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
|
||||||
"\"overlooking the sea is a beautiful balcony\"",
|
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -565,17 +553,14 @@ fn test_exactness_attribute_starts_with_phrase() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("\"overlooking the sea\" is a beautiful balcony");
|
s.query("\"overlooking the sea\" is a beautiful balcony");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 1]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"overlooking the sea is a beautiful balcony\"",
|
"\"overlooking the sea is a beautiful balcony\"",
|
||||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
||||||
"\"a beautiful balcony is overlooking the sea\"",
|
"\"a beautiful balcony is overlooking the sea\"",
|
||||||
"\"over looking the sea is a beautiful balcony\"",
|
|
||||||
"\"this balcony is overlooking the sea\"",
|
"\"this balcony is overlooking the sea\"",
|
||||||
"\"what a lovely view from this balcony, I love it\"",
|
|
||||||
"\"this balcony\"",
|
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -583,7 +568,7 @@ fn test_exactness_attribute_starts_with_phrase() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("overlooking the sea is a beautiful balcony");
|
s.query("overlooking the sea is a beautiful balcony");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2, 7]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 7]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
@ -592,8 +577,6 @@ fn test_exactness_attribute_starts_with_phrase() {
|
|||||||
"\"a beautiful balcony is overlooking the sea\"",
|
"\"a beautiful balcony is overlooking the sea\"",
|
||||||
"\"over looking the sea is a beautiful balcony\"",
|
"\"over looking the sea is a beautiful balcony\"",
|
||||||
"\"this balcony is overlooking the sea\"",
|
"\"this balcony is overlooking the sea\"",
|
||||||
"\"what a lovely view from this balcony, I love it\"",
|
|
||||||
"\"this balcony\"",
|
|
||||||
"\"overlooking\"",
|
"\"overlooking\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@ -609,19 +592,16 @@ fn test_exactness_all_candidates_with_typo() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("overlocking the sea is a beautiful balcony");
|
s.query("overlocking the sea is a beautiful balcony");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 4, 5, 6, 1, 0, 2, 7]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 6, 1, 7]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
// "overlooking" is returned here because the term matching strategy allows it
|
// "overlooking" is returned here because the term matching strategy allows it
|
||||||
// but it has the worst exactness score (0 exact words)
|
// but it has the worst exactness score (0 exact words)
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"over looking the sea is a beautiful balcony\"",
|
|
||||||
"\"a beautiful balcony is overlooking the sea\"",
|
"\"a beautiful balcony is overlooking the sea\"",
|
||||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
||||||
"\"overlooking the sea is a beautiful balcony\"",
|
"\"overlooking the sea is a beautiful balcony\"",
|
||||||
"\"this balcony is overlooking the sea\"",
|
"\"this balcony is overlooking the sea\"",
|
||||||
"\"what a lovely view from this balcony, I love it\"",
|
|
||||||
"\"this balcony\"",
|
|
||||||
"\"overlooking\"",
|
"\"overlooking\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
@ -686,26 +666,26 @@ fn test_words_after_exactness() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 16, 17, 9, 15, 8, 14, 6, 7, 13, 5, 4, 12, 3, 2, 1, 11]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps over the lazy\"",
|
|
||||||
"\"the quick brown fox jumps over\"",
|
|
||||||
"\"the quick brown fox jumps over the\"",
|
|
||||||
"\"the quack briwn fox jlmps over the lazy dog\"",
|
"\"the quack briwn fox jlmps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps\"",
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
"\"the quack briwn fox jlmps over the lazy\"",
|
"\"the quack briwn fox jlmps over the lazy\"",
|
||||||
"\"the quick brown fox\"",
|
"\"the quick brown fox jumps over the\"",
|
||||||
|
"\"the quick brown fox jumps over\"",
|
||||||
"\"the quack briwn fox jlmps over\"",
|
"\"the quack briwn fox jlmps over\"",
|
||||||
"\"the quack briwn fox jlmps over the\"",
|
"\"the quack briwn fox jlmps over the\"",
|
||||||
"\"the quick brown\"",
|
"\"the quick brown fox jumps\"",
|
||||||
"\"the quack briwn fox jlmps\"",
|
"\"the quack briwn fox jlmps\"",
|
||||||
|
"\"the quick brown fox\"",
|
||||||
"\"the quack briwn fox\"",
|
"\"the quack briwn fox\"",
|
||||||
"\"the quick\"",
|
"\"the quick brown\"",
|
||||||
"\"the quack briwn\"",
|
"\"the quack briwn\"",
|
||||||
|
"\"the quick\"",
|
||||||
"\"the quack\"",
|
"\"the quack\"",
|
||||||
"\"the\"",
|
"\"the\"",
|
||||||
"\"the\"",
|
"\"the\"",
|
||||||
@ -729,7 +709,7 @@ fn test_proximity_after_exactness() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("the quick brown fox jumps over the lazy dog");
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 5, 4, 3, 8, 6, 7]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 4, 5, 8, 7, 3, 6]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
@ -737,12 +717,12 @@ fn test_proximity_after_exactness() {
|
|||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
"\"the quick brown fox jumps over the very lazy dog\"",
|
"\"the quick brown fox jumps over the very lazy dog\"",
|
||||||
"\"lazy jumps dog brown quick the over fox the\"",
|
"\"lazy jumps dog brown quick the over fox the\"",
|
||||||
"\"the quick brown fox over the lazy dog\"",
|
|
||||||
"\"the quick brown fox over the very lazy dog\"",
|
"\"the quick brown fox over the very lazy dog\"",
|
||||||
"\"dog brown quick the over fox the lazy\"",
|
"\"the quick brown fox over the lazy dog\"",
|
||||||
"\"the quick brown fox over\"",
|
"\"the quick brown fox over\"",
|
||||||
"\"brown quick the over fox\"",
|
|
||||||
"\"the very quick brown fox over\"",
|
"\"the very quick brown fox over\"",
|
||||||
|
"\"dog brown quick the over fox the lazy\"",
|
||||||
|
"\"brown quick the over fox\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user