mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 09:35:51 +08:00
Fix bugs and add tests to exactness ranking rule
This commit is contained in:
parent
8f2e971879
commit
d3a94e8b25
@ -91,6 +91,12 @@ impl State {
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
// An ordered list of the (remaining) query terms, with data extracted from them:
|
||||||
|
// 0. exact subterm. If it doesn't exist, the term is skipped.
|
||||||
|
// 1. start position of the term
|
||||||
|
// 2. id of the term
|
||||||
|
let mut count_all_positions = 0;
|
||||||
|
|
||||||
let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> =
|
let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> =
|
||||||
Vec::with_capacity(query_graph.nodes.len() as usize);
|
Vec::with_capacity(query_graph.nodes.len() as usize);
|
||||||
for (_, node) in query_graph.nodes.iter() {
|
for (_, node) in query_graph.nodes.iter() {
|
||||||
@ -101,6 +107,7 @@ impl State {
|
|||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
count_all_positions += term.positions.len();
|
||||||
exact_term_position_ids.push((
|
exact_term_position_ids.push((
|
||||||
exact_term,
|
exact_term,
|
||||||
*term.positions.start(),
|
*term.positions.start(),
|
||||||
@ -195,13 +202,15 @@ impl State {
|
|||||||
if !intersection.is_empty() {
|
if !intersection.is_empty() {
|
||||||
// TODO: although not really worth it in terms of performance,
|
// TODO: although not really worth it in terms of performance,
|
||||||
// if would be good to put this in cache for the sake of consistency
|
// if would be good to put this in cache for the sake of consistency
|
||||||
let candidates_with_exact_word_count = ctx
|
let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
|
||||||
.index
|
ctx.index
|
||||||
.field_id_word_count_docids
|
.field_id_word_count_docids
|
||||||
.get(ctx.txn, &(fid, exact_term_position_ids.len() as u8))?
|
.get(ctx.txn, &(fid, count_all_positions as u8))?
|
||||||
.unwrap_or_default();
|
.unwrap_or_default()
|
||||||
// TODO: consider if we must store the candidates as arrays, or if there is a way to perform the union
|
} else {
|
||||||
// here.
|
RoaringBitmap::default()
|
||||||
|
};
|
||||||
|
|
||||||
candidates_per_attribute.push(FieldCandidates {
|
candidates_per_attribute.push(FieldCandidates {
|
||||||
start_with_exact: intersection,
|
start_with_exact: intersection,
|
||||||
exact_word_count: candidates_with_exact_word_count,
|
exact_word_count: candidates_with_exact_word_count,
|
||||||
|
@ -310,11 +310,11 @@ impl QueryGraph {
|
|||||||
rank as u16
|
rank as u16
|
||||||
};
|
};
|
||||||
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
||||||
let mut at_least_one_phrase = false;
|
let mut at_least_one_mandatory_term = false;
|
||||||
for (node_id, node) in self.nodes.iter() {
|
for (node_id, node) in self.nodes.iter() {
|
||||||
let QueryNodeData::Term(t) = &node.data else { continue };
|
let QueryNodeData::Term(t) = &node.data else { continue };
|
||||||
if t.term_subset.original_phrase(ctx).is_some() {
|
if t.term_subset.original_phrase(ctx).is_some() || t.term_subset.is_mandatory() {
|
||||||
at_least_one_phrase = true;
|
at_least_one_mandatory_term = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let mut cost = 0;
|
let mut cost = 0;
|
||||||
@ -327,7 +327,7 @@ impl QueryGraph {
|
|||||||
.insert(node_id);
|
.insert(node_id);
|
||||||
}
|
}
|
||||||
let mut res: Vec<_> = nodes_to_remove.into_values().collect();
|
let mut res: Vec<_> = nodes_to_remove.into_values().collect();
|
||||||
if !at_least_one_phrase {
|
if !at_least_one_mandatory_term {
|
||||||
res.pop();
|
res.pop();
|
||||||
}
|
}
|
||||||
res
|
res
|
||||||
|
@ -4,6 +4,7 @@ mod parse_query;
|
|||||||
mod phrase;
|
mod phrase;
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
use std::iter::FromIterator;
|
||||||
use std::ops::RangeInclusive;
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
use compute_derivations::partially_initialized_term_from_word;
|
use compute_derivations::partially_initialized_term_from_word;
|
||||||
@ -30,6 +31,12 @@ pub struct QueryTermSubset {
|
|||||||
zero_typo_subset: NTypoTermSubset,
|
zero_typo_subset: NTypoTermSubset,
|
||||||
one_typo_subset: NTypoTermSubset,
|
one_typo_subset: NTypoTermSubset,
|
||||||
two_typo_subset: NTypoTermSubset,
|
two_typo_subset: NTypoTermSubset,
|
||||||
|
/// `true` if the term cannot be deleted through the term matching strategy
|
||||||
|
///
|
||||||
|
/// Note that there are other reasons for which a term cannot be deleted, such as
|
||||||
|
/// being a phrase. In that case, this field could be set to `false`, but it
|
||||||
|
/// still wouldn't be deleteable by the term matching strategy.
|
||||||
|
mandatory: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
@ -114,6 +121,12 @@ impl ExactTerm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl QueryTermSubset {
|
impl QueryTermSubset {
|
||||||
|
pub fn is_mandatory(&self) -> bool {
|
||||||
|
self.mandatory
|
||||||
|
}
|
||||||
|
pub fn make_mandatory(&mut self) {
|
||||||
|
self.mandatory = true;
|
||||||
|
}
|
||||||
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
|
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
|
||||||
let full_query_term = ctx.term_interner.get(self.original);
|
let full_query_term = ctx.term_interner.get(self.original);
|
||||||
if full_query_term.ngram_words.is_some() {
|
if full_query_term.ngram_words.is_some() {
|
||||||
@ -135,6 +148,7 @@ impl QueryTermSubset {
|
|||||||
zero_typo_subset: NTypoTermSubset::Nothing,
|
zero_typo_subset: NTypoTermSubset::Nothing,
|
||||||
one_typo_subset: NTypoTermSubset::Nothing,
|
one_typo_subset: NTypoTermSubset::Nothing,
|
||||||
two_typo_subset: NTypoTermSubset::Nothing,
|
two_typo_subset: NTypoTermSubset::Nothing,
|
||||||
|
mandatory: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn full(for_term: Interned<QueryTerm>) -> Self {
|
pub fn full(for_term: Interned<QueryTerm>) -> Self {
|
||||||
@ -143,6 +157,7 @@ impl QueryTermSubset {
|
|||||||
zero_typo_subset: NTypoTermSubset::All,
|
zero_typo_subset: NTypoTermSubset::All,
|
||||||
one_typo_subset: NTypoTermSubset::All,
|
one_typo_subset: NTypoTermSubset::All,
|
||||||
two_typo_subset: NTypoTermSubset::All,
|
two_typo_subset: NTypoTermSubset::All,
|
||||||
|
mandatory: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -352,6 +367,28 @@ impl QueryTermSubset {
|
|||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
|
||||||
|
if let Some(term) = self.exact_term(ctx) {
|
||||||
|
match term {
|
||||||
|
ExactTerm::Phrase(p) => {
|
||||||
|
self.zero_typo_subset = NTypoTermSubset::Subset {
|
||||||
|
words: BTreeSet::new(),
|
||||||
|
phrases: BTreeSet::from_iter([p]),
|
||||||
|
};
|
||||||
|
self.clear_one_typo_subset();
|
||||||
|
self.clear_two_typo_subset();
|
||||||
|
}
|
||||||
|
ExactTerm::Word(w) => {
|
||||||
|
self.zero_typo_subset = NTypoTermSubset::Subset {
|
||||||
|
words: BTreeSet::from_iter([w]),
|
||||||
|
phrases: BTreeSet::new(),
|
||||||
|
};
|
||||||
|
self.clear_one_typo_subset();
|
||||||
|
self.clear_two_typo_subset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pub fn clear_zero_typo_subset(&mut self) {
|
pub fn clear_zero_typo_subset(&mut self) {
|
||||||
self.zero_typo_subset = NTypoTermSubset::Nothing;
|
self.zero_typo_subset = NTypoTermSubset::Nothing;
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,7 @@ fn compute_docids(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// TODO: synonyms?
|
|
||||||
candidates &= universe;
|
candidates &= universe;
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
}
|
}
|
||||||
@ -47,18 +47,21 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
|||||||
condition: &Self::Condition,
|
condition: &Self::Condition,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<ComputedCondition> {
|
) -> Result<ComputedCondition> {
|
||||||
let (docids, dest_node) = match condition {
|
let (docids, end_term_subset) = match condition {
|
||||||
ExactnessCondition::ExactInAttribute(dest_node) => {
|
ExactnessCondition::ExactInAttribute(dest_node) => {
|
||||||
(compute_docids(ctx, dest_node, universe)?, dest_node)
|
let mut end_term_subset = dest_node.clone();
|
||||||
|
end_term_subset.term_subset.keep_only_exact_term(ctx);
|
||||||
|
end_term_subset.term_subset.make_mandatory();
|
||||||
|
(compute_docids(ctx, dest_node, universe)?, end_term_subset)
|
||||||
}
|
}
|
||||||
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node),
|
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node.clone()),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(ComputedCondition {
|
Ok(ComputedCondition {
|
||||||
docids,
|
docids,
|
||||||
universe_len: universe.len(),
|
universe_len: universe.len(),
|
||||||
start_term_subset: None,
|
start_term_subset: None,
|
||||||
// TODO/FIXME: modify `end_term_subset` to signal to the next ranking rules that the term cannot be removed
|
end_term_subset,
|
||||||
end_term_subset: dest_node.clone(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,8 +165,8 @@ pub fn compute_query_graph_docids(
|
|||||||
positions: _,
|
positions: _,
|
||||||
term_ids: _,
|
term_ids: _,
|
||||||
}) => {
|
}) => {
|
||||||
let phrase_docids = compute_query_term_subset_docids(ctx, term_subset)?;
|
let node_docids = compute_query_term_subset_docids(ctx, term_subset)?;
|
||||||
predecessors_docids & phrase_docids
|
predecessors_docids & node_docids
|
||||||
}
|
}
|
||||||
QueryNodeData::Deleted => {
|
QueryNodeData::Deleted => {
|
||||||
panic!()
|
panic!()
|
||||||
|
@ -14,6 +14,11 @@ This module tests the following properties about the exactness ranking rule:
|
|||||||
1. those that have an attribute which is equal to the whole remaining query, if this query does not have any "gap"
|
1. those that have an attribute which is equal to the whole remaining query, if this query does not have any "gap"
|
||||||
2. those that have an attribute which start with the whole remaining query, if this query does not have any "gap"
|
2. those that have an attribute which start with the whole remaining query, if this query does not have any "gap"
|
||||||
3. those that contain the most exact words from the remaining query
|
3. those that contain the most exact words from the remaining query
|
||||||
|
|
||||||
|
- if it is followed by other ranking rules, then:
|
||||||
|
1. `word` will not remove the exact terms matched by `exactness`
|
||||||
|
2. graph-based ranking rules (`typo`, `proximity`, `attribute`) will only work with
|
||||||
|
(1) the exact terms selected by `exactness` or (2) the full query term otherwise
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -21,7 +26,7 @@ use crate::{
|
|||||||
SearchResult, TermsMatchingStrategy,
|
SearchResult, TermsMatchingStrategy,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn create_index_exact_words_simple_ordered() -> TempIndex {
|
fn create_index_simple_ordered() -> TempIndex {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
index
|
index
|
||||||
@ -80,7 +85,7 @@ fn create_index_exact_words_simple_ordered() -> TempIndex {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_index_exact_words_simple_reversed() -> TempIndex {
|
fn create_index_simple_reversed() -> TempIndex {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
index
|
index
|
||||||
@ -138,7 +143,7 @@ fn create_index_exact_words_simple_reversed() -> TempIndex {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_index_exact_words_simple_random() -> TempIndex {
|
fn create_index_simple_random() -> TempIndex {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
index
|
index
|
||||||
@ -242,9 +247,192 @@ fn create_index_attribute_starts_with() -> TempIndex {
|
|||||||
index
|
index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_index_simple_ordered_with_typos() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_criteria(vec![Criterion::Exactness]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "the",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "the quack",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"text": "the quack briwn",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"text": "the quack briwn fox",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"text": "the quack briwn fox jlmps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"text": "the quack briwn fox jlmps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"text": "the quack briwn fox jlmps over the",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"text": "the quack briwn fox jlmps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"text": "the quack briwn fox jlmps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"text": "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"text": "the",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"text": "the quick",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"text": "the quick brown",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"text": "the quick brown fox",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15,
|
||||||
|
"text": "the quick brown fox jumps",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"id": 16,
|
||||||
|
"text": "the quick brown fox jumps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 17,
|
||||||
|
"text": "the quick brown fox jumps over the",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 18,
|
||||||
|
"text": "the quick brown fox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 19,
|
||||||
|
"text": "the quick brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_index_with_varying_proximities() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "lazy jumps dog brown quick the over fox the",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "the quick brown fox jumps over the very lazy dog"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "the quick brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"text": "dog brown quick the over fox the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"text": "the quick brown fox over the very lazy dog"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"text": "the quick brown fox over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"text": "brown quick the over fox",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"text": "the very quick brown fox over"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"text": "the quick brown fox over",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_index_all_equal_except_proximity_between_ignored_terms() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "lazy jumps dog brown quick the over fox the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "lazy jumps dog brown quick the over fox the. quack briwn jlmps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "lazy jumps dog brown quick the over fox the. quack briwn jlmps overt",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_exactness_simple_ordered() {
|
fn test_exactness_simple_ordered() {
|
||||||
let index = create_index_exact_words_simple_ordered();
|
let index = create_index_simple_ordered();
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
@ -271,7 +459,7 @@ fn test_exactness_simple_ordered() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_exactness_simple_reversed() {
|
fn test_exactness_simple_reversed() {
|
||||||
let index = create_index_exact_words_simple_reversed();
|
let index = create_index_simple_reversed();
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
@ -318,7 +506,7 @@ fn test_exactness_simple_reversed() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_exactness_simple_random() {
|
fn test_exactness_simple_random() {
|
||||||
let index = create_index_exact_words_simple_random();
|
let index = create_index_simple_random();
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
@ -377,13 +565,12 @@ fn test_exactness_attribute_starts_with_phrase() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.query("\"overlooking the sea\" is a beautiful balcony");
|
s.query("\"overlooking the sea\" is a beautiful balcony");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 6, 4, 3, 1, 0, 2]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
// TODO: this is incorrect, the first document returned here should actually be the second one
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
|
||||||
"\"overlooking the sea is a beautiful balcony\"",
|
"\"overlooking the sea is a beautiful balcony\"",
|
||||||
|
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
||||||
"\"a beautiful balcony is overlooking the sea\"",
|
"\"a beautiful balcony is overlooking the sea\"",
|
||||||
"\"over looking the sea is a beautiful balcony\"",
|
"\"over looking the sea is a beautiful balcony\"",
|
||||||
"\"this balcony is overlooking the sea\"",
|
"\"this balcony is overlooking the sea\"",
|
||||||
@ -398,7 +585,6 @@ fn test_exactness_attribute_starts_with_phrase() {
|
|||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2, 7]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2, 7]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
// TODO: this is correct, so the exactness ranking rule probably has a bug in the handling of phrases
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"overlooking the sea is a beautiful balcony\"",
|
"\"overlooking the sea is a beautiful balcony\"",
|
||||||
@ -440,3 +626,148 @@ fn test_exactness_all_candidates_with_typo() {
|
|||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_exactness_after_words() {
|
||||||
|
let index = create_index_simple_ordered_with_typos();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Exactness]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quack briwn fox jlmps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quack briwn fox jlmps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the\"",
|
||||||
|
"\"the quick brown fox jumps over\"",
|
||||||
|
"\"the quack briwn fox jlmps over\"",
|
||||||
|
"\"the quack briwn fox jlmps over the\"",
|
||||||
|
"\"the quick brown fox jumps\"",
|
||||||
|
"\"the quack briwn fox jlmps\"",
|
||||||
|
"\"the quick brown fox\"",
|
||||||
|
"\"the quack briwn fox\"",
|
||||||
|
"\"the quick brown\"",
|
||||||
|
"\"the quack briwn\"",
|
||||||
|
"\"the quick\"",
|
||||||
|
"\"the quack\"",
|
||||||
|
"\"the\"",
|
||||||
|
"\"the\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_words_after_exactness() {
|
||||||
|
let index = create_index_simple_ordered_with_typos();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Exactness, Criterion::Words]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 16, 17, 9, 15, 8, 14, 6, 7, 13, 5, 4, 12, 3, 2, 1, 11]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over\"",
|
||||||
|
"\"the quick brown fox jumps over the\"",
|
||||||
|
"\"the quack briwn fox jlmps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps\"",
|
||||||
|
"\"the quack briwn fox jlmps over the lazy\"",
|
||||||
|
"\"the quick brown fox\"",
|
||||||
|
"\"the quack briwn fox jlmps over\"",
|
||||||
|
"\"the quack briwn fox jlmps over the\"",
|
||||||
|
"\"the quick brown\"",
|
||||||
|
"\"the quack briwn fox jlmps\"",
|
||||||
|
"\"the quack briwn fox\"",
|
||||||
|
"\"the quick\"",
|
||||||
|
"\"the quack briwn\"",
|
||||||
|
"\"the quack\"",
|
||||||
|
"\"the\"",
|
||||||
|
"\"the\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_proximity_after_exactness() {
|
||||||
|
let index = create_index_with_varying_proximities();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 5, 4, 3, 8, 6, 7]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the very lazy dog\"",
|
||||||
|
"\"lazy jumps dog brown quick the over fox the\"",
|
||||||
|
"\"the quick brown fox over the lazy dog\"",
|
||||||
|
"\"the quick brown fox over the very lazy dog\"",
|
||||||
|
"\"dog brown quick the over fox the lazy\"",
|
||||||
|
"\"the quick brown fox over\"",
|
||||||
|
"\"brown quick the over fox\"",
|
||||||
|
"\"the very quick brown fox over\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let index = create_index_all_equal_except_proximity_between_ignored_terms();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"lazy jumps dog brown quick the over fox the\"",
|
||||||
|
"\"lazy jumps dog brown quick the over fox the. quack briwn jlmps\"",
|
||||||
|
"\"lazy jumps dog brown quick the over fox the. quack briwn jlmps overt\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -33,7 +33,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
|||||||
&mut self,
|
&mut self,
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
_parent_candidates: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
parent_query_graph: &QueryGraph,
|
parent_query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.exhausted = false;
|
self.exhausted = false;
|
||||||
@ -77,7 +77,6 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
|||||||
let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
|
let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
|
||||||
query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
|
query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user