mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Add new tests and fix construction of query graph from paths
This commit is contained in:
parent
6e50f23896
commit
b5691802a3
@ -176,6 +176,9 @@ impl<T> Interner<T> {
|
||||
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
|
||||
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
||||
}
|
||||
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||
FixedSizeInterner { stable_store: self.stable_store }
|
||||
}
|
||||
}
|
||||
|
||||
/// A store of values of type `T`, each linked to a value of type `From`
|
||||
|
@ -4,7 +4,7 @@ use super::query_term::{
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::SearchContext;
|
||||
use crate::search::new::interner::DedupInterner;
|
||||
use crate::search::new::interner::Interner;
|
||||
use crate::Result;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
@ -345,29 +345,13 @@ impl QueryGraph {
|
||||
Build a query graph from a list of paths
|
||||
|
||||
The paths are composed of source and dest terms.
|
||||
If the source term is `None`, then the last dest term is used
|
||||
as the predecessor of the dest term. If the source is Some(_),
|
||||
then an edge is built between the last dest term and the source,
|
||||
and between the source and new dest term.
|
||||
|
||||
Note that the resulting graph will not correspond to a perfect
|
||||
representation of the set of paths.
|
||||
For example, consider the following paths:
|
||||
```txt
|
||||
PATH 1 : a -> b1 -> c1 -> d -> e1
|
||||
PATH 2 : a -> b2 -> c2 -> d -> e2
|
||||
```
|
||||
Then the resulting graph will be:
|
||||
```txt
|
||||
┌────┐ ┌────┐ ┌────┐
|
||||
┌──│ b1 │──│ c1 │─┐ ┌──│ e1 │
|
||||
┌────┐ │ └────┘ └────┘ │ ┌────┐ │ └────┘
|
||||
│ a │─┤ ├─│ d │─┤
|
||||
└────┘ │ ┌────┐ ┌────┐ │ └────┘ │ ┌────┐
|
||||
└──│ b2 │──│ c2 │─┘ └──│ e2 │
|
||||
└────┘ └────┘ └────┘
|
||||
```
|
||||
which is different from the fully correct representation:
|
||||
```txt
|
||||
┌────┐ ┌────┐ ┌────┐ ┌────┐
|
||||
┌──│ b1 │──│ c1 │───│ d │───│ e1 │
|
||||
@ -383,21 +367,51 @@ impl QueryGraph {
|
||||
pub fn build_from_paths(
|
||||
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
|
||||
) -> Self {
|
||||
let mut node_data = DedupInterner::default();
|
||||
let root_node = node_data.insert(QueryNodeData::Start);
|
||||
let end_node = node_data.insert(QueryNodeData::End);
|
||||
let mut node_data = Interner::default();
|
||||
let root_node = node_data.push(QueryNodeData::Start);
|
||||
let end_node = node_data.push(QueryNodeData::End);
|
||||
|
||||
let mut paths_with_single_terms = vec![];
|
||||
|
||||
for path in paths {
|
||||
let mut processed_path = vec![];
|
||||
let mut prev_dest_term: Option<LocatedQueryTermSubset> = None;
|
||||
for (start_term, dest_term) in path {
|
||||
if let Some(prev_dest_term) = prev_dest_term.take() {
|
||||
if let Some(mut start_term) = start_term {
|
||||
if start_term.term_ids == prev_dest_term.term_ids {
|
||||
start_term.term_subset.intersect(&prev_dest_term.term_subset);
|
||||
processed_path.push(start_term);
|
||||
} else {
|
||||
processed_path.push(prev_dest_term);
|
||||
processed_path.push(start_term);
|
||||
}
|
||||
} else {
|
||||
processed_path.push(prev_dest_term);
|
||||
}
|
||||
} else if let Some(start_term) = start_term {
|
||||
processed_path.push(start_term);
|
||||
}
|
||||
prev_dest_term = Some(dest_term);
|
||||
}
|
||||
if let Some(prev_dest_term) = prev_dest_term {
|
||||
processed_path.push(prev_dest_term);
|
||||
}
|
||||
paths_with_single_terms.push(processed_path);
|
||||
}
|
||||
|
||||
// TODO: make a prefix tree of the processed paths to avoid uselessly duplicating nodes
|
||||
|
||||
let mut paths_with_ids = vec![];
|
||||
for path in paths {
|
||||
for path in paths_with_single_terms {
|
||||
let mut path_with_ids = vec![];
|
||||
for node in path {
|
||||
let (start_term, end_term) = node;
|
||||
let src_node_id = start_term.map(|x| node_data.insert(QueryNodeData::Term(x)));
|
||||
let dest_node_id = node_data.insert(QueryNodeData::Term(end_term));
|
||||
path_with_ids.push((src_node_id, dest_node_id));
|
||||
for term in path {
|
||||
let id = node_data.push(QueryNodeData::Term(term));
|
||||
path_with_ids.push(Interned::from_raw(id.into_raw()));
|
||||
}
|
||||
paths_with_ids.push(path_with_ids);
|
||||
}
|
||||
|
||||
let nodes_data = node_data.freeze();
|
||||
let nodes_data_len = nodes_data.len();
|
||||
let mut nodes = nodes_data.map_move(|n| QueryNode {
|
||||
@ -406,31 +420,22 @@ impl QueryGraph {
|
||||
successors: SmallBitmap::new(nodes_data_len),
|
||||
});
|
||||
|
||||
let root_node = Interned::from_raw(root_node.into_raw());
|
||||
let end_node = Interned::from_raw(end_node.into_raw());
|
||||
let root_node = Interned::<QueryNode>::from_raw(root_node.into_raw());
|
||||
let end_node = Interned::<QueryNode>::from_raw(end_node.into_raw());
|
||||
|
||||
for path in paths_with_ids {
|
||||
let mut prev_node = root_node;
|
||||
for node in path {
|
||||
let (start_term, dest_term) = node;
|
||||
let end_term = Interned::from_raw(dest_term.into_raw());
|
||||
let src = if let Some(start_term) = start_term {
|
||||
// TODO: this is incorrect! should take the intersection
|
||||
// between the prev node and the start term if they refer to the same
|
||||
// original query term!
|
||||
let start_term = Interned::from_raw(start_term.into_raw());
|
||||
nodes.get_mut(prev_node).successors.insert(start_term);
|
||||
nodes.get_mut(start_term).predecessors.insert(prev_node);
|
||||
start_term
|
||||
} else {
|
||||
prev_node
|
||||
};
|
||||
nodes.get_mut(src).successors.insert(end_term);
|
||||
nodes.get_mut(end_term).predecessors.insert(src);
|
||||
prev_node = end_term;
|
||||
let mut prev_node_id = root_node;
|
||||
for node_id in path {
|
||||
let prev_node = nodes.get_mut(prev_node_id);
|
||||
prev_node.successors.insert(node_id);
|
||||
let node = nodes.get_mut(node_id);
|
||||
node.predecessors.insert(prev_node_id);
|
||||
prev_node_id = node_id;
|
||||
}
|
||||
nodes.get_mut(prev_node).successors.insert(end_node);
|
||||
nodes.get_mut(end_node).predecessors.insert(prev_node);
|
||||
let prev_node = nodes.get_mut(prev_node_id);
|
||||
prev_node.successors.insert(end_node);
|
||||
let node = nodes.get_mut(end_node);
|
||||
node.predecessors.insert(prev_node_id);
|
||||
}
|
||||
|
||||
QueryGraph { root_node, end_node, nodes }
|
||||
|
@ -3,6 +3,8 @@ This module tests the interactions between the proximity and typo ranking rules.
|
||||
|
||||
The proximity ranking rule should transform the query graph such that it
|
||||
only contains the word pairs that it used to compute its bucket.
|
||||
|
||||
TODO: This is not currently implemented.
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
@ -61,8 +63,13 @@ fn test_trap_basic() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("summer holiday");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: this is incorrect, 1 should come before 0
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"summer. holiday. sommer holidty\"",
|
||||
"\"summer. holiday. sommer holiday\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -271,13 +271,13 @@ fn test_sort() {
|
||||
"false",
|
||||
"true",
|
||||
"true",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist__",
|
||||
"null",
|
||||
"[null,null,\"\"]",
|
||||
"\"\"",
|
||||
"{\"sub\":0}",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
|
||||
@ -304,13 +304,13 @@ fn test_sort() {
|
||||
"false",
|
||||
"\"1\"",
|
||||
"\"0\"",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist__",
|
||||
"null",
|
||||
"[null,null,\"\"]",
|
||||
"\"\"",
|
||||
"{\"sub\":0}",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -59,8 +59,7 @@ fn create_index() -> TempIndex {
|
||||
"id": 3,
|
||||
"text": "beautiful sommer"
|
||||
},
|
||||
// The next two documents lay out an even more complex trap, which the current implementation
|
||||
// fails to handle properly.
|
||||
// The next two documents lay out an even more complex trap.
|
||||
// With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of:
|
||||
// - id 4: delicitous + sweet + dessert
|
||||
// - id 5: beautiful + sweet + desgert
|
||||
@ -114,13 +113,12 @@ fn test_trap_complex2() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("delicious sweet dessert");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: this is incorrect. 5 should appear before 4
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
||||
"\"delicious. sweet desgert. delicious sweet desgert\"",
|
||||
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user