mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Add new tests and fix construction of query graph from paths
This commit is contained in:
parent
6e50f23896
commit
b5691802a3
@ -176,6 +176,9 @@ impl<T> Interner<T> {
|
|||||||
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
|
pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
|
||||||
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
||||||
}
|
}
|
||||||
|
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||||
|
FixedSizeInterner { stable_store: self.stable_store }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A store of values of type `T`, each linked to a value of type `From`
|
/// A store of values of type `T`, each linked to a value of type `From`
|
||||||
|
@ -4,7 +4,7 @@ use super::query_term::{
|
|||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::search::new::interner::DedupInterner;
|
use crate::search::new::interner::Interner;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
@ -345,29 +345,13 @@ impl QueryGraph {
|
|||||||
Build a query graph from a list of paths
|
Build a query graph from a list of paths
|
||||||
|
|
||||||
The paths are composed of source and dest terms.
|
The paths are composed of source and dest terms.
|
||||||
If the source term is `None`, then the last dest term is used
|
|
||||||
as the predecessor of the dest term. If the source is Some(_),
|
|
||||||
then an edge is built between the last dest term and the source,
|
|
||||||
and between the source and new dest term.
|
|
||||||
|
|
||||||
Note that the resulting graph will not correspond to a perfect
|
|
||||||
representation of the set of paths.
|
|
||||||
For example, consider the following paths:
|
For example, consider the following paths:
|
||||||
```txt
|
```txt
|
||||||
PATH 1 : a -> b1 -> c1 -> d -> e1
|
PATH 1 : a -> b1 -> c1 -> d -> e1
|
||||||
PATH 2 : a -> b2 -> c2 -> d -> e2
|
PATH 2 : a -> b2 -> c2 -> d -> e2
|
||||||
```
|
```
|
||||||
Then the resulting graph will be:
|
Then the resulting graph will be:
|
||||||
```txt
|
|
||||||
┌────┐ ┌────┐ ┌────┐
|
|
||||||
┌──│ b1 │──│ c1 │─┐ ┌──│ e1 │
|
|
||||||
┌────┐ │ └────┘ └────┘ │ ┌────┐ │ └────┘
|
|
||||||
│ a │─┤ ├─│ d │─┤
|
|
||||||
└────┘ │ ┌────┐ ┌────┐ │ └────┘ │ ┌────┐
|
|
||||||
└──│ b2 │──│ c2 │─┘ └──│ e2 │
|
|
||||||
└────┘ └────┘ └────┘
|
|
||||||
```
|
|
||||||
which is different from the fully correct representation:
|
|
||||||
```txt
|
```txt
|
||||||
┌────┐ ┌────┐ ┌────┐ ┌────┐
|
┌────┐ ┌────┐ ┌────┐ ┌────┐
|
||||||
┌──│ b1 │──│ c1 │───│ d │───│ e1 │
|
┌──│ b1 │──│ c1 │───│ d │───│ e1 │
|
||||||
@ -383,21 +367,51 @@ impl QueryGraph {
|
|||||||
pub fn build_from_paths(
|
pub fn build_from_paths(
|
||||||
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
|
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let mut node_data = DedupInterner::default();
|
let mut node_data = Interner::default();
|
||||||
let root_node = node_data.insert(QueryNodeData::Start);
|
let root_node = node_data.push(QueryNodeData::Start);
|
||||||
let end_node = node_data.insert(QueryNodeData::End);
|
let end_node = node_data.push(QueryNodeData::End);
|
||||||
|
|
||||||
|
let mut paths_with_single_terms = vec![];
|
||||||
|
|
||||||
|
for path in paths {
|
||||||
|
let mut processed_path = vec![];
|
||||||
|
let mut prev_dest_term: Option<LocatedQueryTermSubset> = None;
|
||||||
|
for (start_term, dest_term) in path {
|
||||||
|
if let Some(prev_dest_term) = prev_dest_term.take() {
|
||||||
|
if let Some(mut start_term) = start_term {
|
||||||
|
if start_term.term_ids == prev_dest_term.term_ids {
|
||||||
|
start_term.term_subset.intersect(&prev_dest_term.term_subset);
|
||||||
|
processed_path.push(start_term);
|
||||||
|
} else {
|
||||||
|
processed_path.push(prev_dest_term);
|
||||||
|
processed_path.push(start_term);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
processed_path.push(prev_dest_term);
|
||||||
|
}
|
||||||
|
} else if let Some(start_term) = start_term {
|
||||||
|
processed_path.push(start_term);
|
||||||
|
}
|
||||||
|
prev_dest_term = Some(dest_term);
|
||||||
|
}
|
||||||
|
if let Some(prev_dest_term) = prev_dest_term {
|
||||||
|
processed_path.push(prev_dest_term);
|
||||||
|
}
|
||||||
|
paths_with_single_terms.push(processed_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: make a prefix tree of the processed paths to avoid uselessly duplicating nodes
|
||||||
|
|
||||||
let mut paths_with_ids = vec![];
|
let mut paths_with_ids = vec![];
|
||||||
for path in paths {
|
for path in paths_with_single_terms {
|
||||||
let mut path_with_ids = vec![];
|
let mut path_with_ids = vec![];
|
||||||
for node in path {
|
for term in path {
|
||||||
let (start_term, end_term) = node;
|
let id = node_data.push(QueryNodeData::Term(term));
|
||||||
let src_node_id = start_term.map(|x| node_data.insert(QueryNodeData::Term(x)));
|
path_with_ids.push(Interned::from_raw(id.into_raw()));
|
||||||
let dest_node_id = node_data.insert(QueryNodeData::Term(end_term));
|
|
||||||
path_with_ids.push((src_node_id, dest_node_id));
|
|
||||||
}
|
}
|
||||||
paths_with_ids.push(path_with_ids);
|
paths_with_ids.push(path_with_ids);
|
||||||
}
|
}
|
||||||
|
|
||||||
let nodes_data = node_data.freeze();
|
let nodes_data = node_data.freeze();
|
||||||
let nodes_data_len = nodes_data.len();
|
let nodes_data_len = nodes_data.len();
|
||||||
let mut nodes = nodes_data.map_move(|n| QueryNode {
|
let mut nodes = nodes_data.map_move(|n| QueryNode {
|
||||||
@ -406,31 +420,22 @@ impl QueryGraph {
|
|||||||
successors: SmallBitmap::new(nodes_data_len),
|
successors: SmallBitmap::new(nodes_data_len),
|
||||||
});
|
});
|
||||||
|
|
||||||
let root_node = Interned::from_raw(root_node.into_raw());
|
let root_node = Interned::<QueryNode>::from_raw(root_node.into_raw());
|
||||||
let end_node = Interned::from_raw(end_node.into_raw());
|
let end_node = Interned::<QueryNode>::from_raw(end_node.into_raw());
|
||||||
|
|
||||||
for path in paths_with_ids {
|
for path in paths_with_ids {
|
||||||
let mut prev_node = root_node;
|
let mut prev_node_id = root_node;
|
||||||
for node in path {
|
for node_id in path {
|
||||||
let (start_term, dest_term) = node;
|
let prev_node = nodes.get_mut(prev_node_id);
|
||||||
let end_term = Interned::from_raw(dest_term.into_raw());
|
prev_node.successors.insert(node_id);
|
||||||
let src = if let Some(start_term) = start_term {
|
let node = nodes.get_mut(node_id);
|
||||||
// TODO: this is incorrect! should take the intersection
|
node.predecessors.insert(prev_node_id);
|
||||||
// between the prev node and the start term if they refer to the same
|
prev_node_id = node_id;
|
||||||
// original query term!
|
|
||||||
let start_term = Interned::from_raw(start_term.into_raw());
|
|
||||||
nodes.get_mut(prev_node).successors.insert(start_term);
|
|
||||||
nodes.get_mut(start_term).predecessors.insert(prev_node);
|
|
||||||
start_term
|
|
||||||
} else {
|
|
||||||
prev_node
|
|
||||||
};
|
|
||||||
nodes.get_mut(src).successors.insert(end_term);
|
|
||||||
nodes.get_mut(end_term).predecessors.insert(src);
|
|
||||||
prev_node = end_term;
|
|
||||||
}
|
}
|
||||||
nodes.get_mut(prev_node).successors.insert(end_node);
|
let prev_node = nodes.get_mut(prev_node_id);
|
||||||
nodes.get_mut(end_node).predecessors.insert(prev_node);
|
prev_node.successors.insert(end_node);
|
||||||
|
let node = nodes.get_mut(end_node);
|
||||||
|
node.predecessors.insert(prev_node_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
QueryGraph { root_node, end_node, nodes }
|
QueryGraph { root_node, end_node, nodes }
|
||||||
|
@ -3,6 +3,8 @@ This module tests the interactions between the proximity and typo ranking rules.
|
|||||||
|
|
||||||
The proximity ranking rule should transform the query graph such that it
|
The proximity ranking rule should transform the query graph such that it
|
||||||
only contains the word pairs that it used to compute its bucket.
|
only contains the word pairs that it used to compute its bucket.
|
||||||
|
|
||||||
|
TODO: This is not currently implemented.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -61,8 +63,13 @@ fn test_trap_basic() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
s.query("summer holiday");
|
s.query("summer holiday");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
// TODO: this is incorrect, 1 should come before 0
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"summer. holiday. sommer holidty\"",
|
||||||
|
"\"summer. holiday. sommer holiday\"",
|
||||||
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -271,13 +271,13 @@ fn test_sort() {
|
|||||||
"false",
|
"false",
|
||||||
"true",
|
"true",
|
||||||
"true",
|
"true",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
"null",
|
"null",
|
||||||
"[null,null,\"\"]",
|
"[null,null,\"\"]",
|
||||||
"\"\"",
|
"\"\"",
|
||||||
"{\"sub\":0}",
|
"{\"sub\":0}",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -304,13 +304,13 @@ fn test_sort() {
|
|||||||
"false",
|
"false",
|
||||||
"\"1\"",
|
"\"1\"",
|
||||||
"\"0\"",
|
"\"0\"",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
"null",
|
"null",
|
||||||
"[null,null,\"\"]",
|
"[null,null,\"\"]",
|
||||||
"\"\"",
|
"\"\"",
|
||||||
"{\"sub\":0}",
|
"{\"sub\":0}",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
"__does_not_exist___",
|
"__does_not_exist__",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -59,8 +59,7 @@ fn create_index() -> TempIndex {
|
|||||||
"id": 3,
|
"id": 3,
|
||||||
"text": "beautiful sommer"
|
"text": "beautiful sommer"
|
||||||
},
|
},
|
||||||
// The next two documents lay out an even more complex trap, which the current implementation
|
// The next two documents lay out an even more complex trap.
|
||||||
// fails to handle properly.
|
|
||||||
// With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of:
|
// With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of:
|
||||||
// - id 4: delicitous + sweet + dessert
|
// - id 4: delicitous + sweet + dessert
|
||||||
// - id 5: beautiful + sweet + desgert
|
// - id 5: beautiful + sweet + desgert
|
||||||
@ -114,13 +113,12 @@ fn test_trap_complex2() {
|
|||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
s.query("delicious sweet dessert");
|
s.query("delicious sweet dessert");
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5]");
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4]");
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
// TODO: this is incorrect. 5 should appear before 4
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
[
|
[
|
||||||
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
|
||||||
"\"delicious. sweet desgert. delicious sweet desgert\"",
|
"\"delicious. sweet desgert. delicious sweet desgert\"",
|
||||||
|
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user