From 3dcbc737f358c6fb87b1ac79218ef0f8c9aa62d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 19 Jun 2019 14:10:21 +0200 Subject: [PATCH] feat: Make synonyms be not considered like exact matches --- meilidb-core/src/query_builder.rs | 106 ++++++++++++++++++------------ 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs index decffc3b6..3036283f2 100644 --- a/meilidb-core/src/query_builder.rs +++ b/meilidb-core/src/query_builder.rs @@ -20,7 +20,24 @@ use crate::{Match, DocumentId, Store, RawDocument, Document}; const NGRAMS: usize = 3; -fn generate_automatons(query: &str, store: &S) -> Result, S::Error> { +struct Automaton { + index: usize, + is_synonym: bool, + number_words: usize, + dfa: DfaExt, +} + +impl Automaton { + fn synonym(index: usize, number_words: usize, dfa: DfaExt) -> Automaton { + Automaton { index, is_synonym: true, number_words, dfa } + } + + fn original(index: usize, number_words: usize, dfa: DfaExt) -> Automaton { + Automaton { index, is_synonym: false, number_words, dfa } + } +} + +fn generate_automatons(query: &str, store: &S) -> Result, S::Error> { let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace); let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect(); let mut automatons = Vec::new(); @@ -54,25 +71,28 @@ fn generate_automatons(query: &str, store: &S) -> Result { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), None); + }); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { let mut iter = matches.into_iter(); assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY @@ -724,18 +750,18 @@ mod tests { assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway - assert_matches!(iter.next(), None); - }); assert_matches!(iter.next(), None); let builder = QueryBuilder::new(&store); let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway + assert_matches!(iter.next(), None); + }); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { let mut iter = matches.into_iter(); assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY @@ -744,12 +770,6 @@ mod tests { assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 1, .. })); // subway - assert_matches!(iter.next(), None); - }); assert_matches!(iter.next(), None); } @@ -776,6 +796,12 @@ mod tests { let results = builder.query("NY subway", 0..20).unwrap(); let mut iter = results.into_iter(); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), None); + }); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { let mut iter = matches.into_iter(); assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY @@ -784,18 +810,18 @@ mod tests { assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway - assert_matches!(iter.next(), None); - }); assert_matches!(iter.next(), None); let builder = QueryBuilder::new(&store); let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), None); + }); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { let mut iter = matches.into_iter(); assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY @@ -804,12 +830,6 @@ mod tests { assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway - assert_matches!(iter.next(), None); - }); assert_matches!(iter.next(), None); } @@ -860,6 +880,12 @@ mod tests { let results = builder.query("NYC subway", 0..20).unwrap(); let mut iter = results.into_iter(); + assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { + let mut iter = matches.into_iter(); + assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY + assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway + assert_matches!(iter.next(), None); + }); assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches }) => { let mut iter = matches.into_iter(); assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // new = NY @@ -869,12 +895,6 @@ mod tests { assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // train = subway assert_matches!(iter.next(), None); // position rewritten ^ }); - assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches }) => { - let mut iter = matches.into_iter(); - assert_matches!(iter.next(), Some(Match { query_index: 0, word_index: 0, .. })); // NY - assert_matches!(iter.next(), Some(Match { query_index: 1, word_index: 2, .. })); // subway - assert_matches!(iter.next(), None); - }); assert_matches!(iter.next(), None); }