220: Make hard separators split phrase query r=Kerollmops a=ManyTheFish

hard separators will now split a phrase query as two sequential phrases (double-quoted strings):

the query `"Radioactive (Imagine Dragons)"` would be considered equivalent to `"Radioactive" "Imagine Dragons"` which as the little disadvantage of not keeping the order of the two (or more) separate phrases.

Fix #208

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
bors[bot] 2021-06-09 08:22:58 +00:00 committed by GitHub
commit 6faa87302c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,9 @@
use std::{fmt, cmp, mem}; use std::{fmt, cmp, mem};
use fst::Set; use fst::Set;
use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream}; use meilisearch_tokenizer::token::SeparatorKind;
use meilisearch_tokenizer::tokenizer::TokenStream;
use meilisearch_tokenizer::TokenKind;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -467,13 +469,14 @@ fn create_primitive_query(query: TokenStream, stop_words: Option<Set<&[u8]>>, wo
primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true)); primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true));
} }
}, },
TokenKind::Separator(_) => { TokenKind::Separator(separator_kind) => {
let quote_count = token.word.chars().filter(|&s| s == '"').count(); let quote_count = token.word.chars().filter(|&s| s == '"').count();
// swap quoted state if we encounter a double quote // swap quoted state if we encounter a double quote
if quote_count % 2 != 0 { if quote_count % 2 != 0 {
quoted = !quoted; quoted = !quoted;
} }
if !phrase.is_empty() && quote_count > 0 { // if there is a quote or a hard separator we close the phrase.
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) {
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase))); primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
} }
}, },
@ -798,6 +801,29 @@ mod test {
assert_eq!(expected, query_tree); assert_eq!(expected, query_tree);
} }
#[test]
fn phrase_with_hard_separator() {
let query = "\"hey friends. wooop wooop\"";
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let result = analyzer.analyze(query);
let tokens = result.tokens();
let expected = Operation::And(vec![
Operation::Consecutive(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }),
]),
Operation::Consecutive(vec![
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
]),
]);
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
assert_eq!(expected, query_tree);
}
#[test] #[test]
fn optional_word() { fn optional_word() {
let query = "hey my friend "; let query = "hey my friend ";