From b489d699ce647edfa6ccd0ad59aca8b84eb31efb Mon Sep 17 00:00:00 2001 From: many Date: Tue, 8 Jun 2021 17:29:38 +0200 Subject: [PATCH] Make hard separators split phrase query hard separators will now split a phrase query as double double-quotes Fix #208 --- milli/src/search/query_tree.rs | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 03305943b..33708fe76 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -1,7 +1,9 @@ use std::{fmt, cmp, mem}; use fst::Set; -use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream}; +use meilisearch_tokenizer::token::SeparatorKind; +use meilisearch_tokenizer::tokenizer::TokenStream; +use meilisearch_tokenizer::TokenKind; use roaring::RoaringBitmap; use slice_group_by::GroupBy; @@ -467,13 +469,14 @@ fn create_primitive_query(query: TokenStream, stop_words: Option>, wo primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true)); } }, - TokenKind::Separator(_) => { + TokenKind::Separator(separator_kind) => { let quote_count = token.word.chars().filter(|&s| s == '"').count(); // swap quoted state if we encounter a double quote if quote_count % 2 != 0 { quoted = !quoted; } - if !phrase.is_empty() && quote_count > 0 { + // if there are any quote or any hard separator we close the phrase. + if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) { primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase))); } }, @@ -798,6 +801,29 @@ mod test { assert_eq!(expected, query_tree); } + #[test] + fn phrase_with_hard_separator() { + let query = "\"hey friends. wooop wooop\""; + let analyzer = Analyzer::new(AnalyzerConfig::>::default()); + let result = analyzer.analyze(query); + let tokens = result.tokens(); + + let expected = Operation::And(vec![ + Operation::Consecutive(vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), + Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }), + ]), + Operation::Consecutive(vec![ + Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), + Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), + ]), + ]); + + let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); + + assert_eq!(expected, query_tree); + } + #[test] fn optional_word() { let query = "hey my friend ";