mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Merge #220
220: Make hard separators split phrase query r=Kerollmops a=ManyTheFish hard separators will now split a phrase query as two sequential phrases (double-quoted strings): the query `"Radioactive (Imagine Dragons)"` would be considered equivalent to `"Radioactive" "Imagine Dragons"` which as the little disadvantage of not keeping the order of the two (or more) separate phrases. Fix #208 Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
commit
6faa87302c
@ -1,7 +1,9 @@
|
|||||||
use std::{fmt, cmp, mem};
|
use std::{fmt, cmp, mem};
|
||||||
|
|
||||||
use fst::Set;
|
use fst::Set;
|
||||||
use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream};
|
use meilisearch_tokenizer::token::SeparatorKind;
|
||||||
|
use meilisearch_tokenizer::tokenizer::TokenStream;
|
||||||
|
use meilisearch_tokenizer::TokenKind;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -467,13 +469,14 @@ fn create_primitive_query(query: TokenStream, stop_words: Option<Set<&[u8]>>, wo
|
|||||||
primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true));
|
primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
TokenKind::Separator(_) => {
|
TokenKind::Separator(separator_kind) => {
|
||||||
let quote_count = token.word.chars().filter(|&s| s == '"').count();
|
let quote_count = token.word.chars().filter(|&s| s == '"').count();
|
||||||
// swap quoted state if we encounter a double quote
|
// swap quoted state if we encounter a double quote
|
||||||
if quote_count % 2 != 0 {
|
if quote_count % 2 != 0 {
|
||||||
quoted = !quoted;
|
quoted = !quoted;
|
||||||
}
|
}
|
||||||
if !phrase.is_empty() && quote_count > 0 {
|
// if there is a quote or a hard separator we close the phrase.
|
||||||
|
if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) {
|
||||||
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
|
primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase)));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -798,6 +801,29 @@ mod test {
|
|||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn phrase_with_hard_separator() {
|
||||||
|
let query = "\"hey friends. wooop wooop\"";
|
||||||
|
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
|
||||||
|
let result = analyzer.analyze(query);
|
||||||
|
let tokens = result.tokens();
|
||||||
|
|
||||||
|
let expected = Operation::And(vec![
|
||||||
|
Operation::Consecutive(vec![
|
||||||
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }),
|
||||||
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }),
|
||||||
|
]),
|
||||||
|
Operation::Consecutive(vec![
|
||||||
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
||||||
|
Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
|
||||||
|
]),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(expected, query_tree);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn optional_word() {
|
fn optional_word() {
|
||||||
let query = "hey my friend ";
|
let query = "hey my friend ";
|
||||||
|
Loading…
Reference in New Issue
Block a user