mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Check that the unidecoded words are not empty
This commit is contained in:
parent
4571b80a49
commit
3b1cbed238
@ -139,11 +139,12 @@ fn index_token(
|
|||||||
|
|
||||||
if !lower.contains(is_cjk) {
|
if !lower.contains(is_cjk) {
|
||||||
let unidecoded = deunicode_with_tofu(&lower, "");
|
let unidecoded = deunicode_with_tofu(&lower, "");
|
||||||
if unidecoded != lower {
|
if unidecoded != lower && !unidecoded.is_empty() {
|
||||||
let token = Token {
|
let token = Token {
|
||||||
word: &unidecoded,
|
word: &unidecoded,
|
||||||
..token
|
..token
|
||||||
};
|
};
|
||||||
|
|
||||||
match token_to_docindex(id, attr, token) {
|
match token_to_docindex(id, attr, token) {
|
||||||
Some(docindex) => {
|
Some(docindex) => {
|
||||||
let word = Vec::from(token.word);
|
let word = Vec::from(token.word);
|
||||||
@ -252,4 +253,22 @@ mod tests {
|
|||||||
.get(&"éteindre".to_owned().into_bytes())
|
.get(&"éteindre".to_owned().into_bytes())
|
||||||
.is_some());
|
.is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn no_empty_unidecode() {
|
||||||
|
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||||
|
|
||||||
|
let docid = DocumentId(0);
|
||||||
|
let attr = SchemaAttr(0);
|
||||||
|
let text = "🇯🇵";
|
||||||
|
indexer.index_text(docid, attr, text);
|
||||||
|
|
||||||
|
let Indexed {
|
||||||
|
words_doc_indexes, ..
|
||||||
|
} = indexer.build();
|
||||||
|
|
||||||
|
assert!(words_doc_indexes
|
||||||
|
.get(&"🇯🇵".to_owned().into_bytes())
|
||||||
|
.is_some());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user