mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Ignore tokens with empty normalized value during indexing process
This commit is contained in:
parent
8ac5b765bc
commit
66aa6d5871
@ -226,9 +226,9 @@ fn process_tokens<'a>(
|
|||||||
) -> impl Iterator<Item = (usize, Token<'a>)> {
|
) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||||
tokens
|
tokens
|
||||||
.skip_while(|token| token.is_separator())
|
.skip_while(|token| token.is_separator())
|
||||||
.scan((0, None), |(offset, prev_kind), token| {
|
.scan((0, None), |(offset, prev_kind), mut token| {
|
||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
|
||||||
*offset += match *prev_kind {
|
*offset += match *prev_kind {
|
||||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||||
Some(_) => 1,
|
Some(_) => 1,
|
||||||
@ -244,7 +244,7 @@ fn process_tokens<'a>(
|
|||||||
{
|
{
|
||||||
*prev_kind = Some(token.kind);
|
*prev_kind = Some(token.kind);
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => token.kind = TokenKind::Unknown,
|
||||||
}
|
}
|
||||||
Some((*offset, token))
|
Some((*offset, token))
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user