mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
fix: Always lowercase indexed tokens
This commit is contained in:
parent
7dbf5d6319
commit
f0268d49fe
@ -33,6 +33,10 @@ impl Indexer {
|
|||||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
||||||
for token in Tokenizer::new(text) {
|
for token in Tokenizer::new(text) {
|
||||||
if token.word_index >= self.word_limit { break }
|
if token.word_index >= self.word_limit { break }
|
||||||
|
|
||||||
|
let lower = token.word.to_lowercase();
|
||||||
|
let token = Token { word: &lower, ..token };
|
||||||
|
|
||||||
let docindex = match token_to_docindex(id, attr, token) {
|
let docindex = match token_to_docindex(id, attr, token) {
|
||||||
Some(docindex) => docindex,
|
Some(docindex) => docindex,
|
||||||
None => break,
|
None => break,
|
||||||
@ -49,6 +53,10 @@ impl Indexer {
|
|||||||
let iter = iter.into_iter();
|
let iter = iter.into_iter();
|
||||||
for token in SeqTokenizer::new(iter) {
|
for token in SeqTokenizer::new(iter) {
|
||||||
if token.word_index >= self.word_limit { break }
|
if token.word_index >= self.word_limit { break }
|
||||||
|
|
||||||
|
let lower = token.word.to_lowercase();
|
||||||
|
let token = Token { word: &lower, ..token };
|
||||||
|
|
||||||
let docindex = match token_to_docindex(id, attr, token) {
|
let docindex = match token_to_docindex(id, attr, token) {
|
||||||
Some(docindex) => docindex,
|
Some(docindex) => docindex,
|
||||||
None => break,
|
None => break,
|
||||||
|
Loading…
Reference in New Issue
Block a user