fix: Always lowercase indexed tokens

2025-02-20 17:45:54 +08:00 · 2019-04-22 18:43:00 +02:00 · 2019-04-22 18:43:00 +02:00 · f0268d49fe
commit f0268d49fe
parent 7dbf5d6319
1 changed files with 8 additions and 0 deletions
--- a/meilidb-data/src/indexer.rs
+++ b/meilidb-data/src/indexer.rs
@ -33,6 +33,10 @@ impl Indexer {
    pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
        for token in Tokenizer::new(text) {
            if token.word_index >= self.word_limit { break }
+
+            let lower = token.word.to_lowercase();
+            let token = Token { word: &lower, ..token };
+
            let docindex = match token_to_docindex(id, attr, token) {
                Some(docindex) => docindex,
                None => break,
@ -49,6 +53,10 @@ impl Indexer {
        let iter = iter.into_iter();
        for token in SeqTokenizer::new(iter) {
            if token.word_index >= self.word_limit { break }
+
+            let lower = token.word.to_lowercase();
+            let token = Token { word: &lower, ..token };
+
            let docindex = match token_to_docindex(id, attr, token) {
                Some(docindex) => docindex,
                None => break,