diff --git a/Cargo.lock b/Cargo.lock
index effdfe9a7..e03efb009 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -152,7 +152,7 @@ dependencies = [
  "pin-project-lite",
  "tokio-rustls 0.23.4",
  "tokio-util",
- "webpki-roots",
+ "webpki-roots 0.22.6",
 ]
 
 [[package]]
@@ -705,24 +705,27 @@ dependencies = [
 
 [[package]]
 name = "charabia"
-version = "0.7.2"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "413155d93157bff9130895c3bd83970ac7f35659ca57226a96aa35cf1e8e102c"
+checksum = "bb49850f555eb71aa6fc6d4d79420e81f4d89fa56e0e9c0f6d19aace2f56c554"
 dependencies = [
+ "aho-corasick",
  "cow-utils",
  "csv",
  "deunicode",
+ "either",
  "finl_unicode",
  "fst",
  "irg-kvariants",
  "jieba-rs",
- "lindera",
+ "lindera-core",
+ "lindera-dictionary",
+ "lindera-tokenizer",
  "once_cell",
  "pinyin",
  "serde",
  "slice-group-by",
  "unicode-normalization",
- "unicode-segmentation",
  "wana_kana",
  "whatlang",
 ]
@@ -2135,15 +2138,6 @@ dependencies = [
  "simple_asn1",
 ]
 
-[[package]]
-name = "kanaria"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff"
-dependencies = [
- "bitflags",
-]
-
 [[package]]
 name = "language-tags"
 version = "0.3.2"
@@ -2211,38 +2205,11 @@ dependencies = [
  "vcpkg",
 ]
 
-[[package]]
-name = "lindera"
-version = "0.23.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72be283281bec2768687b1784be03a678609b51f2f90f6f9d9b4f07953e6dd25"
-dependencies = [
- "anyhow",
- "bincode",
- "byteorder",
- "encoding",
- "kanaria",
- "lindera-cc-cedict-builder",
- "lindera-core",
- "lindera-dictionary",
- "lindera-filter",
- "lindera-ipadic-builder",
- "lindera-ko-dic-builder",
- "lindera-unidic-builder",
- "regex",
- "serde",
- "serde_json",
- "thiserror",
- "unicode-blocks",
- "unicode-normalization",
- "yada",
-]
-
 [[package]]
 name = "lindera-cc-cedict-builder"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10fbafd37adab44ccc2668a40fba2dbc4e665cb3c36018c15dfe2e2b830e28ce"
+checksum = "4c6bf79b29a90bcd22036e494d6cc9ac3abe9ab604b21f3258ba6dc1ce501801"
 dependencies = [
  "anyhow",
  "bincode",
@@ -2259,9 +2226,9 @@ dependencies = [
 
 [[package]]
 name = "lindera-compress"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed9196bf5995503f6878a090dfee6114ba86430c72f67ef3624246b564869937"
+checksum = "8f2e99e67736352bbb6ed1c273643975822505067ca32194b0981040bc50527a"
 dependencies = [
  "anyhow",
  "flate2",
@@ -2270,9 +2237,9 @@ dependencies = [
 
 [[package]]
 name = "lindera-core"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5f0baa9932f682e9c5b388897330f155d3c40de80016e60125897fde5e0e246"
+checksum = "7c3935e966409156f22cb4b334b21b0dce84b7aa1cad62214b466489d249c8e5"
 dependencies = [
  "anyhow",
  "bincode",
@@ -2287,9 +2254,9 @@ dependencies = [
 
 [[package]]
 name = "lindera-decompress"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6e63fa6ef0bc3ce2c26d372aa6185b7a316194494a84f81678f5da2893bf4a2"
+checksum = "7476406abb63c49d7f59c88b9b868ee8d2981495ea7e2c3ad129902f9916b3c6"
 dependencies = [
  "anyhow",
  "flate2",
@@ -2298,63 +2265,50 @@ dependencies = [
 
 [[package]]
 name = "lindera-dictionary"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd765c36166016de87a1f447ea971573e4c63e334836c46ad0020f0408c88bfc"
+checksum = "808b7d2b3cabc25a4022526d484a4cfd1d5924dc76a26e0379707698841acef2"
 dependencies = [
  "anyhow",
  "bincode",
  "byteorder",
+ "lindera-cc-cedict-builder",
  "lindera-core",
- "lindera-ipadic",
- "lindera-ko-dic",
- "serde",
-]
-
-[[package]]
-name = "lindera-filter"
-version = "0.23.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5345e37fb9521ab3cee19283bed135d46b3521dc1fd13a49fa0992379056203"
-dependencies = [
- "anyhow",
- "bincode",
- "byteorder",
- "kanaria",
- "lindera-core",
- "lindera-dictionary",
- "once_cell",
- "regex",
- "serde",
- "serde_json",
- "unicode-blocks",
- "unicode-normalization",
- "unicode-segmentation",
- "yada",
-]
-
-[[package]]
-name = "lindera-ipadic"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60eeb356295f784e7db4cfd2c6772f2bd059e565a7744e246642a07bc333a88a"
-dependencies = [
- "bincode",
- "byteorder",
- "encoding",
- "flate2",
- "lindera-core",
- "lindera-decompress",
  "lindera-ipadic-builder",
- "once_cell",
- "tar",
+ "lindera-ipadic-neologd-builder",
+ "lindera-ko-dic",
+ "lindera-ko-dic-builder",
+ "lindera-unidic",
+ "lindera-unidic-builder",
+ "serde",
 ]
 
 [[package]]
 name = "lindera-ipadic-builder"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a16a2a88db9d956f5086bc976deb9951ca2dbbfef41a002df0a7bfb2c845aab"
+checksum = "31f373a280958c930e5ee4a1e4db3a0ee0542afaf02d3b5cacb8cab4e298648e"
+dependencies = [
+ "anyhow",
+ "bincode",
+ "byteorder",
+ "csv",
+ "encoding_rs",
+ "encoding_rs_io",
+ "env_logger",
+ "glob",
+ "lindera-core",
+ "lindera-decompress",
+ "log",
+ "serde",
+ "yada",
+]
+
+[[package]]
+name = "lindera-ipadic-neologd-builder"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92eff98e9ed1a7a412b91709c2343457a04ef02fa0c27c27e3a5892f5591eae9"
 dependencies = [
  "anyhow",
  "bincode",
@@ -2364,7 +2318,6 @@ dependencies = [
  "encoding_rs_io",
  "env_logger",
  "glob",
- "lindera-compress",
  "lindera-core",
  "lindera-decompress",
  "log",
@@ -2374,9 +2327,9 @@ dependencies = [
 
 [[package]]
 name = "lindera-ko-dic"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abb479b170a841b8cfbe602d772e30849ffe0562b219190a378368968b8c8f66"
+checksum = "74c6d5bf7d8092bd6d10de7a5d74b70ea7cf234586235b0d6cdb903b05a6c9e2"
 dependencies = [
  "bincode",
  "byteorder",
@@ -2391,9 +2344,9 @@ dependencies = [
 
 [[package]]
 name = "lindera-ko-dic-builder"
-version = "0.23.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b9b58213552560717c48e7833444a20d2d7fe26a6e565f7ce0cbbf85784c7cf"
+checksum = "f0a4add6d3c1e41ec9e2690d33e287d0223fb59a30ccee4980c23f31368cae1e"
 dependencies = [
  "anyhow",
  "bincode",
@@ -2410,10 +2363,42 @@ dependencies = [
 ]
 
 [[package]]
-name = "lindera-unidic-builder"
-version = "0.23.0"
+name = "lindera-tokenizer"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6858147cdaf4a7b564c08a247449d3aca38e9b4812499651af08afbf85324596"
+checksum = "cb6a8acbd068019d1cdac7316f0dcb87f8e33ede2b13aa237f45114f9750afb8"
+dependencies = [
+ "bincode",
+ "byteorder",
+ "lindera-core",
+ "lindera-dictionary",
+ "once_cell",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "lindera-unidic"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14abf0613d350b30d3b0406a33b1de8fa8d829f26516909421702174785991c8"
+dependencies = [
+ "bincode",
+ "byteorder",
+ "encoding",
+ "lindera-core",
+ "lindera-decompress",
+ "lindera-unidic-builder",
+ "once_cell",
+ "ureq",
+ "zip",
+]
+
+[[package]]
+name = "lindera-unidic-builder"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e204ed53d9bd63227d1e6a6c1f122ca039e00a8634ac32e7fb0281eeec8615c4"
 dependencies = [
  "anyhow",
  "bincode",
@@ -2422,6 +2407,7 @@ dependencies = [
  "encoding",
  "env_logger",
  "glob",
+ "lindera-compress",
  "lindera-core",
  "lindera-decompress",
  "log",
@@ -3428,7 +3414,7 @@ dependencies = [
  "wasm-bindgen",
  "wasm-bindgen-futures",
  "web-sys",
- "webpki-roots",
+ "webpki-roots 0.22.6",
  "winreg",
 ]
 
@@ -4211,12 +4197,6 @@ version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
 
-[[package]]
-name = "unicode-blocks"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6"
-
 [[package]]
 name = "unicode-ident"
 version = "1.0.9"
@@ -4250,6 +4230,21 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
 
+[[package]]
+name = "ureq"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
+dependencies = [
+ "base64 0.21.2",
+ "log",
+ "once_cell",
+ "rustls 0.21.1",
+ "rustls-webpki",
+ "url",
+ "webpki-roots 0.23.1",
+]
+
 [[package]]
 name = "url"
 version = "2.3.1"
@@ -4458,6 +4453,15 @@ dependencies = [
  "webpki",
 ]
 
+[[package]]
+name = "webpki-roots"
+version = "0.23.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
+dependencies = [
+ "rustls-webpki",
+]
+
 [[package]]
 name = "whatlang"
 version = "0.16.2"
diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index 85fef13b3..5f055703c 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -814,10 +814,10 @@ fn extract_field(
     }
 }
 
-fn format_fields<A: AsRef<[u8]>>(
+fn format_fields<'a>(
     document: &Document,
     field_ids_map: &FieldsIdsMap,
-    builder: &MatcherBuilder<'_, A>,
+    builder: &'a MatcherBuilder<'a>,
     formatted_options: &BTreeMap<FieldId, FormatOptions>,
     compute_matches: bool,
     displayable_ids: &BTreeSet<FieldId>,
@@ -862,9 +862,9 @@ fn format_fields<A: AsRef<[u8]>>(
     Ok((matches_position, document))
 }
 
-fn format_value<A: AsRef<[u8]>>(
+fn format_value<'a>(
     value: Value,
-    builder: &MatcherBuilder<'_, A>,
+    builder: &'a MatcherBuilder<'a>,
     format_options: Option<FormatOptions>,
     infos: &mut Vec<MatchBounds>,
     compute_matches: bool,
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index aa4b98ec2..ce3070e5d 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.7.2", default-features = false }
+charabia = { version = "0.8.1", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = "0.5.0"
diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs
index 0ba8b198e..2bac05934 100644
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@@ -256,7 +256,8 @@ pub(crate) mod tests {
         let temp_index = temp_index_with_documents();
         let rtxn = temp_index.read_txn().unwrap();
         let mut ctx = SearchContext::new(&temp_index, &rtxn);
-        let tokenizer = TokenizerBuilder::new().build();
+        let mut builder = TokenizerBuilder::default();
+        let tokenizer = builder.build();
         let tokens = tokenizer.tokenize("split this world");
         let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
         let matching_words = MatchingWords::new(ctx, query_terms);
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index ce28e16c1..72e155b3e 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -12,16 +12,16 @@ const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
 const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
 
 /// Structure used to build a Matcher allowing to customize formating tags.
-pub struct MatcherBuilder<'a, A> {
+pub struct MatcherBuilder<'m> {
     matching_words: MatchingWords,
-    tokenizer: Tokenizer<'a, 'a, A>,
+    tokenizer: Tokenizer<'m>,
     crop_marker: Option<String>,
     highlight_prefix: Option<String>,
     highlight_suffix: Option<String>,
 }
 
-impl<'a, A> MatcherBuilder<'a, A> {
-    pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, 'a, A>) -> Self {
+impl<'m> MatcherBuilder<'m> {
+    pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'m>) -> Self {
         Self {
             matching_words,
             tokenizer,
@@ -46,7 +46,7 @@ impl<'a, A> MatcherBuilder<'a, A> {
         self
     }
 
-    pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> {
+    pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> {
         let crop_marker = match &self.crop_marker {
             Some(marker) => marker.as_str(),
             None => DEFAULT_CROP_MARKER,
@@ -103,17 +103,17 @@ pub struct MatchBounds {
 
 /// Structure used to analize a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
-pub struct Matcher<'t, 'm, A> {
+pub struct Matcher<'t, 'm> {
     text: &'t str,
     matching_words: &'m MatchingWords,
-    tokenizer: &'m Tokenizer<'m, 'm, A>,
+    tokenizer: &'m Tokenizer<'m>,
     crop_marker: &'m str,
     highlight_prefix: &'m str,
     highlight_suffix: &'m str,
     matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
 }
 
-impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
+impl<'t> Matcher<'t, '_> {
     /// Iterates over tokens and save any of them that matches the query.
     fn compute_matches(&mut self) -> &mut Self {
         /// some words are counted as matches only if they are close together and in the good order,
@@ -503,7 +503,7 @@ mod tests {
     use crate::index::tests::TempIndex;
     use crate::{execute_search, SearchContext};
 
-    impl<'a> MatcherBuilder<'a, &[u8]> {
+    impl<'a> MatcherBuilder<'a> {
         fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
             let mut ctx = SearchContext::new(index, rtxn);
             let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
@@ -530,7 +530,7 @@ mod tests {
                 None => MatchingWords::default(),
             };
 
-            MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
+            MatcherBuilder::new(matching_words, TokenizerBuilder::default().into_tokenizer())
         }
     }
 
@@ -690,7 +690,7 @@ mod tests {
         // should crop the phrase instead of croping around the match.
         insta::assert_snapshot!(
             matcher.format(format_options),
-            @"… Split The World is a book written by Emily Henry…"
+            @"…Split The World is a book written by Emily Henry…"
         );
 
         // Text containing some matches.
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index 5e97d6578..64fe07a31 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -7,7 +7,7 @@ use crate::{Result, SearchContext, MAX_WORD_LENGTH};
 /// Convert the tokenised search query into a list of located query terms.
 pub fn located_query_terms_from_tokens(
     ctx: &mut SearchContext,
-    query: NormalizedTokenIter<&[u8]>,
+    query: NormalizedTokenIter,
     words_limit: Option<usize>,
 ) -> Result<Vec<LocatedQueryTerm>> {
     let nbr_typos = number_of_typos_allowed(ctx)?;
@@ -303,7 +303,8 @@ mod tests {
 
     #[test]
     fn start_with_hard_separator() -> Result<()> {
-        let tokenizer = TokenizerBuilder::new().build();
+        let mut builder = TokenizerBuilder::default();
+        let tokenizer = builder.build();
         let tokens = tokenizer.tokenize(".");
         let index = temp_index_with_documents();
         let rtxn = index.read_txn()?;
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 0cce91938..8985534db 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -128,10 +128,10 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         .map(|reader| (documents_ids, reader, script_language_docids))
 }
 
-fn extract_tokens_from_document<T: AsRef<[u8]>>(
+fn extract_tokens_from_document(
     obkv: &KvReader<FieldId>,
     searchable_fields: &Option<HashSet<FieldId>>,
-    tokenizer: &Tokenizer<T>,
+    tokenizer: &Tokenizer,
     max_positions_per_attributes: u32,
     buffers: &mut Buffers,
     script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap
index 5b424356a..6e7876726 100644
--- a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap
@@ -1,18 +1,21 @@
 ---
 source: milli/src/update/index_documents/mod.rs
 ---
+0                [1, ]
 1                [2, ]
-10.0             [1, ]
+10               [1, ]
 12               [0, ]
 1344             [3, ]
 2                [0, ]
 23               [5, ]
-25.99            [2, ]
-3.5              [0, ]
+25               [2, ]
+3                [0, ]
 35               [5, ]
 4                [4, ]
 42               [0, 5, ]
 456              [1, ]
+5                [0, ]
+99               [2, ]
 adams            [5, ]
 adventure        [1, ]
 alice            [2, ]
@@ -29,7 +32,7 @@ galaxy           [5, ]
 guide            [5, ]
 half             [4, ]
 harry            [4, ]
-hitchhiker'      [5, ]
+hitchhiker       [5, ]
 hobbit           [3, ]
 in               [2, ]
 j                [3, 4, ]
diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap
index 4f4a9e33a..b0ef38b93 100644
--- a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap
@@ -1,19 +1,22 @@
 ---
 source: milli/src/update/index_documents/mod.rs
 ---
+0                [1, 7, ]
 1                [2, ]
-10.0             [1, 7, ]
+10               [1, 7, ]
 12               [0, 8, ]
 1344             [3, ]
 1813             [8, ]
 2                [0, 8, ]
 23               [5, ]
-25.99            [2, ]
-3.5              [0, 8, ]
+25               [2, ]
+3                [0, 8, ]
 35               [5, ]
 4                [4, 6, ]
 42               [0, 5, 8, ]
 456              [1, 7, ]
+5                [0, 8, ]
+99               [2, ]
 adams            [5, ]
 adventure        [1, 7, ]
 alice            [2, ]
@@ -31,7 +34,7 @@ galaxy           [5, ]
 guide            [5, ]
 half             [4, 6, ]
 harry            [4, 6, ]
-hitchhiker'      [5, ]
+hitchhiker       [5, ]
 hobbit           [3, ]
 in               [2, ]
 j                [3, 4, 6, 8, ]
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index aa69abca1..5b8e5a21c 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1,7 +1,7 @@
 use std::collections::{BTreeSet, HashMap, HashSet};
 use std::result::Result as StdResult;
 
-use charabia::{Tokenizer, TokenizerBuilder};
+use charabia::{Normalize, Tokenizer, TokenizerBuilder};
 use deserr::{DeserializeError, Deserr};
 use itertools::Itertools;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -423,6 +423,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
         match self.stop_words {
             Setting::Set(ref stop_words) => {
                 let current = self.index.stop_words(self.wtxn)?;
+
+                // Apply an unlossy normalization on stop_words
+                let stop_words = stop_words
+                    .iter()
+                    .map(|w| w.as_str().normalize(&Default::default()).into_owned());
+
                 // since we can't compare a BTreeSet with an FST we are going to convert the
                 // BTreeSet to an FST and then compare bytes per bytes the two FSTs.
                 let fst = fst::Set::from_iter(stop_words)?;
@@ -446,7 +452,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
     fn update_synonyms(&mut self) -> Result<bool> {
         match self.synonyms {
             Setting::Set(ref synonyms) => {
-                fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> Vec<String> {
+                fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
                     tokenizer
                         .tokenize(text)
                         .filter_map(|token| {
@@ -647,7 +653,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
     fn update_exact_words(&mut self) -> Result<()> {
         match self.exact_words {
             Setting::Set(ref mut words) => {
-                fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> String {
+                fn normalize(tokenizer: &Tokenizer, text: &str) -> String {
                     tokenizer.tokenize(text).map(|token| token.lemma().to_string()).collect()
                 }
 
diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap
index d8125dfcf..f8d64e001 100644
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap
@@ -1,27 +1,28 @@
 ---
 source: milli/src/update/delete_documents.rs
 ---
-1_36             [3, ]
-1_37             [4, ]
-1_38             [5, ]
-1_39             [6, ]
-1_40             [7, ]
-1_41             [8, ]
-1_42             [9, ]
-1_43             [10, ]
-1_44             [11, ]
-1_45             [12, ]
-1_46             [13, ]
-1_47             [14, ]
-1_5              [1, ]
-1_52             [15, ]
-1_57             [16, ]
-1_58             [17, ]
-1_68             [18, ]
-1_69             [19, ]
-1_7              [2, ]
-1_71             [21, ]
-2.2              [21, ]
+1                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
+2                [21, ]
+36               [3, ]
+37               [4, ]
+38               [5, ]
+39               [6, ]
+40               [7, ]
+41               [8, ]
+42               [9, ]
+43               [10, ]
+44               [11, ]
+45               [12, ]
+46               [13, ]
+47               [14, ]
+5                [1, ]
+52               [15, ]
+57               [16, ]
+58               [17, ]
+68               [18, ]
+69               [19, ]
+7                [2, ]
+71               [21, ]
 abstract         [2, 6, 10, 13, 14, 15, 16, 17, ]
 aquarium         [5, ]
 art              [4, 5, 8, 9, 10, 12, 17, ]
diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap
index 88031d24a..36add107b 100644
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap
@@ -1,4 +1,25 @@
 ---
 source: milli/src/update/delete_documents.rs
 ---
+1  1                36               [3, ]
+1  1                37               [4, ]
+1  1                38               [5, ]
+1  1                39               [6, ]
+1  1                40               [7, ]
+1  1                41               [8, ]
+1  1                42               [9, ]
+1  1                43               [10, ]
+1  1                44               [11, ]
+1  1                45               [12, ]
+1  1                46               [13, ]
+1  1                47               [14, ]
+1  1                5                [1, ]
+1  1                52               [15, ]
+1  1                57               [16, ]
+1  1                58               [17, ]
+1  1                68               [18, ]
+1  1                69               [19, ]
+1  1                7                [2, ]
+1  1                71               [21, ]
+1  2                2                [21, ]
 
diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap
index 7909d9b06..972a733e2 100644
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap
@@ -1,31 +1,31 @@
 ---
 source: milli/src/update/delete_documents.rs
 ---
-1.2              [20, 22, ]
-1_36             [3, ]
-1_37             [4, ]
-1_38             [5, ]
-1_39             [6, ]
-1_4              [0, ]
-1_40             [7, ]
-1_41             [8, ]
-1_42             [9, ]
-1_43             [10, ]
-1_44             [11, ]
-1_45             [12, ]
-1_46             [13, ]
-1_47             [14, ]
-1_5              [1, ]
-1_52             [15, ]
-1_57             [16, ]
-1_58             [17, ]
-1_68             [18, ]
-1_69             [19, ]
-1_7              [2, ]
-1_70             [20, ]
-1_71             [21, ]
-1_72             [22, ]
-2.2              [21, ]
+1                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ]
+2                [20, 21, 22, ]
+36               [3, ]
+37               [4, ]
+38               [5, ]
+39               [6, ]
+4                [0, ]
+40               [7, ]
+41               [8, ]
+42               [9, ]
+43               [10, ]
+44               [11, ]
+45               [12, ]
+46               [13, ]
+47               [14, ]
+5                [1, ]
+52               [15, ]
+57               [16, ]
+58               [17, ]
+68               [18, ]
+69               [19, ]
+7                [2, ]
+70               [20, ]
+71               [21, ]
+72               [22, ]
 abstract         [2, 6, 10, 13, 14, 15, 16, 17, ]
 aquarium         [5, ]
 art              [4, 5, 8, 9, 10, 12, 17, ]
diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap
index 88031d24a..941838e34 100644
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap
@@ -1,4 +1,29 @@
 ---
 source: milli/src/update/delete_documents.rs
 ---
+1  1                2                [20, 22, ]
+1  1                36               [3, ]
+1  1                37               [4, ]
+1  1                38               [5, ]
+1  1                39               [6, ]
+1  1                4                [0, ]
+1  1                40               [7, ]
+1  1                41               [8, ]
+1  1                42               [9, ]
+1  1                43               [10, ]
+1  1                44               [11, ]
+1  1                45               [12, ]
+1  1                46               [13, ]
+1  1                47               [14, ]
+1  1                5                [1, ]
+1  1                52               [15, ]
+1  1                57               [16, ]
+1  1                58               [17, ]
+1  1                68               [18, ]
+1  1                69               [19, ]
+1  1                7                [2, ]
+1  1                70               [20, ]
+1  1                71               [21, ]
+1  1                72               [22, ]
+1  2                2                [21, ]