diff --git a/Cargo.lock b/Cargo.lock index effdfe9a7..e03efb009 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,7 +152,7 @@ dependencies = [ "pin-project-lite", "tokio-rustls 0.23.4", "tokio-util", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] @@ -705,24 +705,27 @@ dependencies = [ [[package]] name = "charabia" -version = "0.7.2" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413155d93157bff9130895c3bd83970ac7f35659ca57226a96aa35cf1e8e102c" +checksum = "bb49850f555eb71aa6fc6d4d79420e81f4d89fa56e0e9c0f6d19aace2f56c554" dependencies = [ + "aho-corasick", "cow-utils", "csv", "deunicode", + "either", "finl_unicode", "fst", "irg-kvariants", "jieba-rs", - "lindera", + "lindera-core", + "lindera-dictionary", + "lindera-tokenizer", "once_cell", "pinyin", "serde", "slice-group-by", "unicode-normalization", - "unicode-segmentation", "wana_kana", "whatlang", ] @@ -2135,15 +2138,6 @@ dependencies = [ "simple_asn1", ] -[[package]] -name = "kanaria" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f9d9652540055ac4fded998a73aca97d965899077ab1212587437da44196ff" -dependencies = [ - "bitflags", -] - [[package]] name = "language-tags" version = "0.3.2" @@ -2211,38 +2205,11 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "lindera" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72be283281bec2768687b1784be03a678609b51f2f90f6f9d9b4f07953e6dd25" -dependencies = [ - "anyhow", - "bincode", - "byteorder", - "encoding", - "kanaria", - "lindera-cc-cedict-builder", - "lindera-core", - "lindera-dictionary", - "lindera-filter", - "lindera-ipadic-builder", - "lindera-ko-dic-builder", - "lindera-unidic-builder", - "regex", - "serde", - "serde_json", - "thiserror", - "unicode-blocks", - "unicode-normalization", - "yada", -] - [[package]] name = "lindera-cc-cedict-builder" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10fbafd37adab44ccc2668a40fba2dbc4e665cb3c36018c15dfe2e2b830e28ce" +checksum = "4c6bf79b29a90bcd22036e494d6cc9ac3abe9ab604b21f3258ba6dc1ce501801" dependencies = [ "anyhow", "bincode", @@ -2259,9 +2226,9 @@ dependencies = [ [[package]] name = "lindera-compress" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9196bf5995503f6878a090dfee6114ba86430c72f67ef3624246b564869937" +checksum = "8f2e99e67736352bbb6ed1c273643975822505067ca32194b0981040bc50527a" dependencies = [ "anyhow", "flate2", @@ -2270,9 +2237,9 @@ dependencies = [ [[package]] name = "lindera-core" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5f0baa9932f682e9c5b388897330f155d3c40de80016e60125897fde5e0e246" +checksum = "7c3935e966409156f22cb4b334b21b0dce84b7aa1cad62214b466489d249c8e5" dependencies = [ "anyhow", "bincode", @@ -2287,9 +2254,9 @@ dependencies = [ [[package]] name = "lindera-decompress" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6e63fa6ef0bc3ce2c26d372aa6185b7a316194494a84f81678f5da2893bf4a2" +checksum = "7476406abb63c49d7f59c88b9b868ee8d2981495ea7e2c3ad129902f9916b3c6" dependencies = [ "anyhow", "flate2", @@ -2298,63 +2265,50 @@ dependencies = [ [[package]] name = "lindera-dictionary" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd765c36166016de87a1f447ea971573e4c63e334836c46ad0020f0408c88bfc" +checksum = "808b7d2b3cabc25a4022526d484a4cfd1d5924dc76a26e0379707698841acef2" dependencies = [ "anyhow", "bincode", "byteorder", + "lindera-cc-cedict-builder", "lindera-core", - "lindera-ipadic", - "lindera-ko-dic", - "serde", -] - -[[package]] -name = "lindera-filter" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5345e37fb9521ab3cee19283bed135d46b3521dc1fd13a49fa0992379056203" -dependencies = [ - "anyhow", - "bincode", - "byteorder", - "kanaria", - "lindera-core", - "lindera-dictionary", - "once_cell", - "regex", - "serde", - "serde_json", - "unicode-blocks", - "unicode-normalization", - "unicode-segmentation", - "yada", -] - -[[package]] -name = "lindera-ipadic" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60eeb356295f784e7db4cfd2c6772f2bd059e565a7744e246642a07bc333a88a" -dependencies = [ - "bincode", - "byteorder", - "encoding", - "flate2", - "lindera-core", - "lindera-decompress", "lindera-ipadic-builder", - "once_cell", - "tar", + "lindera-ipadic-neologd-builder", + "lindera-ko-dic", + "lindera-ko-dic-builder", + "lindera-unidic", + "lindera-unidic-builder", + "serde", ] [[package]] name = "lindera-ipadic-builder" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a16a2a88db9d956f5086bc976deb9951ca2dbbfef41a002df0a7bfb2c845aab" +checksum = "31f373a280958c930e5ee4a1e4db3a0ee0542afaf02d3b5cacb8cab4e298648e" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "csv", + "encoding_rs", + "encoding_rs_io", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "serde", + "yada", +] + +[[package]] +name = "lindera-ipadic-neologd-builder" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92eff98e9ed1a7a412b91709c2343457a04ef02fa0c27c27e3a5892f5591eae9" dependencies = [ "anyhow", "bincode", @@ -2364,7 +2318,6 @@ dependencies = [ "encoding_rs_io", "env_logger", "glob", - "lindera-compress", "lindera-core", "lindera-decompress", "log", @@ -2374,9 +2327,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb479b170a841b8cfbe602d772e30849ffe0562b219190a378368968b8c8f66" +checksum = "74c6d5bf7d8092bd6d10de7a5d74b70ea7cf234586235b0d6cdb903b05a6c9e2" dependencies = [ "bincode", "byteorder", @@ -2391,9 +2344,9 @@ dependencies = [ [[package]] name = "lindera-ko-dic-builder" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b9b58213552560717c48e7833444a20d2d7fe26a6e565f7ce0cbbf85784c7cf" +checksum = "f0a4add6d3c1e41ec9e2690d33e287d0223fb59a30ccee4980c23f31368cae1e" dependencies = [ "anyhow", "bincode", @@ -2410,10 +2363,42 @@ dependencies = [ ] [[package]] -name = "lindera-unidic-builder" -version = "0.23.0" +name = "lindera-tokenizer" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6858147cdaf4a7b564c08a247449d3aca38e9b4812499651af08afbf85324596" +checksum = "cb6a8acbd068019d1cdac7316f0dcb87f8e33ede2b13aa237f45114f9750afb8" +dependencies = [ + "bincode", + "byteorder", + "lindera-core", + "lindera-dictionary", + "once_cell", + "serde", + "serde_json", +] + +[[package]] +name = "lindera-unidic" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14abf0613d350b30d3b0406a33b1de8fa8d829f26516909421702174785991c8" +dependencies = [ + "bincode", + "byteorder", + "encoding", + "lindera-core", + "lindera-decompress", + "lindera-unidic-builder", + "once_cell", + "ureq", + "zip", +] + +[[package]] +name = "lindera-unidic-builder" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e204ed53d9bd63227d1e6a6c1f122ca039e00a8634ac32e7fb0281eeec8615c4" dependencies = [ "anyhow", "bincode", @@ -2422,6 +2407,7 @@ dependencies = [ "encoding", "env_logger", "glob", + "lindera-compress", "lindera-core", "lindera-decompress", "log", @@ -3428,7 +3414,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.22.6", "winreg", ] @@ -4211,12 +4197,6 @@ version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" -[[package]] -name = "unicode-blocks" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6" - [[package]] name = "unicode-ident" version = "1.0.9" @@ -4250,6 +4230,21 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" +[[package]] +name = "ureq" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9" +dependencies = [ + "base64 0.21.2", + "log", + "once_cell", + "rustls 0.21.1", + "rustls-webpki", + "url", + "webpki-roots 0.23.1", +] + [[package]] name = "url" version = "2.3.1" @@ -4458,6 +4453,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "webpki-roots" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338" +dependencies = [ + "rustls-webpki", +] + [[package]] name = "whatlang" version = "0.16.2" diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 85fef13b3..5f055703c 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -814,10 +814,10 @@ fn extract_field( } } -fn format_fields>( +fn format_fields<'a>( document: &Document, field_ids_map: &FieldsIdsMap, - builder: &MatcherBuilder<'_, A>, + builder: &'a MatcherBuilder<'a>, formatted_options: &BTreeMap, compute_matches: bool, displayable_ids: &BTreeSet, @@ -862,9 +862,9 @@ fn format_fields>( Ok((matches_position, document)) } -fn format_value>( +fn format_value<'a>( value: Value, - builder: &MatcherBuilder<'_, A>, + builder: &'a MatcherBuilder<'a>, format_options: Option, infos: &mut Vec, compute_matches: bool, diff --git a/milli/Cargo.toml b/milli/Cargo.toml index aa4b98ec2..ce3070e5d 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.4.0" bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] } byteorder = "1.4.3" -charabia = { version = "0.7.2", default-features = false } +charabia = { version = "0.8.1", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.8" deserr = "0.5.0" diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs index 0ba8b198e..2bac05934 100644 --- a/milli/src/search/new/matches/matching_words.rs +++ b/milli/src/search/new/matches/matching_words.rs @@ -256,7 +256,8 @@ pub(crate) mod tests { let temp_index = temp_index_with_documents(); let rtxn = temp_index.read_txn().unwrap(); let mut ctx = SearchContext::new(&temp_index, &rtxn); - let tokenizer = TokenizerBuilder::new().build(); + let mut builder = TokenizerBuilder::default(); + let tokenizer = builder.build(); let tokens = tokenizer.tokenize("split this world"); let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap(); let matching_words = MatchingWords::new(ctx, query_terms); diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index ce28e16c1..72e155b3e 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -12,16 +12,16 @@ const DEFAULT_HIGHLIGHT_PREFIX: &str = ""; const DEFAULT_HIGHLIGHT_SUFFIX: &str = ""; /// Structure used to build a Matcher allowing to customize formating tags. -pub struct MatcherBuilder<'a, A> { +pub struct MatcherBuilder<'m> { matching_words: MatchingWords, - tokenizer: Tokenizer<'a, 'a, A>, + tokenizer: Tokenizer<'m>, crop_marker: Option, highlight_prefix: Option, highlight_suffix: Option, } -impl<'a, A> MatcherBuilder<'a, A> { - pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'a, 'a, A>) -> Self { +impl<'m> MatcherBuilder<'m> { + pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'m>) -> Self { Self { matching_words, tokenizer, @@ -46,7 +46,7 @@ impl<'a, A> MatcherBuilder<'a, A> { self } - pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> { + pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> { let crop_marker = match &self.crop_marker { Some(marker) => marker.as_str(), None => DEFAULT_CROP_MARKER, @@ -103,17 +103,17 @@ pub struct MatchBounds { /// Structure used to analize a string, compute words that match, /// and format the source string, returning a highlighted and cropped sub-string. -pub struct Matcher<'t, 'm, A> { +pub struct Matcher<'t, 'm> { text: &'t str, matching_words: &'m MatchingWords, - tokenizer: &'m Tokenizer<'m, 'm, A>, + tokenizer: &'m Tokenizer<'m>, crop_marker: &'m str, highlight_prefix: &'m str, highlight_suffix: &'m str, matches: Option<(Vec>, Vec)>, } -impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> { +impl<'t> Matcher<'t, '_> { /// Iterates over tokens and save any of them that matches the query. fn compute_matches(&mut self) -> &mut Self { /// some words are counted as matches only if they are close together and in the good order, @@ -503,7 +503,7 @@ mod tests { use crate::index::tests::TempIndex; use crate::{execute_search, SearchContext}; - impl<'a> MatcherBuilder<'a, &[u8]> { + impl<'a> MatcherBuilder<'a> { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { let mut ctx = SearchContext::new(index, rtxn); let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search( @@ -530,7 +530,7 @@ mod tests { None => MatchingWords::default(), }; - MatcherBuilder::new(matching_words, TokenizerBuilder::new().build()) + MatcherBuilder::new(matching_words, TokenizerBuilder::default().into_tokenizer()) } } @@ -690,7 +690,7 @@ mod tests { // should crop the phrase instead of croping around the match. insta::assert_snapshot!( matcher.format(format_options), - @"… Split The World is a book written by Emily Henry…" + @"…Split The World is a book written by Emily Henry…" ); // Text containing some matches. diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 5e97d6578..64fe07a31 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -7,7 +7,7 @@ use crate::{Result, SearchContext, MAX_WORD_LENGTH}; /// Convert the tokenised search query into a list of located query terms. pub fn located_query_terms_from_tokens( ctx: &mut SearchContext, - query: NormalizedTokenIter<&[u8]>, + query: NormalizedTokenIter, words_limit: Option, ) -> Result> { let nbr_typos = number_of_typos_allowed(ctx)?; @@ -303,7 +303,8 @@ mod tests { #[test] fn start_with_hard_separator() -> Result<()> { - let tokenizer = TokenizerBuilder::new().build(); + let mut builder = TokenizerBuilder::default(); + let tokenizer = builder.build(); let tokens = tokenizer.tokenize("."); let index = temp_index_with_documents(); let rtxn = index.read_txn()?; diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 0cce91938..8985534db 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -128,10 +128,10 @@ pub fn extract_docid_word_positions( .map(|reader| (documents_ids, reader, script_language_docids)) } -fn extract_tokens_from_document>( +fn extract_tokens_from_document( obkv: &KvReader, searchable_fields: &Option>, - tokenizer: &Tokenizer, + tokenizer: &Tokenizer, max_positions_per_attributes: u32, buffers: &mut Buffers, script_language_word_count: &mut HashMap>, diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap index 5b424356a..6e7876726 100644 --- a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap +++ b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap @@ -1,18 +1,21 @@ --- source: milli/src/update/index_documents/mod.rs --- +0 [1, ] 1 [2, ] -10.0 [1, ] +10 [1, ] 12 [0, ] 1344 [3, ] 2 [0, ] 23 [5, ] -25.99 [2, ] -3.5 [0, ] +25 [2, ] +3 [0, ] 35 [5, ] 4 [4, ] 42 [0, 5, ] 456 [1, ] +5 [0, ] +99 [2, ] adams [5, ] adventure [1, ] alice [2, ] @@ -29,7 +32,7 @@ galaxy [5, ] guide [5, ] half [4, ] harry [4, ] -hitchhiker' [5, ] +hitchhiker [5, ] hobbit [3, ] in [2, ] j [3, 4, ] diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap index 4f4a9e33a..b0ef38b93 100644 --- a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap +++ b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap @@ -1,19 +1,22 @@ --- source: milli/src/update/index_documents/mod.rs --- +0 [1, 7, ] 1 [2, ] -10.0 [1, 7, ] +10 [1, 7, ] 12 [0, 8, ] 1344 [3, ] 1813 [8, ] 2 [0, 8, ] 23 [5, ] -25.99 [2, ] -3.5 [0, 8, ] +25 [2, ] +3 [0, 8, ] 35 [5, ] 4 [4, 6, ] 42 [0, 5, 8, ] 456 [1, 7, ] +5 [0, 8, ] +99 [2, ] adams [5, ] adventure [1, 7, ] alice [2, ] @@ -31,7 +34,7 @@ galaxy [5, ] guide [5, ] half [4, 6, ] harry [4, 6, ] -hitchhiker' [5, ] +hitchhiker [5, ] hobbit [3, ] in [2, ] j [3, 4, 6, 8, ] diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index aa69abca1..5b8e5a21c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::result::Result as StdResult; -use charabia::{Tokenizer, TokenizerBuilder}; +use charabia::{Normalize, Tokenizer, TokenizerBuilder}; use deserr::{DeserializeError, Deserr}; use itertools::Itertools; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -423,6 +423,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { match self.stop_words { Setting::Set(ref stop_words) => { let current = self.index.stop_words(self.wtxn)?; + + // Apply an unlossy normalization on stop_words + let stop_words = stop_words + .iter() + .map(|w| w.as_str().normalize(&Default::default()).into_owned()); + // since we can't compare a BTreeSet with an FST we are going to convert the // BTreeSet to an FST and then compare bytes per bytes the two FSTs. let fst = fst::Set::from_iter(stop_words)?; @@ -446,7 +452,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_synonyms(&mut self) -> Result { match self.synonyms { Setting::Set(ref synonyms) => { - fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> Vec { + fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec { tokenizer .tokenize(text) .filter_map(|token| { @@ -647,7 +653,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_exact_words(&mut self) -> Result<()> { match self.exact_words { Setting::Set(ref mut words) => { - fn normalize(tokenizer: &Tokenizer<&[u8]>, text: &str) -> String { + fn normalize(tokenizer: &Tokenizer, text: &str) -> String { tokenizer.tokenize(text).map(|token| token.lemma().to_string()).collect() } diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap index d8125dfcf..f8d64e001 100644 --- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap +++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap @@ -1,27 +1,28 @@ --- source: milli/src/update/delete_documents.rs --- -1_36 [3, ] -1_37 [4, ] -1_38 [5, ] -1_39 [6, ] -1_40 [7, ] -1_41 [8, ] -1_42 [9, ] -1_43 [10, ] -1_44 [11, ] -1_45 [12, ] -1_46 [13, ] -1_47 [14, ] -1_5 [1, ] -1_52 [15, ] -1_57 [16, ] -1_58 [17, ] -1_68 [18, ] -1_69 [19, ] -1_7 [2, ] -1_71 [21, ] -2.2 [21, ] +1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ] +2 [21, ] +36 [3, ] +37 [4, ] +38 [5, ] +39 [6, ] +40 [7, ] +41 [8, ] +42 [9, ] +43 [10, ] +44 [11, ] +45 [12, ] +46 [13, ] +47 [14, ] +5 [1, ] +52 [15, ] +57 [16, ] +58 [17, ] +68 [18, ] +69 [19, ] +7 [2, ] +71 [21, ] abstract [2, 6, 10, 13, 14, 15, 16, 17, ] aquarium [5, ] art [4, 5, 8, 9, 10, 12, 17, ] diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap index 88031d24a..36add107b 100644 --- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap +++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap @@ -1,4 +1,25 @@ --- source: milli/src/update/delete_documents.rs --- +1 1 36 [3, ] +1 1 37 [4, ] +1 1 38 [5, ] +1 1 39 [6, ] +1 1 40 [7, ] +1 1 41 [8, ] +1 1 42 [9, ] +1 1 43 [10, ] +1 1 44 [11, ] +1 1 45 [12, ] +1 1 46 [13, ] +1 1 47 [14, ] +1 1 5 [1, ] +1 1 52 [15, ] +1 1 57 [16, ] +1 1 58 [17, ] +1 1 68 [18, ] +1 1 69 [19, ] +1 1 7 [2, ] +1 1 71 [21, ] +1 2 2 [21, ] diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap index 7909d9b06..972a733e2 100644 --- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap +++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_docids.snap @@ -1,31 +1,31 @@ --- source: milli/src/update/delete_documents.rs --- -1.2 [20, 22, ] -1_36 [3, ] -1_37 [4, ] -1_38 [5, ] -1_39 [6, ] -1_4 [0, ] -1_40 [7, ] -1_41 [8, ] -1_42 [9, ] -1_43 [10, ] -1_44 [11, ] -1_45 [12, ] -1_46 [13, ] -1_47 [14, ] -1_5 [1, ] -1_52 [15, ] -1_57 [16, ] -1_58 [17, ] -1_68 [18, ] -1_69 [19, ] -1_7 [2, ] -1_70 [20, ] -1_71 [21, ] -1_72 [22, ] -2.2 [21, ] +1 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ] +2 [20, 21, 22, ] +36 [3, ] +37 [4, ] +38 [5, ] +39 [6, ] +4 [0, ] +40 [7, ] +41 [8, ] +42 [9, ] +43 [10, ] +44 [11, ] +45 [12, ] +46 [13, ] +47 [14, ] +5 [1, ] +52 [15, ] +57 [16, ] +58 [17, ] +68 [18, ] +69 [19, ] +7 [2, ] +70 [20, ] +71 [21, ] +72 [22, ] abstract [2, 6, 10, 13, 14, 15, 16, 17, ] aquarium [5, ] art [4, 5, 8, 9, 10, 12, 17, ] diff --git a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap index 88031d24a..941838e34 100644 --- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap +++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_soft/word_pair_proximity_docids.snap @@ -1,4 +1,29 @@ --- source: milli/src/update/delete_documents.rs --- +1 1 2 [20, 22, ] +1 1 36 [3, ] +1 1 37 [4, ] +1 1 38 [5, ] +1 1 39 [6, ] +1 1 4 [0, ] +1 1 40 [7, ] +1 1 41 [8, ] +1 1 42 [9, ] +1 1 43 [10, ] +1 1 44 [11, ] +1 1 45 [12, ] +1 1 46 [13, ] +1 1 47 [14, ] +1 1 5 [1, ] +1 1 52 [15, ] +1 1 57 [16, ] +1 1 58 [17, ] +1 1 68 [18, ] +1 1 69 [19, ] +1 1 7 [2, ] +1 1 70 [20, ] +1 1 71 [21, ] +1 1 72 [22, ] +1 2 2 [21, ]