diff --git a/milli/src/search/new/tests/language.rs b/milli/src/search/new/tests/language.rs index 6adad748c..e16544fdb 100644 --- a/milli/src/search/new/tests/language.rs +++ b/milli/src/search/new/tests/language.rs @@ -18,5 +18,5 @@ fn test_kanji_language_detection() { search.query("東京"); let SearchResult { documents_ids, .. } = search.execute().unwrap(); - assert_eq!(documents_ids, vec![1]); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]"); } diff --git a/milli/src/search/new/tests/ngram_split_words.rs b/milli/src/search/new/tests/ngram_split_words.rs index 06c49274c..b78bbe763 100644 --- a/milli/src/search/new/tests/ngram_split_words.rs +++ b/milli/src/search/new/tests/ngram_split_words.rs @@ -16,7 +16,10 @@ This module tests the following properties: 13. Ngrams cannot be formed by combining a phrase and a word or two phrases */ -use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy}; +use crate::{ + index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, + SearchResult, TermsMatchingStrategy, +}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -46,6 +49,14 @@ fn create_index() -> TempIndex { { "id": 3, "text": "the sunflower is tall" + }, + { + "id": 4, + "text": "the sunflawer is tall" + }, + { + "id": 5, + "text": "sunflowering is not a verb" } ])) .unwrap(); @@ -67,8 +78,18 @@ fn test_2gram_simple() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("sun flower"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - // will also match documents with "sun flower" - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); + // will also match documents with "sunflower" + prefix tolerance + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flowers are pretty\"", + "\"the sun flower is tall\"", + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] fn test_3gram_simple() { @@ -87,6 +108,13 @@ fn test_3gram_simple() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flowers are pretty\"", + "\"the sunflowers are pretty\"", + ] + "###); } #[test] @@ -99,7 +127,18 @@ fn test_2gram_typo() { s.query("sun flawer"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flowers are pretty\"", + "\"the sun flower is tall\"", + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"the sunflawer is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] @@ -119,6 +158,13 @@ fn test_no_disable_ngrams() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); // documents containing `sunflower` insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flower is tall\"", + "\"the sunflower is tall\"", + ] + "###); } #[test] @@ -137,7 +183,17 @@ fn test_2gram_prefix() { s.query("sun flow"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); // documents containing words beginning with `sunflow` - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flowers are pretty\"", + "\"the sun flower is tall\"", + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] @@ -157,7 +213,16 @@ fn test_3gram_prefix() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); // documents containing a word beginning with sunfl - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"the sunflawer is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] @@ -170,8 +235,17 @@ fn test_split_words() { s.query("sunflower "); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - // all the documents with either `sunflower` or `sun flower` - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]"); + // all the documents with either `sunflower` or `sun flower` + eventual typo + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flower is tall\"", + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"the sunflawer is tall\"", + ] + "###); } #[test] @@ -191,6 +265,12 @@ fn test_disable_split_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); // no document containing `sun flower` insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sunflower is tall\"", + ] + "###); } #[test] @@ -203,8 +283,18 @@ fn test_2gram_split_words() { s.query("sunf lower"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - // all the documents with "sunflower", "sun flower", or (sunflower + 1 typo) - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]"); + // all the documents with "sunflower", "sun flower", (sunflower + 1 typo), or (sunflower as prefix) + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flower is tall\"", + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"the sunflawer is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] @@ -218,7 +308,15 @@ fn test_3gram_no_split_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); // no document with `sun flower` - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sunflowers are pretty\"", + "\"the sunflower is tall\"", + "\"sunflowering is not a verb\"", + ] + "###); } #[test] @@ -231,7 +329,13 @@ fn test_3gram_no_typos() { s.query("sunf la wer"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sunflawer is tall\"", + ] + "###); } #[test] @@ -245,6 +349,13 @@ fn test_no_ngram_phrases() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flowers are pretty\"", + "\"the sun flower is tall\"", + ] + "###); let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); @@ -252,4 +363,10 @@ fn test_no_ngram_phrases() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the sun flower is tall\"", + ] + "###); } diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs index e69de29bb..f6e071572 100644 --- a/milli/src/search/new/tests/proximity.rs +++ b/milli/src/search/new/tests/proximity.rs @@ -0,0 +1,317 @@ +/*! +This module tests the Proximity ranking rule: + +1. A proximity of >7 always has the same cost. + +2. Phrase terms can be in proximity to other terms via their start and end words, +but we need to make sure that the phrase exists in the document that meets this +proximity condition. This is especially relevant with split words and synonyms. + +3. An ngram has the same proximity cost as its component words being consecutive. +e.g. `sunflower` equivalent to `sun flower`. + +4. The prefix databases can be used to find the proximity between two words, but +they store fewer proximities than the regular word proximity DB. + +*/ + +use std::collections::HashMap; + +use crate::{ + index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, + SearchResult, TermsMatchingStrategy, +}; + +fn create_simple_index() -> TempIndex { + let index = TempIndex::new(); + + index + .update_settings(|s| { + s.set_primary_key("id".to_owned()); + s.set_searchable_fields(vec!["text".to_owned()]); + s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); + }) + .unwrap(); + + index + .add_documents(documents!([ + { + "id": 0, + "text": "the very quick dark brown and smart fox did jump over the terribly lazy and small dog" + }, + { + "id": 1, + "text": "the. quick brown fox jumps over the lazy. dog" + }, + { + "id": 2, + "text": "the quick brown fox jumps over the lazy. dog" + }, + { + "id": 3, + "text": "dog the quick brown fox jumps over the lazy" + }, + { + "id": 4, + "text": "the quickbrown fox jumps over the lazy dog" + }, + { + "id": 5, + "text": "brown quick fox jumps over the lazy dog" + }, + { + "id": 6, + "text": "the really quick brown fox jumps over the very lazy dog" + }, + { + "id": 7, + "text": "the really quick brown fox jumps over the lazy dog" + }, + { + "id": 8, + "text": "the quick brown fox jumps over the lazy" + }, + { + "id": 9, + "text": "the quack brown fox jumps over the lazy" + }, + { + "id": 9, + "text": "the quack brown fox jumps over the lazy dog" + }, + { + "id": 10, + "text": "the quick brown fox jumps over the lazy dog" + } + ])) + .unwrap(); + index +} + +fn create_edge_cases_index() -> TempIndex { + let index = TempIndex::new(); + + index + .update_settings(|s| { + s.set_primary_key("id".to_owned()); + s.set_searchable_fields(vec!["text".to_owned()]); + s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); + }) + .unwrap(); + + index.add_documents(documents!([ + { + // This document will insert "s" in the prefix database + "id": 0, + "text": " + saa sab sac sae saf sag sah sai saj sak sal sam san sao sap saq sar sasa sat sau sav saw sax say saz + sba sbb sbc sbe sbf sbg sbh sbi sbj sbk sbl sbm sbn sbo sbp sbq sbr sbsb sbt sbu sbv sbw sbx sby sbz + sca scb scc sce scf scg sch sci scj sck scl scm scn sco scp scq scr scsc sct scu scv scw scx scy scz + sda sdb sdc sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sdsd sdt sdu sdv sdw sdx sdy sdz + sea seb sec see sef seg seh sei sej sek sel sem sen seo sep seq ser sese set seu sev sew sex sey sez + sfa sfb sfc sfe sff sfg sfh sfi sfj sfk sfl sfm sfn sfo sfp sfq sfr sfsf sft sfu sfv sfw sfx sfy sfz + sga sgb sgc sge sgf sgg sgh sgi sgj sgk sgl sgm sgn sgo sgp sgq sgr sgsg sgt sgu sgv sgw sgx sgy sgz + ska skb skc ske skf skg skh ski skj skk skl skm skn sko skp skq skr sksk skt sku skv skw skx sky skz + sla slb slc sle slf slg slh sli slj slk sll slm sln slo slp slq slr slsl slt slu slv slw slx sly slz + sma smb smc sme smf smg smh smi smj smk sml smm smn smo smp smq smr smsm smt smu smv smw smx smy smz + sna snb snc sne snf sng snh sni snj snk snl snm snn sno snp snq snr snsn snt snu snv snw snx sny snz + soa sob soc soe sof sog soh soi soj sok sol som son soo sop soq sor soso sot sou sov sow sox soy soz + spa spb spc spe spf spg sph spi spj spk spl spm spn spo spp spq spr spsp spt spu spv spw spx spy spz + sqa sqb sqc sqe sqf sqg sqh sqi sqj sqk sql sqm sqn sqo sqp sqq sqr sqsq sqt squ sqv sqw sqx sqy sqz + sra srb src sre srf srg srh sri srj srk srl srm srn sro srp srq srr srsr srt sru srv srw srx sry srz + ssa ssb ssc sse ssf ssg ssh ssi ssj ssk ssl ssm ssn sso ssp ssq ssr ssss sst ssu ssv ssw ssx ssy ssz + sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz + " + }, + // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. + // If the search query is "sunflower", the split word "Sun Flower" will match some documents. + // If the query is `sunflower wilting`, then we should make sure that + // the proximity condition `flower wilting: prox N` also comes with the condition + // `sun wilting: prox N+1`. TODO: this is not the exact condition we use for now. + // We only check that the phrase `sun flower` exists and `flower wilting: prox N`, which + // is better than nothing but not the best. + { + "id": 1, + "text": "Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat." + }, + { + "id": 2, + "text": "Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat." + }, + { + "id": 3, + // This document matches the query `sunflower wilting`, but the proximity condition + // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower` + // which would reduce to only `flower` and `wilting` being in proximity. + "text": "A flower wilting under the sun, unlike a sunflower" + }, + { + // This should be the best document for `sunflower wilting` + "id": 4, + "text": "sun flower wilting under the heat" + }, + { + // This is also the best document for `sunflower wilting` + "id": 5, + "text": "sunflower wilting under the heat" + }, + { + // Prox MAX between `best` and `s` prefix + "id": 6, + "text": "this is the best meal I have ever had in such a beautiful summer day" + }, + { + // Prox 5 between `best` and `s` prefix + "id": 7, + "text": "this is the best cooked meal of the summer" + }, + { + // Prox 4 between `best` and `s` prefix + "id": 8, + "text": "this is the best meal of the summer" + }, + { + // Prox 3 between `best` and `s` prefix + "id": 9, + "text": "this is the best meal of summer" + }, + { + // Prox 1 between `best` and `s` prefix + "id": 10, + "text": "this is the best summer meal" + }, + { + // Reverse Prox 3 between `best` and `s` prefix + "id": 11, + "text": "summer x y best" + }, + { + // Reverse Prox 2 between `best` and `s` prefix + "id": 12, + "text": "summer x best" + }, + { + // Reverse Prox 1 between `best` and `s` prefix + "id": 13, + "text": "summer best" + }, + ])).unwrap(); + index +} + +#[test] +fn test_proximity_simple() { + let index = create_simple_index(); + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::All); + s.query("the quick brown fox jumps over the lazy dog"); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quickbrown fox jumps over the lazy dog\"", + "\"the quack brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lazy dog\"", + "\"the really quick brown fox jumps over the lazy dog\"", + "\"the really quick brown fox jumps over the very lazy dog\"", + "\"brown quick fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lazy. dog\"", + "\"dog the quick brown fox jumps over the lazy\"", + "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"", + "\"the. quick brown fox jumps over the lazy. dog\"", + ] + "###); +} + +#[test] +fn test_proximity_split_word() { + let index = create_edge_cases_index(); + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::All); + s.query("sunflower wilting"); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + // TODO: "2" and "4" should be swapped ideally + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", + "\"sun flower wilting under the heat\"", + "\"sunflower wilting under the heat\"", + "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", + "\"A flower wilting under the sun, unlike a sunflower\"", + ] + "###); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::All); + s.query("\"sun flower\" wilting"); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + // TODO: "2" and "4" should be swapped ideally + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", + "\"sun flower wilting under the heat\"", + "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", + ] + "###); + drop(txn); + + index + .update_settings(|s| { + let mut syns = HashMap::new(); + syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]); + s.set_synonyms(syns); + }) + .unwrap(); + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::All); + s.query("xyz wilting"); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + // TODO: "2" and "4" should be swapped ideally + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", + "\"sun flower wilting under the heat\"", + "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"", + ] + "###); +} + +#[test] +fn test_proximity_prefix_db() { + let index = create_edge_cases_index(); + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.terms_matching_strategy(TermsMatchingStrategy::All); + s.query("best s"); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + + // This test illustrates the loss of precision from using the prefix DB + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"this is the best summer meal\"", + "\"summer best\"", + "\"this is the best meal of summer\"", + "\"summer x best\"", + "\"this is the best meal of the summer\"", + "\"this is the best meal I have ever had in such a beautiful summer day\"", + "\"this is the best cooked meal of the summer\"", + "\"summer x y best\"", + ] + "###); +} diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs index 6ac8f5516..4df340e9b 100644 --- a/milli/src/search/new/tests/typo.rs +++ b/milli/src/search/new/tests/typo.rs @@ -21,8 +21,8 @@ if `words` doesn't exist before it. use std::collections::HashMap; use crate::{ - index::tests::TempIndex, Criterion, - Search, SearchResult, TermsMatchingStrategy, + index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, + SearchResult, TermsMatchingStrategy, }; fn create_index() -> TempIndex { @@ -130,6 +130,10 @@ fn create_index() -> TempIndex { "id": 22, "text": "the quick brown fox jumps over the lackadaisical dog" }, + { + "id": 23, + "text": "the quivk brown fox jumps over the lazy dog" + }, ])) .unwrap(); index @@ -151,6 +155,12 @@ fn test_no_typo() { s.query("the quick brown fox jumps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); } #[test] @@ -168,7 +178,14 @@ fn test_default_typo() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quivk brown fox jumps over the lazy dog\"", + ] + "###); // 1 typo on one word, replaced letter let mut s = Search::new(&txn, &index); @@ -176,6 +193,12 @@ fn test_default_typo() { s.query("the quack brown fox jumps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); // 1 typo on one word, missing letter, extra letter let mut s = Search::new(&txn, &index); @@ -183,6 +206,12 @@ fn test_default_typo() { s.query("the quicest brownest fox jummps over the laziest dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quickest brownest fox jumps over the laziest dog\"", + ] + "###); // 1 typo on one word, swapped letters let mut s = Search::new(&txn, &index); @@ -190,6 +219,12 @@ fn test_default_typo() { s.query("the quikc borwn fox jupms over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); // 1 first letter typo on a word <5 bytes, replaced letter let mut s = Search::new(&txn, &index); @@ -211,6 +246,12 @@ fn test_default_typo() { s.query("the quack brawn fox junps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); // 2 typos on words < 9 bytes let mut s = Search::new(&txn, &index); @@ -225,6 +266,12 @@ fn test_default_typo() { s.query("the extravant fox kyrocketed over the lamguorout dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the extravagant fox skyrocketed over the languorous dog\"", + ] + "###); // 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters let mut s = Search::new(&txn, &index); @@ -232,6 +279,12 @@ fn test_default_typo() { s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the extravagant fox skyrocketed over the languorous dog\"", + ] + "###); } #[test] @@ -244,6 +297,8 @@ fn test_phrase_no_typo_allowed() { s.query("the \"quick brewn\" fox jumps over the lazy dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @"[]"); } #[test] @@ -256,12 +311,20 @@ fn test_ngram_typos() { s.query("the extra lagant fox skyrocketed over the languorous dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the extravagant fox skyrocketed over the languorous dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the ex tra lagant fox skyrocketed over the languorous dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @"[]"); } #[test] fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { @@ -278,7 +341,29 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); + insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); + let texts = collect_field_values(&index, &txn, "text", &ids_1); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quivk brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps over the lackadaisical dog\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown foxes jump over the lazy dog\"", + "\"the quick brown fax sends a letter to the dog\"", + "\"the quick brown\"", + "\"the quick\"", + "\"a fox doesn't quack, that crown goes to the duck.\"", + "\"the quickest brownest fox jumps over the laziest dog\"", + "\"the quicker browner fox jumped over the lazier dog\"", + "\"the extravagant fox skyrocketed over the languorous dog\"", + "\"the fast brownish fox jumps over the lackadaisical dog\"", + ] + "###); index .update_settings(|s| { @@ -290,7 +375,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() { s.terms_matching_strategy(TermsMatchingStrategy::Last); s.query("the quick brown fox jumps over the lazy dog"); let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); + insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]"); assert_eq!(ids_1, ids_2); } @@ -307,6 +392,17 @@ fn test_typo_bucketing() { s.query("network interconnection sunflower"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"netwolk interconections sunflawar\"", + "\"network interconnections sunflawer\"", + "\"network interconnection sunflower\"", + "\"network interconnection sun flower\"", + "\"network interconnection sunflowering\"", + "\"network interconnection sunflowar\"", + ] + "###); // Then with the typo ranking rule drop(txn); @@ -322,12 +418,34 @@ fn test_typo_bucketing() { s.query("network interconnection sunflower"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"network interconnection sunflower\"", + "\"network interconnection sunflowering\"", + "\"network interconnection sun flower\"", + "\"network interconnection sunflowar\"", + "\"network interconnections sunflawer\"", + "\"netwolk interconections sunflawar\"", + ] + "###); let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("network interconnection sun flower"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"network interconnection sun flower\"", + "\"network interconnection sun flowering\"", + "\"network interconnection sunflower\"", + "\"network interconnection sunflowering\"", + "\"network interconnection sunflowar\"", + "\"network interconnections sunflawer\"", + ] + "###); } #[test] @@ -350,7 +468,15 @@ fn test_typo_synonyms() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the quick brown fox jumps over the lackadaisical dog"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lackadaisical dog\"", + "\"the quivk brown fox jumps over the lazy dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::All); @@ -359,5 +485,13 @@ fn test_typo_synonyms() { // TODO: is this correct? interaction of ngrams + synonyms means that the // multi-word synonyms end up having a typo cost. This is probably not what we want. let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the fast brownish fox jumps over the lackadaisical dog\"", + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lackadaisical dog\"", + ] + "###); } diff --git a/milli/src/search/new/tests/words_tms.rs b/milli/src/search/new/tests/words_tms.rs index 8b5c0153f..74748ea5a 100644 --- a/milli/src/search/new/tests/words_tms.rs +++ b/milli/src/search/new/tests/words_tms.rs @@ -12,9 +12,12 @@ account by the proximity ranking rule. 7. The search is capable of returning no results if no documents match the query */ -use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy}; +use crate::{ + index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, + SearchResult, TermsMatchingStrategy, +}; -fn create_quick_brown_fox_trivial_index() -> TempIndex { +fn create_index() -> TempIndex { let index = TempIndex::new(); index @@ -126,7 +129,7 @@ fn create_quick_brown_fox_trivial_index() -> TempIndex { #[test] fn test_words_tms_last_simple() { - let index = create_quick_brown_fox_trivial_index(); + let index = create_index(); let txn = index.read_txn().unwrap(); let mut s = Search::new(&txn, &index); @@ -136,6 +139,31 @@ fn test_words_tms_last_simple() { // 6 and 7 have the same score because "the" appears twice insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the brown quick fox jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy blue dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + "\"the quick brown\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("extravagant the quick brown fox jumps over the lazy dog"); @@ -146,7 +174,7 @@ fn test_words_tms_last_simple() { #[test] fn test_words_tms_last_phrase() { - let index = create_quick_brown_fox_trivial_index(); + let index = create_index(); let txn = index.read_txn().unwrap(); let mut s = Search::new(&txn, &index); @@ -156,6 +184,21 @@ fn test_words_tms_last_phrase() { // "The quick brown fox" is a phrase, not deleted by this term matching strategy insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox\" jumps over the \"lazy\" dog"); @@ -165,6 +208,17 @@ fn test_words_tms_last_phrase() { // "lazy" is a phrase, not deleted by this term matching strategy // but words before it can be deleted insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox jumps over the lazy dog\""); @@ -173,6 +227,12 @@ fn test_words_tms_last_phrase() { // The whole query is a phrase, no terms are removed insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("\"the quick brown fox jumps over the lazy dog"); @@ -181,11 +241,17 @@ fn test_words_tms_last_phrase() { // The whole query is still a phrase, even without closing quotes, so no terms are removed insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + ] + "###); } #[test] fn test_words_proximity_tms_last_simple() { - let index = create_quick_brown_fox_trivial_index(); + let index = create_index(); index .update_settings(|s| { s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); @@ -200,6 +266,31 @@ fn test_words_proximity_tms_last_simple() { // 7 is better than 6 because of the proximity between "the" and its surrounding terms insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy blue dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + "\"the quick brown\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("the brown quick fox jumps over the lazy dog"); @@ -208,11 +299,36 @@ fn test_words_proximity_tms_last_simple() { // 10 is better than 9 because of the proximity between "quick" and "brown" insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the brown quick fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy dog\"", + "\"the quick brown fox jumps over the lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy blue dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + "\"the quick brown\"", + ] + "###); } #[test] fn test_words_proximity_tms_last_phrase() { - let index = create_quick_brown_fox_trivial_index(); + let index = create_index(); index .update_settings(|s| { s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); @@ -228,6 +344,26 @@ fn test_words_proximity_tms_last_phrase() { // "quick brown" is a phrase. The proximity of its first and last words // to their adjacent query words should be taken into account insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps\"", + "\"the quick brown fox\"", + "\"the quick brown fox talks to the lazy and slow dog\"", + "\"the quick brown fox talks to the lazy dog\"", + "\"the quick brown\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("the \"quick brown\" \"fox jumps\" over the lazy dog"); @@ -238,11 +374,27 @@ fn test_words_proximity_tms_last_phrase() { // to their adjacent query words should be taken into account. // The same applies to `fox jumps`. insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the lazy\"", + "\"the quick brown fox jumps over the\"", + "\"the quick brown fox jumps over\"", + "\"the quick brown fox jumps\"", + ] + "###); } #[test] fn test_words_tms_all() { - let index = create_quick_brown_fox_trivial_index(); + let index = create_index(); index .update_settings(|s| { s.set_criteria(vec![Criterion::Words, Criterion::Proximity]); @@ -256,6 +408,23 @@ fn test_words_tms_all() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @r###" + [ + "\"the quick brown fox jumps over the lazy dog\"", + "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"", + "\"the great quick brown fox jumps over the lazy dog\"", + "\"the quick brown fox jumps over the really lazy dog\"", + "\"the mighty and quick brown fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the lazy dog\"", + "\"the brown quick fox jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy dog\"", + "\"the brown quick fox immediately jumps over the really lazy blue dog\"", + "\"this quick brown and scary fox jumps over the lazy dog\"", + "\"this quick brown and very scary fox jumps over the lazy dog\"", + "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"", + ] + "###); let mut s = Search::new(&txn, &index); s.query("extravagant"); @@ -263,4 +432,6 @@ fn test_words_tms_all() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]"); + let texts = collect_field_values(&index, &txn, "text", &documents_ids); + insta::assert_debug_snapshot!(texts, @"[]"); }