From c69cbec64a516629e22029737b35cf3dae10c8c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic.lecrenier@me.com>
Date: Wed, 5 Apr 2023 11:20:04 +0200
Subject: [PATCH] Add more search tests

---
 milli/src/search/new/tests/language.rs        |   2 +-
 .../src/search/new/tests/ngram_split_words.rs | 141 +++++++-
 milli/src/search/new/tests/proximity.rs       | 317 ++++++++++++++++++
 milli/src/search/new/tests/typo.rs            | 148 +++++++-
 milli/src/search/new/tests/words_tms.rs       | 185 +++++++++-
 5 files changed, 766 insertions(+), 27 deletions(-)

diff --git a/milli/src/search/new/tests/language.rs b/milli/src/search/new/tests/language.rs
index 6adad748c..e16544fdb 100644
--- a/milli/src/search/new/tests/language.rs
+++ b/milli/src/search/new/tests/language.rs
@@ -18,5 +18,5 @@ fn test_kanji_language_detection() {
     search.query("東京");
     let SearchResult { documents_ids, .. } = search.execute().unwrap();
 
-    assert_eq!(documents_ids, vec![1]);
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
 }
diff --git a/milli/src/search/new/tests/ngram_split_words.rs b/milli/src/search/new/tests/ngram_split_words.rs
index 06c49274c..b78bbe763 100644
--- a/milli/src/search/new/tests/ngram_split_words.rs
+++ b/milli/src/search/new/tests/ngram_split_words.rs
@@ -16,7 +16,10 @@ This module tests the following properties:
 13. Ngrams cannot be formed by combining a phrase and a word or two phrases
 */
 
-use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
+use crate::{
+    index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
+    SearchResult, TermsMatchingStrategy,
+};
 
 fn create_index() -> TempIndex {
     let index = TempIndex::new();
@@ -46,6 +49,14 @@ fn create_index() -> TempIndex {
             {
                 "id": 3,
                 "text": "the sunflower is tall"
+            },
+            {
+                "id": 4,
+                "text": "the sunflawer is tall"
+            },
+            {
+                "id": 5,
+                "text": "sunflowering is not a verb"
             }
         ]))
         .unwrap();
@@ -67,8 +78,18 @@ fn test_2gram_simple() {
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("sun flower");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    // will also match documents with "sun flower"
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+    // will also match documents with "sunflower" + prefix tolerance
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flowers are pretty\"",
+        "\"the sun flower is tall\"",
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 #[test]
 fn test_3gram_simple() {
@@ -87,6 +108,13 @@ fn test_3gram_simple() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flowers are pretty\"",
+        "\"the sunflowers are pretty\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -99,7 +127,18 @@ fn test_2gram_typo() {
     s.query("sun flawer");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flowers are pretty\"",
+        "\"the sun flower is tall\"",
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"the sunflawer is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -119,6 +158,13 @@ fn test_no_disable_ngrams() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     // documents containing `sunflower`
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flower is tall\"",
+        "\"the sunflower is tall\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -137,7 +183,17 @@ fn test_2gram_prefix() {
     s.query("sun flow");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     // documents containing words beginning with `sunflow`
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flowers are pretty\"",
+        "\"the sun flower is tall\"",
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -157,7 +213,16 @@ fn test_3gram_prefix() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     // documents containing a word beginning with sunfl
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"the sunflawer is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -170,8 +235,17 @@ fn test_split_words() {
     s.query("sunflower ");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
-    // all the documents with either `sunflower` or `sun flower`
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
+    // all the documents with either `sunflower` or `sun flower` + eventual typo
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flower is tall\"",
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"the sunflawer is tall\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -191,6 +265,12 @@ fn test_disable_split_words() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     // no document containing `sun flower`
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sunflower is tall\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -203,8 +283,18 @@ fn test_2gram_split_words() {
     s.query("sunf lower");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
-    // all the documents with "sunflower", "sun flower", or (sunflower + 1 typo)
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
+    // all the documents with "sunflower", "sun flower", (sunflower + 1 typo), or (sunflower as prefix)
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flower is tall\"",
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"the sunflawer is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -218,7 +308,15 @@ fn test_3gram_no_split_words() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     // no document with `sun flower`
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sunflowers are pretty\"",
+        "\"the sunflower is tall\"",
+        "\"sunflowering is not a verb\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -231,7 +329,13 @@ fn test_3gram_no_typos() {
     s.query("sunf la wer");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sunflawer is tall\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -245,6 +349,13 @@ fn test_no_ngram_phrases() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flowers are pretty\"",
+        "\"the sun flower is tall\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.terms_matching_strategy(TermsMatchingStrategy::All);
@@ -252,4 +363,10 @@ fn test_no_ngram_phrases() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flower is tall\"",
+    ]
+    "###);
 }
diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs
index e69de29bb..f6e071572 100644
--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@@ -0,0 +1,317 @@
+/*!
+This module tests the Proximity ranking rule:
+
+1. A proximity of >7 always has the same cost.
+
+2. Phrase terms can be in proximity to other terms via their start and end words,
+but we need to make sure that the phrase exists in the document that meets this
+proximity condition. This is especially relevant with split words and synonyms.
+
+3. An ngram has the same proximity cost as its component words being consecutive.
+e.g. `sunflower` equivalent to `sun flower`.
+
+4. The prefix databases can be used to find the proximity between two words, but
+they store fewer proximities than the regular word proximity DB.
+
+*/
+
+use std::collections::HashMap;
+
+use crate::{
+    index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
+    SearchResult, TermsMatchingStrategy,
+};
+
+fn create_simple_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "text": "the very quick dark brown and smart fox did jump over the terribly lazy and small dog"
+            },
+            {
+                "id": 1,
+                "text": "the. quick brown fox jumps over the lazy. dog"
+            },
+            {
+                "id": 2,
+                "text": "the quick brown fox jumps over the lazy. dog"
+            },
+            {
+                "id": 3,
+                "text": "dog the quick brown fox jumps over the lazy"
+            },
+            {
+                "id": 4,
+                "text": "the quickbrown fox jumps over the lazy dog"
+            },
+            {
+                "id": 5,
+                "text": "brown quick fox jumps over the lazy dog"
+            },
+            {
+                "id": 6,
+                "text": "the really quick brown fox jumps over the very lazy dog"
+            },
+            {
+                "id": 7,
+                "text": "the really quick brown fox jumps over the lazy dog"
+            },
+            {
+                "id": 8,
+                "text": "the quick brown fox jumps over the lazy"
+            },
+            {
+                "id": 9,
+                "text": "the quack brown fox jumps over the lazy"
+            },
+            {
+                "id": 9,
+                "text": "the quack brown fox jumps over the lazy dog"
+            },
+            {
+                "id": 10,
+                "text": "the quick brown fox jumps over the lazy dog"
+            }
+        ]))
+        .unwrap();
+    index
+}
+
+fn create_edge_cases_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
+        })
+        .unwrap();
+
+    index.add_documents(documents!([
+        {
+            // This document will insert "s" in the prefix database
+            "id": 0,
+            "text": "
+            saa sab sac sae saf sag sah sai saj sak sal sam san sao sap saq sar sasa sat sau sav saw sax say saz
+            sba sbb sbc sbe sbf sbg sbh sbi sbj sbk sbl sbm sbn sbo sbp sbq sbr sbsb sbt sbu sbv sbw sbx sby sbz
+            sca scb scc sce scf scg sch sci scj sck scl scm scn sco scp scq scr scsc sct scu scv scw scx scy scz
+            sda sdb sdc sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sdsd sdt sdu sdv sdw sdx sdy sdz
+            sea seb sec see sef seg seh sei sej sek sel sem sen seo sep seq ser sese set seu sev sew sex sey sez
+            sfa sfb sfc sfe sff sfg sfh sfi sfj sfk sfl sfm sfn sfo sfp sfq sfr sfsf sft sfu sfv sfw sfx sfy sfz
+            sga sgb sgc sge sgf sgg sgh sgi sgj sgk sgl sgm sgn sgo sgp sgq sgr sgsg sgt sgu sgv sgw sgx sgy sgz
+            ska skb skc ske skf skg skh ski skj skk skl skm skn sko skp skq skr sksk skt sku skv skw skx sky skz
+            sla slb slc sle slf slg slh sli slj slk sll slm sln slo slp slq slr slsl slt slu slv slw slx sly slz
+            sma smb smc sme smf smg smh smi smj smk sml smm smn smo smp smq smr smsm smt smu smv smw smx smy smz
+            sna snb snc sne snf sng snh sni snj snk snl snm snn sno snp snq snr snsn snt snu snv snw snx sny snz
+            soa sob soc soe sof sog soh soi soj sok sol som son soo sop soq sor soso sot sou sov sow sox soy soz
+            spa spb spc spe spf spg sph spi spj spk spl spm spn spo spp spq spr spsp spt spu spv spw spx spy spz
+            sqa sqb sqc sqe sqf sqg sqh sqi sqj sqk sql sqm sqn sqo sqp sqq sqr sqsq sqt squ sqv sqw sqx sqy sqz
+            sra srb src sre srf srg srh sri srj srk srl srm srn sro srp srq srr srsr srt sru srv srw srx sry srz
+            ssa ssb ssc sse ssf ssg ssh ssi ssj ssk ssl ssm ssn sso ssp ssq ssr ssss sst ssu ssv ssw ssx ssy ssz
+            sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz
+            "
+        },
+        // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. 
+        // If the search query is "sunflower", the split word "Sun Flower" will match some documents. 
+        // If the query is `sunflower wilting`, then we should make sure that
+        // the proximity condition `flower wilting: prox N` also comes with the condition
+        // `sun wilting: prox N+1`. TODO: this is not the exact condition we use for now. 
+        // We only check that the phrase `sun flower` exists and `flower wilting: prox N`, which
+        // is better than nothing but not the best.
+        {
+            "id": 1,
+            "text": "Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat."
+        },
+        {
+            "id": 2,
+            "text": "Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat."
+        },
+        {
+            "id": 3,
+            // This document matches the query `sunflower wilting`, but the proximity condition 
+            // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower`
+            // which would reduce to only `flower` and `wilting` being in proximity.
+            "text": "A flower wilting under the sun, unlike a sunflower"
+        },
+        {
+            // This should be the best document for `sunflower wilting`
+            "id": 4,
+            "text": "sun flower wilting under the heat"
+        },
+        {
+            // This is also the best document for `sunflower wilting`
+            "id": 5,
+            "text": "sunflower wilting under the heat"
+        },
+        {
+            // Prox MAX between `best` and `s` prefix
+            "id": 6,
+            "text": "this is the best meal I have ever had in such a beautiful summer day"
+        },
+        {
+            // Prox 5 between `best` and `s` prefix
+            "id": 7,
+            "text": "this is the best cooked meal of the summer"
+        },
+        {
+            // Prox 4 between `best` and `s` prefix
+            "id": 8,
+            "text": "this is the best meal of the summer"
+        },
+        {
+            // Prox 3 between `best` and `s` prefix
+            "id": 9,
+            "text": "this is the best meal of summer"
+        },
+        {
+            // Prox 1 between `best` and `s` prefix
+            "id": 10,
+            "text": "this is the best summer meal"
+        },
+        {
+            // Reverse Prox 3 between `best` and `s` prefix
+            "id": 11,
+            "text": "summer x y best"
+        },
+        {
+            // Reverse Prox 2 between `best` and `s` prefix
+            "id": 12,
+            "text": "summer x best"
+        },
+        {
+            // Reverse Prox 1 between `best` and `s` prefix
+            "id": 13,
+            "text": "summer best"
+        },
+    ])).unwrap();
+    index
+}
+
+#[test]
+fn test_proximity_simple() {
+    let index = create_simple_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quickbrown fox jumps over the lazy dog\"",
+        "\"the quack brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the really quick brown fox jumps over the lazy dog\"",
+        "\"the really quick brown fox jumps over the very lazy dog\"",
+        "\"brown quick fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy. dog\"",
+        "\"dog the quick brown fox jumps over the lazy\"",
+        "\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
+        "\"the. quick brown fox jumps over the lazy. dog\"",
+    ]
+    "###);
+}
+
+#[test]
+fn test_proximity_split_word() {
+    let index = create_edge_cases_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunflower wilting");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    // TODO: "2" and "4" should be swapped ideally
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
+        "\"sun flower wilting under the heat\"",
+        "\"sunflower wilting under the heat\"",
+        "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
+        "\"A flower wilting under the sun, unlike a sunflower\"",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("\"sun flower\" wilting");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    // TODO: "2" and "4" should be swapped ideally
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
+        "\"sun flower wilting under the heat\"",
+        "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
+    ]
+    "###);
+    drop(txn);
+
+    index
+        .update_settings(|s| {
+            let mut syns = HashMap::new();
+            syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
+            s.set_synonyms(syns);
+        })
+        .unwrap();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("xyz wilting");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    // TODO: "2" and "4" should be swapped ideally
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
+        "\"sun flower wilting under the heat\"",
+        "\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
+    ]
+    "###);
+}
+
+#[test]
+fn test_proximity_prefix_db() {
+    let index = create_edge_cases_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("best s");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+
+    // This test illustrates the loss of precision from using the prefix DB
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"this is the best summer meal\"",
+        "\"summer best\"",
+        "\"this is the best meal of summer\"",
+        "\"summer x best\"",
+        "\"this is the best meal of the summer\"",
+        "\"this is the best meal I have ever had in such a beautiful summer day\"",
+        "\"this is the best cooked meal of the summer\"",
+        "\"summer x y best\"",
+    ]
+    "###);
+}
diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs
index 6ac8f5516..4df340e9b 100644
--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@@ -21,8 +21,8 @@ if `words` doesn't exist before it.
 use std::collections::HashMap;
 
 use crate::{
-    index::tests::TempIndex, Criterion, 
-    Search, SearchResult, TermsMatchingStrategy,
+    index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
+    SearchResult, TermsMatchingStrategy,
 };
 
 fn create_index() -> TempIndex {
@@ -130,6 +130,10 @@ fn create_index() -> TempIndex {
                 "id": 22,
                 "text": "the quick brown fox jumps over the lackadaisical dog"
             },
+            {
+                "id": 23,
+                "text": "the quivk brown fox jumps over the lazy dog"
+            },
         ]))
         .unwrap();
     index
@@ -151,6 +155,12 @@ fn test_no_typo() {
     s.query("the quick brown fox jumps over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -168,7 +178,14 @@ fn test_default_typo() {
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("the quick brown fox jumps over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quivk brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     // 1 typo on one word, replaced letter
     let mut s = Search::new(&txn, &index);
@@ -176,6 +193,12 @@ fn test_default_typo() {
     s.query("the quack brown fox jumps over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     // 1 typo on one word, missing letter, extra letter
     let mut s = Search::new(&txn, &index);
@@ -183,6 +206,12 @@ fn test_default_typo() {
     s.query("the quicest brownest fox jummps over the laziest dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quickest brownest fox jumps over the laziest dog\"",
+    ]
+    "###);
 
     // 1 typo on one word, swapped letters
     let mut s = Search::new(&txn, &index);
@@ -190,6 +219,12 @@ fn test_default_typo() {
     s.query("the quikc borwn fox jupms over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     // 1 first letter typo on a word <5 bytes, replaced letter
     let mut s = Search::new(&txn, &index);
@@ -211,6 +246,12 @@ fn test_default_typo() {
     s.query("the quack brawn fox junps over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     // 2 typos on words < 9 bytes
     let mut s = Search::new(&txn, &index);
@@ -225,6 +266,12 @@ fn test_default_typo() {
     s.query("the extravant fox kyrocketed over the lamguorout dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the extravagant fox skyrocketed over the languorous dog\"",
+    ]
+    "###);
 
     // 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
     let mut s = Search::new(&txn, &index);
@@ -232,6 +279,12 @@ fn test_default_typo() {
     s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the extravagant fox skyrocketed over the languorous dog\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -244,6 +297,8 @@ fn test_phrase_no_typo_allowed() {
     s.query("the \"quick brewn\" fox jumps over the lazy dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @"[]");
 }
 
 #[test]
@@ -256,12 +311,20 @@ fn test_ngram_typos() {
     s.query("the extra lagant fox skyrocketed over the languorous dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the extravagant fox skyrocketed over the languorous dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("the ex tra lagant fox skyrocketed over the languorous dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @"[]");
 }
 #[test]
 fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
@@ -278,7 +341,29 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
     s.terms_matching_strategy(TermsMatchingStrategy::Last);
     s.query("the quick brown fox jumps over the lazy dog");
     let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
+    insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
+    let texts = collect_field_values(&index, &txn, "text", &ids_1);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quivk brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps over the lackadaisical dog\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown foxes jump over the lazy dog\"",
+        "\"the quick brown fax sends a letter to the dog\"",
+        "\"the quick brown\"",
+        "\"the quick\"",
+        "\"a fox doesn't quack, that crown goes to the duck.\"",
+        "\"the quickest brownest fox jumps over the laziest dog\"",
+        "\"the quicker browner fox jumped over the lazier dog\"",
+        "\"the extravagant fox skyrocketed over the languorous dog\"",
+        "\"the fast brownish fox jumps over the lackadaisical dog\"",
+    ]
+    "###);
 
     index
         .update_settings(|s| {
@@ -290,7 +375,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
     s.terms_matching_strategy(TermsMatchingStrategy::Last);
     s.query("the quick brown fox jumps over the lazy dog");
     let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
+    insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
 
     assert_eq!(ids_1, ids_2);
 }
@@ -307,6 +392,17 @@ fn test_typo_bucketing() {
     s.query("network interconnection sunflower");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"netwolk interconections sunflawar\"",
+        "\"network interconnections sunflawer\"",
+        "\"network interconnection sunflower\"",
+        "\"network interconnection sun flower\"",
+        "\"network interconnection sunflowering\"",
+        "\"network interconnection sunflowar\"",
+    ]
+    "###);
 
     // Then with the typo ranking rule
     drop(txn);
@@ -322,12 +418,34 @@ fn test_typo_bucketing() {
     s.query("network interconnection sunflower");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"network interconnection sunflower\"",
+        "\"network interconnection sunflowering\"",
+        "\"network interconnection sun flower\"",
+        "\"network interconnection sunflowar\"",
+        "\"network interconnections sunflawer\"",
+        "\"netwolk interconections sunflawar\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("network interconnection sun flower");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"network interconnection sun flower\"",
+        "\"network interconnection sun flowering\"",
+        "\"network interconnection sunflower\"",
+        "\"network interconnection sunflowering\"",
+        "\"network interconnection sunflowar\"",
+        "\"network interconnections sunflawer\"",
+    ]
+    "###);
 }
 
 #[test]
@@ -350,7 +468,15 @@ fn test_typo_synonyms() {
     s.terms_matching_strategy(TermsMatchingStrategy::All);
     s.query("the quick brown fox jumps over the lackadaisical dog");
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lackadaisical dog\"",
+        "\"the quivk brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.terms_matching_strategy(TermsMatchingStrategy::All);
@@ -359,5 +485,13 @@ fn test_typo_synonyms() {
     // TODO: is this correct? interaction of ngrams + synonyms means that the
     // multi-word synonyms end up having a typo cost. This is probably not what we want.
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the fast brownish fox jumps over the lackadaisical dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lackadaisical dog\"",
+    ]
+    "###);
 }
diff --git a/milli/src/search/new/tests/words_tms.rs b/milli/src/search/new/tests/words_tms.rs
index 8b5c0153f..74748ea5a 100644
--- a/milli/src/search/new/tests/words_tms.rs
+++ b/milli/src/search/new/tests/words_tms.rs
@@ -12,9 +12,12 @@ account by the proximity ranking rule.
 7. The search is capable of returning no results if no documents match the query
 */
 
-use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
+use crate::{
+    index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
+    SearchResult, TermsMatchingStrategy,
+};
 
-fn create_quick_brown_fox_trivial_index() -> TempIndex {
+fn create_index() -> TempIndex {
     let index = TempIndex::new();
 
     index
@@ -126,7 +129,7 @@ fn create_quick_brown_fox_trivial_index() -> TempIndex {
 
 #[test]
 fn test_words_tms_last_simple() {
-    let index = create_quick_brown_fox_trivial_index();
+    let index = create_index();
 
     let txn = index.read_txn().unwrap();
     let mut s = Search::new(&txn, &index);
@@ -136,6 +139,31 @@ fn test_words_tms_last_simple() {
 
     // 6 and 7 have the same score because "the" appears twice
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the brown quick fox jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+        "\"the quick brown\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("extravagant the quick brown fox jumps over the lazy dog");
@@ -146,7 +174,7 @@ fn test_words_tms_last_simple() {
 
 #[test]
 fn test_words_tms_last_phrase() {
-    let index = create_quick_brown_fox_trivial_index();
+    let index = create_index();
 
     let txn = index.read_txn().unwrap();
     let mut s = Search::new(&txn, &index);
@@ -156,6 +184,21 @@ fn test_words_tms_last_phrase() {
 
     // "The quick brown fox" is a phrase, not deleted by this term matching strategy
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
@@ -165,6 +208,17 @@ fn test_words_tms_last_phrase() {
     // "lazy" is a phrase, not deleted by this term matching strategy
     // but words before it can be deleted
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("\"the quick brown fox jumps over the lazy dog\"");
@@ -173,6 +227,12 @@ fn test_words_tms_last_phrase() {
 
     // The whole query is a phrase, no terms are removed
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("\"the quick brown fox jumps over the lazy dog");
@@ -181,11 +241,17 @@ fn test_words_tms_last_phrase() {
 
     // The whole query is still a phrase, even without closing quotes, so no terms are removed
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+    ]
+    "###);
 }
 
 #[test]
 fn test_words_proximity_tms_last_simple() {
-    let index = create_quick_brown_fox_trivial_index();
+    let index = create_index();
     index
         .update_settings(|s| {
             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
@@ -200,6 +266,31 @@ fn test_words_proximity_tms_last_simple() {
 
     // 7 is better than 6 because of the proximity between "the" and its surrounding terms
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+        "\"the quick brown\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("the brown quick fox jumps over the lazy dog");
@@ -208,11 +299,36 @@ fn test_words_proximity_tms_last_simple() {
 
     // 10 is better than 9 because of the proximity between "quick" and "brown"
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the brown quick fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+        "\"the quick brown\"",
+    ]
+    "###);
 }
 
 #[test]
 fn test_words_proximity_tms_last_phrase() {
-    let index = create_quick_brown_fox_trivial_index();
+    let index = create_index();
     index
         .update_settings(|s| {
             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
@@ -228,6 +344,26 @@ fn test_words_proximity_tms_last_phrase() {
     // "quick brown" is a phrase. The proximity of its first and last words
     // to their adjacent query words should be taken into account
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps\"",
+        "\"the quick brown fox\"",
+        "\"the quick brown fox talks to the lazy and slow dog\"",
+        "\"the quick brown fox talks to the lazy dog\"",
+        "\"the quick brown\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
@@ -238,11 +374,27 @@ fn test_words_proximity_tms_last_phrase() {
     // to their adjacent query words should be taken into account.
     // The same applies to `fox jumps`.
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the\"",
+        "\"the quick brown fox jumps over\"",
+        "\"the quick brown fox jumps\"",
+    ]
+    "###);
 }
 
 #[test]
 fn test_words_tms_all() {
-    let index = create_quick_brown_fox_trivial_index();
+    let index = create_index();
     index
         .update_settings(|s| {
             s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
@@ -256,6 +408,23 @@ fn test_words_tms_all() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
+        "\"the great quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the really lazy dog\"",
+        "\"the mighty and quick brown fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the lazy dog\"",
+        "\"the brown quick fox jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy dog\"",
+        "\"the brown quick fox immediately jumps over the really lazy blue dog\"",
+        "\"this quick brown and scary fox jumps over the lazy dog\"",
+        "\"this quick brown and very scary fox jumps over the lazy dog\"",
+        "\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
+    ]
+    "###);
 
     let mut s = Search::new(&txn, &index);
     s.query("extravagant");
@@ -263,4 +432,6 @@ fn test_words_tms_all() {
     let SearchResult { documents_ids, .. } = s.execute().unwrap();
 
     insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @"[]");
 }