diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 5c83991c2..c1580309a 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -190,17 +190,6 @@ pub fn documents_batch_reader_from_objects( DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() } -#[cfg(test)] -pub fn batch_reader_from_documents( - documents: &[Object], -) -> DocumentsBatchReader>> { - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - for object in documents { - builder.append_json_object(&object).unwrap(); - } - DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() -} - #[cfg(test)] mod test { use std::io::Cursor; diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 981fa819c..904f165b1 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -347,7 +347,11 @@ fn write_string_entry( mod tests { use std::num::NonZeroUsize; - use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex}; + use crate::{ + db_snap, + documents::{batch_reader_from_documents, documents_batch_reader_from_objects}, + index::tests::TempIndex, + }; #[test] fn test_facets_number() { @@ -419,7 +423,7 @@ mod tests { serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(), ); } - let documents = batch_reader_from_documents(&documents); + let documents = documents_batch_reader_from_objects(documents); index.add_documents(documents).unwrap(); diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap new file mode 100644 index 000000000..0a61cf4e8 --- /dev/null +++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap @@ -0,0 +1,46 @@ +--- +source: milli/src/update/word_prefix_pair_proximity_docids.rs +--- +5 a 1 [101, ] +5 a 2 [101, ] +5 b 4 [101, ] +5 be 4 [101, ] +am a 3 [101, ] +amazing a 1 [100, ] +amazing a 2 [100, ] +amazing a 3 [100, ] +amazing b 2 [100, ] +amazing be 2 [100, ] +an a 1 [100, ] +an a 2 [100, ] +an b 3 [100, ] +an be 3 [100, ] +and a 2 [100, ] +and a 3 [100, ] +and a 4 [100, ] +and b 1 [100, ] +and be 1 [100, ] +at a 1 [100, ] +at a 2 [100, 101, ] +at a 3 [100, ] +at b 3 [101, ] +at b 4 [100, ] +at be 3 [101, ] +at be 4 [100, ] +beautiful a 2 [100, ] +beautiful a 3 [100, ] +beautiful a 4 [100, ] +bell a 2 [101, ] +bell a 4 [101, ] +house a 3 [100, ] +house a 4 [100, ] +house b 2 [100, ] +house be 2 [100, ] +rings a 1 [101, ] +rings a 3 [101, ] +rings b 2 [101, ] +rings be 2 [101, ] +the a 3 [101, ] +the b 1 [101, ] +the be 1 [101, ] + diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap new file mode 100644 index 000000000..aabd9ddec --- /dev/null +++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap @@ -0,0 +1,56 @@ +--- +source: milli/src/update/word_prefix_pair_proximity_docids.rs +--- +5 a 1 [101, ] +5 a 2 [101, ] +5 am 1 [101, ] +5 b 4 [101, ] +5 be 4 [101, ] +am a 3 [101, ] +amazing a 1 [100, ] +amazing a 2 [100, ] +amazing a 3 [100, ] +amazing b 2 [100, ] +amazing be 2 [100, ] +an a 1 [100, ] +an a 2 [100, 202, ] +an am 1 [100, ] +an b 3 [100, ] +an be 3 [100, ] +and a 2 [100, ] +and a 3 [100, ] +and a 4 [100, ] +and am 2 [100, ] +and b 1 [100, ] +and be 1 [100, ] +at a 1 [100, 202, ] +at a 2 [100, 101, ] +at a 3 [100, ] +at am 2 [100, 101, ] +at b 3 [101, ] +at b 4 [100, ] +at be 3 [101, ] +at be 4 [100, ] +beautiful a 2 [100, ] +beautiful a 3 [100, ] +beautiful a 4 [100, ] +beautiful am 3 [100, ] +bell a 2 [101, ] +bell a 4 [101, ] +bell am 4 [101, ] +extraordinary a 2 [202, ] +extraordinary a 3 [202, ] +house a 3 [100, 202, ] +house a 4 [100, 202, ] +house am 4 [100, ] +house b 2 [100, ] +house be 2 [100, ] +rings a 1 [101, ] +rings a 3 [101, ] +rings am 3 [101, ] +rings b 2 [101, ] +rings be 2 [101, ] +the a 3 [101, ] +the b 1 [101, ] +the be 1 [101, ] + diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs index 72b41c472..7e5d5c090 100644 --- a/milli/src/update/word_prefix_pair_proximity_docids.rs +++ b/milli/src/update/word_prefix_pair_proximity_docids.rs @@ -244,3 +244,90 @@ fn insert_current_prefix_data_in_sorter<'a>( Ok(()) } + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use crate::{ + db_snap, + documents::{DocumentsBatchBuilder, DocumentsBatchReader}, + index::tests::TempIndex, + }; + + fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec { + let mut documents = Vec::new(); + for prefix in prefixes { + for i in 0..50 { + documents.push( + serde_json::json!({ + "text": format!("{prefix}{i:x}"), + }) + .as_object() + .unwrap() + .clone(), + ) + } + } + documents + } + + #[test] + fn test_update() { + let mut index = TempIndex::new(); + index.index_documents_config.words_prefix_threshold = Some(50); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| { + settings.set_searchable_fields(vec!["text".to_owned()]); + }) + .unwrap(); + + let batch_reader_from_documents = |documents| { + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + for object in documents { + builder.append_json_object(&object).unwrap(); + } + DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap() + }; + + let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]); + // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database + documents.push( + serde_json::json!({ + "text": "At an amazing and beautiful house" + }) + .as_object() + .unwrap() + .clone(), + ); + documents.push( + serde_json::json!({ + "text": "The bell rings at 5 am" + }) + .as_object() + .unwrap() + .clone(), + ); + + let documents = batch_reader_from_documents(documents); + index.add_documents(documents).unwrap(); + + db_snap!(index, word_prefix_pair_proximity_docids, "initial"); + + let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]); + documents.push( + serde_json::json!({ + "text": "At an extraordinary house" + }) + .as_object() + .unwrap() + .clone(), + ); + let documents = batch_reader_from_documents(documents); + index.add_documents(documents).unwrap(); + + db_snap!(index, word_prefix_pair_proximity_docids, "update"); + } +}