From 334098a7e04f31a20a37149ce869fe240381a8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 Aug 2022 08:45:26 +0200 Subject: [PATCH 01/16] Add index snapshot test helper function --- milli/Cargo.toml | 3 + milli/src/lib.rs | 4 + milli/src/snapshot_tests.rs | 320 ++++++++++++++++++++++++++++++++++++ 3 files changed, 327 insertions(+) create mode 100644 milli/src/snapshot_tests.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 37c7b7c84..318a2604a 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -51,8 +51,11 @@ csv = "1.1.6" [dev-dependencies] big_s = "1.0.2" +insta = "1.17.1" maplit = "1.0.2" +md5 = "0.7.0" rand = "0.8.5" +regex = "1.6.0" [features] default = [] diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 09cecb228..85b25cad1 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -13,6 +13,10 @@ pub mod proximity; mod search; pub mod update; +#[cfg(test)] +#[macro_use] +pub mod snapshot_tests; + use std::collections::{BTreeMap, HashMap}; use std::convert::{TryFrom, TryInto}; use std::hash::BuildHasherDefault; diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs new file mode 100644 index 000000000..aa1d5cf27 --- /dev/null +++ b/milli/src/snapshot_tests.rs @@ -0,0 +1,320 @@ +use heed::BytesDecode; +use roaring::RoaringBitmap; +use std::path::Path; + +use crate::{ + heed_codec::facet::{ + FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, + FacetStringZeroBoundsValueCodec, + }, + CboRoaringBitmapCodec, ExternalDocumentsIds, Index, +}; + +macro_rules! snapshot_index { + ($index:expr, $name:expr) => { + $crate::index::tests::snapshot_index($index, $name, None, None) + }; + ($index:expr, $name:expr, include: $regex:literal) => { + $crate::index::tests::snapshot_index( + $index, + $name, + Some(regex::Regex::new($regex).unwrap()), + None, + ) + }; + ($index:expr, $name:expr, exclude: $regex:literal) => { + $crate::index::tests::snapshot_index( + $index, + $name, + None, + Some(regex::Regex::new($regex).unwrap()), + ) + }; +} + +#[track_caller] +pub fn snapshot_index( + index: &Index, + name: &str, + include: Option, + exclude: Option, +) { + use std::fmt::Write; + + let should_snapshot = |name: &str| -> bool { + include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) + && !exclude.as_ref().map(|f| f.is_match(name)).unwrap_or(false) + }; + + let mut settings = insta::Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + let path = Path::new(std::panic::Location::caller().file()); + let path = path.strip_prefix("milli/src").unwrap(); + settings.set_omit_expression(true); + settings.set_snapshot_path(Path::new("snapshots").join(path).join(name)); + let rtxn = index.read_txn().unwrap(); + + let store_whole_snapshot = std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned()); + let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); + + macro_rules! snapshot_db { + ($name:ident, |$vars:pat| $push:block) => { + let name_str = stringify!($name); + if should_snapshot(name_str) { + let iter = index.$name.iter(&rtxn).unwrap(); + let mut snap = String::new(); + for x in iter { + let $vars = x.unwrap(); + snap.push_str($push); + snap.push('\n'); + } + if snap.len() < 512 { + insta::assert_snapshot!(name_str, snap); + } else { + if store_whole_snapshot { + insta::assert_snapshot!(format!("{name_str}.full"), snap); + } + let hash = md5::compute(snap.as_bytes()); + let hash_str = format!("{hash:x}"); + insta::assert_snapshot!(format!("{name_str}.hash"), hash_str); + } + } + }; + } + + fn display_bitmap(b: &RoaringBitmap) -> String { + let mut s = String::new(); + s.push('['); + for x in b.into_iter() { + write!(&mut s, "{x}, ").unwrap(); + } + s.push(']'); + s + } + + settings.bind(|| { + snapshot_db!(word_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); + snapshot_db!(exact_word_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); + snapshot_db!(word_prefix_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); + snapshot_db!(exact_word_prefix_docids, |(s, b)| { + &format!("{s:<16} {}", display_bitmap(&b)) + }); + + snapshot_db!(docid_word_positions, |((idx, s), b)| { + &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) + }); + + snapshot_db!(word_pair_proximity_docids, |((word1, word2, proximity), b)| { + &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) + }); + + snapshot_db!(word_prefix_pair_proximity_docids, |((word1, prefix, proximity), b)| { + &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) + }); + + snapshot_db!(word_position_docids, |((word, position), b)| { + &format!("{word:<16} {position:<6} {}", display_bitmap(&b)) + }); + + snapshot_db!(field_id_word_count_docids, |((field_id, word_count), b)| { + &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b)) + }); + + snapshot_db!(word_prefix_position_docids, |((word_prefix, position), b)| { + &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b)) + }); + + snapshot_db!(facet_id_f64_docids, |((facet_id, level, left, right), b)| { + &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b)) + }); + { + let name_str = stringify!(facet_id_string_docids); + if should_snapshot(name_str) { + let bytes_db = index.facet_id_string_docids.remap_types::(); + let iter = bytes_db.iter(&rtxn).unwrap(); + let mut snap = String::new(); + + for x in iter { + let (key, value) = x.unwrap(); + if let Some((field_id, normalized_str)) = + FacetStringLevelZeroCodec::bytes_decode(key) + { + let (orig_string, docids) = + FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap(); + snap.push_str(&format!( + "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n", + display_bitmap(&docids) + )); + } else if let Some((field_id, level, left, right)) = + FacetLevelValueU32Codec::bytes_decode(key) + { + snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} ")); + let (bounds, docids) = FacetStringZeroBoundsValueCodec::< + CboRoaringBitmapCodec, + >::bytes_decode(value) + .unwrap(); + if let Some((left, right)) = bounds { + snap.push_str(&format!("{left:<8} {right:<8} ")); + } + snap.push_str(&display_bitmap(&docids)); + snap.push('\n'); + } else { + panic!(); + } + } + insta::assert_snapshot!(name_str, snap); + } + } + + // Main - computed settings + { + let mut snap = String::new(); + + macro_rules! write_setting_to_snap { + ($name:ident) => { + if should_snapshot(&format!("settings.{}", stringify!($name))) { + let $name = index.$name(&rtxn).unwrap(); + writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); + } + }; + } + write_setting_to_snap!(primary_key); + write_setting_to_snap!(criteria); + write_setting_to_snap!(displayed_fields); + write_setting_to_snap!(distinct_field); + write_setting_to_snap!(filterable_fields); + write_setting_to_snap!(sortable_fields); + write_setting_to_snap!(synonyms); + write_setting_to_snap!(authorize_typos); + write_setting_to_snap!(min_word_len_one_typo); + write_setting_to_snap!(min_word_len_two_typos); + write_setting_to_snap!(exact_words); + write_setting_to_snap!(exact_attributes); + write_setting_to_snap!(max_values_per_facet); + write_setting_to_snap!(pagination_max_total_hits); + write_setting_to_snap!(searchable_fields); + write_setting_to_snap!(user_defined_searchable_fields); + + if !snap.is_empty() { + insta::assert_snapshot!("settings", snap); + } + } + // Main - others + { + macro_rules! snapshot_string { + ($name:ident) => { + if should_snapshot(&format!("{}", stringify!($name))) { + insta::assert_snapshot!(stringify!($name), $name); + } + }; + } + { + let documents_ids = index.documents_ids(&rtxn).unwrap(); + let documents_ids = display_bitmap(&documents_ids); + snapshot_string!(documents_ids); + } + { + let stop_words = index.stop_words(&rtxn).unwrap(); + let stop_words = format!("{stop_words:?}"); + snapshot_string!(stop_words); + } + { + let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); + let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids); + snapshot_string!(soft_deleted_documents_ids); + } + + { + let mut field_distribution = String::new(); + for (field, count) in index.field_distribution(&rtxn).unwrap() { + writeln!(&mut field_distribution, "{field:<16} {count:<6}").unwrap(); + } + snapshot_string!(field_distribution); + } + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + { + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let name = fields_ids_map.name(field_id).unwrap(); + writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); + } + let fields_ids_map = snap; + snapshot_string!(fields_ids_map); + } + + { + let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); + let geo_faceted_documents_ids = display_bitmap(&geo_faceted_documents_ids); + snapshot_string!(geo_faceted_documents_ids); + } + // let geo_rtree = index.geo_rtree(&rtxn).unwrap(); + { + let ExternalDocumentsIds { soft, hard, .. } = + index.external_documents_ids(&rtxn).unwrap(); + let mut external_documents_ids = String::new(); + let soft_bytes = soft.into_fst().as_bytes().to_owned(); + let mut hex_soft = String::new(); + for byte in soft_bytes { + write!(&mut hex_soft, "{:x}", byte).unwrap(); + } + writeln!(&mut external_documents_ids, "soft: {hex_soft}").unwrap(); + let hard_bytes = hard.into_fst().as_bytes().to_owned(); + let mut hex_hard = String::new(); + for byte in hard_bytes { + write!(&mut hex_hard, "{:x}", byte).unwrap(); + } + writeln!(&mut external_documents_ids, "hard: {hex_hard}").unwrap(); + + snapshot_string!(external_documents_ids); + } + { + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let number_faceted_documents_ids = + index.number_faceted_documents_ids(&rtxn, field_id).unwrap(); + writeln!( + &mut snap, + "{field_id:<3} {}", + display_bitmap(&number_faceted_documents_ids) + ) + .unwrap(); + } + let number_faceted_documents_ids = snap; + snapshot_string!(number_faceted_documents_ids); + } + { + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let string_faceted_documents_ids = + index.string_faceted_documents_ids(&rtxn, field_id).unwrap(); + writeln!( + &mut snap, + "{field_id:<3} {}", + display_bitmap(&string_faceted_documents_ids) + ) + .unwrap(); + } + let string_faceted_documents_ids = snap; + snapshot_string!(string_faceted_documents_ids); + } + { + let words_fst = index.words_fst(&rtxn).unwrap(); + let bytes = words_fst.into_fst().as_bytes().to_owned(); + let mut words_fst = String::new(); + for byte in bytes { + write!(&mut words_fst, "{:x}", byte).unwrap(); + } + snapshot_string!(words_fst); + } + { + let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap(); + let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned(); + let mut words_prefixes_fst = String::new(); + for byte in bytes { + write!(&mut words_prefixes_fst, "{:x}", byte).unwrap(); + } + snapshot_string!(words_prefixes_fst); + } + } + }); +} From ef889ade5df9dd9f1d433c326d93c4feff91624e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 Aug 2022 16:24:28 +0200 Subject: [PATCH 02/16] Refactor snapshot tests --- milli/Cargo.toml | 1 - milli/src/index.rs | 66 +- milli/src/snapshot_tests.rs | 1075 ++++++++++++----- .../1/field_distribution.snap | 7 + .../field_distribution.snap | 7 + 5 files changed, 827 insertions(+), 329 deletions(-) create mode 100644 milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap create mode 100644 milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 318a2604a..b745d970a 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -55,7 +55,6 @@ insta = "1.17.1" maplit = "1.0.2" md5 = "0.7.0" rand = "0.8.5" -regex = "1.6.0" [features] default = [] diff --git a/milli/src/index.rs b/milli/src/index.rs index 43888a177..36e15c181 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1183,13 +1183,12 @@ pub(crate) mod tests { use big_s::S; use heed::{EnvOpenOptions, RwTxn}; - use maplit::btreemap; use tempfile::TempDir; use crate::documents::DocumentsBatchReader; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::update::{self, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; - use crate::Index; + use crate::{db_snap, Index}; pub(crate) struct TempIndex { pub inner: Index, @@ -1288,17 +1287,30 @@ pub(crate) mod tests { ])) .unwrap(); - let rtxn = index.read_txn().unwrap(); - let field_distribution = index.field_distribution(&rtxn).unwrap(); - assert_eq!( - field_distribution, - btreemap! { - "id".to_string() => 2, - "name".to_string() => 2, - "age".to_string() => 1, - } + db_snap!(index, field_distribution, 1); + + db_snap!(index, word_docids, + @r###" + 1 [0, ] + 2 [1, ] + 20 [1, ] + bob [1, ] + kevin [0, ] + "### ); + db_snap!(index, field_distribution); + + db_snap!(index, field_distribution, + @" + age 1 + id 2 + name 2 + " + ); + + // snapshot_index!(&index, "1", include: "^field_distribution$"); + // we add all the documents a second time. we are supposed to get the same // field_distribution in the end index @@ -1309,16 +1321,12 @@ pub(crate) mod tests { ])) .unwrap(); - let rtxn = index.read_txn().unwrap(); - - let field_distribution = index.field_distribution(&rtxn).unwrap(); - assert_eq!( - field_distribution, - btreemap! { - "id".to_string() => 2, - "name".to_string() => 2, - "age".to_string() => 1, - } + db_snap!(index, field_distribution, + @r###" + age 1 + id 2 + name 2 + "### ); // then we update a document by removing one field and another by adding one field @@ -1329,16 +1337,12 @@ pub(crate) mod tests { ])) .unwrap(); - let rtxn = index.read_txn().unwrap(); - - let field_distribution = index.field_distribution(&rtxn).unwrap(); - assert_eq!( - field_distribution, - btreemap! { - "id".to_string() => 2, - "name".to_string() => 2, - "has_dog".to_string() => 1, - } + db_snap!(index, field_distribution, + @r###" + has_dog 1 + id 2 + name 2 + "### ); } diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index aa1d5cf27..6f41ddd5b 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -1,320 +1,801 @@ -use heed::BytesDecode; -use roaring::RoaringBitmap; -use std::path::Path; - use crate::{ heed_codec::facet::{ FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec, }, - CboRoaringBitmapCodec, ExternalDocumentsIds, Index, + make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index, }; - -macro_rules! snapshot_index { - ($index:expr, $name:expr) => { - $crate::index::tests::snapshot_index($index, $name, None, None) - }; - ($index:expr, $name:expr, include: $regex:literal) => { - $crate::index::tests::snapshot_index( - $index, - $name, - Some(regex::Regex::new($regex).unwrap()), - None, - ) - }; - ($index:expr, $name:expr, exclude: $regex:literal) => { - $crate::index::tests::snapshot_index( - $index, - $name, - None, - Some(regex::Regex::new($regex).unwrap()), - ) - }; -} +use heed::{types::ByteSlice, BytesDecode}; +use roaring::RoaringBitmap; +use std::path::Path; +use std::{borrow::Cow, fmt::Write}; #[track_caller] -pub fn snapshot_index( - index: &Index, - name: &str, - include: Option, - exclude: Option, -) { - use std::fmt::Write; - - let should_snapshot = |name: &str| -> bool { - include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) - && !exclude.as_ref().map(|f| f.is_match(name)).unwrap_or(false) - }; - +pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Settings { let mut settings = insta::Settings::clone_current(); settings.set_prepend_module_to_snapshot(false); let path = Path::new(std::panic::Location::caller().file()); let path = path.strip_prefix("milli/src").unwrap(); settings.set_omit_expression(true); - settings.set_snapshot_path(Path::new("snapshots").join(path).join(name)); - let rtxn = index.read_txn().unwrap(); + let test_name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_owned(); - let store_whole_snapshot = std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned()); - let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); + if let Some(name) = name { + settings.set_snapshot_path(Path::new("snapshots").join(path).join(test_name).join(name)); + } else { + settings.set_snapshot_path(Path::new("snapshots").join(path).join(test_name)); + } - macro_rules! snapshot_db { - ($name:ident, |$vars:pat| $push:block) => { - let name_str = stringify!($name); - if should_snapshot(name_str) { - let iter = index.$name.iter(&rtxn).unwrap(); - let mut snap = String::new(); - for x in iter { - let $vars = x.unwrap(); - snap.push_str($push); - snap.push('\n'); - } - if snap.len() < 512 { - insta::assert_snapshot!(name_str, snap); + settings +} + +#[macro_export] +macro_rules! db_snap { + ($index:ident, $db_name:ident, $name:literal) => { + let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some( + &format!("{}", $name), + )); + settings.bind(|| { + let snap = $crate::full_snap_of_db!($index, $db_name); + let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, false); + for (name, snap) in snaps { + insta::assert_snapshot!(name, snap); + } + }); + }; + ($index:ident, $db_name:ident) => { + let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(None); + settings.bind(|| { + let snap = $crate::full_snap_of_db!($index, $db_name); + let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, false); + for (name, snap) in snaps { + insta::assert_snapshot!(name, snap); + } + }); + }; + ($index:ident, $db_name:ident, @$inline:literal) => { + let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(None); + settings.bind(|| { + let snap = $crate::full_snap_of_db!($index, $db_name); + let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, true); + for (name, snap) in snaps { + if !name.ends_with(".full") { + insta::assert_snapshot!(snap, @$inline); } else { - if store_whole_snapshot { - insta::assert_snapshot!(format!("{name_str}.full"), snap); - } - let hash = md5::compute(snap.as_bytes()); - let hash_str = format!("{hash:x}"); - insta::assert_snapshot!(format!("{name_str}.hash"), hash_str); + insta::assert_snapshot!(name, snap); } } + }); + }; + ($index:ident, $db_name:ident, $name:literal, @$inline:literal) => { + let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some(format!("", $name))); + settings.bind(|| { + let snap = $crate::full_snap_of_db!($index, $db_name); + let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, true); + for (name, snap) in snaps { + if !name.ends_with(".full") { + insta::assert_snapshot!(snap, @$inline); + } else { + insta::assert_snapshot!(name, snap); + } + } + }); + }; +} + +pub fn snap_word_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_docids, |(s, b)| { + &format!("{s:<16} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_exact_word_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, exact_word_docids, |(s, b)| { + &format!("{s:<16} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_word_prefix_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_prefix_docids, |(s, b)| { + &format!("{s:<16} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_exact_word_prefix_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, exact_word_prefix_docids, |(s, b)| { + &format!("{s:<16} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_docid_word_positions(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, docid_word_positions, |((idx, s), b)| { + &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_word_pair_proximity_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |( + (word1, word2, proximity), + b, + )| { + &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |( + (word1, prefix, proximity), + b, + )| { + &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_word_position_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_position_docids, |((word, position), b)| { + &format!("{word:<16} {position:<6} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_field_id_word_count_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, field_id_word_count_docids, |( + (field_id, word_count), + b, + )| { + &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_word_prefix_position_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, word_prefix_position_docids, |( + (word_prefix, position), + b, + )| { + &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_facet_id_f64_docids(index: &Index) -> String { + let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |( + (facet_id, level, left, right), + b, + )| { + &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b)) + }); + snap +} +pub fn snap_facet_id_string_docids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let bytes_db = index.facet_id_string_docids.remap_types::(); + let iter = bytes_db.iter(&rtxn).unwrap(); + let mut snap = String::new(); + + for x in iter { + let (key, value) = x.unwrap(); + if let Some((field_id, normalized_str)) = FacetStringLevelZeroCodec::bytes_decode(key) { + let (orig_string, docids) = + FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap(); + snap.push_str(&format!( + "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n", + display_bitmap(&docids) + )); + } else if let Some((field_id, level, left, right)) = + FacetLevelValueU32Codec::bytes_decode(key) + { + snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} ")); + let (bounds, docids) = + FacetStringZeroBoundsValueCodec::::bytes_decode(value) + .unwrap(); + if let Some((left, right)) = bounds { + snap.push_str(&format!("{left:<8} {right:<8} ")); + } + snap.push_str(&display_bitmap(&docids)); + snap.push('\n'); + } else { + panic!(); + } + } + snap +} +pub fn snap_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let documents_ids = index.documents_ids(&rtxn).unwrap(); + let snap = display_bitmap(&documents_ids); + snap +} +pub fn snap_stop_words(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + let snap = format!("{stop_words:?}"); + snap +} +pub fn snap_soft_deleted_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); + let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids); + soft_deleted_documents_ids +} +pub fn snap_field_distributions(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let mut snap = String::new(); + for (field, count) in index.field_distribution(&rtxn).unwrap() { + writeln!(&mut snap, "{field:<16} {count:<6}").unwrap(); + } + snap +} +pub fn snap_fields_ids_map(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let name = fields_ids_map.name(field_id).unwrap(); + writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); + } + snap +} +pub fn snap_geo_faceted_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); + let snap = display_bitmap(&geo_faceted_documents_ids); + snap +} +pub fn snap_external_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap(); + let mut snap = String::new(); + let soft_bytes = soft.into_fst().as_bytes().to_owned(); + let mut hex_soft = String::new(); + for byte in soft_bytes { + write!(&mut hex_soft, "{:x}", byte).unwrap(); + } + writeln!(&mut snap, "soft: {hex_soft}").unwrap(); + let hard_bytes = hard.into_fst().as_bytes().to_owned(); + let mut hex_hard = String::new(); + for byte in hard_bytes { + write!(&mut hex_hard, "{:x}", byte).unwrap(); + } + writeln!(&mut snap, "hard: {hex_hard}").unwrap(); + snap +} +pub fn snap_number_faceted_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let number_faceted_documents_ids = + index.number_faceted_documents_ids(&rtxn, field_id).unwrap(); + writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids)) + .unwrap(); + } + snap +} +pub fn snap_string_faceted_documents_ids(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + + let mut snap = String::new(); + for field_id in fields_ids_map.ids() { + let string_faceted_documents_ids = + index.string_faceted_documents_ids(&rtxn, field_id).unwrap(); + writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids)) + .unwrap(); + } + snap +} +pub fn snap_words_fst(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let words_fst = index.words_fst(&rtxn).unwrap(); + let bytes = words_fst.into_fst().as_bytes().to_owned(); + let mut snap = String::new(); + for byte in bytes { + write!(&mut snap, "{:x}", byte).unwrap(); + } + snap +} +pub fn snap_words_prefixes_fst(index: &Index) -> String { + let rtxn = index.read_txn().unwrap(); + let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap(); + let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned(); + let mut snap = String::new(); + for byte in bytes { + write!(&mut snap, "{:x}", byte).unwrap(); + } + snap +} + +pub fn snap_settings(index: &Index) -> String { + let mut snap = String::new(); + let rtxn = index.read_txn().unwrap(); + + macro_rules! write_setting_to_snap { + ($name:ident) => { + let $name = index.$name(&rtxn).unwrap(); + writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); }; } - fn display_bitmap(b: &RoaringBitmap) -> String { - let mut s = String::new(); - s.push('['); - for x in b.into_iter() { - write!(&mut s, "{x}, ").unwrap(); - } - s.push(']'); - s - } + write_setting_to_snap!(primary_key); + write_setting_to_snap!(criteria); + write_setting_to_snap!(displayed_fields); + write_setting_to_snap!(distinct_field); + write_setting_to_snap!(filterable_fields); + write_setting_to_snap!(sortable_fields); + write_setting_to_snap!(synonyms); + write_setting_to_snap!(authorize_typos); + write_setting_to_snap!(min_word_len_one_typo); + write_setting_to_snap!(min_word_len_two_typos); + write_setting_to_snap!(exact_words); + write_setting_to_snap!(exact_attributes); + write_setting_to_snap!(max_values_per_facet); + write_setting_to_snap!(pagination_max_total_hits); + write_setting_to_snap!(searchable_fields); + write_setting_to_snap!(user_defined_searchable_fields); - settings.bind(|| { - snapshot_db!(word_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); - snapshot_db!(exact_word_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); - snapshot_db!(word_prefix_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); - snapshot_db!(exact_word_prefix_docids, |(s, b)| { - &format!("{s:<16} {}", display_bitmap(&b)) - }); - - snapshot_db!(docid_word_positions, |((idx, s), b)| { - &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) - }); - - snapshot_db!(word_pair_proximity_docids, |((word1, word2, proximity), b)| { - &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) - }); - - snapshot_db!(word_prefix_pair_proximity_docids, |((word1, prefix, proximity), b)| { - &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) - }); - - snapshot_db!(word_position_docids, |((word, position), b)| { - &format!("{word:<16} {position:<6} {}", display_bitmap(&b)) - }); - - snapshot_db!(field_id_word_count_docids, |((field_id, word_count), b)| { - &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b)) - }); - - snapshot_db!(word_prefix_position_docids, |((word_prefix, position), b)| { - &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b)) - }); - - snapshot_db!(facet_id_f64_docids, |((facet_id, level, left, right), b)| { - &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b)) - }); - { - let name_str = stringify!(facet_id_string_docids); - if should_snapshot(name_str) { - let bytes_db = index.facet_id_string_docids.remap_types::(); - let iter = bytes_db.iter(&rtxn).unwrap(); - let mut snap = String::new(); - - for x in iter { - let (key, value) = x.unwrap(); - if let Some((field_id, normalized_str)) = - FacetStringLevelZeroCodec::bytes_decode(key) - { - let (orig_string, docids) = - FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap(); - snap.push_str(&format!( - "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n", - display_bitmap(&docids) - )); - } else if let Some((field_id, level, left, right)) = - FacetLevelValueU32Codec::bytes_decode(key) - { - snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} ")); - let (bounds, docids) = FacetStringZeroBoundsValueCodec::< - CboRoaringBitmapCodec, - >::bytes_decode(value) - .unwrap(); - if let Some((left, right)) = bounds { - snap.push_str(&format!("{left:<8} {right:<8} ")); - } - snap.push_str(&display_bitmap(&docids)); - snap.push('\n'); - } else { - panic!(); - } - } - insta::assert_snapshot!(name_str, snap); - } - } - - // Main - computed settings - { - let mut snap = String::new(); - - macro_rules! write_setting_to_snap { - ($name:ident) => { - if should_snapshot(&format!("settings.{}", stringify!($name))) { - let $name = index.$name(&rtxn).unwrap(); - writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); - } - }; - } - write_setting_to_snap!(primary_key); - write_setting_to_snap!(criteria); - write_setting_to_snap!(displayed_fields); - write_setting_to_snap!(distinct_field); - write_setting_to_snap!(filterable_fields); - write_setting_to_snap!(sortable_fields); - write_setting_to_snap!(synonyms); - write_setting_to_snap!(authorize_typos); - write_setting_to_snap!(min_word_len_one_typo); - write_setting_to_snap!(min_word_len_two_typos); - write_setting_to_snap!(exact_words); - write_setting_to_snap!(exact_attributes); - write_setting_to_snap!(max_values_per_facet); - write_setting_to_snap!(pagination_max_total_hits); - write_setting_to_snap!(searchable_fields); - write_setting_to_snap!(user_defined_searchable_fields); - - if !snap.is_empty() { - insta::assert_snapshot!("settings", snap); - } - } - // Main - others - { - macro_rules! snapshot_string { - ($name:ident) => { - if should_snapshot(&format!("{}", stringify!($name))) { - insta::assert_snapshot!(stringify!($name), $name); - } - }; - } - { - let documents_ids = index.documents_ids(&rtxn).unwrap(); - let documents_ids = display_bitmap(&documents_ids); - snapshot_string!(documents_ids); - } - { - let stop_words = index.stop_words(&rtxn).unwrap(); - let stop_words = format!("{stop_words:?}"); - snapshot_string!(stop_words); - } - { - let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); - let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids); - snapshot_string!(soft_deleted_documents_ids); - } - - { - let mut field_distribution = String::new(); - for (field, count) in index.field_distribution(&rtxn).unwrap() { - writeln!(&mut field_distribution, "{field:<16} {count:<6}").unwrap(); - } - snapshot_string!(field_distribution); - } - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); - { - let mut snap = String::new(); - for field_id in fields_ids_map.ids() { - let name = fields_ids_map.name(field_id).unwrap(); - writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); - } - let fields_ids_map = snap; - snapshot_string!(fields_ids_map); - } - - { - let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); - let geo_faceted_documents_ids = display_bitmap(&geo_faceted_documents_ids); - snapshot_string!(geo_faceted_documents_ids); - } - // let geo_rtree = index.geo_rtree(&rtxn).unwrap(); - { - let ExternalDocumentsIds { soft, hard, .. } = - index.external_documents_ids(&rtxn).unwrap(); - let mut external_documents_ids = String::new(); - let soft_bytes = soft.into_fst().as_bytes().to_owned(); - let mut hex_soft = String::new(); - for byte in soft_bytes { - write!(&mut hex_soft, "{:x}", byte).unwrap(); - } - writeln!(&mut external_documents_ids, "soft: {hex_soft}").unwrap(); - let hard_bytes = hard.into_fst().as_bytes().to_owned(); - let mut hex_hard = String::new(); - for byte in hard_bytes { - write!(&mut hex_hard, "{:x}", byte).unwrap(); - } - writeln!(&mut external_documents_ids, "hard: {hex_hard}").unwrap(); - - snapshot_string!(external_documents_ids); - } - { - let mut snap = String::new(); - for field_id in fields_ids_map.ids() { - let number_faceted_documents_ids = - index.number_faceted_documents_ids(&rtxn, field_id).unwrap(); - writeln!( - &mut snap, - "{field_id:<3} {}", - display_bitmap(&number_faceted_documents_ids) - ) - .unwrap(); - } - let number_faceted_documents_ids = snap; - snapshot_string!(number_faceted_documents_ids); - } - { - let mut snap = String::new(); - for field_id in fields_ids_map.ids() { - let string_faceted_documents_ids = - index.string_faceted_documents_ids(&rtxn, field_id).unwrap(); - writeln!( - &mut snap, - "{field_id:<3} {}", - display_bitmap(&string_faceted_documents_ids) - ) - .unwrap(); - } - let string_faceted_documents_ids = snap; - snapshot_string!(string_faceted_documents_ids); - } - { - let words_fst = index.words_fst(&rtxn).unwrap(); - let bytes = words_fst.into_fst().as_bytes().to_owned(); - let mut words_fst = String::new(); - for byte in bytes { - write!(&mut words_fst, "{:x}", byte).unwrap(); - } - snapshot_string!(words_fst); - } - { - let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap(); - let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned(); - let mut words_prefixes_fst = String::new(); - for byte in bytes { - write!(&mut words_prefixes_fst, "{:x}", byte).unwrap(); - } - snapshot_string!(words_prefixes_fst); - } - } - }); + snap } + +#[macro_export] +macro_rules! full_snap_of_db { + ($index:ident, settings) => {{ + $crate::snapshot_tests::snap_settings(&$index) + }}; + ($index:ident, word_docids) => {{ + $crate::snapshot_tests::snap_word_docids(&$index) + }}; + ($index:ident, exact_word_docids) => {{ + $crate::snapshot_tests::snap_exact_word_docids(&$index) + }}; + ($index:ident, word_prefix_docids) => {{ + $crate::snapshot_tests::snap_word_prefix_docids(&$index) + }}; + ($index:ident, exact_word_prefix_docids) => {{ + $crate::snapshot_tests::snap_exact_word_prefix_docids(&$index) + }}; + ($index:ident, docid_word_positions) => {{ + $crate::snapshot_tests::snap_docid_word_positions(&$index) + }}; + ($index:ident, word_pair_proximity_docids) => {{ + $crate::snapshot_tests::snap_word_pair_proximity_docids(&$index) + }}; + ($index:ident, word_prefix_pair_proximity_docids) => {{ + $crate::snapshot_tests::snap_word_prefix_pair_proximity_docids(&$index) + }}; + ($index:ident, word_position_docids) => {{ + $crate::snapshot_tests::snap_word_position_docids(&$index) + }}; + ($index:ident, field_id_word_count_docids) => {{ + $crate::snapshot_tests::snap_field_id_word_count_docids(&$index) + }}; + ($index:ident, word_prefix_position_docids) => {{ + $crate::snapshot_tests::snap_word_prefix_position_docids(&$index) + }}; + ($index:ident, facet_id_f64_docids) => {{ + $crate::snapshot_tests::snap_facet_id_f64_docids(&$index) + }}; + ($index:ident, facet_id_string_docids) => {{ + $crate::snapshot_tests::snap_facet_id_string_docids(&$index) + }}; + ($index:ident, documents_ids) => {{ + $crate::snapshot_tests::snap_documents_ids(&$index) + }}; + ($index:ident, stop_words) => {{ + $crate::snapshot_tests::snap_stop_words(&$index) + }}; + ($index:ident, soft_deleted_documents_ids) => {{ + $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index) + }}; + ($index:ident, field_distribution) => {{ + $crate::snapshot_tests::snap_field_distributions(&$index) + }}; + ($index:ident, fields_ids_map) => {{ + $crate::snapshot_tests::snap_fields_ids_map(&$index) + }}; + ($index:ident, geo_faceted_documents_ids) => {{ + $crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index) + }}; + ($index:ident, external_documents_ids) => {{ + $crate::snapshot_tests::snap_external_documents_ids(&$index) + }}; + ($index:ident, number_faceted_documents_ids) => {{ + $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index) + }}; + ($index:ident, string_faceted_documents_ids) => {{ + $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index) + }}; + ($index:ident, words_fst) => {{ + $crate::snapshot_tests::snap_words_fst(&$index) + }}; + ($index:ident, words_prefixes_fst) => {{ + $crate::snapshot_tests::snap_words_prefixes_fst(&$index) + }}; +} + +pub fn convert_snap_to_hash_if_needed<'snap>( + name: &str, + snap: &'snap str, + inline: bool, +) -> Vec<(String, Cow<'snap, str>)> { + let store_whole_snapshot = std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned()); + let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); + + let max_len = if inline { 256 } else { 2048 }; + + if snap.len() < max_len { + vec![(name.to_owned(), Cow::Borrowed(snap))] + } else { + let mut r = vec![]; + if store_whole_snapshot { + r.push((format!("{name}.full"), Cow::Borrowed(snap))); + } + let hash = md5::compute(snap.as_bytes()); + let hash_str = format!("{hash:x}"); + r.push((format!("{name}.hash"), Cow::Owned(hash_str))); + r + } +} + +#[macro_export] +macro_rules! make_db_snap_from_iter { + ($index:ident, $name:ident, |$vars:pat| $push:block) => {{ + let rtxn = $index.read_txn().unwrap(); + let iter = $index.$name.iter(&rtxn).unwrap(); + let mut snap = String::new(); + for x in iter { + let $vars = x.unwrap(); + snap.push_str($push); + snap.push('\n'); + } + snap + }}; +} + +pub fn display_bitmap(b: &RoaringBitmap) -> String { + let mut s = String::new(); + s.push('['); + for x in b.into_iter() { + write!(&mut s, "{x}, ").unwrap(); + } + s.push(']'); + s +} + +// #[macro_export] +// macro_rules! snapshot_index { +// ($index:expr, $name:expr) => { +// $crate::snapshot_tests::snapshot_index($index, $name, None, None) +// }; +// ($index:expr, $name:expr, include: $regex:literal) => { +// $crate::snapshot_tests::snapshot_index( +// $index, +// $name, +// Some(regex::Regex::new($regex).unwrap()), +// None, +// ) +// }; +// ($index:expr, $name:expr, exclude: $regex:literal) => { +// $crate::snapshot_tests::snapshot_index( +// $index, +// $name, +// None, +// Some(regex::Regex::new($regex).unwrap()), +// ) +// }; +// } + +// pub fn snap_of_db_settings(index: &Index, include: Option) -> String { +// let should_snapshot = +// |name: &str| -> bool { include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) }; + +// let rtxn = index.read_txn().unwrap(); + +// let mut snap = String::new(); + +// macro_rules! write_setting_to_snap { +// ($name:ident) => { +// if should_snapshot(&format!("settings.{}", stringify!($name))) { +// let $name = index.$name(&rtxn).unwrap(); +// writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); +// } +// }; +// } +// write_setting_to_snap!(primary_key); +// write_setting_to_snap!(criteria); +// write_setting_to_snap!(displayed_fields); +// write_setting_to_snap!(distinct_field); +// write_setting_to_snap!(filterable_fields); +// write_setting_to_snap!(sortable_fields); +// write_setting_to_snap!(synonyms); +// write_setting_to_snap!(authorize_typos); +// write_setting_to_snap!(min_word_len_one_typo); +// write_setting_to_snap!(min_word_len_two_typos); +// write_setting_to_snap!(exact_words); +// write_setting_to_snap!(exact_attributes); +// write_setting_to_snap!(max_values_per_facet); +// write_setting_to_snap!(pagination_max_total_hits); +// write_setting_to_snap!(searchable_fields); +// write_setting_to_snap!(user_defined_searchable_fields); + +// snap +// } + +// #[track_caller] +// pub fn snapshot_index( +// index: &Index, +// name: &str, +// include: Option, +// exclude: Option, +// ) { +// let should_snapshot = |name: &str| -> bool { +// include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) +// && !exclude.as_ref().map(|f| f.is_match(name)).unwrap_or(false) +// }; +// let settings = default_db_snapshot_settings_for_test(Some(name)); +// let rtxn = index.read_txn().unwrap(); + +// let snapshot_hash = |name: &str, snap: &str| { +// let store_whole_snapshot = +// std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned()); +// let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); +// if snap.len() < 512 { +// insta::assert_snapshot!(name, snap); +// } else { +// if store_whole_snapshot { +// insta::assert_snapshot!(format!("{name}.full"), snap); +// } +// let hash = md5::compute(snap.as_bytes()); +// let hash_str = format!("{hash:x}"); +// insta::assert_snapshot!(format!("{name}.hash"), hash_str); +// } +// }; + +// macro_rules! snapshot_db { +// ($name:ident, |$vars:pat| $push:block) => { +// let name_str = stringify!($name); +// if should_snapshot(name_str) { +// let iter = index.$name.iter(&rtxn).unwrap(); +// let mut snap = String::new(); +// for x in iter { +// let $vars = x.unwrap(); +// snap.push_str($push); +// snap.push('\n'); +// } +// snapshot_hash(name_str, &snap); +// } +// }; +// } + +// fn display_bitmap(b: &RoaringBitmap) -> String { +// let mut s = String::new(); +// s.push('['); +// for x in b.into_iter() { +// write!(&mut s, "{x}, ").unwrap(); +// } +// s.push(']'); +// s +// } + +// settings.bind(|| { +// snapshot_db!(word_docids, |(s, b)| { &format!("{s:<16} {}", $crate::snapshot_tests::display_bitmap(&b)) }); +// snapshot_db!(exact_word_docids, |(s, b)| { &format!("{s:<16} {}", $crate::snapshot_tests::display_bitmap(&b)) }); +// snapshot_db!(word_prefix_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); +// snapshot_db!(exact_word_prefix_docids, |(s, b)| { +// &format!("{s:<16} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(docid_word_positions, |((idx, s), b)| { +// &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(word_pair_proximity_docids, |((word1, word2, proximity), b)| { +// &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(word_prefix_pair_proximity_docids, |((word1, prefix, proximity), b)| { +// &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(word_position_docids, |((word, position), b)| { +// &format!("{word:<16} {position:<6} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(field_id_word_count_docids, |((field_id, word_count), b)| { +// &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(word_prefix_position_docids, |((word_prefix, position), b)| { +// &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b)) +// }); + +// snapshot_db!(facet_id_f64_docids, |((facet_id, level, left, right), b)| { +// &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b)) +// }); +// { +// let name_str = stringify!(facet_id_string_docids); +// if should_snapshot(name_str) { +// let bytes_db = index.facet_id_string_docids.remap_types::(); +// let iter = bytes_db.iter(&rtxn).unwrap(); +// let mut snap = String::new(); + +// for x in iter { +// let (key, value) = x.unwrap(); +// if let Some((field_id, normalized_str)) = +// FacetStringLevelZeroCodec::bytes_decode(key) +// { +// let (orig_string, docids) = +// FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap(); +// snap.push_str(&format!( +// "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n", +// display_bitmap(&docids) +// )); +// } else if let Some((field_id, level, left, right)) = +// FacetLevelValueU32Codec::bytes_decode(key) +// { +// snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} ")); +// let (bounds, docids) = FacetStringZeroBoundsValueCodec::< +// CboRoaringBitmapCodec, +// >::bytes_decode(value) +// .unwrap(); +// if let Some((left, right)) = bounds { +// snap.push_str(&format!("{left:<8} {right:<8} ")); +// } +// snap.push_str(&display_bitmap(&docids)); +// snap.push('\n'); +// } else { +// panic!(); +// } +// } +// snapshot_hash(name_str, &snap); +// } +// } + +// // Main - computed settings +// { +// let mut snap = String::new(); + +// macro_rules! write_setting_to_snap { +// ($name:ident) => { +// if should_snapshot(&format!("settings.{}", stringify!($name))) { +// let $name = index.$name(&rtxn).unwrap(); +// writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); +// } +// }; +// } +// write_setting_to_snap!(primary_key); +// write_setting_to_snap!(criteria); +// write_setting_to_snap!(displayed_fields); +// write_setting_to_snap!(distinct_field); +// write_setting_to_snap!(filterable_fields); +// write_setting_to_snap!(sortable_fields); +// write_setting_to_snap!(synonyms); +// write_setting_to_snap!(authorize_typos); +// write_setting_to_snap!(min_word_len_one_typo); +// write_setting_to_snap!(min_word_len_two_typos); +// write_setting_to_snap!(exact_words); +// write_setting_to_snap!(exact_attributes); +// write_setting_to_snap!(max_values_per_facet); +// write_setting_to_snap!(pagination_max_total_hits); +// write_setting_to_snap!(searchable_fields); +// write_setting_to_snap!(user_defined_searchable_fields); + +// if !snap.is_empty() { +// insta::assert_snapshot!("settings", snap); +// } +// } +// // Main - others +// { +// macro_rules! snapshot_string { +// ($name:ident) => { +// if should_snapshot(&format!("{}", stringify!($name))) { +// insta::assert_snapshot!(stringify!($name), $name); +// } +// }; +// } +// { +// let documents_ids = index.documents_ids(&rtxn).unwrap(); +// let documents_ids = display_bitmap(&documents_ids); +// snapshot_string!(documents_ids); +// } +// { +// let stop_words = index.stop_words(&rtxn).unwrap(); +// let stop_words = format!("{stop_words:?}"); +// snapshot_string!(stop_words); +// } +// { +// let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); +// let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids); +// snapshot_string!(soft_deleted_documents_ids); +// } + +// { +// let mut field_distribution = String::new(); +// for (field, count) in index.field_distribution(&rtxn).unwrap() { +// writeln!(&mut field_distribution, "{field:<16} {count:<6}").unwrap(); +// } +// snapshot_string!(field_distribution); +// } +// let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); +// { +// let mut snap = String::new(); +// for field_id in fields_ids_map.ids() { +// let name = fields_ids_map.name(field_id).unwrap(); +// writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); +// } +// let fields_ids_map = snap; +// snapshot_string!(fields_ids_map); +// } + +// { +// let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); +// let geo_faceted_documents_ids = display_bitmap(&geo_faceted_documents_ids); +// snapshot_string!(geo_faceted_documents_ids); +// } +// // let geo_rtree = index.geo_rtree(&rtxn).unwrap(); +// { +// let ExternalDocumentsIds { soft, hard, .. } = +// index.external_documents_ids(&rtxn).unwrap(); +// let mut external_documents_ids = String::new(); +// let soft_bytes = soft.into_fst().as_bytes().to_owned(); +// let mut hex_soft = String::new(); +// for byte in soft_bytes { +// write!(&mut hex_soft, "{:x}", byte).unwrap(); +// } +// writeln!(&mut external_documents_ids, "soft: {hex_soft}").unwrap(); +// let hard_bytes = hard.into_fst().as_bytes().to_owned(); +// let mut hex_hard = String::new(); +// for byte in hard_bytes { +// write!(&mut hex_hard, "{:x}", byte).unwrap(); +// } +// writeln!(&mut external_documents_ids, "hard: {hex_hard}").unwrap(); + +// snapshot_string!(external_documents_ids); +// } +// { +// let mut snap = String::new(); +// for field_id in fields_ids_map.ids() { +// let number_faceted_documents_ids = +// index.number_faceted_documents_ids(&rtxn, field_id).unwrap(); +// writeln!( +// &mut snap, +// "{field_id:<3} {}", +// display_bitmap(&number_faceted_documents_ids) +// ) +// .unwrap(); +// } +// let number_faceted_documents_ids = snap; +// snapshot_string!(number_faceted_documents_ids); +// } +// { +// let mut snap = String::new(); +// for field_id in fields_ids_map.ids() { +// let string_faceted_documents_ids = +// index.string_faceted_documents_ids(&rtxn, field_id).unwrap(); +// writeln!( +// &mut snap, +// "{field_id:<3} {}", +// display_bitmap(&string_faceted_documents_ids) +// ) +// .unwrap(); +// } +// let string_faceted_documents_ids = snap; +// snapshot_string!(string_faceted_documents_ids); +// } +// { +// let words_fst = index.words_fst(&rtxn).unwrap(); +// let bytes = words_fst.into_fst().as_bytes().to_owned(); +// let mut words_fst = String::new(); +// for byte in bytes { +// write!(&mut words_fst, "{:x}", byte).unwrap(); +// } +// snapshot_string!(words_fst); +// } +// { +// let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap(); +// let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned(); +// let mut words_prefixes_fst = String::new(); +// for byte in bytes { +// write!(&mut words_prefixes_fst, "{:x}", byte).unwrap(); +// } +// snapshot_string!(words_prefixes_fst); +// } +// } +// }); +// } diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap new file mode 100644 index 000000000..9b074fb59 --- /dev/null +++ b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap @@ -0,0 +1,7 @@ +--- +source: milli/src/index.rs +--- +age 1 +id 2 +name 2 + diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap new file mode 100644 index 000000000..9b074fb59 --- /dev/null +++ b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap @@ -0,0 +1,7 @@ +--- +source: milli/src/index.rs +--- +age 1 +id 2 +name 2 + From b9907997e4e17e3675f970bcccf3fb267f9edc8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 Aug 2022 16:25:33 +0200 Subject: [PATCH 03/16] Remove old snapshot tests code --- milli/src/snapshot_tests.rs | 342 ------------------------------------ 1 file changed, 342 deletions(-) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 6f41ddd5b..77eeeb159 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -457,345 +457,3 @@ pub fn display_bitmap(b: &RoaringBitmap) -> String { s.push(']'); s } - -// #[macro_export] -// macro_rules! snapshot_index { -// ($index:expr, $name:expr) => { -// $crate::snapshot_tests::snapshot_index($index, $name, None, None) -// }; -// ($index:expr, $name:expr, include: $regex:literal) => { -// $crate::snapshot_tests::snapshot_index( -// $index, -// $name, -// Some(regex::Regex::new($regex).unwrap()), -// None, -// ) -// }; -// ($index:expr, $name:expr, exclude: $regex:literal) => { -// $crate::snapshot_tests::snapshot_index( -// $index, -// $name, -// None, -// Some(regex::Regex::new($regex).unwrap()), -// ) -// }; -// } - -// pub fn snap_of_db_settings(index: &Index, include: Option) -> String { -// let should_snapshot = -// |name: &str| -> bool { include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) }; - -// let rtxn = index.read_txn().unwrap(); - -// let mut snap = String::new(); - -// macro_rules! write_setting_to_snap { -// ($name:ident) => { -// if should_snapshot(&format!("settings.{}", stringify!($name))) { -// let $name = index.$name(&rtxn).unwrap(); -// writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); -// } -// }; -// } -// write_setting_to_snap!(primary_key); -// write_setting_to_snap!(criteria); -// write_setting_to_snap!(displayed_fields); -// write_setting_to_snap!(distinct_field); -// write_setting_to_snap!(filterable_fields); -// write_setting_to_snap!(sortable_fields); -// write_setting_to_snap!(synonyms); -// write_setting_to_snap!(authorize_typos); -// write_setting_to_snap!(min_word_len_one_typo); -// write_setting_to_snap!(min_word_len_two_typos); -// write_setting_to_snap!(exact_words); -// write_setting_to_snap!(exact_attributes); -// write_setting_to_snap!(max_values_per_facet); -// write_setting_to_snap!(pagination_max_total_hits); -// write_setting_to_snap!(searchable_fields); -// write_setting_to_snap!(user_defined_searchable_fields); - -// snap -// } - -// #[track_caller] -// pub fn snapshot_index( -// index: &Index, -// name: &str, -// include: Option, -// exclude: Option, -// ) { -// let should_snapshot = |name: &str| -> bool { -// include.as_ref().map(|f| f.is_match(name)).unwrap_or(true) -// && !exclude.as_ref().map(|f| f.is_match(name)).unwrap_or(false) -// }; -// let settings = default_db_snapshot_settings_for_test(Some(name)); -// let rtxn = index.read_txn().unwrap(); - -// let snapshot_hash = |name: &str, snap: &str| { -// let store_whole_snapshot = -// std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned()); -// let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); -// if snap.len() < 512 { -// insta::assert_snapshot!(name, snap); -// } else { -// if store_whole_snapshot { -// insta::assert_snapshot!(format!("{name}.full"), snap); -// } -// let hash = md5::compute(snap.as_bytes()); -// let hash_str = format!("{hash:x}"); -// insta::assert_snapshot!(format!("{name}.hash"), hash_str); -// } -// }; - -// macro_rules! snapshot_db { -// ($name:ident, |$vars:pat| $push:block) => { -// let name_str = stringify!($name); -// if should_snapshot(name_str) { -// let iter = index.$name.iter(&rtxn).unwrap(); -// let mut snap = String::new(); -// for x in iter { -// let $vars = x.unwrap(); -// snap.push_str($push); -// snap.push('\n'); -// } -// snapshot_hash(name_str, &snap); -// } -// }; -// } - -// fn display_bitmap(b: &RoaringBitmap) -> String { -// let mut s = String::new(); -// s.push('['); -// for x in b.into_iter() { -// write!(&mut s, "{x}, ").unwrap(); -// } -// s.push(']'); -// s -// } - -// settings.bind(|| { -// snapshot_db!(word_docids, |(s, b)| { &format!("{s:<16} {}", $crate::snapshot_tests::display_bitmap(&b)) }); -// snapshot_db!(exact_word_docids, |(s, b)| { &format!("{s:<16} {}", $crate::snapshot_tests::display_bitmap(&b)) }); -// snapshot_db!(word_prefix_docids, |(s, b)| { &format!("{s:<16} {}", display_bitmap(&b)) }); -// snapshot_db!(exact_word_prefix_docids, |(s, b)| { -// &format!("{s:<16} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(docid_word_positions, |((idx, s), b)| { -// &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(word_pair_proximity_docids, |((word1, word2, proximity), b)| { -// &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(word_prefix_pair_proximity_docids, |((word1, prefix, proximity), b)| { -// &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(word_position_docids, |((word, position), b)| { -// &format!("{word:<16} {position:<6} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(field_id_word_count_docids, |((field_id, word_count), b)| { -// &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(word_prefix_position_docids, |((word_prefix, position), b)| { -// &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b)) -// }); - -// snapshot_db!(facet_id_f64_docids, |((facet_id, level, left, right), b)| { -// &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b)) -// }); -// { -// let name_str = stringify!(facet_id_string_docids); -// if should_snapshot(name_str) { -// let bytes_db = index.facet_id_string_docids.remap_types::(); -// let iter = bytes_db.iter(&rtxn).unwrap(); -// let mut snap = String::new(); - -// for x in iter { -// let (key, value) = x.unwrap(); -// if let Some((field_id, normalized_str)) = -// FacetStringLevelZeroCodec::bytes_decode(key) -// { -// let (orig_string, docids) = -// FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap(); -// snap.push_str(&format!( -// "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n", -// display_bitmap(&docids) -// )); -// } else if let Some((field_id, level, left, right)) = -// FacetLevelValueU32Codec::bytes_decode(key) -// { -// snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} ")); -// let (bounds, docids) = FacetStringZeroBoundsValueCodec::< -// CboRoaringBitmapCodec, -// >::bytes_decode(value) -// .unwrap(); -// if let Some((left, right)) = bounds { -// snap.push_str(&format!("{left:<8} {right:<8} ")); -// } -// snap.push_str(&display_bitmap(&docids)); -// snap.push('\n'); -// } else { -// panic!(); -// } -// } -// snapshot_hash(name_str, &snap); -// } -// } - -// // Main - computed settings -// { -// let mut snap = String::new(); - -// macro_rules! write_setting_to_snap { -// ($name:ident) => { -// if should_snapshot(&format!("settings.{}", stringify!($name))) { -// let $name = index.$name(&rtxn).unwrap(); -// writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap(); -// } -// }; -// } -// write_setting_to_snap!(primary_key); -// write_setting_to_snap!(criteria); -// write_setting_to_snap!(displayed_fields); -// write_setting_to_snap!(distinct_field); -// write_setting_to_snap!(filterable_fields); -// write_setting_to_snap!(sortable_fields); -// write_setting_to_snap!(synonyms); -// write_setting_to_snap!(authorize_typos); -// write_setting_to_snap!(min_word_len_one_typo); -// write_setting_to_snap!(min_word_len_two_typos); -// write_setting_to_snap!(exact_words); -// write_setting_to_snap!(exact_attributes); -// write_setting_to_snap!(max_values_per_facet); -// write_setting_to_snap!(pagination_max_total_hits); -// write_setting_to_snap!(searchable_fields); -// write_setting_to_snap!(user_defined_searchable_fields); - -// if !snap.is_empty() { -// insta::assert_snapshot!("settings", snap); -// } -// } -// // Main - others -// { -// macro_rules! snapshot_string { -// ($name:ident) => { -// if should_snapshot(&format!("{}", stringify!($name))) { -// insta::assert_snapshot!(stringify!($name), $name); -// } -// }; -// } -// { -// let documents_ids = index.documents_ids(&rtxn).unwrap(); -// let documents_ids = display_bitmap(&documents_ids); -// snapshot_string!(documents_ids); -// } -// { -// let stop_words = index.stop_words(&rtxn).unwrap(); -// let stop_words = format!("{stop_words:?}"); -// snapshot_string!(stop_words); -// } -// { -// let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap(); -// let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids); -// snapshot_string!(soft_deleted_documents_ids); -// } - -// { -// let mut field_distribution = String::new(); -// for (field, count) in index.field_distribution(&rtxn).unwrap() { -// writeln!(&mut field_distribution, "{field:<16} {count:<6}").unwrap(); -// } -// snapshot_string!(field_distribution); -// } -// let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); -// { -// let mut snap = String::new(); -// for field_id in fields_ids_map.ids() { -// let name = fields_ids_map.name(field_id).unwrap(); -// writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); -// } -// let fields_ids_map = snap; -// snapshot_string!(fields_ids_map); -// } - -// { -// let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); -// let geo_faceted_documents_ids = display_bitmap(&geo_faceted_documents_ids); -// snapshot_string!(geo_faceted_documents_ids); -// } -// // let geo_rtree = index.geo_rtree(&rtxn).unwrap(); -// { -// let ExternalDocumentsIds { soft, hard, .. } = -// index.external_documents_ids(&rtxn).unwrap(); -// let mut external_documents_ids = String::new(); -// let soft_bytes = soft.into_fst().as_bytes().to_owned(); -// let mut hex_soft = String::new(); -// for byte in soft_bytes { -// write!(&mut hex_soft, "{:x}", byte).unwrap(); -// } -// writeln!(&mut external_documents_ids, "soft: {hex_soft}").unwrap(); -// let hard_bytes = hard.into_fst().as_bytes().to_owned(); -// let mut hex_hard = String::new(); -// for byte in hard_bytes { -// write!(&mut hex_hard, "{:x}", byte).unwrap(); -// } -// writeln!(&mut external_documents_ids, "hard: {hex_hard}").unwrap(); - -// snapshot_string!(external_documents_ids); -// } -// { -// let mut snap = String::new(); -// for field_id in fields_ids_map.ids() { -// let number_faceted_documents_ids = -// index.number_faceted_documents_ids(&rtxn, field_id).unwrap(); -// writeln!( -// &mut snap, -// "{field_id:<3} {}", -// display_bitmap(&number_faceted_documents_ids) -// ) -// .unwrap(); -// } -// let number_faceted_documents_ids = snap; -// snapshot_string!(number_faceted_documents_ids); -// } -// { -// let mut snap = String::new(); -// for field_id in fields_ids_map.ids() { -// let string_faceted_documents_ids = -// index.string_faceted_documents_ids(&rtxn, field_id).unwrap(); -// writeln!( -// &mut snap, -// "{field_id:<3} {}", -// display_bitmap(&string_faceted_documents_ids) -// ) -// .unwrap(); -// } -// let string_faceted_documents_ids = snap; -// snapshot_string!(string_faceted_documents_ids); -// } -// { -// let words_fst = index.words_fst(&rtxn).unwrap(); -// let bytes = words_fst.into_fst().as_bytes().to_owned(); -// let mut words_fst = String::new(); -// for byte in bytes { -// write!(&mut words_fst, "{:x}", byte).unwrap(); -// } -// snapshot_string!(words_fst); -// } -// { -// let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap(); -// let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned(); -// let mut words_prefixes_fst = String::new(); -// for byte in bytes { -// write!(&mut words_prefixes_fst, "{:x}", byte).unwrap(); -// } -// snapshot_string!(words_prefixes_fst); -// } -// } -// }); -// } From 3a734af159a3e01289ece0ff0abd253112e28ac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 Aug 2022 16:33:36 +0200 Subject: [PATCH 04/16] Add snapshot tests for Facets::execute --- .gitignore | 7 ++ milli/src/documents/mod.rs | 11 +++ milli/src/snapshot_tests.rs | 2 +- milli/src/update/facets.rs | 88 +++++++++++++++++++ .../default/facet_id_f64_docids.hash.snap | 4 + .../facet_id_f64_docids.hash.snap | 4 + .../facet_id_f64_docids.hash.snap | 4 + .../facet_id_f64_docids.hash.snap | 4 + .../facet_id_f64_docids.hash.snap | 4 + .../facet_id_f64_docids.hash.snap | 4 + .../default/facet_id_string_docids.hash.snap | 4 + .../facet_id_string_docids.hash.snap | 4 + 12 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap create mode 100644 milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap diff --git a/.gitignore b/.gitignore index 107b5bb36..02c4fcd79 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,10 @@ *.csv *.mmdb *.svg + +# Snapshots +## ... large +*.full.snap + +# ... unreviewed +*.snap.new diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index c1580309a..5c83991c2 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -190,6 +190,17 @@ pub fn documents_batch_reader_from_objects( DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() } +#[cfg(test)] +pub fn batch_reader_from_documents( + documents: &[Object], +) -> DocumentsBatchReader>> { + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + for object in documents { + builder.append_json_object(&object).unwrap(); + } + DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() +} + #[cfg(test)] mod test { use std::io::Cursor; diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 77eeeb159..a881a155e 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -30,7 +30,7 @@ pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Setti #[macro_export] macro_rules! db_snap { - ($index:ident, $db_name:ident, $name:literal) => { + ($index:ident, $db_name:ident, $name:expr) => { let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some( &format!("{}", $name), )); diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 5892123eb..981fa819c 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -342,3 +342,91 @@ fn write_string_entry( writer.insert(&key, &data)?; Ok(()) } + +#[cfg(test)] +mod tests { + use std::num::NonZeroUsize; + + use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex}; + + #[test] + fn test_facets_number() { + let test = + |name: &str, group_size: Option, min_level_size: Option| { + let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB + index.index_documents_config.autogenerate_docids = true; + index.index_documents_config.facet_level_group_size = group_size; + index.index_documents_config.facet_min_level_size = min_level_size; + + index + .update_settings(|settings| { + settings.set_filterable_fields( + IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()]) + .collect(), + ); + }) + .unwrap(); + + let mut documents = vec![]; + for i in 0..1_000 { + documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone()); + } + for i in 0..100 { + documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone()); + } + let documents = batch_reader_from_documents(&documents); + + index.add_documents(documents).unwrap(); + + db_snap!(index, facet_id_f64_docids, name); + }; + + test("default", None, None); + test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1)); + test("small_groups_small_levels", NonZeroUsize::new(2), NonZeroUsize::new(2)); + test("small_groups_large_levels", NonZeroUsize::new(2), NonZeroUsize::new(128)); + test("large_groups_small_levels", NonZeroUsize::new(16), NonZeroUsize::new(2)); + test("large_groups_large_levels", NonZeroUsize::new(16), NonZeroUsize::new(256)); + } + + #[test] + fn test_facets_string() { + let test = |name: &str, + group_size: Option, + min_level_size: Option| { + let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB + index.index_documents_config.autogenerate_docids = true; + index.index_documents_config.facet_level_group_size = group_size; + index.index_documents_config.facet_min_level_size = min_level_size; + + index + .update_settings(|settings| { + settings.set_filterable_fields( + IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()]) + .collect(), + ); + }) + .unwrap(); + + let mut documents = vec![]; + for i in 0..100 { + documents.push( + serde_json::json!({ "facet": format!("s{i:X}") }).as_object().unwrap().clone(), + ); + } + for i in 0..10 { + documents.push( + serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(), + ); + } + let documents = batch_reader_from_documents(&documents); + + index.add_documents(documents).unwrap(); + + db_snap!(index, facet_id_string_docids, name); + }; + + test("default", None, None); + test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1)); + } +} diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..373455db6 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +587899707db2848da3f18399e14ed4d0 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..c3415c320 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +02bbf2ca1663cccea0e4c06d5ad06a45 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..78dad29f1 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +e68ea591e1af3e53e544dff9a1648e88 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..61a5908f4 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +12a4bb0f5b95d7629c2b9a915150c0cf diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..961346de5 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +6438e94bc7fada13022e0efccdf294e0 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..2b7c1ef9c --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +5348bbc46b5384455b6a900666d2a502 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap new file mode 100644 index 000000000..901b86255 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +faddef9eae5f2efacfec51f20f2e8cd6 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap new file mode 100644 index 000000000..aa6c85461 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +ddb8fc987c5dc892337682595043858e From 606625668977b7620fd5b1d47ae0b8f0562d3183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 3 Aug 2022 16:49:03 +0200 Subject: [PATCH 05/16] Add snapshot tests for indexing of word_prefix_pair_proximity_docids --- milli/src/documents/mod.rs | 11 --- milli/src/update/facets.rs | 8 +- .../word_prefix_pair_proximity_docids.snap | 46 ++++++++++ .../word_prefix_pair_proximity_docids.snap | 56 ++++++++++++ .../word_prefix_pair_proximity_docids.rs | 87 +++++++++++++++++++ 5 files changed, 195 insertions(+), 13 deletions(-) create mode 100644 milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap create mode 100644 milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 5c83991c2..c1580309a 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -190,17 +190,6 @@ pub fn documents_batch_reader_from_objects( DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() } -#[cfg(test)] -pub fn batch_reader_from_documents( - documents: &[Object], -) -> DocumentsBatchReader>> { - let mut builder = DocumentsBatchBuilder::new(Vec::new()); - for object in documents { - builder.append_json_object(&object).unwrap(); - } - DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() -} - #[cfg(test)] mod test { use std::io::Cursor; diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 981fa819c..904f165b1 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -347,7 +347,11 @@ fn write_string_entry( mod tests { use std::num::NonZeroUsize; - use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex}; + use crate::{ + db_snap, + documents::{batch_reader_from_documents, documents_batch_reader_from_objects}, + index::tests::TempIndex, + }; #[test] fn test_facets_number() { @@ -419,7 +423,7 @@ mod tests { serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(), ); } - let documents = batch_reader_from_documents(&documents); + let documents = documents_batch_reader_from_objects(documents); index.add_documents(documents).unwrap(); diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap new file mode 100644 index 000000000..0a61cf4e8 --- /dev/null +++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap @@ -0,0 +1,46 @@ +--- +source: milli/src/update/word_prefix_pair_proximity_docids.rs +--- +5 a 1 [101, ] +5 a 2 [101, ] +5 b 4 [101, ] +5 be 4 [101, ] +am a 3 [101, ] +amazing a 1 [100, ] +amazing a 2 [100, ] +amazing a 3 [100, ] +amazing b 2 [100, ] +amazing be 2 [100, ] +an a 1 [100, ] +an a 2 [100, ] +an b 3 [100, ] +an be 3 [100, ] +and a 2 [100, ] +and a 3 [100, ] +and a 4 [100, ] +and b 1 [100, ] +and be 1 [100, ] +at a 1 [100, ] +at a 2 [100, 101, ] +at a 3 [100, ] +at b 3 [101, ] +at b 4 [100, ] +at be 3 [101, ] +at be 4 [100, ] +beautiful a 2 [100, ] +beautiful a 3 [100, ] +beautiful a 4 [100, ] +bell a 2 [101, ] +bell a 4 [101, ] +house a 3 [100, ] +house a 4 [100, ] +house b 2 [100, ] +house be 2 [100, ] +rings a 1 [101, ] +rings a 3 [101, ] +rings b 2 [101, ] +rings be 2 [101, ] +the a 3 [101, ] +the b 1 [101, ] +the be 1 [101, ] + diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap new file mode 100644 index 000000000..aabd9ddec --- /dev/null +++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap @@ -0,0 +1,56 @@ +--- +source: milli/src/update/word_prefix_pair_proximity_docids.rs +--- +5 a 1 [101, ] +5 a 2 [101, ] +5 am 1 [101, ] +5 b 4 [101, ] +5 be 4 [101, ] +am a 3 [101, ] +amazing a 1 [100, ] +amazing a 2 [100, ] +amazing a 3 [100, ] +amazing b 2 [100, ] +amazing be 2 [100, ] +an a 1 [100, ] +an a 2 [100, 202, ] +an am 1 [100, ] +an b 3 [100, ] +an be 3 [100, ] +and a 2 [100, ] +and a 3 [100, ] +and a 4 [100, ] +and am 2 [100, ] +and b 1 [100, ] +and be 1 [100, ] +at a 1 [100, 202, ] +at a 2 [100, 101, ] +at a 3 [100, ] +at am 2 [100, 101, ] +at b 3 [101, ] +at b 4 [100, ] +at be 3 [101, ] +at be 4 [100, ] +beautiful a 2 [100, ] +beautiful a 3 [100, ] +beautiful a 4 [100, ] +beautiful am 3 [100, ] +bell a 2 [101, ] +bell a 4 [101, ] +bell am 4 [101, ] +extraordinary a 2 [202, ] +extraordinary a 3 [202, ] +house a 3 [100, 202, ] +house a 4 [100, 202, ] +house am 4 [100, ] +house b 2 [100, ] +house be 2 [100, ] +rings a 1 [101, ] +rings a 3 [101, ] +rings am 3 [101, ] +rings b 2 [101, ] +rings be 2 [101, ] +the a 3 [101, ] +the b 1 [101, ] +the be 1 [101, ] + diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs index 72b41c472..7e5d5c090 100644 --- a/milli/src/update/word_prefix_pair_proximity_docids.rs +++ b/milli/src/update/word_prefix_pair_proximity_docids.rs @@ -244,3 +244,90 @@ fn insert_current_prefix_data_in_sorter<'a>( Ok(()) } + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use crate::{ + db_snap, + documents::{DocumentsBatchBuilder, DocumentsBatchReader}, + index::tests::TempIndex, + }; + + fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec { + let mut documents = Vec::new(); + for prefix in prefixes { + for i in 0..50 { + documents.push( + serde_json::json!({ + "text": format!("{prefix}{i:x}"), + }) + .as_object() + .unwrap() + .clone(), + ) + } + } + documents + } + + #[test] + fn test_update() { + let mut index = TempIndex::new(); + index.index_documents_config.words_prefix_threshold = Some(50); + index.index_documents_config.autogenerate_docids = true; + + index + .update_settings(|settings| { + settings.set_searchable_fields(vec!["text".to_owned()]); + }) + .unwrap(); + + let batch_reader_from_documents = |documents| { + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + for object in documents { + builder.append_json_object(&object).unwrap(); + } + DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap() + }; + + let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]); + // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database + documents.push( + serde_json::json!({ + "text": "At an amazing and beautiful house" + }) + .as_object() + .unwrap() + .clone(), + ); + documents.push( + serde_json::json!({ + "text": "The bell rings at 5 am" + }) + .as_object() + .unwrap() + .clone(), + ); + + let documents = batch_reader_from_documents(documents); + index.add_documents(documents).unwrap(); + + db_snap!(index, word_prefix_pair_proximity_docids, "initial"); + + let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]); + documents.push( + serde_json::json!({ + "text": "At an extraordinary house" + }) + .as_object() + .unwrap() + .clone(), + ); + let documents = batch_reader_from_documents(documents); + index.add_documents(documents).unwrap(); + + db_snap!(index, word_prefix_pair_proximity_docids, "update"); + } +} From 8ac24d3114e43622ba9a194ee2d574f4fab2861f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 10:50:38 +0200 Subject: [PATCH 06/16] Cargo fmt + fix compiler warnings/error --- milli/src/search/facet/filter.rs | 8 +++---- milli/src/snapshot_tests.rs | 22 ++++++++++--------- milli/src/update/facets.rs | 10 ++++----- .../word_prefix_pair_proximity_docids.rs | 8 +++---- 4 files changed, 22 insertions(+), 26 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 225d3ea8d..03ec03d39 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -490,14 +490,12 @@ impl<'a> From> for Filter<'a> { #[cfg(test)] mod tests { - use std::fmt::Write; - + use crate::index::tests::TempIndex; + use crate::Filter; use big_s::S; use either::Either; use maplit::hashset; - - use crate::index::tests::TempIndex; - use crate::Filter; + use std::fmt::Write; #[test] fn empty_db() { diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index a881a155e..f9929bb22 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -1,14 +1,16 @@ -use crate::{ - heed_codec::facet::{ - FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, - FacetStringZeroBoundsValueCodec, - }, - make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index, -}; -use heed::{types::ByteSlice, BytesDecode}; -use roaring::RoaringBitmap; +use std::borrow::Cow; +use std::fmt::Write; use std::path::Path; -use std::{borrow::Cow, fmt::Write}; + +use heed::types::ByteSlice; +use heed::BytesDecode; +use roaring::RoaringBitmap; + +use crate::heed_codec::facet::{ + FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, + FacetStringZeroBoundsValueCodec, +}; +use crate::{make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index}; #[track_caller] pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Settings { diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 904f165b1..4c4963b56 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -347,11 +347,9 @@ fn write_string_entry( mod tests { use std::num::NonZeroUsize; - use crate::{ - db_snap, - documents::{batch_reader_from_documents, documents_batch_reader_from_objects}, - index::tests::TempIndex, - }; + use crate::db_snap; + use crate::documents::documents_batch_reader_from_objects; + use crate::index::tests::TempIndex; #[test] fn test_facets_number() { @@ -378,7 +376,7 @@ mod tests { for i in 0..100 { documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone()); } - let documents = batch_reader_from_documents(&documents); + let documents = documents_batch_reader_from_objects(documents); index.add_documents(documents).unwrap(); diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs index 7e5d5c090..574b49e97 100644 --- a/milli/src/update/word_prefix_pair_proximity_docids.rs +++ b/milli/src/update/word_prefix_pair_proximity_docids.rs @@ -249,11 +249,9 @@ fn insert_current_prefix_data_in_sorter<'a>( mod tests { use std::io::Cursor; - use crate::{ - db_snap, - documents::{DocumentsBatchBuilder, DocumentsBatchReader}, - index::tests::TempIndex, - }; + use crate::db_snap; + use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; + use crate::index::tests::TempIndex; fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec { let mut documents = Vec::new(); From 4bba2f41d784cd2df2ad3fb730deae19718c6f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 11:00:46 +0200 Subject: [PATCH 07/16] Switch to snapshot tests for query_tree.rs --- milli/src/search/query_tree.rs | 537 ++++++++------------------------- 1 file changed, 134 insertions(+), 403 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index e0fac0f43..ace1d9dfe 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -857,30 +857,16 @@ mod test { let query = "hey friends"; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: true, - kind: QueryKind::tolerant(1, "friends".to_string()), - }), - ]), - Operation::Query(Query { - prefix: true, - kind: QueryKind::tolerant(1, "heyfriends".to_string()), - }), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + Exact { word: "hey" } + PrefixTolerant { word: "friends", max typo: 1 } + PrefixTolerant { word: "heyfriends", max typo: 1 } + "###); } #[test] @@ -888,30 +874,16 @@ mod test { let query = "hey friends "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "friends".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "heyfriends".to_string()), - }), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + Exact { word: "hey" } + Tolerant { word: "friends", max typo: 1 } + Tolerant { word: "heyfriends", max typo: 1 } + "###); } #[test] @@ -919,62 +891,24 @@ mod test { let query = "hello world "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hi".to_string()), - }), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("good".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("morning".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "hello".to_string()), - }), - ], - ), - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("earth".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("nature".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "world".to_string()), - }), - ], - ), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "helloworld".to_string()), - }), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + OR + Exact { word: "hi" } + AND + Exact { word: "good" } + Exact { word: "morning" } + Tolerant { word: "hello", max typo: 1 } + OR + Exact { word: "earth" } + Exact { word: "nature" } + Tolerant { word: "world", max typo: 1 } + Tolerant { word: "helloworld", max typo: 1 } + "###); } #[test] @@ -982,97 +916,34 @@ mod test { let query = "new york city "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("new".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("york".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("city".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "yorkcity".to_string()), - }), - ], - ), - ]), - Operation::And(vec![ - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("nyc".to_string()), - }), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("new".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("york".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("city".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "newyork".to_string()), - }), - ], - ), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("city".to_string()), - }), - ]), - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("nyc".to_string()), - }), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("new".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("york".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "newyorkcity".to_string()), - }), - ], - ), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + Exact { word: "new" } + OR + AND + Exact { word: "york" } + Exact { word: "city" } + Tolerant { word: "yorkcity", max typo: 1 } + AND + OR + Exact { word: "nyc" } + AND + Exact { word: "new" } + Exact { word: "york" } + Exact { word: "city" } + Tolerant { word: "newyork", max typo: 1 } + Exact { word: "city" } + OR + Exact { word: "nyc" } + AND + Exact { word: "new" } + Exact { word: "york" } + Tolerant { word: "newyorkcity", max typo: 1 } + "###); } #[test] @@ -1080,30 +951,16 @@ mod test { let query = "n grams "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("n".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "grams".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "ngrams".to_string()), - }), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + Exact { word: "n" } + Tolerant { word: "grams", max typo: 1 } + Tolerant { word: "ngrams", max typo: 1 } + "###); } #[test] @@ -1111,36 +968,18 @@ mod test { let query = "wordsplit fish "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Or( - false, - vec![ - Operation::Phrase(vec!["word".to_string(), "split".to_string()]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(2, "wordsplit".to_string()), - }), - ], - ), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("fish".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "wordsplitfish".to_string()), - }), - ], - ); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + OR + PHRASE ["word", "split"] + Tolerant { word: "wordsplit", max typo: 2 } + Exact { word: "fish" } + Tolerant { word: "wordsplitfish", max typo: 1 } + "###); } #[test] @@ -1148,15 +987,14 @@ mod test { let query = "\"hey friends\" \" \" \"wooop"; let tokens = query.tokenize(); - let expected = Operation::And(vec![ - Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]), - Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), - ]); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + AND + PHRASE ["hey", "friends"] + Exact { word: "wooop" } + "###); } #[test] @@ -1164,15 +1002,14 @@ mod test { let query = "\"hey friends. wooop wooop\""; let tokens = query.tokenize(); - let expected = Operation::And(vec![ - Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]), - Operation::Phrase(vec!["wooop".to_string(), "wooop".to_string()]), - ]); - let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + AND + PHRASE ["hey", "friends"] + PHRASE ["wooop", "wooop"] + "###); } #[test] @@ -1180,82 +1017,30 @@ mod test { let query = "hey my friend "; let tokens = query.tokenize(); - let expected = Operation::Or( - true, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("my".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "heymy".to_string()), - }), - ], - ), - Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("my".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "friend".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "myfriend".to_string()), - }), - ], - ), - ]), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "heymy".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "friend".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "heymyfriend".to_string()), - }), - ], - ), - ], - ); let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR(WORD) + Exact { word: "hey" } + OR + AND + Exact { word: "hey" } + Exact { word: "my" } + Tolerant { word: "heymy", max typo: 1 } + OR + AND + Exact { word: "hey" } + OR + AND + Exact { word: "my" } + Tolerant { word: "friend", max typo: 1 } + Tolerant { word: "myfriend", max typo: 1 } + AND + Tolerant { word: "heymy", max typo: 1 } + Tolerant { word: "friend", max typo: 1 } + Tolerant { word: "heymyfriend", max typo: 1 } + "###); } #[test] @@ -1263,11 +1048,12 @@ mod test { let query = "\"hey my\""; let tokens = query.tokenize(); - let expected = Operation::Phrase(vec!["hey".to_string(), "my".to_string()]); let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + PHRASE ["hey", "my"] + "###); } #[test] @@ -1275,68 +1061,27 @@ mod test { let query = r#""hey" my good "friend""#; let tokens = query.tokenize(); - let expected = Operation::Or( - true, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("friend".to_string()), - }), - ]), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("my".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("friend".to_string()), - }), - ]), - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("my".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("good".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::tolerant(1, "mygood".to_string()), - }), - ], - ), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("friend".to_string()), - }), - ]), - ], - ); let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR(WORD) + AND + Exact { word: "hey" } + Exact { word: "friend" } + AND + Exact { word: "hey" } + Exact { word: "my" } + Exact { word: "friend" } + AND + Exact { word: "hey" } + OR + AND + Exact { word: "my" } + Exact { word: "good" } + Tolerant { word: "mygood", max typo: 1 } + Exact { word: "friend" } + "###); } #[test] @@ -1344,29 +1089,16 @@ mod test { let query = "hey friends "; let tokens = query.tokenize(); - let expected = Operation::Or( - false, - vec![ - Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("hey".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("friends".to_string()), - }), - ]), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("heyfriends".to_string()), - }), - ], - ); let (query_tree, _) = TestContext::default().build(false, false, None, tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + OR + AND + Exact { word: "hey" } + Exact { word: "friends" } + Exact { word: "heyfriends" } + "###); } #[test] @@ -1374,15 +1106,14 @@ mod test { let query = "\"hey my\" good friend"; let tokens = query.tokenize(); - let expected = Operation::And(vec![ - Operation::Phrase(vec!["hey".to_string(), "my".to_string()]), - Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }), - ]); - let (query_tree, _) = TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap(); - assert_eq!(expected, query_tree); + insta::assert_debug_snapshot!(query_tree, @r###" + AND + PHRASE ["hey", "my"] + Exact { word: "good" } + "###); } #[test] From a9c7d8269308e6e145437c5ff7de952fed8bbad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 11:04:07 +0200 Subject: [PATCH 08/16] Switch to snapshot tests for search/criteria/attribute.rs --- milli/src/search/criteria/attribute.rs | 72 +++++++++++++++++++------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs index 3d67b60c0..d8feeeee9 100644 --- a/milli/src/search/criteria/attribute.rs +++ b/milli/src/search/criteria/attribute.rs @@ -632,25 +632,59 @@ mod tests { ]), ], ); - - let expected = vec![ - vec![vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }]], - vec![ - vec![Query { prefix: false, kind: QueryKind::exact(S("manythe")) }], - vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }], - ], - vec![ - vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }], - vec![Query { prefix: false, kind: QueryKind::exact(S("thefish")) }], - ], - vec![ - vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }], - vec![Query { prefix: false, kind: QueryKind::exact(S("the")) }], - vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }], - ], - ]; - let result = flatten_query_tree(&query_tree); - assert_eq!(expected, result); + + insta::assert_debug_snapshot!(result, @r###" + [ + [ + [ + Exact { + word: "manythefish", + }, + ], + ], + [ + [ + Exact { + word: "manythe", + }, + ], + [ + Exact { + word: "fish", + }, + ], + ], + [ + [ + Exact { + word: "many", + }, + ], + [ + Exact { + word: "thefish", + }, + ], + ], + [ + [ + Exact { + word: "many", + }, + ], + [ + Exact { + word: "the", + }, + ], + [ + Exact { + word: "fish", + }, + ], + ], + ] + "###); } } From d2e01528a6e113e6090a670999fbd04452ca71f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 11:18:32 +0200 Subject: [PATCH 09/16] Switch to snapshot tests for search/criteria/typo.rs --- milli/src/search/criteria/typo.rs | 206 +++++++++--------------------- milli/src/search/query_tree.rs | 5 - 2 files changed, 59 insertions(+), 152 deletions(-) diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 97a9b4e4b..3ba158b3b 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -349,22 +349,33 @@ mod test { use super::super::test::TestContext; use super::*; + fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String { + let mut result = String::new(); + while let Some(criterion) = criteria.next(&mut parameters).unwrap() { + result.push_str(&format!("{criterion:?}\n\n")); + } + result + } + #[test] fn initial_placeholder_no_facets() { let context = TestContext::default(); let query_tree = None; let facet_candidates = None; - let mut criterion_parameters = CriterionParameters { + let criterion_parameters = CriterionParameters { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; let parent = Initial::new(query_tree, facet_candidates); - let mut criteria = Typo::new(&context, Box::new(parent)); + let criteria = Typo::new(&context, Box::new(parent)); + + let result = display_criteria(criteria, criterion_parameters); + insta::assert_snapshot!(result, @r###" + CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None } - assert!(criteria.next(&mut criterion_parameters).unwrap().unwrap().candidates.is_none()); - assert!(criteria.next(&mut criterion_parameters).unwrap().is_none()); + "###); } #[test] @@ -390,78 +401,32 @@ mod test { let facet_candidates = None; - let mut criterion_parameters = CriterionParameters { + let criterion_parameters = CriterionParameters { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; let parent = Initial::new(Some(query_tree), facet_candidates); - let mut criteria = Typo::new(&context, Box::new(parent)); + let criteria = Typo::new(&context, Box::new(parent)); - let candidates_1 = context.word_docids("split").unwrap().unwrap() - & context.word_docids("this").unwrap().unwrap() - & context.word_docids("world").unwrap().unwrap(); - let expected_1 = CriterionResult { - query_tree: Some(Operation::Or( - false, - vec![Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("split".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("this".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("world".to_string()), - }), - ])], - )), - candidates: Some(candidates_1.clone()), - bucket_candidates: Some(candidates_1), - filtered_candidates: None, - }; + let result = display_criteria(criteria, criterion_parameters); + insta::assert_snapshot!(result, @r###" + CriterionResult { query_tree: Some(OR + AND + Exact { word: "split" } + Exact { word: "this" } + Exact { word: "world" } + ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } - assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); + CriterionResult { query_tree: Some(OR + AND + Exact { word: "split" } + Exact { word: "this" } + OR + Exact { word: "word" } + Exact { word: "world" } + ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } - let candidates_2 = (context.word_docids("split").unwrap().unwrap() - & context.word_docids("this").unwrap().unwrap() - & context.word_docids("word").unwrap().unwrap()) - - context.word_docids("world").unwrap().unwrap(); - let expected_2 = CriterionResult { - query_tree: Some(Operation::Or( - false, - vec![Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("split".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("this".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact_with_typo(1, "word".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("world".to_string()), - }), - ], - ), - ])], - )), - candidates: Some(candidates_2.clone()), - bucket_candidates: Some(candidates_2), - filtered_candidates: None, - }; - - assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); + "###); } #[test] @@ -470,25 +435,18 @@ mod test { let query_tree = None; let facet_candidates = context.word_docids("earth").unwrap().unwrap(); - let mut criterion_parameters = CriterionParameters { + let criterion_parameters = CriterionParameters { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; let parent = Initial::new(query_tree, Some(facet_candidates.clone())); - let mut criteria = Typo::new(&context, Box::new(parent)); + let criteria = Typo::new(&context, Box::new(parent)); - let expected = CriterionResult { - query_tree: None, - candidates: None, - bucket_candidates: None, - filtered_candidates: Some(facet_candidates.clone()), - }; + let result = display_criteria(criteria, criterion_parameters); + insta::assert_snapshot!(result, @r###" + CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None } - // first iteration, returns the facet candidates - assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected)); - - // second iteration, returns None because there is no more things to do - assert!(criteria.next(&mut criterion_parameters).unwrap().is_none()); + "###); } #[test] @@ -514,77 +472,31 @@ mod test { let facet_candidates = context.word_docids("earth").unwrap().unwrap(); - let mut criterion_parameters = CriterionParameters { + let criterion_parameters = CriterionParameters { wdcache: &mut WordDerivationsCache::new(), excluded_candidates: &RoaringBitmap::new(), }; let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone())); - let mut criteria = Typo::new(&context, Box::new(parent)); + let criteria = Typo::new(&context, Box::new(parent)); - let candidates_1 = context.word_docids("split").unwrap().unwrap() - & context.word_docids("this").unwrap().unwrap() - & context.word_docids("world").unwrap().unwrap(); - let expected_1 = CriterionResult { - query_tree: Some(Operation::Or( - false, - vec![Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("split".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("this".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("world".to_string()), - }), - ])], - )), - candidates: Some(&candidates_1 & &facet_candidates), - bucket_candidates: Some(&candidates_1 & &facet_candidates), - filtered_candidates: None, - }; + let result = display_criteria(criteria, criterion_parameters); + insta::assert_snapshot!(result, @r###" + CriterionResult { query_tree: Some(OR + AND + Exact { word: "split" } + Exact { word: "this" } + Exact { word: "world" } + ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } - assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); + CriterionResult { query_tree: Some(OR + AND + Exact { word: "split" } + Exact { word: "this" } + OR + Exact { word: "word" } + Exact { word: "world" } + ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } - let candidates_2 = (context.word_docids("split").unwrap().unwrap() - & context.word_docids("this").unwrap().unwrap() - & context.word_docids("word").unwrap().unwrap()) - - context.word_docids("world").unwrap().unwrap(); - let expected_2 = CriterionResult { - query_tree: Some(Operation::Or( - false, - vec![Operation::And(vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("split".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("this".to_string()), - }), - Operation::Or( - false, - vec![ - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact_with_typo(1, "word".to_string()), - }), - Operation::Query(Query { - prefix: false, - kind: QueryKind::exact("world".to_string()), - }), - ], - ), - ])], - )), - candidates: Some(&candidates_2 & &facet_candidates), - bucket_candidates: Some(&candidates_2 & &facet_candidates), - filtered_candidates: None, - }; - - assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); + "###); } } diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index ace1d9dfe..617d9e4d9 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -99,11 +99,6 @@ impl QueryKind { QueryKind::Exact { original_typo: 0, word } } - #[cfg(test)] - pub fn exact_with_typo(original_typo: u8, word: String) -> Self { - QueryKind::Exact { original_typo, word } - } - pub fn tolerant(typo: u8, word: String) -> Self { QueryKind::Tolerant { typo, word } } From 051f24f67467d106ff605e7f7ec43577ac6281f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 11:26:39 +0200 Subject: [PATCH 10/16] Switch to snapshot tests for search/matches/mod.rs --- milli/src/search/matches/mod.rs | 142 ++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 54 deletions(-) diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 72592c4cb..09ed24080 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -573,15 +573,18 @@ mod tests { let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. - assert_eq!(&matcher.format(format_options), "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."); - + insta::assert_snapshot!( + matcher.format(format_options), + @"Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World." + ); + // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. - assert_eq!( - &matcher.format(format_options), - "Natalie risk her future to build a world with the boy she loves." + insta::assert_snapshot!( + matcher.format(format_options), + @"Natalie risk her future to build a world with the boy she loves." ); } @@ -602,19 +605,28 @@ mod tests { let text = "Ŵôřlḑôle"; let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. - assert_eq!(&matcher.format(format_options), "Ŵôřlḑôle"); + insta::assert_snapshot!( + matcher.format(format_options), + @"Ŵôřlḑôle" + ); // Text containing unicode match. let text = "Ŵôřlḑ"; let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. - assert_eq!(&matcher.format(format_options), "Ŵôřlḑ"); + insta::assert_snapshot!( + matcher.format(format_options), + @"Ŵôřlḑ" + ); // Text containing unicode match. let text = "Westfália"; let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. - assert_eq!(&matcher.format(format_options), "Westfália"); + insta::assert_snapshot!( + matcher.format(format_options), + @"Westfália" + ); } #[test] @@ -628,83 +640,89 @@ mod tests { // empty text. let text = ""; let mut matcher = builder.build(text); - assert_eq!(&matcher.format(format_options), ""); + insta::assert_snapshot!( + matcher.format(format_options), + @"" + ); // text containing only separators. let text = ":-)"; let mut matcher = builder.build(text); - assert_eq!(&matcher.format(format_options), ":-)"); + insta::assert_snapshot!( + matcher.format(format_options), + @":-)" + ); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. - assert_eq!( - &matcher.format(format_options), - "A quick brown fox can not jump 32 feet, right…" + insta::assert_snapshot!( + matcher.format(format_options), + @"A quick brown fox can not jump 32 feet, right…" ); // Text without any match starting by a separator. let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)"; let mut matcher = builder.build(text); // no highlight should return 10 first words with a marker at the end. - assert_eq!( - &matcher.format(format_options), - "(A quick brown fox can not jump 32 feet, right…" + insta::assert_snapshot!( + matcher.format(format_options), + @"(A quick brown fox can not jump 32 feet, right…" ); // Test phrase propagation let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it."; let mut matcher = builder.build(text); // should crop the phrase instead of croping around the match. - assert_eq!( - &matcher.format(format_options), - "… Split The World is a book written by Emily Henry…", + insta::assert_snapshot!( + matcher.format(format_options), + @"… Split The World is a book written by Emily Henry…" ); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…future to build a world with the boy she loves…" + insta::assert_snapshot!( + matcher.format(format_options), + @"…future to build a world with the boy she loves…" ); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; let mut matcher = builder.build(text); // no highlight should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…she loves. Emily Henry: The Love That Split The World." + insta::assert_snapshot!( + matcher.format(format_options), + @"…she loves. Emily Henry: The Love That Split The World." ); // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…void void void void void split the world void void" + insta::assert_snapshot!( + matcher.format(format_options), + @"…void void void void void split the world void void" ); // Text containing matches with diferent density. let text = "split void the void void world void void void void void void void void void void split the world void void"; let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…void void void void void split the world void void" + insta::assert_snapshot!( + matcher.format(format_options), + @"…void void void void void split the world void void" ); // Text containing matches with same word. let text = "split split split split split split void void void void void void void void void void split the world void void"; let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…void void void void void split the world void void" + insta::assert_snapshot!( + matcher.format(format_options), + @"…void void void void void split the world void void" ); } @@ -719,44 +737,53 @@ mod tests { // empty text. let text = ""; let mut matcher = builder.build(text); - assert_eq!(&matcher.format(format_options), ""); + insta::assert_snapshot!( + matcher.format(format_options), + @"" + ); // text containing only separators. let text = ":-)"; let mut matcher = builder.build(text); - assert_eq!(&matcher.format(format_options), ":-)"); + insta::assert_snapshot!( + matcher.format(format_options), + @":-)" + ); // Text without any match. let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; let mut matcher = builder.build(text); // both should return 10 first words with a marker at the end. - assert_eq!( - &matcher.format(format_options), - "A quick brown fox can not jump 32 feet, right…" + insta::assert_snapshot!( + matcher.format(format_options), + @"A quick brown fox can not jump 32 feet, right…" ); // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. - assert_eq!( - &matcher.format(format_options), - "…future to build a world with the boy she loves…" + insta::assert_snapshot!( + matcher.format(format_options), + @"…future to build a world with the boy she loves…" ); // Text containing all matches. let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World."; let mut matcher = builder.build(text); // both should return 10 last words with a marker at the start and highlighted matches. - assert_eq!(&matcher.format(format_options), "…she loves. Emily Henry: The Love That Split The World."); + insta::assert_snapshot!( + matcher.format(format_options), + @"…she loves. Emily Henry: The Love That Split The World." + ); // Text containing a match unordered and a match ordered. let text = "The world split void void void void void void void void void split the world void void"; let mut matcher = builder.build(text); // crop should return 10 last words with a marker at the start. - assert_eq!( - &matcher.format(format_options), - "…void void void void void split the world void void" + insta::assert_snapshot!( + matcher.format(format_options), + @"…void void void void void split the world void void" ); } @@ -773,19 +800,28 @@ mod tests { let format_options = FormatOptions { highlight: false, crop: Some(2) }; let mut matcher = builder.build(text); // because crop size < query size, partially format matches. - assert_eq!(&matcher.format(format_options), "…split the…"); + insta::assert_snapshot!( + matcher.format(format_options), + @"…split the…" + ); // set a smaller crop size let format_options = FormatOptions { highlight: false, crop: Some(1) }; let mut matcher = builder.build(text); // because crop size < query size, partially format matches. - assert_eq!(&matcher.format(format_options), "…split…"); + insta::assert_snapshot!( + matcher.format(format_options), + @"…split…" + ); // set crop size to 0 let format_options = FormatOptions { highlight: false, crop: Some(0) }; let mut matcher = builder.build(text); // because crop size is 0, crop is ignored. - assert_eq!(&matcher.format(format_options), "void void split the world void void."); + insta::assert_snapshot!( + matcher.format(format_options), + @"void void split the world void void." + ); } #[test] @@ -820,11 +856,9 @@ mod tests { let text = "the do or die can't be he do and or isn't he"; let mut matcher = builder.build(text); - assert_eq!( - &matcher.format(format_options), - "_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_", - "matches: {:?}", - &matcher.matches + insta::assert_snapshot!( + matcher.format(format_options), + @"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_" ); } } From 748bb86b5be3ba942e6b9ed5ce60b8d4430ed754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 11:34:10 +0200 Subject: [PATCH 11/16] cargo fmt --- milli/src/search/criteria/typo.rs | 2 +- milli/src/search/facet/filter.rs | 8 +++++--- milli/src/search/matches/mod.rs | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 3ba158b3b..e9e6fb2f5 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -370,7 +370,7 @@ mod test { let parent = Initial::new(query_tree, facet_candidates); let criteria = Typo::new(&context, Box::new(parent)); - + let result = display_criteria(criteria, criterion_parameters); insta::assert_snapshot!(result, @r###" CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 03ec03d39..225d3ea8d 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -490,12 +490,14 @@ impl<'a> From> for Filter<'a> { #[cfg(test)] mod tests { - use crate::index::tests::TempIndex; - use crate::Filter; + use std::fmt::Write; + use big_s::S; use either::Either; use maplit::hashset; - use std::fmt::Write; + + use crate::index::tests::TempIndex; + use crate::Filter; #[test] fn empty_db() { diff --git a/milli/src/search/matches/mod.rs b/milli/src/search/matches/mod.rs index 09ed24080..2697405be 100644 --- a/milli/src/search/matches/mod.rs +++ b/milli/src/search/matches/mod.rs @@ -574,10 +574,10 @@ mod tests { let mut matcher = builder.build(text); // no crop should return complete text with highlighted matches. insta::assert_snapshot!( - matcher.format(format_options), + matcher.format(format_options), @"Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World." ); - + // Text containing some matches. let text = "Natalie risk her future to build a world with the boy she loves."; let mut matcher = builder.build(text); From ce560fdcb5d7b3aa2150be57d5e4e80b0038fd84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 4 Aug 2022 12:25:39 +0200 Subject: [PATCH 12/16] Add documentation for `db_snap!` --- milli/src/snapshot_tests.rs | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index f9929bb22..2b55a7e2c 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -30,6 +30,71 @@ pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Setti settings } +/** +Create a snapshot test of the given database. + +## Arguments +1. The identifier for the `Index` +2. The content of the index to snapshot. Available options are: + - `settings` + - `word_docids` + - `exact_word_docids` + - `word_prefix_docids` + - `exact_word_prefix_docids` + - `docid_word_positions` + - `word_pair_proximity_docids` + - `word_prefix_pair_proximity_docids` + - `word_position_docids` + - `field_id_word_count_docids` + - `word_prefix_position_docids` + - `facet_id_f64_docids` + - `facet_id_string_docids` + - `documents_ids` + - `stop_words` + - `soft_deleted_documents_ids` + - `field_distribution` + - `fields_ids_map` + - `geo_faceted_documents_ids` + - `external_documents_ids` + - `number_faceted_documents_ids` + - `string_faceted_documents_ids` + - `words_fst` + - `words_prefixes_fst` + +3. The identifier for the snapshot test (optional) +4. `@""` to write the snapshot inline (optional) + +## Behaviour +The content of the database will be printed either inline or to the file system +at `test_directory/test_file.rs/test_name/db_name.snap`. + +If the database is too large, then only the hash of the database will be saved, with +the name `db_name.hash.snap`. To *also* save the full content of the database anyway, +set the `MILLI_TEST_FULL_SNAPS` environment variable to `true`. The full snapshot will +be saved with the name `db_name.full.snap` but will not be saved to the git repository. + +Running `cargo test` will check whether the old snapshot is identical to the +current one. If they are equal, the test passes. Otherwise, the test fails. + +Use the command line `cargo insta` to approve or reject new snapshots. + +## Example +```ignore +let index = TempIndex::new(); + +// basic usages +db_snap!(index, word_docids); + +// named snapshot to avoid conflicts +db_snap!(index, word_docids, "some_identifier"); + +// write the snapshot inline +db_snap!(index, word_docids, @""); // will be autocompleted by running `cargo insta review` + +// give a name to the inline snapshot +db_snap!(index, word_docids, "some_identifier", @""); +``` +*/ #[macro_export] macro_rules! db_snap { ($index:ident, $db_name:ident, $name:expr) => { From 4b7fd4dfae9234492378ad51c0e7a2a0558dfb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 10 Aug 2022 10:42:54 +0200 Subject: [PATCH 13/16] Update insta version --- milli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index b745d970a..2bb6a50a1 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -51,7 +51,7 @@ csv = "1.1.6" [dev-dependencies] big_s = "1.0.2" -insta = "1.17.1" +insta = "1.18.1" maplit = "1.0.2" md5 = "0.7.0" rand = "0.8.5" From 12920f2a4f3f3613233801c858ce9cae214fb03b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 10 Aug 2022 12:10:45 +0200 Subject: [PATCH 14/16] Fix paths of snapshot tests --- milli/src/snapshot_tests.rs | 7 ++++--- .../default/facet_id_f64_docids.hash.snap | 0 .../facet_id_f64_docids.hash.snap | 0 .../facet_id_f64_docids.hash.snap | 0 .../facet_id_f64_docids.hash.snap | 0 .../facet_id_f64_docids.hash.snap | 0 .../tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap | 0 .../default/facet_id_string_docids.hash.snap | 0 .../facet_id_string_docids.hash.snap | 0 .../initial/word_prefix_pair_proximity_docids.snap | 0 .../update/word_prefix_pair_proximity_docids.snap | 0 11 files changed, 4 insertions(+), 3 deletions(-) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap (100%) rename milli/src/update/snapshots/{update => }/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap (100%) rename milli/src/update/snapshots/{update => }/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap (100%) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 2b55a7e2c..c6e99a437 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -17,14 +17,15 @@ pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Setti let mut settings = insta::Settings::clone_current(); settings.set_prepend_module_to_snapshot(false); let path = Path::new(std::panic::Location::caller().file()); - let path = path.strip_prefix("milli/src").unwrap(); + let filename = path.file_name().unwrap().to_str().unwrap(); settings.set_omit_expression(true); let test_name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_owned(); if let Some(name) = name { - settings.set_snapshot_path(Path::new("snapshots").join(path).join(test_name).join(name)); + settings + .set_snapshot_path(Path::new("snapshots").join(filename).join(test_name).join(name)); } else { - settings.set_snapshot_path(Path::new("snapshots").join(path).join(test_name)); + settings.set_snapshot_path(Path::new("snapshots").join(filename).join(test_name)); } settings diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap similarity index 100% rename from milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap rename to milli/src/update/snapshots/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap similarity index 100% rename from milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap rename to milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap diff --git a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap similarity index 100% rename from milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap rename to milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap From 6f4912622360934a1f7a87fef888fc4415201b1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 10 Aug 2022 15:53:01 +0200 Subject: [PATCH 15/16] Fix db_snap macro with inline parameter --- milli/src/snapshot_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index c6e99a437..eac3340fd 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -135,7 +135,7 @@ macro_rules! db_snap { }); }; ($index:ident, $db_name:ident, $name:literal, @$inline:literal) => { - let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some(format!("", $name))); + let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some(&format!("{}", $name))); settings.bind(|| { let snap = $crate::full_snap_of_db!($index, $db_name); let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, true); From dea00311b681316caf441b3d4718c6d0e297f504 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 16 Aug 2022 09:19:17 +0200 Subject: [PATCH 16/16] Add type annotations to remove compiler error --- milli/src/error.rs | 4 ++-- milli/src/search/facet/filter.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 80c923bd9..c817f64fa 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -99,7 +99,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco )] InvalidDocumentId { document_id: Value }, #[error("Invalid facet distribution, the fields `{}` are not set as filterable.", - .invalid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") + .invalid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") )] InvalidFacetsDistribution { invalid_facets_name: BTreeSet }, #[error(transparent)] @@ -111,7 +111,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco match .valid_fields.is_empty() { true => "This index does not have configured sortable attributes.".to_string(), false => format!("Available sortable attributes are: `{}`.", - valid_fields.iter().map(AsRef::as_ref).collect::>().join(", ") + valid_fields.iter().map(AsRef::as_ref).collect::>().join(", ") ), } )] diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 225d3ea8d..90aab826a 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -45,7 +45,7 @@ impl<'a> Display for FilterError<'a> { attribute, ) } else { - let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::>().join(" "); + let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::>().join(" "); write!( f,