From 0597a97c8457d38cc937920a837ef001b5e82e1e Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 19 Jul 2023 18:15:48 +0200 Subject: [PATCH 01/13] Update tests --- meilisearch/tests/settings/get_settings.rs | 6 +++ meilisearch/tests/settings/mod.rs | 1 + .../tests/settings/tokenizer_customization.rs | 38 +++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 meilisearch/tests/settings/tokenizer_customization.rs diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index d39cbd96e..d5dfd1145 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -16,6 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]), ); map.insert("stop_words", json!([])); + map.insert("non_separator_tokens", json!([])); + map.insert("separator_tokens", json!([])); + map.insert("dictionary", json!([])); map.insert("synonyms", json!({})); map.insert( "faceting", @@ -62,6 +65,9 @@ async fn get_settings() { json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]) ); assert_eq!(settings["stopWords"], json!([])); + assert_eq!(settings["non_separator_tokens"], json!([])); + assert_eq!(settings["separator_tokens"], json!([])); + assert_eq!(settings["dictionary"], json!([])); assert_eq!( settings["faceting"], json!({ diff --git a/meilisearch/tests/settings/mod.rs b/meilisearch/tests/settings/mod.rs index fa4df254c..70125a360 100644 --- a/meilisearch/tests/settings/mod.rs +++ b/meilisearch/tests/settings/mod.rs @@ -1,3 +1,4 @@ mod distinct; mod errors; mod get_settings; +mod tokenizer_customization; diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs new file mode 100644 index 000000000..0926f66bf --- /dev/null +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -0,0 +1,38 @@ +use serde_json::json; + +use crate::common::Server; + +#[actix_rt::test] +async fn set_and_reset() { + let server = Server::new().await; + let index = server.index("test"); + + let (_response, _code) = index + .update_settings(json!({ + "non_separator_tokens": ["#", "&"], + "separator_tokens": ["&sep", "
"], + "dictionary": ["J.R.R.", "J. R. R."], + })) + .await; + index.wait_task(0).await; + + let (response, _) = index.settings().await; + assert_eq!(response["non_separator_tokens"], json!(["#", "&"])); + assert_eq!(response["separator_tokens"], json!(["&sep", "
"])); + assert_eq!(response["dictionary"], json!(["J.R.R.", "J. R. R."])); + + index + .update_settings(json!({ + "non_separator_tokens": null, + "separator_tokens": null, + "dictionary": null, + })) + .await; + + index.wait_task(1).await; + + let (response, _) = index.settings().await; + assert_eq!(response["non_separator_tokens"], json!(null)); + assert_eq!(response["separator_tokens"], json!(null)); + assert_eq!(response["dictionary"], json!(null)); +} From d8d12d5979dfef6aa4d9391a8304f1cf2aebcadc Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 24 Jul 2023 17:00:18 +0200 Subject: [PATCH 02/13] Be able to set and reset settings --- dump/src/lib.rs | 3 + dump/src/reader/compat/v5_to_v6.rs | 3 + meilisearch-types/src/error.rs | 3 + meilisearch-types/src/settings.rs | 57 ++ meilisearch/src/routes/indexes/settings.rs | 75 ++ meilisearch/tests/dumps/mod.rs | 855 +++++++++++++++--- meilisearch/tests/settings/get_settings.rs | 2 +- .../tests/settings/tokenizer_customization.rs | 36 +- milli/src/index.rs | 89 +- milli/src/update/settings.rs | 105 +++ 10 files changed, 1088 insertions(+), 140 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index 1e21eed05..036de6010 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -261,6 +261,9 @@ pub(crate) mod test { sortable_attributes: Setting::Set(btreeset! { S("age") }), ranking_rules: Setting::NotSet, stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, typo_tolerance: Setting::NotSet, diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index ef5588d8f..9e938d756 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -340,6 +340,9 @@ impl From> for v6::Settings { } }, stop_words: settings.stop_words.into(), + non_separator_tokens: v6::Setting::NotSet, + separator_tokens: v6::Setting::NotSet, + dictionary: v6::Setting::NotSet, synonyms: settings.synonyms.into(), distinct_attribute: settings.distinct_attribute.into(), typo_tolerance: match settings.typo_tolerance { diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 36b760cd7..4b6711601 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -259,6 +259,9 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ; +InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ; +InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ; InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ; InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ; InvalidState , Internal , INTERNAL_SERVER_ERROR ; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index a5fb10074..5778d3703 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -171,6 +171,15 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] pub stop_words: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub non_separator_tokens: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub separator_tokens: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub dictionary: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] pub synonyms: Setting>>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] @@ -201,6 +210,9 @@ impl Settings { ranking_rules: Setting::Reset, stop_words: Setting::Reset, synonyms: Setting::Reset, + non_separator_tokens: Setting::Reset, + separator_tokens: Setting::Reset, + dictionary: Setting::Reset, distinct_attribute: Setting::Reset, typo_tolerance: Setting::Reset, faceting: Setting::Reset, @@ -217,6 +229,9 @@ impl Settings { sortable_attributes, ranking_rules, stop_words, + non_separator_tokens, + separator_tokens, + dictionary, synonyms, distinct_attribute, typo_tolerance, @@ -232,6 +247,9 @@ impl Settings { sortable_attributes, ranking_rules, stop_words, + non_separator_tokens, + separator_tokens, + dictionary, synonyms, distinct_attribute, typo_tolerance, @@ -274,6 +292,9 @@ impl Settings { ranking_rules: self.ranking_rules, stop_words: self.stop_words, synonyms: self.synonyms, + non_separator_tokens: self.non_separator_tokens, + separator_tokens: self.separator_tokens, + dictionary: self.dictionary, distinct_attribute: self.distinct_attribute, typo_tolerance: self.typo_tolerance, faceting: self.faceting, @@ -335,6 +356,28 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } + match settings.non_separator_tokens { + Setting::Set(ref non_separator_tokens) => { + builder.set_non_separator_tokens(non_separator_tokens.clone()) + } + Setting::Reset => builder.reset_non_separator_tokens(), + Setting::NotSet => (), + } + + match settings.separator_tokens { + Setting::Set(ref separator_tokens) => { + builder.set_separator_tokens(separator_tokens.clone()) + } + Setting::Reset => builder.reset_separator_tokens(), + Setting::NotSet => (), + } + + match settings.dictionary { + Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()), + Setting::Reset => builder.reset_dictionary(), + Setting::NotSet => (), + } + match settings.synonyms { Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), Setting::Reset => builder.reset_synonyms(), @@ -459,6 +502,11 @@ pub fn settings( }) .transpose()? .unwrap_or_default(); + + let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default(); + let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default(); + let dictionary = index.dictionary(rtxn)?.unwrap_or_default(); + let distinct_field = index.distinct_field(rtxn)?.map(String::from); // in milli each word in the synonyms map were split on their separator. Since we lost @@ -520,6 +568,9 @@ pub fn settings( sortable_attributes: Setting::Set(sortable_attributes), ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()), stop_words: Setting::Set(stop_words), + non_separator_tokens: Setting::Set(non_separator_tokens), + separator_tokens: Setting::Set(separator_tokens), + dictionary: Setting::Set(dictionary), distinct_attribute: match distinct_field { Some(field) => Setting::Set(field), None => Setting::Reset, @@ -642,6 +693,9 @@ pub(crate) mod test { sortable_attributes: Setting::NotSet, ranking_rules: Setting::NotSet, stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, typo_tolerance: Setting::NotSet, @@ -663,6 +717,9 @@ pub(crate) mod test { sortable_attributes: Setting::NotSet, ranking_rules: Setting::NotSet, stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, synonyms: Setting::NotSet, distinct_attribute: Setting::NotSet, typo_tolerance: Setting::NotSet, diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index fb5d8ff7a..a29adadcf 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -309,6 +309,81 @@ make_setting_route!( } ); +make_setting_route!( + "/non-separator-tokens", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens, + >, + non_separator_tokens, + "nonSeparatorTokens", + analytics, + |non_separator_tokens: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "nonSeparatorTokens Updated".to_string(), + json!({ + "non_separator_tokens": { + "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()), + }, + }), + Some(req), + ); + } +); + +make_setting_route!( + "/separator-tokens", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens, + >, + separator_tokens, + "separatorTokens", + analytics, + |separator_tokens: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "separatorTokens Updated".to_string(), + json!({ + "separator_tokens": { + "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()), + }, + }), + Some(req), + ); + } +); + +make_setting_route!( + "/dictionary", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsDictionary, + >, + dictionary, + "dictionary", + analytics, + |dictionary: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "dictionary Updated".to_string(), + json!({ + "dictionary": { + "total": dictionary.as_ref().map(|dictionary| dictionary.len()), + }, + }), + Some(req), + ); + } +); + make_setting_route!( "/synonyms", put, diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index ce225cdf7..2feec996e 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -17,7 +17,7 @@ async fn import_dump_v1_movie_raw() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -26,21 +26,63 @@ async fn import_dump_v1_movie_raw() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.317060500S", "enqueuedAt": "2021-09-08T09:08:45.153219Z", "startedAt": "2021-09-08T09:08:45.3961665Z", "finishedAt": "2021-09-08T09:08:54.713227Z" }], "limit": 20, "from": 0, "next": null }) @@ -48,21 +90,21 @@ async fn import_dump_v1_movie_raw() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 100, "title": "Lock, Stock and Two Smoking Barrels", "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "genres": ["Comedy", "Crime"], "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000}) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 500, "title": "Reservoir Dogs", "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "genres": ["Crime", "Thriller"], "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 10006, "title": "Wild Seven", "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "genres": ["Action", "Crime", "Drama"], "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -109,7 +151,7 @@ async fn import_dump_v1_movie_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -118,21 +160,89 @@ async fn import_dump_v1_movie_with_settings() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); - assert_eq!( - stats, - json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(stats), + @r###" + { + "numberOfDocuments": 53, + "isIndexing": false, + "fieldDistribution": { + "genres": 53, + "id": 53, + "overview": 53, + "poster": 53, + "release_date": 53, + "title": 53 + } + } + "### ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "genres", + "id", + "overview", + "poster", + "release_date", + "title" + ], + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres" + ], + "sortableAttributes": [ + "genres" + ], + "rankingRules": [ + "typo", + "words", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [ + "of", + "the" + ], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT7.288826907S", "enqueuedAt": "2021-09-08T09:34:40.882977Z", "startedAt": "2021-09-08T09:34:40.883073093Z", "finishedAt": "2021-09-08T09:34:48.1719Z"}, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.090735774S", "enqueuedAt": "2021-09-08T09:34:16.036101Z", "startedAt": "2021-09-08T09:34:16.261191226Z", "finishedAt": "2021-09-08T09:34:25.351927Z" }], "limit": 20, "from": 1, "next": null }) @@ -140,21 +250,21 @@ async fn import_dump_v1_movie_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "genres": ["Comedy", "Crime"], "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000 }) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 500, "title": "Reservoir Dogs", "genres": ["Crime", "Thriller"], "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 10006, "title": "Wild Seven", "genres": ["Action", "Crime", "Drama"], "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -201,7 +311,7 @@ async fn import_dump_v1_rubygems_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("rubygems")); @@ -210,21 +320,75 @@ async fn import_dump_v1_rubygems_with_settings() { let index = server.index("rubygems"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": ["version"], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 }}) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "description", + "id", + "name", + "summary", + "total_downloads", + "version" + ], + "searchableAttributes": [ + "name", + "summary" + ], + "filterableAttributes": [ + "version" + ], + "sortableAttributes": [ + "version" + ], + "rankingRules": [ + "typo", + "words", + "fame:desc", + "proximity", + "attribute", + "exactness", + "total_downloads:desc" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks["results"][0], json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT1.487793839S", "enqueuedAt": "2021-09-08T09:27:01.465296Z", "startedAt": "2021-09-08T09:28:44.882177161Z", "finishedAt": "2021-09-08T09:28:46.369971Z"}) @@ -232,21 +396,21 @@ async fn import_dump_v1_rubygems_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(188040, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "meilisearch", "summary": "An easy-to-use ruby client for Meilisearch API", "description": "An easy-to-use ruby client for Meilisearch API. See https://github.com/meilisearch/MeiliSearch", "id": "188040", "version": "0.15.2", "total_downloads": "7465"}) ); let (document, code) = index.get_document(191940, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "doggo", "summary": "RSpec 3 formatter - documentation, with progress indication", "description": "Similar to \"rspec -f d\", but also indicates progress by showing the current test number and total test count on each line.", "id": "191940", "version": "1.1.0", "total_downloads": "9394"}) ); let (document, code) = index.get_document(159227, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "vortex-of-agony", "summary": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "description": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "id": "159227", "version": "0.1.0", "total_downloads": "1007"}) @@ -291,7 +455,7 @@ async fn import_dump_v2_movie_raw() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -300,21 +464,63 @@ async fn import_dump_v2_movie_raw() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) @@ -322,21 +528,21 @@ async fn import_dump_v2_movie_raw() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 100, "title": "Lock, Stock and Two Smoking Barrels", "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "genres": ["Comedy", "Crime"], "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000}) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 500, "title": "Reservoir Dogs", "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "genres": ["Crime", "Thriller"], "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 10006, "title": "Wild Seven", "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "genres": ["Action", "Crime", "Drama"], "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -383,7 +589,7 @@ async fn import_dump_v2_movie_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -392,21 +598,73 @@ async fn import_dump_v2_movie_with_settings() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "title", + "genres", + "overview", + "poster", + "release_date" + ], + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres" + ], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [ + "of", + "the" + ], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) @@ -414,21 +672,21 @@ async fn import_dump_v2_movie_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "genres": ["Comedy", "Crime"], "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000 }) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 500, "title": "Reservoir Dogs", "genres": ["Crime", "Thriller"], "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 10006, "title": "Wild Seven", "genres": ["Action", "Crime", "Drama"], "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -475,7 +733,7 @@ async fn import_dump_v2_rubygems_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("rubygems")); @@ -484,21 +742,72 @@ async fn import_dump_v2_rubygems_with_settings() { let index = server.index("rubygems"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 }}) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "name", + "summary", + "description", + "version", + "total_downloads" + ], + "searchableAttributes": [ + "name", + "summary" + ], + "filterableAttributes": [ + "version" + ], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "fame:desc", + "proximity", + "attribute", + "exactness", + "total_downloads:desc" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks["results"][0], json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) @@ -506,21 +815,21 @@ async fn import_dump_v2_rubygems_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(188040, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "meilisearch", "summary": "An easy-to-use ruby client for Meilisearch API", "description": "An easy-to-use ruby client for Meilisearch API. See https://github.com/meilisearch/MeiliSearch", "id": "188040", "version": "0.15.2", "total_downloads": "7465"}) ); let (document, code) = index.get_document(191940, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "doggo", "summary": "RSpec 3 formatter - documentation, with progress indication", "description": "Similar to \"rspec -f d\", but also indicates progress by showing the current test number and total test count on each line.", "id": "191940", "version": "1.1.0", "total_downloads": "9394"}) ); let (document, code) = index.get_document(159227, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "vortex-of-agony", "summary": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "description": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "id": "159227", "version": "0.1.0", "total_downloads": "1007"}) @@ -565,7 +874,7 @@ async fn import_dump_v3_movie_raw() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -574,21 +883,63 @@ async fn import_dump_v3_movie_raw() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) @@ -596,21 +947,21 @@ async fn import_dump_v3_movie_raw() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 100, "title": "Lock, Stock and Two Smoking Barrels", "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "genres": ["Comedy", "Crime"], "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000}) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 500, "title": "Reservoir Dogs", "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "genres": ["Crime", "Thriller"], "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({"id": 10006, "title": "Wild Seven", "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "genres": ["Action", "Crime", "Drama"], "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -657,7 +1008,7 @@ async fn import_dump_v3_movie_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -666,21 +1017,73 @@ async fn import_dump_v3_movie_with_settings() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "title", + "genres", + "overview", + "poster", + "release_date" + ], + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres" + ], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [ + "of", + "the" + ], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) @@ -688,21 +1091,21 @@ async fn import_dump_v3_movie_with_settings() { // finally we're just going to check that we can["results"] still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "genres": ["Comedy", "Crime"], "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000 }) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 500, "title": "Reservoir Dogs", "genres": ["Crime", "Thriller"], "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 10006, "title": "Wild Seven", "genres": ["Action", "Crime", "Drama"], "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -749,7 +1152,7 @@ async fn import_dump_v3_rubygems_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("rubygems")); @@ -758,21 +1161,72 @@ async fn import_dump_v3_rubygems_with_settings() { let index = server.index("rubygems"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "name", + "summary", + "description", + "version", + "total_downloads" + ], + "searchableAttributes": [ + "name", + "summary" + ], + "filterableAttributes": [ + "version" + ], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "fame:desc", + "proximity", + "attribute", + "exactness", + "total_downloads:desc" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks["results"][0], json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) @@ -780,21 +1234,21 @@ async fn import_dump_v3_rubygems_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(188040, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "meilisearch", "summary": "An easy-to-use ruby client for Meilisearch API", "description": "An easy-to-use ruby client for Meilisearch API. See https://github.com/meilisearch/MeiliSearch", "id": "188040", "version": "0.15.2", "total_downloads": "7465"}) ); let (document, code) = index.get_document(191940, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "doggo", "summary": "RSpec 3 formatter - documentation, with progress indication", "description": "Similar to \"rspec -f d\", but also indicates progress by showing the current test number and total test count on each line.", "id": "191940", "version": "1.1.0", "total_downloads": "9394"}) ); let (document, code) = index.get_document(159227, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "vortex-of-agony", "summary": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "description": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "id": "159227", "version": "0.1.0", "total_downloads": "1007"}) @@ -839,7 +1293,7 @@ async fn import_dump_v4_movie_raw() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -848,21 +1302,63 @@ async fn import_dump_v4_movie_raw() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit" : 20, "from": 0, "next": null }) @@ -870,21 +1366,21 @@ async fn import_dump_v4_movie_raw() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "genres": ["Comedy", "Crime"], "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000}) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 500, "title": "Reservoir Dogs", "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "genres": ["Crime", "Thriller"], "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 10006, "title": "Wild Seven", "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "genres": ["Action", "Crime", "Drama"], "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -931,7 +1427,7 @@ async fn import_dump_v4_movie_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("indexUID")); @@ -940,21 +1436,73 @@ async fn import_dump_v4_movie_with_settings() { let index = server.index("indexUID"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "title", + "genres", + "overview", + "poster", + "release_date" + ], + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres" + ], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "stopWords": [ + "of", + "the" + ], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks, json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) @@ -962,21 +1510,21 @@ async fn import_dump_v4_movie_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(100, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 100, "title": "Lock, Stock and Two Smoking Barrels", "genres": ["Comedy", "Crime"], "overview": "A card shark and his unwillingly-enlisted friends need to make a lot of cash quick after losing a sketchy poker match. To do this they decide to pull a heist on a small-time gang who happen to be operating out of the flat next door.", "poster": "https://image.tmdb.org/t/p/w500/8kSerJrhrJWKLk1LViesGcnrUPE.jpg", "release_date": 889056000 }) ); let (document, code) = index.get_document(500, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 500, "title": "Reservoir Dogs", "genres": ["Crime", "Thriller"], "overview": "A botched robbery indicates a police informant, and the pressure mounts in the aftermath at a warehouse. Crime begets violence as the survivors -- veteran Mr. White, newcomer Mr. Orange, psychopathic parolee Mr. Blonde, bickering weasel Mr. Pink and Nice Guy Eddie -- unravel.", "poster": "https://image.tmdb.org/t/p/w500/AjTtJNumZyUDz33VtMlF1K8JPsE.jpg", "release_date": 715392000}) ); let (document, code) = index.get_document(10006, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "id": 10006, "title": "Wild Seven", "genres": ["Action", "Crime", "Drama"], "overview": "In this darkly karmic vision of Arizona, a man who breathes nothing but ill will begins a noxious domino effect as quickly as an uncontrollable virus kills. As he exits Arizona State Penn after twenty-one long years, Wilson has only one thing on the brain, leveling the score with career criminal, Mackey Willis.", "poster": "https://image.tmdb.org/t/p/w500/y114dTPoqn8k2Txps4P2tI95YCS.jpg", "release_date": 1136073600}) @@ -1023,7 +1571,7 @@ async fn import_dump_v4_rubygems_with_settings() { let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(indexes["results"].as_array().unwrap().len(), 1); assert_eq!(indexes["results"][0]["uid"], json!("rubygems")); @@ -1032,21 +1580,72 @@ async fn import_dump_v4_rubygems_with_settings() { let index = server.index("rubygems"); let (stats, code) = index.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( stats, json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }}) ); let (settings, code) = index.settings().await; - assert_eq!(code, 200); - assert_eq!( - settings, - json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100, "sortFacetValuesBy": { "*": "alpha" } }, "pagination": { "maxTotalHits": 1000 } }) + snapshot!(code, @"200 OK"); + snapshot!( + json_string!(settings), + @r###" + { + "displayedAttributes": [ + "name", + "summary", + "description", + "version", + "total_downloads" + ], + "searchableAttributes": [ + "name", + "summary" + ], + "filterableAttributes": [ + "version" + ], + "sortableAttributes": [], + "rankingRules": [ + "typo", + "words", + "fame:desc", + "proximity", + "attribute", + "exactness", + "total_downloads:desc" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + } + } + "### ); let (tasks, code) = index.list_tasks().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( tasks["results"][0], json!({ "uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) @@ -1054,21 +1653,21 @@ async fn import_dump_v4_rubygems_with_settings() { // finally we're just going to check that we can still get a few documents by id let (document, code) = index.get_document(188040, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "meilisearch", "summary": "An easy-to-use ruby client for Meilisearch API", "description": "An easy-to-use ruby client for Meilisearch API. See https://github.com/meilisearch/MeiliSearch", "id": "188040", "version": "0.15.2", "total_downloads": "7465"}) ); let (document, code) = index.get_document(191940, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "doggo", "summary": "RSpec 3 formatter - documentation, with progress indication", "description": "Similar to \"rspec -f d\", but also indicates progress by showing the current test number and total test count on each line.", "id": "191940", "version": "1.1.0", "total_downloads": "9394"}) ); let (document, code) = index.get_document(159227, None).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!( document, json!({ "name": "vortex-of-agony", "summary": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "description": "You dont need to use nodejs or go, just install this plugin. It will crash your application at random", "id": "159227", "version": "0.1.0", "total_downloads": "1007"}) @@ -1152,22 +1751,22 @@ async fn import_dump_v5() { let index2 = server.index("test2"); let (stats, code) = index1.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(stats, expected_stats); let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(docs["results"].as_array().unwrap().len(), 10); let (docs, code) = index1.get_all_documents(GetAllDocumentsOptions::default()).await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(docs["results"].as_array().unwrap().len(), 10); let (stats, code) = index2.stats().await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); assert_eq!(stats, expected_stats); let (keys, code) = server.list_api_keys("").await; - assert_eq!(code, 200); + snapshot!(code, @"200 OK"); let key = &keys["results"][0]; assert_eq!(key["name"], "my key"); diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index d5dfd1145..683350fc3 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -54,7 +54,7 @@ async fn get_settings() { let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 11); + assert_eq!(settings.keys().len(), 14); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["filterableAttributes"], json!([])); diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index 0926f66bf..e622d9a3c 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -1,3 +1,4 @@ +use meili_snap::{json_string, snapshot}; use serde_json::json; use crate::common::Server; @@ -9,22 +10,37 @@ async fn set_and_reset() { let (_response, _code) = index .update_settings(json!({ - "non_separator_tokens": ["#", "&"], - "separator_tokens": ["&sep", "
"], + "nonSeparatorTokens": ["#", "&"], + "separatorTokens": ["&sep", "
"], "dictionary": ["J.R.R.", "J. R. R."], })) .await; index.wait_task(0).await; let (response, _) = index.settings().await; - assert_eq!(response["non_separator_tokens"], json!(["#", "&"])); - assert_eq!(response["separator_tokens"], json!(["&sep", "
"])); - assert_eq!(response["dictionary"], json!(["J.R.R.", "J. R. R."])); + snapshot!(json_string!(response["nonSeparatorTokens"]), @r###" + [ + "#", + "&" + ] + "###); + snapshot!(json_string!(response["separatorTokens"]), @r###" + [ + "&sep", + "
" + ] + "###); + snapshot!(json_string!(response["dictionary"]), @r###" + [ + "J. R. R.", + "J.R.R." + ] + "###); index .update_settings(json!({ - "non_separator_tokens": null, - "separator_tokens": null, + "nonSeparatorTokens": null, + "separatorTokens": null, "dictionary": null, })) .await; @@ -32,7 +48,7 @@ async fn set_and_reset() { index.wait_task(1).await; let (response, _) = index.settings().await; - assert_eq!(response["non_separator_tokens"], json!(null)); - assert_eq!(response["separator_tokens"], json!(null)); - assert_eq!(response["dictionary"], json!(null)); + snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]"); + snapshot!(json_string!(response["separatorTokens"]), @"[]"); + snapshot!(json_string!(response["dictionary"]), @"[]"); } diff --git a/milli/src/index.rs b/milli/src/index.rs index 392ed1705..ea0120769 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::fs::File; use std::mem::size_of; use std::path::Path; @@ -60,6 +60,9 @@ pub mod main_key { pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields"; pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids"; pub const STOP_WORDS_KEY: &str = "stop-words"; + pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens"; + pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens"; + pub const DICTIONARY_KEY: &str = "dictionary"; pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; pub const SYNONYMS_KEY: &str = "synonyms"; pub const WORDS_FST_KEY: &str = "words-fst"; @@ -1048,6 +1051,90 @@ impl Index { } } + /* non separator tokens */ + + pub(crate) fn put_non_separator_tokens( + &self, + wtxn: &mut RwTxn, + set: &BTreeSet, + ) -> heed::Result<()> { + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set) + } + + pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) + } + + pub fn non_separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + Ok(self.main.get::<_, Str, SerdeBincode>>( + rtxn, + main_key::NON_SEPARATOR_TOKENS_KEY, + )?) + } + + /* separator tokens */ + + pub(crate) fn put_separator_tokens( + &self, + wtxn: &mut RwTxn, + set: &BTreeSet, + ) -> heed::Result<()> { + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set) + } + + pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) + } + + pub fn separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + Ok(self + .main + .get::<_, Str, SerdeBincode>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) + } + + /* separators easing method */ + + pub(crate) fn allowed_separators<'t>( + &self, + rtxn: &'t RoTxn, + ) -> Result>> { + let default_separators = + charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string()); + let mut separators: Option> = None; + if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? { + separator_tokens.extend(default_separators.clone()); + separators = Some(separator_tokens); + } + + if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? { + separators = separators + .or_else(|| Some(default_separators.collect())) + .map(|separators| &separators - &non_separator_tokens); + } + + Ok(separators) + } + + /* dictionary */ + + pub(crate) fn put_dictionary( + &self, + wtxn: &mut RwTxn, + set: &BTreeSet, + ) -> heed::Result<()> { + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set) + } + + pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) + } + + pub fn dictionary<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + Ok(self + .main + .get::<_, Str, SerdeBincode>>(rtxn, main_key::DICTIONARY_KEY)?) + } + /* synonyms */ pub(crate) fn put_synonyms( diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index aebbbeca2..a9db0490d 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -112,6 +112,9 @@ pub struct Settings<'a, 't, 'u, 'i> { sortable_fields: Setting>, criteria: Setting>, stop_words: Setting>, + non_separator_tokens: Setting>, + separator_tokens: Setting>, + dictionary: Setting>, distinct_field: Setting, synonyms: Setting>>, primary_key: Setting, @@ -141,6 +144,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { sortable_fields: Setting::NotSet, criteria: Setting::NotSet, stop_words: Setting::NotSet, + non_separator_tokens: Setting::NotSet, + separator_tokens: Setting::NotSet, + dictionary: Setting::NotSet, distinct_field: Setting::NotSet, synonyms: Setting::NotSet, primary_key: Setting::NotSet, @@ -205,6 +211,39 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) } } + pub fn reset_non_separator_tokens(&mut self) { + self.non_separator_tokens = Setting::Reset; + } + + pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet) { + self.non_separator_tokens = if non_separator_tokens.is_empty() { + Setting::Reset + } else { + Setting::Set(non_separator_tokens) + } + } + + pub fn reset_separator_tokens(&mut self) { + self.separator_tokens = Setting::Reset; + } + + pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet) { + self.separator_tokens = if separator_tokens.is_empty() { + Setting::Reset + } else { + Setting::Set(separator_tokens) + } + } + + pub fn reset_dictionary(&mut self) { + self.dictionary = Setting::Reset; + } + + pub fn set_dictionary(&mut self, dictionary: BTreeSet) { + self.dictionary = + if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) } + } + pub fn reset_distinct_field(&mut self) { self.distinct_field = Setting::Reset; } @@ -451,6 +490,60 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + fn update_non_separator_tokens(&mut self) -> Result { + match self.non_separator_tokens { + Setting::Set(ref non_separator_tokens) => { + let current = self.index.non_separator_tokens(self.wtxn)?; + + // Does the new list differ from the previous one? + if current.map_or(true, |current| ¤t != non_separator_tokens) { + self.index.put_non_separator_tokens(self.wtxn, &non_separator_tokens)?; + Ok(true) + } else { + Ok(false) + } + } + Setting::Reset => Ok(self.index.delete_non_separator_tokens(self.wtxn)?), + Setting::NotSet => Ok(false), + } + } + + fn update_separator_tokens(&mut self) -> Result { + match self.separator_tokens { + Setting::Set(ref separator_tokens) => { + let current = self.index.separator_tokens(self.wtxn)?; + + // Does the new list differ from the previous one? + if current.map_or(true, |current| ¤t != separator_tokens) { + self.index.put_separator_tokens(self.wtxn, &separator_tokens)?; + Ok(true) + } else { + Ok(false) + } + } + Setting::Reset => Ok(self.index.delete_separator_tokens(self.wtxn)?), + Setting::NotSet => Ok(false), + } + } + + fn update_dictionary(&mut self) -> Result { + match self.dictionary { + Setting::Set(ref dictionary) => { + let current = self.index.dictionary(self.wtxn)?; + + // Does the new list differ from the previous one? + if current.map_or(true, |current| ¤t != dictionary) { + self.index.put_dictionary(self.wtxn, &dictionary)?; + Ok(true) + } else { + Ok(false) + } + } + Setting::Reset => Ok(self.index.delete_dictionary(self.wtxn)?), + Setting::NotSet => Ok(false), + } + } + fn update_synonyms(&mut self) -> Result { match self.synonyms { Setting::Set(ref synonyms) => { @@ -756,11 +849,17 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let faceted_updated = old_faceted_fields != new_faceted_fields; let stop_words_updated = self.update_stop_words()?; + let non_separator_tokens_updated = self.update_non_separator_tokens()?; + let separator_tokens_updated = self.update_separator_tokens()?; + let dictionary_updated = self.update_dictionary()?; let synonyms_updated = self.update_synonyms()?; let searchable_updated = self.update_searchable()?; let exact_attributes_updated = self.update_exact_attributes()?; if stop_words_updated + || non_separator_tokens_updated + || separator_tokens_updated + || dictionary_updated || faceted_updated || synonyms_updated || searchable_updated @@ -1539,6 +1638,9 @@ mod tests { sortable_fields, criteria, stop_words, + non_separator_tokens, + separator_tokens, + dictionary, distinct_field, synonyms, primary_key, @@ -1557,6 +1659,9 @@ mod tests { assert!(matches!(sortable_fields, Setting::NotSet)); assert!(matches!(criteria, Setting::NotSet)); assert!(matches!(stop_words, Setting::NotSet)); + assert!(matches!(non_separator_tokens, Setting::NotSet)); + assert!(matches!(separator_tokens, Setting::NotSet)); + assert!(matches!(dictionary, Setting::NotSet)); assert!(matches!(distinct_field, Setting::NotSet)); assert!(matches!(synonyms, Setting::NotSet)); assert!(matches!(primary_key, Setting::NotSet)); From 9c485f8563c9df6561988cf03ea9851ccc7a50a0 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 24 Jul 2023 18:35:20 +0200 Subject: [PATCH 03/13] Make the search and the indexing work --- meilisearch/src/search.rs | 14 ++ .../tests/settings/tokenizer_customization.rs | 142 ++++++++++++++++++ milli/src/index.rs | 5 +- milli/src/search/new/mod.rs | 14 ++ .../extract/extract_docid_word_positions.rs | 10 ++ .../src/update/index_documents/extract/mod.rs | 8 + milli/src/update/index_documents/mod.rs | 8 + 7 files changed, 197 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 1a1c7721e..ab88cb671 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -491,6 +491,20 @@ pub fn perform_search( tokenizer_builder.allow_list(&script_lang_map); } + let separators = index.allowed_separators(&rtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref separators) = separators { + tokenizer_builder.separators(separators); + } + + let dictionary = index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref dictionary) = dictionary { + tokenizer_builder.words_dict(dictionary); + } + let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build()); formatter_builder.crop_marker(query.crop_marker); formatter_builder.highlight_prefix(query.highlight_pre_tag); diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index e622d9a3c..75bea560b 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -52,3 +52,145 @@ async fn set_and_reset() { snapshot!(json_string!(response["separatorTokens"]), @"[]"); snapshot!(json_string!(response["dictionary"]), @"[]"); } + +#[actix_rt::test] +async fn set_and_search() { + let documents = json!([ + { + "id": 1, + "content": "Mac & cheese", + }, + { + "id": 2, + "content": "G#D#G#D#G#C#D#G#C#", + }, + { + "id": 3, + "content": "Mac&sep&&sepcheese", + }, + ]); + + let server = Server::new().await; + let index = server.index("test"); + + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (_response, _code) = index + .update_settings(json!({ + "nonSeparatorTokens": ["#", "&"], + "separatorTokens": ["
", "&sep"], + "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"], + })) + .await; + index.wait_task(1).await; + + index + .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "content": "Mac & cheese", + "_formatted": { + "id": "1", + "content": "Mac & cheese" + } + }, + { + "id": 3, + "content": "Mac&sep&&sepcheese", + "_formatted": { + "id": "3", + "content": "Mac&sep&&sepcheese" + } + } + ] + "###); + }) + .await; + + index + .search( + json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "content": "Mac & cheese", + "_formatted": { + "id": "1", + "content": "Mac & cheese" + } + }, + { + "id": 3, + "content": "Mac&sep&&sepcheese", + "_formatted": { + "id": "3", + "content": "Mac&sep&&sepcheese" + } + } + ] + "###); + }, + ) + .await; + + index + .search( + json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "content": "Mac & cheese", + "_formatted": { + "id": "1", + "content": "Mac & cheese" + } + }, + { + "id": 3, + "content": "Mac&sep&&sepcheese", + "_formatted": { + "id": "3", + "content": "Mac&sep&&sepcheese" + } + } + ] + "###); + }, + ) + .await; + + index + .search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "content": "G#D#G#D#G#C#D#G#C#", + "_formatted": { + "id": "2", + "content": "G#D#G#D#G#C#D#G#C#" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @"[]"); + }) + .await; +} diff --git a/milli/src/index.rs b/milli/src/index.rs index ea0120769..68014cc1a 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1094,10 +1094,7 @@ impl Index { /* separators easing method */ - pub(crate) fn allowed_separators<'t>( - &self, - rtxn: &'t RoTxn, - ) -> Result>> { + pub fn allowed_separators<'t>(&self, rtxn: &'t RoTxn) -> Result>> { let default_separators = charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string()); let mut separators: Option> = None; diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 034b279ad..8868d23fd 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -479,6 +479,20 @@ pub fn execute_search( tokbuilder.stop_words(stop_words); } + let separators = ctx.index.allowed_separators(ctx.txn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref separators) = separators { + tokbuilder.separators(separators); + } + + let dictionary = ctx.index.dictionary(ctx.txn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref dictionary) = dictionary { + tokbuilder.words_dict(dictionary); + } + let script_lang_map = ctx.index.script_language(ctx.txn)?; if !script_lang_map.is_empty() { tokbuilder.allow_list(&script_lang_map); diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index f726bf866..b56398385 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -28,6 +28,8 @@ pub fn extract_docid_word_positions( indexer: GrenadParameters, searchable_fields: &Option>, stop_words: Option<&fst::Set<&[u8]>>, + allowed_separators: Option<&Vec<&str>>, + dictionary: Option<&Vec<&str>>, max_positions_per_attributes: Option, ) -> Result<(RoaringBitmap, grenad::Reader, ScriptLanguageDocidsMap)> { puffin::profile_function!(); @@ -52,6 +54,14 @@ pub fn extract_docid_word_positions( if let Some(stop_words) = stop_words { tokenizer_builder.stop_words(stop_words); } + if let Some(dictionary) = dictionary { + // let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect(); + tokenizer_builder.words_dict(dictionary.as_slice()); + } + if let Some(separators) = allowed_separators { + // let separators: Vec<_> = separators.iter().map(String::as_str).collect(); + tokenizer_builder.separators(separators.as_slice()); + } let tokenizer = tokenizer_builder.build(); let mut cursor = obkv_documents.into_cursor()?; diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 1b1dc1420..cec0d5814 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents( geo_fields_ids: Option<(FieldId, FieldId)>, vectors_field_id: Option, stop_words: Option>, + allowed_separators: Option>, + dictionary: Option>, max_positions_per_attributes: Option, exact_attributes: HashSet, ) -> Result<()> { @@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents( geo_fields_ids, vectors_field_id, &stop_words, + &allowed_separators, + &dictionary, max_positions_per_attributes, ) }) @@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data( geo_fields_ids: Option<(FieldId, FieldId)>, vectors_field_id: Option, stop_words: &Option>, + allowed_separators: &Option>, + dictionary: &Option>, max_positions_per_attributes: Option, ) -> Result<( grenad::Reader, @@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data( indexer, searchable_fields, stop_words.as_ref(), + allowed_separators.as_ref(), + dictionary.as_ref(), max_positions_per_attributes, )?; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 1b2aab827..9a657674e 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -316,6 +316,12 @@ where let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors"); let stop_words = self.index.stop_words(self.wtxn)?; + let separators = self.index.allowed_separators(self.wtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + let dictionary = self.index.dictionary(self.wtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; let pool_params = GrenadParameters { @@ -353,6 +359,8 @@ where geo_fields_ids, vectors_field_id, stop_words, + separators, + dictionary, max_positions_per_attributes, exact_attributes, ) From d4ff59fcf50111832949fa218953011032bca227 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 24 Jul 2023 18:42:26 +0200 Subject: [PATCH 04/13] Fix clippy --- milli/src/index.rs | 8 ++++---- milli/src/update/settings.rs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 68014cc1a..77acd4cb8 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1065,7 +1065,7 @@ impl Index { self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) } - pub fn non_separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result>> { Ok(self.main.get::<_, Str, SerdeBincode>>( rtxn, main_key::NON_SEPARATOR_TOKENS_KEY, @@ -1086,7 +1086,7 @@ impl Index { self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) } - pub fn separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main .get::<_, Str, SerdeBincode>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) @@ -1094,7 +1094,7 @@ impl Index { /* separators easing method */ - pub fn allowed_separators<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result>> { let default_separators = charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string()); let mut separators: Option> = None; @@ -1126,7 +1126,7 @@ impl Index { self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) } - pub fn dictionary<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + pub fn dictionary(&self, rtxn: &RoTxn) -> Result>> { Ok(self .main .get::<_, Str, SerdeBincode>>(rtxn, main_key::DICTIONARY_KEY)?) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a9db0490d..bdae5d7b4 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -497,7 +497,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != non_separator_tokens) { - self.index.put_non_separator_tokens(self.wtxn, &non_separator_tokens)?; + self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?; Ok(true) } else { Ok(false) @@ -515,7 +515,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != separator_tokens) { - self.index.put_separator_tokens(self.wtxn, &separator_tokens)?; + self.index.put_separator_tokens(self.wtxn, separator_tokens)?; Ok(true) } else { Ok(false) @@ -533,7 +533,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != dictionary) { - self.index.put_dictionary(self.wtxn, &dictionary)?; + self.index.put_dictionary(self.wtxn, dictionary)?; Ok(true) } else { Ok(false) From 41c9e8856a3a8027af880f1602ec24a43fe87407 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 25 Jul 2023 10:55:37 +0200 Subject: [PATCH 05/13] Fix test --- meilisearch/tests/settings/get_settings.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 683350fc3..8034337d4 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -65,8 +65,8 @@ async fn get_settings() { json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]) ); assert_eq!(settings["stopWords"], json!([])); - assert_eq!(settings["non_separator_tokens"], json!([])); - assert_eq!(settings["separator_tokens"], json!([])); + assert_eq!(settings["nonSeparatorTokens"], json!([])); + assert_eq!(settings["separatorTokens"], json!([])); assert_eq!(settings["dictionary"], json!([])); assert_eq!( settings["faceting"], From d57026cd969e5b41621d6f4a592bd98b994eb0fd Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 25 Jul 2023 15:01:42 +0200 Subject: [PATCH 06/13] Support synonyms sinergies --- .../tests/settings/tokenizer_customization.rs | 263 ++++++++++++++++++ milli/src/update/settings.rs | 66 ++++- 2 files changed, 314 insertions(+), 15 deletions(-) diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index 75bea560b..62a1440b2 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -194,3 +194,266 @@ async fn set_and_search() { }) .await; } + +#[actix_rt::test] +async fn advanced_synergies() { + let documents = json!([ + { + "id": 1, + "content": "J.R.R. Tolkien", + }, + { + "id": 2, + "content": "J. R. R. Tolkien", + }, + { + "id": 3, + "content": "jrr Tolkien", + }, + { + "id": 4, + "content": "J.K. Rowlings", + }, + { + "id": 5, + "content": "J. K. Rowlings", + }, + { + "id": 6, + "content": "jk Rowlings", + }, + ]); + + let server = Server::new().await; + let index = server.index("test"); + + index.add_documents(documents, None).await; + index.wait_task(0).await; + + let (_response, _code) = index + .update_settings(json!({ + "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."], + "synonyms": { + "J.R.R.": ["jrr", "J. R. R."], + "J. R. R.": ["jrr", "J.R.R."], + "jrr": ["J.R.R.", "J. R. R."], + "J.K.": ["jk", "J. K."], + "J. K.": ["jk", "J.K."], + "jk": ["J.K.", "J. K."], + } + })) + .await; + index.wait_task(1).await; + + index + .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "content": "J.R.R. Tolkien", + "_formatted": { + "id": "1", + "content": "J.R.R. Tolkien" + } + }, + { + "id": 2, + "content": "J. R. R. Tolkien", + "_formatted": { + "id": "2", + "content": "J. R. R. Tolkien" + } + }, + { + "id": 3, + "content": "jrr Tolkien", + "_formatted": { + "id": "3", + "content": "jrr Tolkien" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "content": "jrr Tolkien", + "_formatted": { + "id": "3", + "content": "jrr Tolkien" + } + }, + { + "id": 1, + "content": "J.R.R. Tolkien", + "_formatted": { + "id": "1", + "content": "J.R.R. Tolkien" + } + }, + { + "id": 2, + "content": "J. R. R. Tolkien", + "_formatted": { + "id": "2", + "content": "J. R. R. Tolkien" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "content": "J. R. R. Tolkien", + "_formatted": { + "id": "2", + "content": "J. R. R. Tolkien" + } + }, + { + "id": 1, + "content": "J.R.R. Tolkien", + "_formatted": { + "id": "1", + "content": "J.R.R. Tolkien" + } + }, + { + "id": 3, + "content": "jrr Tolkien", + "_formatted": { + "id": "3", + "content": "jrr Tolkien" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 6, + "content": "jk Rowlings", + "_formatted": { + "id": "6", + "content": "jk Rowlings" + } + }, + { + "id": 4, + "content": "J.K. Rowlings", + "_formatted": { + "id": "4", + "content": "J.K. Rowlings" + } + }, + { + "id": 5, + "content": "J. K. Rowlings", + "_formatted": { + "id": "5", + "content": "J. K. Rowlings" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 4, + "content": "J.K. Rowlings", + "_formatted": { + "id": "4", + "content": "J.K. Rowlings" + } + }, + { + "id": 5, + "content": "J. K. Rowlings", + "_formatted": { + "id": "5", + "content": "J. K. Rowlings" + } + }, + { + "id": 6, + "content": "jk Rowlings", + "_formatted": { + "id": "6", + "content": "jk Rowlings" + } + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 5, + "content": "J. K. Rowlings", + "_formatted": { + "id": "5", + "content": "J. K. Rowlings" + } + }, + { + "id": 4, + "content": "J.K. Rowlings", + "_formatted": { + "id": "4", + "content": "J.K. Rowlings" + } + }, + { + "id": 6, + "content": "jk Rowlings", + "_formatted": { + "id": "6", + "content": "jk Rowlings" + } + }, + { + "id": 2, + "content": "J. R. R. Tolkien", + "_formatted": { + "id": "2", + "content": "J. R. R. Tolkien" + } + } + ] + "###); + }) + .await; +} diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index bdae5d7b4..8f5a71f1d 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -491,57 +491,78 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } fn update_non_separator_tokens(&mut self) -> Result { - match self.non_separator_tokens { + let changes = match self.non_separator_tokens { Setting::Set(ref non_separator_tokens) => { let current = self.index.non_separator_tokens(self.wtxn)?; // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != non_separator_tokens) { self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?; - Ok(true) + true } else { - Ok(false) + false } } - Setting::Reset => Ok(self.index.delete_non_separator_tokens(self.wtxn)?), - Setting::NotSet => Ok(false), + Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?, + Setting::NotSet => false, + }; + + // the synonyms must be updated if non separator tokens have been updated. + if changes { + self.update_synonyms()?; } + + Ok(changes) } fn update_separator_tokens(&mut self) -> Result { - match self.separator_tokens { + let changes = match self.separator_tokens { Setting::Set(ref separator_tokens) => { let current = self.index.separator_tokens(self.wtxn)?; // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != separator_tokens) { self.index.put_separator_tokens(self.wtxn, separator_tokens)?; - Ok(true) + true } else { - Ok(false) + false } } - Setting::Reset => Ok(self.index.delete_separator_tokens(self.wtxn)?), - Setting::NotSet => Ok(false), + Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?, + Setting::NotSet => false, + }; + + // the synonyms must be updated if separator tokens have been updated. + if changes { + self.update_synonyms()?; } + + Ok(changes) } fn update_dictionary(&mut self) -> Result { - match self.dictionary { + let changes = match self.dictionary { Setting::Set(ref dictionary) => { let current = self.index.dictionary(self.wtxn)?; // Does the new list differ from the previous one? if current.map_or(true, |current| ¤t != dictionary) { self.index.put_dictionary(self.wtxn, dictionary)?; - Ok(true) + true } else { - Ok(false) + false } } - Setting::Reset => Ok(self.index.delete_dictionary(self.wtxn)?), - Setting::NotSet => Ok(false), + Setting::Reset => self.index.delete_dictionary(self.wtxn)?, + Setting::NotSet => false, + }; + + // the synonyms must be updated if dictionary has been updated. + if changes { + self.update_synonyms()?; } + + Ok(changes) } fn update_synonyms(&mut self) -> Result { @@ -565,6 +586,21 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { if let Some(ref stop_words) = stop_words { builder.stop_words(stop_words); } + + let separators = self.index.allowed_separators(self.wtxn)?; + let separators: Option> = + separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref separators) = separators { + builder.separators(separators); + } + + let dictionary = self.index.dictionary(self.wtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); + if let Some(ref dictionary) = dictionary { + builder.words_dict(dictionary); + } + let tokenizer = builder.build(); let mut new_synonyms = HashMap::new(); From b0c1a9504ab0d827a159132a8096932b2ba7891b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 26 Jul 2023 09:33:42 +0200 Subject: [PATCH 07/13] ensure the synonyms are updated when the tokenizer settings are changed --- .../tests/settings/tokenizer_customization.rs | 10 ++++++- milli/src/index.rs | 24 ++++++++++++++-- milli/src/search/new/tests/integration.rs | 4 +-- milli/src/search/new/tests/proximity.rs | 4 +-- milli/src/search/new/tests/typo.rs | 4 +-- milli/src/update/settings.rs | 28 +++++++++---------- milli/tests/search/mod.rs | 4 +-- 7 files changed, 52 insertions(+), 26 deletions(-) diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index 62a1440b2..fc5d8a880 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -232,7 +232,7 @@ async fn advanced_synergies() { let (_response, _code) = index .update_settings(json!({ - "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."], + "dictionary": ["J.R.R.", "J. R. R."], "synonyms": { "J.R.R.": ["jrr", "J. R. R."], "J. R. R.": ["jrr", "J.R.R."], @@ -347,6 +347,14 @@ async fn advanced_synergies() { }) .await; + // Only update dictionary, the synonyms should be recomputed. + let (_response, _code) = index + .update_settings(json!({ + "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."], + })) + .await; + index.wait_task(2).await; + index .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| { snapshot!(code, @"200 OK"); diff --git a/milli/src/index.rs b/milli/src/index.rs index 77acd4cb8..e1314896b 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fs::File; use std::mem::size_of; use std::path::Path; @@ -65,6 +65,7 @@ pub mod main_key { pub const DICTIONARY_KEY: &str = "dictionary"; pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids"; pub const SYNONYMS_KEY: &str = "synonyms"; + pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms"; pub const WORDS_FST_KEY: &str = "words-fst"; pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst"; pub const CREATED_AT_KEY: &str = "created-at"; @@ -1138,12 +1139,29 @@ impl Index { &self, wtxn: &mut RwTxn, synonyms: &HashMap, Vec>>, + user_defined_synonyms: &BTreeMap>, ) -> heed::Result<()> { - self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms) + self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?; + self.main.put::<_, Str, SerdeBincode<_>>( + wtxn, + main_key::USER_DEFINED_SYNONYMS_KEY, + user_defined_synonyms, + ) } pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result { - self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY) + self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?; + self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) + } + + pub fn user_defined_synonyms( + &self, + rtxn: &RoTxn, + ) -> heed::Result>> { + Ok(self + .main + .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? + .unwrap_or_default()) } pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result, Vec>>> { diff --git a/milli/src/search/new/tests/integration.rs b/milli/src/search/new/tests/integration.rs index 3abb1878f..e2ea4580e 100644 --- a/milli/src/search/new/tests/integration.rs +++ b/milli/src/search/new/tests/integration.rs @@ -2,7 +2,7 @@ use std::io::Cursor; use big_s::S; use heed::EnvOpenOptions; -use maplit::{hashmap, hashset}; +use maplit::{btreemap, hashset}; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; @@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("tag"), S("asc_desc_rank"), }); - builder.set_synonyms(hashmap! { + builder.set_synonyms(btreemap! { S("hello") => vec![S("good morning")], S("world") => vec![S("earth")], S("america") => vec![S("the united states")], diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs index b54007c6e..f9ad2b57e 100644 --- a/milli/src/search/new/tests/proximity.rs +++ b/milli/src/search/new/tests/proximity.rs @@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB. */ -use std::collections::HashMap; +use std::collections::BTreeMap; use crate::index::tests::TempIndex; use crate::search::new::tests::collect_field_values; @@ -336,7 +336,7 @@ fn test_proximity_split_word() { index .update_settings(|s| { - let mut syns = HashMap::new(); + let mut syns = BTreeMap::new(); syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]); s.set_synonyms(syns); }) diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs index 4f5e851f5..61d4c4387 100644 --- a/milli/src/search/new/tests/typo.rs +++ b/milli/src/search/new/tests/typo.rs @@ -18,7 +18,7 @@ if `words` doesn't exist before it. 14. Synonyms cost nothing according to the typo ranking rule */ -use std::collections::HashMap; +use std::collections::BTreeMap; use crate::index::tests::TempIndex; use crate::search::new::tests::collect_field_values; @@ -591,7 +591,7 @@ fn test_typo_synonyms() { .update_settings(|s| { s.set_criteria(vec![Criterion::Typo]); - let mut synonyms = HashMap::new(); + let mut synonyms = BTreeMap::new(); synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]); synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 8f5a71f1d..360fdb474 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::result::Result as StdResult; use charabia::{Normalize, Tokenizer, TokenizerBuilder}; @@ -116,7 +116,7 @@ pub struct Settings<'a, 't, 'u, 'i> { separator_tokens: Setting>, dictionary: Setting>, distinct_field: Setting, - synonyms: Setting>>, + synonyms: Setting>>, primary_key: Setting, authorize_typos: Setting, min_word_len_two_typos: Setting, @@ -256,7 +256,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.synonyms = Setting::Reset; } - pub fn set_synonyms(&mut self, synonyms: HashMap>) { + pub fn set_synonyms(&mut self, synonyms: BTreeMap>) { self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) } } @@ -508,8 +508,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { }; // the synonyms must be updated if non separator tokens have been updated. - if changes { - self.update_synonyms()?; + if changes && self.synonyms == Setting::NotSet { + self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?); } Ok(changes) @@ -533,8 +533,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { }; // the synonyms must be updated if separator tokens have been updated. - if changes { - self.update_synonyms()?; + if changes && self.synonyms == Setting::NotSet { + self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?); } Ok(changes) @@ -558,8 +558,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { }; // the synonyms must be updated if dictionary has been updated. - if changes { - self.update_synonyms()?; + if changes && self.synonyms == Setting::NotSet { + self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?); } Ok(changes) @@ -567,7 +567,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_synonyms(&mut self) -> Result { match self.synonyms { - Setting::Set(ref synonyms) => { + Setting::Set(ref user_synonyms) => { fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec { tokenizer .tokenize(text) @@ -604,7 +604,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let tokenizer = builder.build(); let mut new_synonyms = HashMap::new(); - for (word, synonyms) in synonyms { + for (word, synonyms) in user_synonyms { // Normalize both the word and associated synonyms. let normalized_word = normalize(&tokenizer, word); let normalized_synonyms = @@ -625,7 +625,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let old_synonyms = self.index.synonyms(self.wtxn)?; if new_synonyms != old_synonyms { - self.index.put_synonyms(self.wtxn, &new_synonyms)?; + self.index.put_synonyms(self.wtxn, &new_synonyms, &user_synonyms)?; Ok(true) } else { Ok(false) @@ -912,7 +912,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { mod tests { use big_s::S; use heed::types::ByteSlice; - use maplit::{btreeset, hashmap, hashset}; + use maplit::{btreemap, btreeset, hashset}; use super::*; use crate::error::Error; @@ -1378,7 +1378,7 @@ mod tests { // In the same transaction provide some synonyms index .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_synonyms(hashmap! { + settings.set_synonyms(btreemap! { "blini".to_string() => vec!["crepes".to_string()], "super like".to_string() => vec!["love".to_string()], "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 7b2d9ad6d..1c68cfff2 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -5,7 +5,7 @@ use std::io::Cursor; use big_s::S; use either::{Either, Left, Right}; use heed::EnvOpenOptions; -use maplit::{hashmap, hashset}; +use maplit::{btreemap, hashset}; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy}; @@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("tag"), S("asc_desc_rank"), }); - builder.set_synonyms(hashmap! { + builder.set_synonyms(btreemap! { S("hello") => vec![S("good morning")], S("world") => vec![S("earth")], S("america") => vec![S("the united states")], From 04694071fe500cbff6672dc662da515d68aebedc Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Jul 2023 14:12:23 +0200 Subject: [PATCH 08/13] Fix the synonyms settings display --- meilisearch-types/src/settings.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 5778d3703..7bef64d4b 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -509,13 +509,7 @@ pub fn settings( let distinct_field = index.distinct_field(rtxn)?.map(String::from); - // in milli each word in the synonyms map were split on their separator. Since we lost - // this information we are going to put space between words. - let synonyms = index - .synonyms(rtxn)? - .iter() - .map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect())) - .collect(); + let synonyms = index.user_defined_synonyms(rtxn)?; let min_typo_word_len = MinWordSizeTyposSetting { one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), From 9d5e3457e5309bd4056bb4fb9c2f4c859d5d17ce Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Jul 2023 14:21:19 +0200 Subject: [PATCH 09/13] Fix clippy --- milli/src/update/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 360fdb474..2ae5077f8 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -625,7 +625,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let old_synonyms = self.index.synonyms(self.wtxn)?; if new_synonyms != old_synonyms { - self.index.put_synonyms(self.wtxn, &new_synonyms, &user_synonyms)?; + self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?; Ok(true) } else { Ok(false) From ae8e69c030272ae704a3b20b11ecf1e09085962b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 8 Aug 2023 16:03:16 +0200 Subject: [PATCH 10/13] Add API route for the new settings --- meilisearch/src/routes/indexes/settings.rs | 3 +++ meilisearch/tests/settings/get_settings.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index a29adadcf..eab08b895 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -540,6 +540,9 @@ generate_configure!( searchable_attributes, distinct_attribute, stop_words, + separator_tokens, + non_separator_tokens, + dictionary, synonyms, ranking_rules, typo_tolerance, diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 8034337d4..f62957a32 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -278,6 +278,9 @@ test_setting_routes!( searchable_attributes put, distinct_attribute put, stop_words put, + separator_tokens put, + non_separator_tokens put, + dictionary put, ranking_rules put, synonyms put, pagination patch, From 43c13faeda14929ef511507afb94f5c46a566f06 Mon Sep 17 00:00:00 2001 From: Many the fish Date: Thu, 10 Aug 2023 10:05:03 +0200 Subject: [PATCH 11/13] Update milli/src/update/index_documents/extract/extract_docid_word_positions.rs Co-authored-by: Tamo --- .../index_documents/extract/extract_docid_word_positions.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index b56398385..67270bc52 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -55,11 +55,9 @@ pub fn extract_docid_word_positions( tokenizer_builder.stop_words(stop_words); } if let Some(dictionary) = dictionary { - // let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect(); tokenizer_builder.words_dict(dictionary.as_slice()); } if let Some(separators) = allowed_separators { - // let separators: Vec<_> = separators.iter().map(String::as_str).collect(); tokenizer_builder.separators(separators.as_slice()); } let tokenizer = tokenizer_builder.build(); From 6b2d671be7f9e26b0a9a63dd9198269445f4c71b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 10 Aug 2023 10:44:07 +0200 Subject: [PATCH 12/13] Fix PR comments --- .../extract/extract_docid_word_positions.rs | 8 ++++---- milli/src/update/index_documents/extract/mod.rs | 12 ++++++------ milli/src/update/index_documents/mod.rs | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 67270bc52..ac041a8b0 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -28,8 +28,8 @@ pub fn extract_docid_word_positions( indexer: GrenadParameters, searchable_fields: &Option>, stop_words: Option<&fst::Set<&[u8]>>, - allowed_separators: Option<&Vec<&str>>, - dictionary: Option<&Vec<&str>>, + allowed_separators: Option<&[&str]>, + dictionary: Option<&[&str]>, max_positions_per_attributes: Option, ) -> Result<(RoaringBitmap, grenad::Reader, ScriptLanguageDocidsMap)> { puffin::profile_function!(); @@ -55,10 +55,10 @@ pub fn extract_docid_word_positions( tokenizer_builder.stop_words(stop_words); } if let Some(dictionary) = dictionary { - tokenizer_builder.words_dict(dictionary.as_slice()); + tokenizer_builder.words_dict(dictionary); } if let Some(separators) = allowed_separators { - tokenizer_builder.separators(separators.as_slice()); + tokenizer_builder.separators(separators); } let tokenizer = tokenizer_builder.build(); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index cec0d5814..4e174631c 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -49,8 +49,8 @@ pub(crate) fn data_from_obkv_documents( geo_fields_ids: Option<(FieldId, FieldId)>, vectors_field_id: Option, stop_words: Option>, - allowed_separators: Option>, - dictionary: Option>, + allowed_separators: Option<&[&str]>, + dictionary: Option<&[&str]>, max_positions_per_attributes: Option, exact_attributes: HashSet, ) -> Result<()> { @@ -293,8 +293,8 @@ fn send_and_extract_flattened_documents_data( geo_fields_ids: Option<(FieldId, FieldId)>, vectors_field_id: Option, stop_words: &Option>, - allowed_separators: &Option>, - dictionary: &Option>, + allowed_separators: &Option<&[&str]>, + dictionary: &Option<&[&str]>, max_positions_per_attributes: Option, ) -> Result<( grenad::Reader, @@ -350,8 +350,8 @@ fn send_and_extract_flattened_documents_data( indexer, searchable_fields, stop_words.as_ref(), - allowed_separators.as_ref(), - dictionary.as_ref(), + *allowed_separators, + *dictionary, max_positions_per_attributes, )?; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 5426e26db..0dae611c9 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -359,8 +359,8 @@ where geo_fields_ids, vectors_field_id, stop_words, - separators, - dictionary, + separators.as_ref().map(Vec::as_slice), + dictionary.as_ref().map(Vec::as_slice), max_positions_per_attributes, exact_attributes, ) From 5a7c1bde8463f1f49b399c0caf949715791f054a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 10 Aug 2023 11:27:56 +0200 Subject: [PATCH 13/13] Fix clippy --- meilisearch/src/analytics/mock_analytics.rs | 6 +++--- milli/src/update/index_documents/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 4bd190f87..8fa1916a5 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -20,7 +20,7 @@ pub struct SearchAggregator; #[allow(dead_code)] impl SearchAggregator { pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self::default() + Self } pub fn succeed(&mut self, _: &dyn Any) {} @@ -32,7 +32,7 @@ pub struct MultiSearchAggregator; #[allow(dead_code)] impl MultiSearchAggregator { pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self { - Self::default() + Self } pub fn succeed(&mut self) {} @@ -44,7 +44,7 @@ pub struct FacetSearchAggregator; #[allow(dead_code)] impl FacetSearchAggregator { pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { - Self::default() + Self } pub fn succeed(&mut self, _: &dyn Any) {} diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 0dae611c9..f50b81d2d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -359,8 +359,8 @@ where geo_fields_ids, vectors_field_id, stop_words, - separators.as_ref().map(Vec::as_slice), - dictionary.as_ref().map(Vec::as_slice), + separators.as_deref(), + dictionary.as_deref(), max_positions_per_attributes, exact_attributes, )