From f073a8638757b56751aefa238619f9724b9b1c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 17 Jan 2023 11:28:19 +0100 Subject: [PATCH 01/19] Update deserr to latest version --- milli/Cargo.toml | 2 +- milli/src/update/settings.rs | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 5bbd7a8ff..981468f85 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -12,7 +12,7 @@ byteorder = "1.4.3" charabia = { version = "0.7.0", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.6" -deserr = "0.1.4" +deserr = "0.1.5" either = "1.8.0" flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index f10bfe4e9..4242654cb 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -37,9 +37,6 @@ where _ => T::deserialize_from_value(value, location).map(Setting::Set), } } - fn default() -> Option { - Some(Self::NotSet) - } } impl Default for Setting { From e3d30e28efe147432699abbee348a5c88948e0a5 Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 17 Jan 2023 10:50:29 +0000 Subject: [PATCH 02/19] Update version for the next release (v0.39.1) in Cargo.toml files --- benchmarks/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- filter-parser/Cargo.toml | 2 +- flatten-serde-json/Cargo.toml | 2 +- json-depth-checker/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 1cb63db4a..9f5b6190e 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "benchmarks" -version = "0.39.0" +version = "0.39.1" edition = "2018" publish = false diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 7ecc3fa33..42fa4353a 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cli" -version = "0.39.0" +version = "0.39.1" edition = "2018" description = "A CLI to interact with a milli index" publish = false diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 9202c3875..38427ccdf 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "filter-parser" -version = "0.39.0" +version = "0.39.1" edition = "2021" description = "The parser for the Meilisearch filter syntax" publish = false diff --git a/flatten-serde-json/Cargo.toml b/flatten-serde-json/Cargo.toml index 2fb668f86..904baa0b4 100644 --- a/flatten-serde-json/Cargo.toml +++ b/flatten-serde-json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatten-serde-json" -version = "0.39.0" +version = "0.39.1" edition = "2021" description = "Flatten serde-json objects like elastic search" readme = "README.md" diff --git a/json-depth-checker/Cargo.toml b/json-depth-checker/Cargo.toml index feb245e5e..5210b8bf4 100644 --- a/json-depth-checker/Cargo.toml +++ b/json-depth-checker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-depth-checker" -version = "0.39.0" +version = "0.39.1" edition = "2021" description = "A library that indicates if a JSON must be flattened" publish = false diff --git a/milli/Cargo.toml b/milli/Cargo.toml index e23051b69..b591fb466 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "milli" -version = "0.39.0" +version = "0.39.1" authors = ["Kerollmops "] edition = "2018" From 497187083b918b6cee3832fe7ddca5467e4bd69a Mon Sep 17 00:00:00 2001 From: Philipp Ahlner Date: Wed, 18 Jan 2023 13:24:26 +0100 Subject: [PATCH 03/19] Add test for bug #3007: Wrong error message Adds a test for #3007: Wrong error message when lat and lng are unparseable --- milli/src/index.rs | 36 +++++++++++++++++++ .../bug_3007/geo_faceted_documents_ids.snap | 4 +++ 2 files changed, 40 insertions(+) create mode 100644 milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap diff --git a/milli/src/index.rs b/milli/src/index.rs index 46f8eb6a3..7ed9af424 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2292,4 +2292,40 @@ pub(crate) mod tests { assert!(all_ids.insert(id)); } } + + #[test] + fn bug_3007() { + // https://github.com/meilisearch/meilisearch/issues/3007 + + use crate::error::{GeoError, UserError}; + let index = TempIndex::new(); + + // Given is an index with a geo field NOT contained in the sortable_fields of the settings + index + .update_settings(|settings| { + settings.set_primary_key("id".to_string()); + settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); + }) + .unwrap(); + + // happy path + index.add_documents(documents!({ "id" : 5, "_geo": {"lat": 12.0, "lng": 11.0}})).unwrap(); + + db_snap!(index, geo_faceted_documents_ids); + + // both are unparseable, we expect GeoError::BadLatitudeAndLongitude + let err1 = index + .add_documents( + documents!({ "id" : 6, "_geo": {"lat": "unparseable", "lng": "unparseable"}}), + ) + .unwrap_err(); + assert!(matches!( + err1, + Error::UserError(UserError::InvalidGeoField( + GeoError::BadLatitudeAndLongitude { .. } + )) + )); + + db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted + } } diff --git a/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap b/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap new file mode 100644 index 000000000..f9ebc0c20 --- /dev/null +++ b/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/index.rs +--- +[0, ] From a2cd7214f0fc31cd983a46fa19d2f9fed5ca3d32 Mon Sep 17 00:00:00 2001 From: Philipp Ahlner Date: Wed, 18 Jan 2023 13:24:46 +0100 Subject: [PATCH 04/19] Fixes error message when lat/lng are unparseable --- milli/src/index.rs | 4 +--- milli/src/update/index_documents/enrich.rs | 11 +++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 7ed9af424..0ab596fa9 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2321,9 +2321,7 @@ pub(crate) mod tests { .unwrap_err(); assert!(matches!( err1, - Error::UserError(UserError::InvalidGeoField( - GeoError::BadLatitudeAndLongitude { .. } - )) + Error::UserError(UserError::InvalidGeoField(GeoError::BadLatitudeAndLongitude { .. })) )); db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 3331497c9..4c735856d 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -98,7 +98,12 @@ pub fn enrich_documents_batch( // If the settings specifies that a _geo field must be used therefore we must check the // validity of it in all the documents of this batch and this is when we return `Some`. let geo_field_id = match documents_batch_index.id("_geo") { - Some(geo_field_id) if index.sortable_fields(rtxn)?.contains("_geo") => Some(geo_field_id), + Some(geo_field_id) + if index.sortable_fields(rtxn)?.contains("_geo") + || index.filterable_fields(rtxn)?.contains("_geo") => + { + Some(geo_field_id) + } _otherwise => None, }; @@ -367,7 +372,9 @@ pub fn extract_finite_float_from_value(value: Value) -> StdResult { pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result> { use GeoError::*; - let debug_id = || Value::from(id.debug()); + let debug_id = || { + serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug())) + }; match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? { Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) { (Some(lat), Some(lng)) => { From 4fd6fd9bef9e097dbb1ea70b5ca4ab78dedf1a3e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 19 Jan 2023 12:25:18 +0100 Subject: [PATCH 05/19] Indicate filterable attributes when the user set a non filterable attribute in facet distributions --- milli/src/error.rs | 54 ++++++++++++++++++-- milli/src/search/facet/facet_distribution.rs | 1 + 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index 8734cb540..87cb3f360 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -1,5 +1,6 @@ use std::collections::BTreeSet; use std::convert::Infallible; +use std::fmt::Write; use std::{io, str}; use heed::{Error as HeedError, MdbError}; @@ -100,10 +101,11 @@ A document identifier can be of type integer or string, \ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string() )] InvalidDocumentId { document_id: Value }, - #[error("Invalid facet distribution, the fields `{}` are not set as filterable.", - .invalid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") - )] - InvalidFacetsDistribution { invalid_facets_name: BTreeSet }, + #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))] + InvalidFacetsDistribution { + invalid_facets_name: BTreeSet, + valid_facets_name: BTreeSet, + }, #[error(transparent)] InvalidGeoField(#[from] GeoError), #[error("{0}")] @@ -166,6 +168,50 @@ pub enum GeoError { BadLongitude { document_id: Value, value: Value }, } +fn format_invalid_filter_distribution( + invalid_facets_name: &BTreeSet, + valid_facets_name: &BTreeSet, +) -> String { + if valid_facets_name.is_empty() { + return "this index does not have configured filterable attributes.".into(); + } + + let mut result = String::new(); + + match invalid_facets_name.len() { + 0 => (), + 1 => write!( + result, + "attribute `{}` is not filterable.", + invalid_facets_name.first().unwrap() + ) + .unwrap(), + _ => write!( + result, + "attributes `{}` are not filterable.", + invalid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") + ) + .unwrap(), + }; + + match valid_facets_name.len() { + 1 => write!( + result, + " The available filterable attribute is `{}`.", + valid_facets_name.first().unwrap() + ) + .unwrap(), + _ => write!( + result, + " The available filterable attributes are `{}`.", + valid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") + ) + .unwrap(), + } + + result +} + /// A little macro helper to autogenerate From implementation that needs two `Into`. /// Given the following parameters: `error_from_sub_error!(FieldIdMapMissingEntry => InternalError)` /// the macro will create the following code: diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 43367abbb..4d5028ce0 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -291,6 +291,7 @@ impl<'a> FacetDistribution<'a> { if !invalid_fields.is_empty() { return Err(UserError::InvalidFacetsDistribution { invalid_facets_name: invalid_fields.into_iter().cloned().collect(), + valid_facets_name: filterable_fields.into_iter().collect(), } .into()); } else { From 3f048927a0336858d8a6888f466a55baa51584b4 Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 19 Jan 2023 14:29:09 +0000 Subject: [PATCH 06/19] Update version for the next release (v0.39.2) in Cargo.toml files --- benchmarks/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- filter-parser/Cargo.toml | 2 +- flatten-serde-json/Cargo.toml | 2 +- json-depth-checker/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 9f5b6190e..5fec31ee0 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "benchmarks" -version = "0.39.1" +version = "0.39.2" edition = "2018" publish = false diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 42fa4353a..3a40384bc 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cli" -version = "0.39.1" +version = "0.39.2" edition = "2018" description = "A CLI to interact with a milli index" publish = false diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 38427ccdf..73754df26 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "filter-parser" -version = "0.39.1" +version = "0.39.2" edition = "2021" description = "The parser for the Meilisearch filter syntax" publish = false diff --git a/flatten-serde-json/Cargo.toml b/flatten-serde-json/Cargo.toml index 904baa0b4..e84a21798 100644 --- a/flatten-serde-json/Cargo.toml +++ b/flatten-serde-json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatten-serde-json" -version = "0.39.1" +version = "0.39.2" edition = "2021" description = "Flatten serde-json objects like elastic search" readme = "README.md" diff --git a/json-depth-checker/Cargo.toml b/json-depth-checker/Cargo.toml index 5210b8bf4..95ee2aa88 100644 --- a/json-depth-checker/Cargo.toml +++ b/json-depth-checker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-depth-checker" -version = "0.39.1" +version = "0.39.2" edition = "2021" description = "A library that indicates if a JSON must be flattened" publish = false diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 743cd23a9..dd0287331 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "milli" -version = "0.39.1" +version = "0.39.2" authors = ["Kerollmops "] edition = "2018" From f5ca421227405ab312f57b0f13da48e6586ccb90 Mon Sep 17 00:00:00 2001 From: Philipp Ahlner Date: Thu, 19 Jan 2023 15:39:21 +0100 Subject: [PATCH 07/19] Superfluous test removed --- milli/src/update/index_documents/mod.rs | 28 ------------------------- 1 file changed, 28 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index f912a756a..7e13afb1b 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -965,34 +965,6 @@ mod tests { .unwrap(); } - #[test] - fn index_all_flavour_of_geo() { - let mut index = TempIndex::new(); - index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; - - index - .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S("_geo"))); - }) - .unwrap(); - - index - .add_documents(documents!([ - { "id": 0, "_geo": { "lat": 31, "lng": [42] } }, - { "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 }, - { "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" }, - { "id": 3, "_geo.lat": 31, "_geo.lng": "42" }, - ])) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - - let mut search = crate::Search::new(&rtxn, &index); - search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap()); - let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); - assert_eq!(documents_ids, vec![0, 1, 2, 3]); - } - #[test] fn geo_error() { let mut index = TempIndex::new(); From d1a31afdd654eb13c26c025840cd110b2575ccb3 Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 19 Jan 2023 17:17:34 +0100 Subject: [PATCH 08/19] Modify README to prevent contributions --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 948752ee9..ea80410a5 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,14 @@

a concurrent indexer combined with fast and relevant search algorithms

+--- + +DO NOT CONTRIBUTE TO THIS REPOSITORY ANYMORE, IT WILL BE ARCHIVED SOON. ONLY MEILI TEAM IS ALLOWED TO CONTRIBUTE. + +The content of this repository is now available in the [Meilisearch repository](https://github.com/meilisearch/meilisearch) in the workspace `milli`. + +--- + ## Introduction This repository contains the core engine used in [Meilisearch]. From 30fc37671329ae479db035fc64004ebfa1fc911b Mon Sep 17 00:00:00 2001 From: Many the fish Date: Thu, 19 Jan 2023 17:37:30 +0100 Subject: [PATCH 09/19] Update deserr v0.3.0 --- milli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dd0287331..3b709d638 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -12,7 +12,7 @@ byteorder = "1.4.3" charabia = { version = "0.7.0", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.6" -deserr = "0.1.5" +deserr = "0.3.0" either = "1.8.0" flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" From abd65d93079ab6abb6a5824c9214597fdcacc62c Mon Sep 17 00:00:00 2001 From: curquiza Date: Thu, 19 Jan 2023 16:43:45 +0000 Subject: [PATCH 10/19] Update version for the next release (v0.40.0) in Cargo.toml files --- benchmarks/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- filter-parser/Cargo.toml | 2 +- flatten-serde-json/Cargo.toml | 2 +- json-depth-checker/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 5fec31ee0..73ca8ec33 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "benchmarks" -version = "0.39.2" +version = "0.40.0" edition = "2018" publish = false diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 3a40384bc..5acbbc632 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cli" -version = "0.39.2" +version = "0.40.0" edition = "2018" description = "A CLI to interact with a milli index" publish = false diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 73754df26..d7e96cebf 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "filter-parser" -version = "0.39.2" +version = "0.40.0" edition = "2021" description = "The parser for the Meilisearch filter syntax" publish = false diff --git a/flatten-serde-json/Cargo.toml b/flatten-serde-json/Cargo.toml index e84a21798..802bf5f7c 100644 --- a/flatten-serde-json/Cargo.toml +++ b/flatten-serde-json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatten-serde-json" -version = "0.39.2" +version = "0.40.0" edition = "2021" description = "Flatten serde-json objects like elastic search" readme = "README.md" diff --git a/json-depth-checker/Cargo.toml b/json-depth-checker/Cargo.toml index 95ee2aa88..85e52c4fd 100644 --- a/json-depth-checker/Cargo.toml +++ b/json-depth-checker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-depth-checker" -version = "0.39.2" +version = "0.40.0" edition = "2021" description = "A library that indicates if a JSON must be flattened" publish = false diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dd0287331..44fb045ef 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "milli" -version = "0.39.2" +version = "0.40.0" authors = ["Kerollmops "] edition = "2018" From de3c4f1986f0a87b93911ed8ceb76bec16d4b28e Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 24 Jan 2023 12:20:50 +0100 Subject: [PATCH 11/19] throw an error on unknown fields specified in the _geo field --- milli/src/error.rs | 2 ++ milli/src/index.rs | 31 +++++++++++++++++++ .../geo_faceted_documents_ids.snap | 4 +++ milli/src/update/index_documents/enrich.rs | 4 +++ 4 files changed, 41 insertions(+) create mode 100644 milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap diff --git a/milli/src/error.rs b/milli/src/error.rs index 87cb3f360..f96c633f2 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -154,6 +154,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco pub enum GeoError { #[error("The `_geo` field in the document with the id: `{document_id}` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `{value}`.")] NotAnObject { document_id: Value, value: Value }, + #[error("The `_geo` field in the document with the id: `{document_id}` contains the following unexpected fields: `{value}`.")] + UnexpectedExtraFields { document_id: Value, value: Value }, #[error("Could not find latitude nor longitude in the document with the id: `{document_id}`. Was expecting `_geo.lat` and `_geo.lng` fields.")] MissingLatitudeAndLongitude { document_id: Value }, #[error("Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.")] diff --git a/milli/src/index.rs b/milli/src/index.rs index 0ab596fa9..8a17cebf4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2326,4 +2326,35 @@ pub(crate) mod tests { db_snap!(index, geo_faceted_documents_ids); // ensure that no more document was inserted } + + #[test] + fn unexpected_extra_fields_in_geo_field() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_primary_key("id".to_string()); + settings.set_filterable_fields(HashSet::from(["_geo".to_string()])); + }) + .unwrap(); + + let err = index + .add_documents( + documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best" }}), + ) + .unwrap_err(); + insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"doggo":"are the best"}`."###); + + db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted + + // multiple fields and complex values + let err = index + .add_documents( + documents!({ "id" : "doggo", "_geo": { "lat": 1, "lng": 2, "doggo": "are the best", "and": { "all": ["cats", { "are": "beautiful" } ] } } }), + ) + .unwrap_err(); + insta::assert_display_snapshot!(err, @r###"The `_geo` field in the document with the id: `"\"doggo\""` contains the following unexpected fields: `{"and":{"all":["cats",{"are":"beautiful"}]},"doggo":"are the best"}`."###); + + db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted + } } diff --git a/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap b/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap new file mode 100644 index 000000000..89fb1856a --- /dev/null +++ b/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/index.rs +--- +[] diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 4c735856d..ed04e9962 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -379,6 +379,10 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result match (object.remove("lat"), object.remove("lng")) { (Some(lat), Some(lng)) => { match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) { + (Ok(_), Ok(_)) if !object.is_empty() => Ok(Err(UnexpectedExtraFields { + document_id: debug_id(), + value: object.into(), + })), (Ok(_), Ok(_)) => Ok(Ok(())), (Err(value), Ok(_)) => Ok(Err(BadLatitude { document_id: debug_id(), value })), (Ok(_), Err(value)) => Ok(Err(BadLongitude { document_id: debug_id(), value })), From 55e80465510ed6ea2f527406ab4c25a722517bff Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 24 Jan 2023 13:52:21 +0100 Subject: [PATCH 12/19] bump milli --- benchmarks/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- filter-parser/Cargo.toml | 2 +- flatten-serde-json/Cargo.toml | 2 +- json-depth-checker/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 73ca8ec33..bf96ca84c 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "benchmarks" -version = "0.40.0" +version = "0.41.0" edition = "2018" publish = false diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 5acbbc632..c8e63a764 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cli" -version = "0.40.0" +version = "0.41.0" edition = "2018" description = "A CLI to interact with a milli index" publish = false diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index d7e96cebf..8f47bf2bc 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "filter-parser" -version = "0.40.0" +version = "0.41.0" edition = "2021" description = "The parser for the Meilisearch filter syntax" publish = false diff --git a/flatten-serde-json/Cargo.toml b/flatten-serde-json/Cargo.toml index 802bf5f7c..9191364ae 100644 --- a/flatten-serde-json/Cargo.toml +++ b/flatten-serde-json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatten-serde-json" -version = "0.40.0" +version = "0.41.0" edition = "2021" description = "Flatten serde-json objects like elastic search" readme = "README.md" diff --git a/json-depth-checker/Cargo.toml b/json-depth-checker/Cargo.toml index 85e52c4fd..63906a276 100644 --- a/json-depth-checker/Cargo.toml +++ b/json-depth-checker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-depth-checker" -version = "0.40.0" +version = "0.41.0" edition = "2021" description = "A library that indicates if a JSON must be flattened" publish = false diff --git a/milli/Cargo.toml b/milli/Cargo.toml index b64d96368..c3fccc9e2 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "milli" -version = "0.40.0" +version = "0.41.0" authors = ["Kerollmops "] edition = "2018" From 3d8a3d22d15becfad9ed17c324180fed42784b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar=20-=20curqui?= Date: Tue, 24 Jan 2023 15:58:34 +0100 Subject: [PATCH 13/19] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ea80410a5..a992b9440 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ DO NOT CONTRIBUTE TO THIS REPOSITORY ANYMORE, IT WILL BE ARCHIVED SOON. ONLY MEILI TEAM IS ALLOWED TO CONTRIBUTE. -The content of this repository is now available in the [Meilisearch repository](https://github.com/meilisearch/meilisearch) in the workspace `milli`. +The content of this repository is now available in the [Meilisearch repository in the workspace `milli`](https://github.com/meilisearch/meilisearch/tree/main/milli). --- From f7ae8bc0650a52515d4dc45f3e5d174715fb460c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar=20-=20curqui?= Date: Tue, 24 Jan 2023 15:58:41 +0100 Subject: [PATCH 14/19] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a992b9440..9450d5824 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ --- -DO NOT CONTRIBUTE TO THIS REPOSITORY ANYMORE, IT WILL BE ARCHIVED SOON. ONLY MEILI TEAM IS ALLOWED TO CONTRIBUTE. +DO NOT CONTRIBUTE TO THIS REPOSITORY ANYMORE. IT WILL BE ARCHIVED SOON. ONLY THE MEILISEARCH TEAM IS ALLOWED TO CONTRIBUTE. The content of this repository is now available in the [Meilisearch repository in the workspace `milli`](https://github.com/meilisearch/meilisearch/tree/main/milli). From bffabf9cc63f29a73f74faab15da11020a1329aa Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 31 Jan 2023 09:56:22 +0000 Subject: [PATCH 15/19] Update version for the next release (v0.41.1) in Cargo.toml files --- benchmarks/Cargo.toml | 2 +- cli/Cargo.toml | 2 +- filter-parser/Cargo.toml | 2 +- flatten-serde-json/Cargo.toml | 2 +- json-depth-checker/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index bf96ca84c..6d3608a5e 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "benchmarks" -version = "0.41.0" +version = "0.41.1" edition = "2018" publish = false diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c8e63a764..ede953756 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cli" -version = "0.41.0" +version = "0.41.1" edition = "2018" description = "A CLI to interact with a milli index" publish = false diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 8f47bf2bc..6dd13c645 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "filter-parser" -version = "0.41.0" +version = "0.41.1" edition = "2021" description = "The parser for the Meilisearch filter syntax" publish = false diff --git a/flatten-serde-json/Cargo.toml b/flatten-serde-json/Cargo.toml index 9191364ae..136fd3f7e 100644 --- a/flatten-serde-json/Cargo.toml +++ b/flatten-serde-json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "flatten-serde-json" -version = "0.41.0" +version = "0.41.1" edition = "2021" description = "Flatten serde-json objects like elastic search" readme = "README.md" diff --git a/json-depth-checker/Cargo.toml b/json-depth-checker/Cargo.toml index 63906a276..50a66da4b 100644 --- a/json-depth-checker/Cargo.toml +++ b/json-depth-checker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json-depth-checker" -version = "0.41.0" +version = "0.41.1" edition = "2021" description = "A library that indicates if a JSON must be flattened" publish = false diff --git a/milli/Cargo.toml b/milli/Cargo.toml index c3fccc9e2..b3d87304d 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "milli" -version = "0.41.0" +version = "0.41.1" authors = ["Kerollmops "] edition = "2018" From cbf029f64c6d014d29930d4a71a3b7b17763f306 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 31 Jan 2023 11:06:43 +0100 Subject: [PATCH 16/19] clippy: --fix --- .../update/index_documents/helpers/grenad_helpers.rs | 4 ++-- milli/src/update/index_documents/transform.rs | 10 +++++----- milli/tests/search/phrase_search.rs | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index 03f15945a..eb66a28fe 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; use std::fs::File; -use std::io::{self, Seek, SeekFrom}; +use std::io::{self, Seek}; use std::time::Instant; use grenad::{CompressionType, Sorter}; @@ -66,7 +66,7 @@ pub fn sorter_into_reader( pub fn writer_into_reader(writer: grenad::Writer) -> Result> { let mut file = writer.into_inner()?; - file.seek(SeekFrom::Start(0))?; + file.rewind()?; grenad::Reader::new(file).map_err(Into::into) } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 68ef2b7ee..9e07e78ad 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fs::File; -use std::io::{Read, Seek, SeekFrom}; +use std::io::{Read, Seek}; use fxhash::FxHashMap; use heed::RoTxn; @@ -510,7 +510,7 @@ impl<'a, 'i> Transform<'a, 'i> { let mut original_documents = writer.into_inner()?; // We then extract the file and reset the seek to be able to read it again. - original_documents.seek(SeekFrom::Start(0))?; + original_documents.rewind()?; // We create a final writer to write the new documents in order from the sorter. let mut writer = create_writer( @@ -522,7 +522,7 @@ impl<'a, 'i> Transform<'a, 'i> { // into this writer, extract the file and reset the seek to be able to read it again. self.flattened_sorter.write_into_stream_writer(&mut writer)?; let mut flattened_documents = writer.into_inner()?; - flattened_documents.seek(SeekFrom::Start(0))?; + flattened_documents.rewind()?; let mut new_external_documents_ids_builder: Vec<_> = self.new_external_documents_ids_builder.into_iter().collect(); @@ -650,10 +650,10 @@ impl<'a, 'i> Transform<'a, 'i> { // Once we have written all the documents, we extract // the file and reset the seek to be able to read it again. let mut original_documents = original_writer.into_inner()?; - original_documents.seek(SeekFrom::Start(0))?; + original_documents.rewind()?; let mut flattened_documents = flattened_writer.into_inner()?; - flattened_documents.seek(SeekFrom::Start(0))?; + flattened_documents.rewind()?; let output = TransformOutput { primary_key, diff --git a/milli/tests/search/phrase_search.rs b/milli/tests/search/phrase_search.rs index ca5eaad48..2e63c96c4 100644 --- a/milli/tests/search/phrase_search.rs +++ b/milli/tests/search/phrase_search.rs @@ -7,15 +7,15 @@ fn set_stop_words(index: &Index, stop_words: &[&str]) { let mut wtxn = index.write_txn().unwrap(); let config = IndexerConfig::default(); - let mut builder = Settings::new(&mut wtxn, &index, &config); - let stop_words = stop_words.into_iter().map(|s| s.to_string()).collect(); + let mut builder = Settings::new(&mut wtxn, index, &config); + let stop_words = stop_words.iter().map(|s| s.to_string()).collect(); builder.set_stop_words(stop_words); builder.execute(|_| (), || false).unwrap(); wtxn.commit().unwrap(); } fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) { - let index = super::setup_search_index_with_criteria(&criteria); + let index = super::setup_search_index_with_criteria(criteria); // Add stop_words set_stop_words(&index, &["a", "an", "the", "of"]); From 20f05efb3c423a272124e650a54df0c68952abd4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 31 Jan 2023 11:11:49 +0100 Subject: [PATCH 17/19] clippy: needless_lifetimes --- milli/src/index.rs | 6 +++--- milli/src/search/criteria/proximity.rs | 16 ++++++++-------- milli/src/search/criteria/typo.rs | 12 ++++++------ milli/src/update/delete_documents.rs | 6 +++--- milli/src/update/facet/incremental.rs | 24 ++++++++++++------------ 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 8a17cebf4..32ffe45cf 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -348,10 +348,10 @@ impl Index { /* external documents ids */ /// Writes the external documents ids and internal ids (i.e. `u32`). - pub(crate) fn put_external_documents_ids<'a>( + pub(crate) fn put_external_documents_ids( &self, wtxn: &mut RwTxn, - external_documents_ids: &ExternalDocumentsIds<'a>, + external_documents_ids: &ExternalDocumentsIds<'_>, ) -> heed::Result<()> { let ExternalDocumentsIds { hard, soft, .. } = external_documents_ids; let hard = hard.as_fst().as_bytes(); @@ -426,7 +426,7 @@ impl Index { } /// Returns the `rtree` which associates coordinates to documents ids. - pub fn geo_rtree<'t>(&self, rtxn: &'t RoTxn) -> Result>> { + pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result>> { match self .main .get::<_, Str, SerdeBincode>>(rtxn, main_key::GEO_RTREE_KEY)? diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 66e5c95bf..182f9fbea 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -182,15 +182,15 @@ impl<'t> Criterion for Proximity<'t> { } } -fn resolve_candidates<'t>( - ctx: &'t dyn Context, +fn resolve_candidates( + ctx: &dyn Context, query_tree: &Operation, proximity: u8, cache: &mut Cache, wdcache: &mut WordDerivationsCache, ) -> Result { - fn resolve_operation<'t>( - ctx: &'t dyn Context, + fn resolve_operation( + ctx: &dyn Context, query_tree: &Operation, proximity: u8, cache: &mut Cache, @@ -243,8 +243,8 @@ fn resolve_candidates<'t>( Ok(result) } - fn mdfs_pair<'t>( - ctx: &'t dyn Context, + fn mdfs_pair( + ctx: &dyn Context, left: &Operation, right: &Operation, proximity: u8, @@ -298,8 +298,8 @@ fn resolve_candidates<'t>( Ok(output) } - fn mdfs<'t>( - ctx: &'t dyn Context, + fn mdfs( + ctx: &dyn Context, branches: &[Operation], proximity: u8, cache: &mut Cache, diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 20bc718fd..ff2567304 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -239,15 +239,15 @@ fn alterate_query_tree( Ok(query_tree) } -fn resolve_candidates<'t>( - ctx: &'t dyn Context, +fn resolve_candidates( + ctx: &dyn Context, query_tree: &Operation, number_typos: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, wdcache: &mut WordDerivationsCache, ) -> Result { - fn resolve_operation<'t>( - ctx: &'t dyn Context, + fn resolve_operation( + ctx: &dyn Context, query_tree: &Operation, number_typos: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, @@ -276,8 +276,8 @@ fn resolve_candidates<'t>( } } - fn mdfs<'t>( - ctx: &'t dyn Context, + fn mdfs( + ctx: &dyn Context, branches: &[Operation], mana: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 635ce85be..90118af18 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -574,9 +574,9 @@ fn remove_from_word_docids( Ok(()) } -fn remove_docids_from_field_id_docid_facet_value<'i, 'a>( - index: &'i Index, - wtxn: &'a mut heed::RwTxn, +fn remove_docids_from_field_id_docid_facet_value( + index: &Index, + wtxn: &mut heed::RwTxn, facet_type: FacetType, field_id: FieldId, to_remove: &RoaringBitmap, diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index cffce5525..a5840dc6e 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -157,9 +157,9 @@ impl FacetsUpdateIncrementalInner { /// /// ## Return /// See documentation of `insert_in_level` - fn insert_in_level_0<'t>( + fn insert_in_level_0( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, @@ -211,9 +211,9 @@ impl FacetsUpdateIncrementalInner { /// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted /// in the addition of a new key in that level, and that therefore the number of children /// of the parent node should be incremented. - fn insert_in_level<'t>( + fn insert_in_level( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, level: u8, facet_value: &[u8], @@ -348,9 +348,9 @@ impl FacetsUpdateIncrementalInner { } /// Insert the given facet value and corresponding document ids in the database. - pub fn insert<'t>( + pub fn insert( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, @@ -470,9 +470,9 @@ impl FacetsUpdateIncrementalInner { /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4). /// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust /// its left bound as well. - fn delete_in_level<'t>( + fn delete_in_level( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, level: u8, facet_value: &[u8], @@ -529,9 +529,9 @@ impl FacetsUpdateIncrementalInner { } } - fn delete_in_level_0<'t>( + fn delete_in_level_0( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, @@ -557,9 +557,9 @@ impl FacetsUpdateIncrementalInner { } } - pub fn delete<'t>( + pub fn delete( &self, - txn: &'t mut RwTxn, + txn: &mut RwTxn, field_id: u16, facet_value: &[u8], docids: &RoaringBitmap, From 5c0668afcfca87b525f6101dd4b5515c7423de1c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 31 Jan 2023 11:13:47 +0100 Subject: [PATCH 18/19] clippy: allow uninlined_format_args --- .github/workflows/rust.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index abe227db0..119be47f9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -65,6 +65,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: clippy + args: -- --allow clippy::uninlined_format_args fmt: name: Run Rustfmt From a2690ea8d47d6104bf9e6a011b65e33ada822a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 31 Jan 2023 11:42:24 +0100 Subject: [PATCH 19/19] Reduce incremental indexing time of `words_prefix_position_docids` DB This database can easily contain millions of entries. Thus, iterating over it can be very expensive. For regular `documentAdditionOrUpdate` tasks, `del_prefix_fst_words` will always be empty. Thus, we can save a significant amount of time by adding this `if !del_prefix_fst_words.is_empty()` condition. The code's behaviour remains completely unchanged. --- .../src/update/words_prefix_position_docids.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/milli/src/update/words_prefix_position_docids.rs b/milli/src/update/words_prefix_position_docids.rs index 5dbc9f89b..6f12dde38 100644 --- a/milli/src/update/words_prefix_position_docids.rs +++ b/milli/src/update/words_prefix_position_docids.rs @@ -140,16 +140,20 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> { // We remove all the entries that are no more required in this word prefix position // docids database. - let mut iter = - self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); - while let Some(((prefix, _), _)) = iter.next().transpose()? { - if del_prefix_fst_words.contains(prefix.as_bytes()) { - unsafe { iter.del_current()? }; + // We also avoid iterating over the whole `word_prefix_position_docids` database if we know in + // advance that the `if del_prefix_fst_words.contains(prefix.as_bytes()) {` condition below + // will always be false (i.e. if `del_prefix_fst_words` is empty). + if !del_prefix_fst_words.is_empty() { + let mut iter = + self.index.word_prefix_position_docids.iter_mut(self.wtxn)?.lazily_decode_data(); + while let Some(((prefix, _), _)) = iter.next().transpose()? { + if del_prefix_fst_words.contains(prefix.as_bytes()) { + unsafe { iter.del_current()? }; + } } + drop(iter); } - drop(iter); - // We finally write all the word prefix position docids into the LMDB database. sorter_into_lmdb_database( self.wtxn,