From 61b383f4225553b6295d5ae7cbdc790895151fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 4 Dec 2020 12:02:22 +0100 Subject: [PATCH] Introduce the criteria update setting --- Cargo.lock | 60 +++++++++++++++++++++++++++++------------- Cargo.toml | 3 ++- http-ui/Cargo.lock | 55 ++++++++++++++++++++++++++++++++------ http-ui/Cargo.toml | 2 +- http-ui/src/main.rs | 15 +++++++++++ src/criterion.rs | 36 ++++++++++++++++++++++++- src/update/settings.rs | 44 +++++++++++++++++++++++++++---- 7 files changed, 181 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70128cfa9..de41e6a19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,15 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10" +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.31" @@ -345,6 +354,16 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "form_urlencoded" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "fs_extra" version = "1.1.0" @@ -429,9 +448,9 @@ dependencies = [ [[package]] name = "heed" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616" +checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc" dependencies = [ "byteorder", "heed-traits", @@ -453,9 +472,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c" [[package]] name = "heed-types" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb" +checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405" dependencies = [ "bincode", "heed-traits", @@ -571,9 +590,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.79" +version = "0.2.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" +checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" [[package]] name = "linked-hash-map" @@ -675,6 +694,7 @@ dependencies = [ "pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)", "pest_derive", "rayon", + "regex", "ringtail", "roaring", "serde", @@ -760,9 +780,9 @@ checksum = "ce30a214135d83e7250f2e8fad781f7cb987e3a3f1b4529712d891594bda311c" [[package]] name = "once_cell" -version = "1.4.0" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" [[package]] name = "oorandom" @@ -1003,11 +1023,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8963b85b8ce3074fecffde43b4b0dded83ce2f367dc8d363afc56679f3ee820b" +checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" dependencies = [ + "aho-corasick", + "memchr", "regex-syntax", + "thread_local", ] [[package]] @@ -1021,9 +1044,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c" +checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" [[package]] name = "remove_dir_all" @@ -1096,9 +1119,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.110" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c" +checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a" dependencies = [ "serde_derive", ] @@ -1115,9 +1138,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.110" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984" +checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e" dependencies = [ "proc-macro2", "quote", @@ -1406,10 +1429,11 @@ checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" [[package]] name = "url" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb" +checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e" dependencies = [ + "form_urlencoded", "idna", "matches", "percent-encoding", diff --git a/Cargo.toml b/Cargo.toml index 37c83b4f0..a6f0ff911 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ flate2 = "1.0.17" fst = "0.4.4" fxhash = "0.2.1" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" } -heed = { version = "0.10.4", default-features = false, features = ["lmdb", "sync-read-txn"] } +heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] } human_format = "1.0.3" jemallocator = "0.3.2" levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } @@ -26,6 +26,7 @@ obkv = "0.1.0" once_cell = "1.4.0" ordered-float = "2.0.0" rayon = "1.3.1" +regex = "1.4.2" ringtail = "0.3.0" roaring = "0.6.1" serde = { version = "1.0", features = ["derive"] } diff --git a/http-ui/Cargo.lock b/http-ui/Cargo.lock index b15700ce5..b3136fd77 100644 --- a/http-ui/Cargo.lock +++ b/http-ui/Cargo.lock @@ -6,6 +6,15 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "anyhow" version = "1.0.34" @@ -404,6 +413,16 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "form_urlencoded" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00" +dependencies = [ + "matches", + "percent-encoding", +] + [[package]] name = "fs_extra" version = "1.2.0" @@ -654,9 +673,9 @@ dependencies = [ [[package]] name = "heed" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616" +checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc" dependencies = [ "byteorder", "heed-traits", @@ -678,9 +697,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c" [[package]] name = "heed-types" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb" +checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405" dependencies = [ "bincode", "heed-traits", @@ -1000,6 +1019,7 @@ dependencies = [ "pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)", "pest_derive", "rayon", + "regex", "ringtail", "roaring", "serde", @@ -1199,9 +1219,9 @@ checksum = "ddd8a5a0aa2f3adafe349259a5b3e21a19c388b792414c1161d60a69c1fa48e8" [[package]] name = "once_cell" -version = "1.4.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" [[package]] name = "opaque-debug" @@ -1602,6 +1622,18 @@ version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +[[package]] +name = "regex" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + [[package]] name = "regex-automata" version = "0.1.9" @@ -1611,6 +1643,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "regex-syntax" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -2137,10 +2175,11 @@ checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" [[package]] name = "url" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb" +checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e" dependencies = [ + "form_urlencoded", "idna", "matches", "percent-encoding", diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index b30fb95c2..bfe39dcf6 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -8,7 +8,7 @@ edition = "2018" [dependencies] anyhow = "1.0.28" grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" } -heed = "0.10.4" +heed = "0.10.5" memmap = "0.7.0" milli = { path = ".." } once_cell = "1.4.1" diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 62d3d75bd..b14e7f892 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -243,6 +243,13 @@ struct Settings { #[serde(default)] faceted_attributes: Option>, + + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none", + )] + criteria: Option>>, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -399,6 +406,14 @@ async fn main() -> anyhow::Result<()> { builder.set_faceted_fields(facet_types); } + // We transpose the settings JSON struct into a real setting update. + if let Some(criteria) = settings.criteria { + match criteria { + Some(criteria) => builder.set_criteria(criteria), + None => builder.reset_criteria(), + } + } + let result = builder.execute(|indexing_step| { let (current, total) = match indexing_step { TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None), diff --git a/src/criterion.rs b/src/criterion.rs index 176630950..ea82055f2 100644 --- a/src/criterion.rs +++ b/src/criterion.rs @@ -1,5 +1,7 @@ -use crate::FieldId; +use crate::{FieldsIdsMap, FieldId}; +use anyhow::{Context, bail}; +use regex::Regex; use serde::{Serialize, Deserialize}; #[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)] @@ -24,6 +26,38 @@ pub enum Criterion { Desc(FieldId), } +impl Criterion { + pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result { + match txt { + "typo" => Ok(Criterion::Typo), + "words" => Ok(Criterion::Words), + "proximity" => Ok(Criterion::Proximity), + "attribute" => Ok(Criterion::Attribute), + "wordsposition" => Ok(Criterion::WordsPosition), + "exactness" => Ok(Criterion::Exactness), + text => { + let re = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#)?; + let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; + let order = caps.get(1).unwrap().as_str(); + let field_name = caps.get(2).unwrap().as_str(); + let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?; + match order { + "asc" => Ok(Criterion::Asc(field_id)), + "desc" => Ok(Criterion::Desc(field_id)), + otherwise => bail!("unknown criterion name: {}", otherwise), + } + }, + } + } + + pub fn field_id(&self) -> Option { + match *self { + Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid), + _ => None, + } + } +} + pub fn default_criteria() -> Vec { vec![ Criterion::Typo, diff --git a/src/update/settings.rs b/src/update/settings.rs index cddd68ca3..ea0f9b5be 100644 --- a/src/update/settings.rs +++ b/src/update/settings.rs @@ -8,7 +8,7 @@ use rayon::ThreadPool; use crate::update::index_documents::{Transform, IndexDocumentsMethod}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::facet::FacetType; -use crate::{Index, FieldsIdsMap}; +use crate::{Index, FieldsIdsMap, Criterion}; pub struct Settings<'a, 't, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -27,6 +27,7 @@ pub struct Settings<'a, 't, 'u, 'i> { searchable_fields: Option>>, displayed_fields: Option>>, faceted_fields: Option>, + criteria: Option>>, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -45,6 +46,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { searchable_fields: None, displayed_fields: None, faceted_fields: None, + criteria: None, } } @@ -68,6 +70,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.faceted_fields = Some(names_facet_types); } + pub fn reset_criteria(&mut self) { + self.criteria = Some(None); + } + + pub fn set_criteria(&mut self, criteria: Vec) { + self.criteria = Some(Some(criteria)); + } + pub fn execute(self, progress_callback: F) -> anyhow::Result<()> where F: Fn(UpdateIndexingStep) + Sync @@ -75,6 +85,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { let mut updated_searchable_fields = None; let mut updated_faceted_fields = None; let mut updated_displayed_fields = None; + let mut updated_criteria = None; // Construct the new FieldsIdsMap based on the searchable fields order. let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; @@ -113,9 +124,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { None => fields_ids_map.insert("id").context("field id limit reached")?, }; + let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; if let Some(fields_names_facet_types) = self.faceted_fields { - let current_faceted_fields = self.index.faceted_fields(self.wtxn)?; - let mut faceted_fields = HashMap::new(); for (name, sftype) in fields_names_facet_types { let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?; @@ -147,6 +157,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } + if let Some(criteria) = self.criteria { + match criteria { + Some(criteria_names) => { + let mut new_criteria = Vec::new(); + for name in criteria_names { + let criterion = Criterion::from_str(&mut fields_ids_map, &name)?; + if let Some(fid) = criterion.field_id() { + let name = fields_ids_map.name(fid).unwrap(); + let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(¤t_faceted_fields); + ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name); + } + new_criteria.push(criterion); + } + updated_criteria = Some(Some(new_criteria)); + }, + None => updated_criteria = Some(None), + } + } + // If any setting have modified any of the datastructures it means that we need // to retrieve the documents and then reindex then with the new settings. if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() { @@ -202,14 +231,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } if let Some(displayed_fields) = updated_displayed_fields { - // We write the displayed fields into the database here - // to make sure that the right fields are displayed. match displayed_fields { Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?, None => self.index.delete_displayed_fields(self.wtxn).map(drop)?, } } + if let Some(criteria) = updated_criteria { + match criteria { + Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?, + None => self.index.delete_criteria(self.wtxn).map(drop)?, + } + } + Ok(()) } }