Introduce the criteria update setting

This commit is contained in:
Clément Renault 2020-12-04 12:02:22 +01:00
parent f8f33d35e0
commit 61b383f422
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
7 changed files with 181 additions and 34 deletions

60
Cargo.lock generated
View File

@ -6,6 +6,15 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccc9a9dd069569f212bc4330af9f17c4afb5e8ce185e83dbb14f1349dda18b10"
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.31"
@ -345,6 +354,16 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "form_urlencoded"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
dependencies = [
"matches",
"percent-encoding",
]
[[package]]
name = "fs_extra"
version = "1.1.0"
@ -429,9 +448,9 @@ dependencies = [
[[package]]
name = "heed"
version = "0.10.4"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616"
checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc"
dependencies = [
"byteorder",
"heed-traits",
@ -453,9 +472,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
[[package]]
name = "heed-types"
version = "0.7.1"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb"
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
dependencies = [
"bincode",
"heed-traits",
@ -571,9 +590,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743"
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
[[package]]
name = "linked-hash-map"
@ -675,6 +694,7 @@ dependencies = [
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
"pest_derive",
"rayon",
"regex",
"ringtail",
"roaring",
"serde",
@ -760,9 +780,9 @@ checksum = "ce30a214135d83e7250f2e8fad781f7cb987e3a3f1b4529712d891594bda311c"
[[package]]
name = "once_cell"
version = "1.4.0"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
[[package]]
name = "oorandom"
@ -1003,11 +1023,14 @@ dependencies = [
[[package]]
name = "regex"
version = "1.4.1"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8963b85b8ce3074fecffde43b4b0dded83ce2f367dc8d363afc56679f3ee820b"
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
@ -1021,9 +1044,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
version = "0.6.20"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cab7a364d15cde1e505267766a2d3c4e22a843e1a601f0fa7564c0f82ced11c"
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
[[package]]
name = "remove_dir_all"
@ -1096,9 +1119,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
version = "1.0.110"
version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
dependencies = [
"serde_derive",
]
@ -1115,9 +1138,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.110"
version = "1.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
dependencies = [
"proc-macro2",
"quote",
@ -1406,10 +1429,11 @@ checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
[[package]]
name = "url"
version = "2.1.1"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
dependencies = [
"form_urlencoded",
"idna",
"matches",
"percent-encoding",

View File

@ -14,7 +14,7 @@ flate2 = "1.0.17"
fst = "0.4.4"
fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" }
heed = { version = "0.10.4", default-features = false, features = ["lmdb", "sync-read-txn"] }
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3"
jemallocator = "0.3.2"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
@ -26,6 +26,7 @@ obkv = "0.1.0"
once_cell = "1.4.0"
ordered-float = "2.0.0"
rayon = "1.3.1"
regex = "1.4.2"
ringtail = "0.3.0"
roaring = "0.6.1"
serde = { version = "1.0", features = ["derive"] }

55
http-ui/Cargo.lock generated
View File

@ -6,6 +6,15 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.34"
@ -404,6 +413,16 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
dependencies = [
"matches",
"percent-encoding",
]
[[package]]
name = "fs_extra"
version = "1.2.0"
@ -654,9 +673,9 @@ dependencies = [
[[package]]
name = "heed"
version = "0.10.4"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cddc0d0d20adfc803b3e57c2d84447e134cad636202e68e275c65e3cbe63c616"
checksum = "2eaba3b0edee6a9cd551f24caca2027922b03259f7203a15f0b86af4c1348fcc"
dependencies = [
"byteorder",
"heed-traits",
@ -678,9 +697,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
[[package]]
name = "heed-types"
version = "0.7.1"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb"
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
dependencies = [
"bincode",
"heed-traits",
@ -1000,6 +1019,7 @@ dependencies = [
"pest 2.1.3 (git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67)",
"pest_derive",
"rayon",
"regex",
"ringtail",
"roaring",
"serde",
@ -1199,9 +1219,9 @@ checksum = "ddd8a5a0aa2f3adafe349259a5b3e21a19c388b792414c1161d60a69c1fa48e8"
[[package]]
name = "once_cell"
version = "1.4.1"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0"
[[package]]
name = "opaque-debug"
@ -1602,6 +1622,18 @@ version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "regex"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"thread_local",
]
[[package]]
name = "regex-automata"
version = "0.1.9"
@ -1611,6 +1643,12 @@ dependencies = [
"byteorder",
]
[[package]]
name = "regex-syntax"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
@ -2137,10 +2175,11 @@ checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
[[package]]
name = "url"
version = "2.1.1"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
dependencies = [
"form_urlencoded",
"idna",
"matches",
"percent-encoding",

View File

@ -8,7 +8,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0.28"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3eb7ad9" }
heed = "0.10.4"
heed = "0.10.5"
memmap = "0.7.0"
milli = { path = ".." }
once_cell = "1.4.1"

View File

@ -243,6 +243,13 @@ struct Settings {
#[serde(default)]
faceted_attributes: Option<HashMap<String, String>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none",
)]
criteria: Option<Option<Vec<String>>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -399,6 +406,14 @@ async fn main() -> anyhow::Result<()> {
builder.set_faceted_fields(facet_types);
}
// We transpose the settings JSON struct into a real setting update.
if let Some(criteria) = settings.criteria {
match criteria {
Some(criteria) => builder.set_criteria(criteria),
None => builder.reset_criteria(),
}
}
let result = builder.execute(|indexing_step| {
let (current, total) = match indexing_step {
TransformFromUserIntoGenericFormat { documents_seen } => (documents_seen, None),

View File

@ -1,5 +1,7 @@
use crate::FieldId;
use crate::{FieldsIdsMap, FieldId};
use anyhow::{Context, bail};
use regex::Regex;
use serde::{Serialize, Deserialize};
#[derive(Debug, Serialize, Deserialize, Copy, Clone, PartialEq, Eq)]
@ -24,6 +26,38 @@ pub enum Criterion {
Desc(FieldId),
}
impl Criterion {
pub fn from_str(fields_ids_map: &mut FieldsIdsMap, txt: &str) -> anyhow::Result<Criterion> {
match txt {
"typo" => Ok(Criterion::Typo),
"words" => Ok(Criterion::Words),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"wordsposition" => Ok(Criterion::WordsPosition),
"exactness" => Ok(Criterion::Exactness),
text => {
let re = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#)?;
let caps = re.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
let field_id = fields_ids_map.insert(field_name).context("field id limit reached")?;
match order {
"asc" => Ok(Criterion::Asc(field_id)),
"desc" => Ok(Criterion::Desc(field_id)),
otherwise => bail!("unknown criterion name: {}", otherwise),
}
},
}
}
pub fn field_id(&self) -> Option<FieldId> {
match *self {
Criterion::Asc(fid) | Criterion::Desc(fid) => Some(fid),
_ => None,
}
}
}
pub fn default_criteria() -> Vec<Criterion> {
vec![
Criterion::Typo,

View File

@ -8,7 +8,7 @@ use rayon::ThreadPool;
use crate::update::index_documents::{Transform, IndexDocumentsMethod};
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
use crate::facet::FacetType;
use crate::{Index, FieldsIdsMap};
use crate::{Index, FieldsIdsMap, Criterion};
pub struct Settings<'a, 't, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -27,6 +27,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
searchable_fields: Option<Option<Vec<String>>>,
displayed_fields: Option<Option<Vec<String>>>,
faceted_fields: Option<HashMap<String, String>>,
criteria: Option<Option<Vec<String>>>,
}
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@ -45,6 +46,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
searchable_fields: None,
displayed_fields: None,
faceted_fields: None,
criteria: None,
}
}
@ -68,6 +70,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.faceted_fields = Some(names_facet_types);
}
pub fn reset_criteria(&mut self) {
self.criteria = Some(None);
}
pub fn set_criteria(&mut self, criteria: Vec<String>) {
self.criteria = Some(Some(criteria));
}
pub fn execute<F>(self, progress_callback: F) -> anyhow::Result<()>
where
F: Fn(UpdateIndexingStep) + Sync
@ -75,6 +85,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let mut updated_searchable_fields = None;
let mut updated_faceted_fields = None;
let mut updated_displayed_fields = None;
let mut updated_criteria = None;
// Construct the new FieldsIdsMap based on the searchable fields order.
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@ -113,9 +124,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
None => fields_ids_map.insert("id").context("field id limit reached")?,
};
if let Some(fields_names_facet_types) = self.faceted_fields {
let current_faceted_fields = self.index.faceted_fields(self.wtxn)?;
if let Some(fields_names_facet_types) = self.faceted_fields {
let mut faceted_fields = HashMap::new();
for (name, sftype) in fields_names_facet_types {
let ftype = FacetType::from_str(&sftype).with_context(|| format!("parsing facet type {:?}", sftype))?;
@ -147,6 +157,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
if let Some(criteria) = self.criteria {
match criteria {
Some(criteria_names) => {
let mut new_criteria = Vec::new();
for name in criteria_names {
let criterion = Criterion::from_str(&mut fields_ids_map, &name)?;
if let Some(fid) = criterion.field_id() {
let name = fields_ids_map.name(fid).unwrap();
let faceted_fields = updated_faceted_fields.as_ref().unwrap_or(&current_faceted_fields);
ensure!(faceted_fields.contains_key(&fid), "criterion field {} must be faceted", name);
}
new_criteria.push(criterion);
}
updated_criteria = Some(Some(new_criteria));
},
None => updated_criteria = Some(None),
}
}
// If any setting have modified any of the datastructures it means that we need
// to retrieve the documents and then reindex then with the new settings.
if updated_searchable_fields.is_some() || updated_faceted_fields.is_some() {
@ -202,14 +231,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
if let Some(displayed_fields) = updated_displayed_fields {
// We write the displayed fields into the database here
// to make sure that the right fields are displayed.
match displayed_fields {
Some(fields) => self.index.put_displayed_fields(self.wtxn, &fields)?,
None => self.index.delete_displayed_fields(self.wtxn).map(drop)?,
}
}
if let Some(criteria) = updated_criteria {
match criteria {
Some(criteria) => self.index.put_criteria(self.wtxn, &criteria)?,
None => self.index.delete_criteria(self.wtxn).map(drop)?,
}
}
Ok(())
}
}