From 03709910fd2da62ce261a6c7b4a7b4e6677eafba Mon Sep 17 00:00:00 2001 From: qdequele Date: Wed, 18 Sep 2019 11:41:29 +0200 Subject: [PATCH 1/2] feat: add typed index custom settings for common uses --- .../database/index/custom_settings_index.rs | 77 +++++++++++++++++++ meilidb-data/src/database/index/mod.rs | 2 +- meilidb-data/src/database/mod.rs | 2 +- meilidb-data/src/lib.rs | 2 +- 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/meilidb-data/src/database/index/custom_settings_index.rs b/meilidb-data/src/database/index/custom_settings_index.rs index 0fd0aade9..3404eff5b 100644 --- a/meilidb-data/src/database/index/custom_settings_index.rs +++ b/meilidb-data/src/database/index/custom_settings_index.rs @@ -1,4 +1,25 @@ +use serde::de::DeserializeOwned; +use serde::{Serialize, Deserialize}; +use std::collections::{HashMap, HashSet}; use std::ops::Deref; +use super::Error; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum RankingOrdering { + Asc, + Dsc +} + +pub type StopWords = HashSet; +pub type RankingOrder = Vec; +pub type DistinctField = String; +pub type RankingRules = HashMap; + +const STOP_WORDS_KEY: &str = "stop-words"; +const RANKING_ORDER_KEY: &str = "ranking-order"; +const DISTINCT_FIELD_KEY: &str = "distinct-field"; +const RANKING_RULES_KEY: &str = "ranking-rules"; #[derive(Clone)] pub struct CustomSettingsIndex(pub(crate) crate::CfTree); @@ -10,3 +31,59 @@ impl Deref for CustomSettingsIndex { &self.0 } } + +impl CustomSettingsIndex { + fn get(&self, key: K) -> Result, Error> + where K: AsRef<[u8]>, + T: DeserializeOwned, + { + let setting = self.0.get(key)?; + let raw = match setting { + Some(raw) => raw, + None => return Ok(None) + }; + + Ok(Some(bincode::deserialize(&raw)?)) + } + + fn set(&self, key: K, data: &T) -> Result<(), Error> + where K: AsRef<[u8]>, + T: Serialize, + { + let raw = bincode::serialize(data)?; + self.0.insert(key, &raw)?; + Ok(()) + } + + pub fn get_stop_words(&self) -> Result, Error> { + self.get(STOP_WORDS_KEY) + } + + pub fn get_ranking_order(&self) -> Result, Error> { + self.get(RANKING_ORDER_KEY) + } + + pub fn get_distinct_field(&self) -> Result, Error> { + self.get(DISTINCT_FIELD_KEY) + } + + pub fn get_ranking_rules(&self) -> Result, Error> { + self.get(RANKING_RULES_KEY) + } + + pub fn set_stop_words(&self, value: &StopWords) -> Result<(), Error> { + self.set(STOP_WORDS_KEY, value) + } + + pub fn set_ranking_order(&self, value: &RankingOrder) -> Result<(), Error> { + self.set(RANKING_ORDER_KEY, value) + } + + pub fn set_distinct_field(&self, value: &DistinctField) -> Result<(), Error> { + self.set(DISTINCT_FIELD_KEY, value) + } + + pub fn set_ranking_rules(&self, value: &RankingRules) -> Result<(), Error> { + self.set(RANKING_RULES_KEY, value) + } +} diff --git a/meilidb-data/src/database/index/mod.rs b/meilidb-data/src/database/index/mod.rs index 1bcfaec95..5ecfedafa 100644 --- a/meilidb-data/src/database/index/mod.rs +++ b/meilidb-data/src/database/index/mod.rs @@ -17,7 +17,7 @@ use crate::CfTree; use crate::ranked_map::RankedMap; use crate::serde::{Deserializer, DeserializerError}; -pub use self::custom_settings_index::CustomSettingsIndex; +pub use self::custom_settings_index::{CustomSettingsIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules}; pub use self::common_index::CommonIndex; use self::docs_words_index::DocsWordsIndex; use self::documents_index::DocumentsIndex; diff --git a/meilidb-data/src/database/mod.rs b/meilidb-data/src/database/mod.rs index 1975dd3e8..b1391ac49 100644 --- a/meilidb-data/src/database/mod.rs +++ b/meilidb-data/src/database/mod.rs @@ -12,7 +12,7 @@ mod update; use crate::CfTree; pub use self::error::Error; -pub use self::index::{Index, CustomSettingsIndex, CommonIndex}; +pub use self::index::{Index, CustomSettingsIndex, CommonIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules}; pub use self::update::DocumentsAddition; pub use self::update::DocumentsDeletion; diff --git a/meilidb-data/src/lib.rs b/meilidb-data/src/lib.rs index 9124dcc77..168311755 100644 --- a/meilidb-data/src/lib.rs +++ b/meilidb-data/src/lib.rs @@ -7,7 +7,7 @@ mod ranked_map; mod serde; pub use self::cf_tree::{CfTree, CfIter}; -pub use self::database::{Database, Index, CustomSettingsIndex}; +pub use self::database::{Database, Index, CustomSettingsIndex, RankingOrdering, StopWords, RankingOrder, DistinctField, RankingRules}; pub use self::number::Number; pub use self::ranked_map::RankedMap; pub use self::serde::{compute_document_id, extract_document_id, value_to_string}; From e1c119b5a8d627b56b5c883b743885896cc866a2 Mon Sep 17 00:00:00 2001 From: qdequele Date: Wed, 18 Sep 2019 11:42:30 +0200 Subject: [PATCH 2/2] chore: add test for custom settings --- meilidb-data/Cargo.toml | 1 + meilidb-data/tests/updates.rs | 45 ++++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index d08f3664d..a2428eea2 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -38,3 +38,4 @@ branch = "arc-byte-slice" [dev-dependencies] tempfile = "3.1.0" maplit = "1.0.2" +big_s = "1.0.2" diff --git a/meilidb-data/tests/updates.rs b/meilidb-data/tests/updates.rs index 7afbbc343..8654b8ff5 100644 --- a/meilidb-data/tests/updates.rs +++ b/meilidb-data/tests/updates.rs @@ -3,8 +3,9 @@ use std::sync::atomic::{AtomicBool, Ordering::Relaxed}; use std::sync::Arc; +use big_s::S; use serde_json::json; -use meilidb_data::Database; +use meilidb_data::{Database, RankingOrdering}; use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED}; fn simple_schema() -> Schema { @@ -120,8 +121,8 @@ fn database_stats() { assert!(status.result.is_ok()); let stats = index.stats().unwrap(); let repartition = hashmap!{ - "objectId".to_string() => 1u64, - "title".to_string() => 1u64, + S("objectId") => 1u64, + S("title") => 1u64, }; assert_eq!(stats.number_of_documents, 1); assert_eq!(stats.documents_fields_repartition, repartition); @@ -136,8 +137,8 @@ fn database_stats() { assert!(status.result.is_ok()); let stats = index.stats().unwrap(); let repartition = hashmap!{ - "objectId".to_string() => 2u64, - "title".to_string() => 2u64, + S("objectId") => 2u64, + S("title") => 2u64, }; assert_eq!(stats.number_of_documents, 2); assert_eq!(stats.documents_fields_repartition, repartition); @@ -153,10 +154,38 @@ fn database_stats() { assert!(status.result.is_ok()); let stats = index.stats().unwrap(); let repartition = hashmap!{ - "objectId".to_string() => 3u64, - "title".to_string() => 2u64, + S("objectId") => 3u64, + S("title") => 2u64, }; assert_eq!(stats.number_of_documents, 3); assert_eq!(stats.documents_fields_repartition, repartition); - +} + +#[test] +fn custom_settings() { + let tmp_dir = tempfile::tempdir().unwrap(); + let database = Database::open(&tmp_dir).unwrap(); + + let schema = simple_schema(); + let index = database.create_index("hello", schema).unwrap(); + + let stop_words = hashset!{ S("le"), S("la"), S("les"), }; + let ranking_order = vec![S("SumOfTypos"), S("NumberOfWords"), S("WordsProximity"), S("SumOfWordsAttribute"), S("SumOfWordsPosition"), S("Exact"), S("DocumentId")]; + let distinct_field = S("title"); + let ranking_rules = hashmap!{ S("objectId") => RankingOrdering::Asc }; + + index.custom_settings().set_stop_words(&stop_words).unwrap(); + index.custom_settings().set_ranking_order(&ranking_order).unwrap(); + index.custom_settings().set_distinct_field(&distinct_field).unwrap(); + index.custom_settings().set_ranking_rules(&ranking_rules).unwrap(); + + let ret_stop_words = index.custom_settings().get_stop_words().unwrap().unwrap(); + let ret_ranking_orderer = index.custom_settings().get_ranking_order().unwrap().unwrap(); + let ret_distinct_field = index.custom_settings().get_distinct_field().unwrap().unwrap(); + let ret_ranking_rules = index.custom_settings().get_ranking_rules().unwrap().unwrap(); + + assert_eq!(ret_stop_words, stop_words); + assert_eq!(ret_ranking_orderer, ranking_order); + assert_eq!(ret_distinct_field, distinct_field); + assert_eq!(ret_ranking_rules, ranking_rules); }