From 95dfbd1fe093ca180802fa4e0ffbab8664a0e201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 29 Mar 2019 17:28:54 +0100 Subject: [PATCH] feat: Introduce the meilidb-data schema module --- meilidb-core/Cargo.toml | 3 +- meilidb-core/src/lib.rs | 2 +- meilidb-data/Cargo.toml | 6 + meilidb-data/src/lib.rs | 2 + meilidb-data/src/schema.rs | 309 ++++++++++++++++++++++++++++++++++++ meilidb/Cargo.toml | 1 + meilidb/src/sort_by_attr.rs | 4 +- 7 files changed, 322 insertions(+), 5 deletions(-) create mode 100644 meilidb-data/src/schema.rs diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml index fbac7dbe2..233243016 100644 --- a/meilidb-core/Cargo.toml +++ b/meilidb-core/Cargo.toml @@ -14,8 +14,7 @@ log = "0.4.6" meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } rayon = "1.0.3" sdset = "0.3.1" -serde = "1.0.88" -serde_derive = "1.0.88" +serde = { version = "1.0.88", features = ["derive"] } slice-group-by = "0.2.4" [features] diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs index 11c734e37..18e9a99cc 100644 --- a/meilidb-core/src/lib.rs +++ b/meilidb-core/src/lib.rs @@ -9,7 +9,7 @@ pub mod shared_data_cursor; pub mod write_to_bytes; use std::sync::Arc; -use serde_derive::{Serialize, Deserialize}; +use serde::{Serialize, Deserialize}; use slice_group_by::GroupBy; use rayon::slice::ParallelSliceMut; diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index 82d1bd8d4..f0046bc1a 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -5,4 +5,10 @@ authors = ["Kerollmops "] edition = "2018" [dependencies] +bincode = "1.1.2" +linked-hash-map = { version = "0.5.2", features = ["serde_impl"] } +meilidb-core = { path = "../meilidb-core", version = "0.1.0" } +serde = { version = "1.0.88", features = ["derive"] } +serde_json = { version = "1.0.39", features = ["preserve_order"] } sled = "0.20.0" +toml = { version = "0.5.0", features = ["preserve_order"] } diff --git a/meilidb-data/src/lib.rs b/meilidb-data/src/lib.rs index 4308ec6ad..a2f028ecd 100644 --- a/meilidb-data/src/lib.rs +++ b/meilidb-data/src/lib.rs @@ -1,3 +1,5 @@ mod database; +mod schema; pub use self::database::{Database, Index}; +pub use self::schema::{Schema, SchemaAttr, SchemaBuilder}; diff --git a/meilidb-data/src/schema.rs b/meilidb-data/src/schema.rs new file mode 100644 index 000000000..c73b8b067 --- /dev/null +++ b/meilidb-data/src/schema.rs @@ -0,0 +1,309 @@ +use std::collections::{HashMap, BTreeMap}; +use std::io::{Read, Write}; +use std::error::Error; +use std::{fmt, u16}; +use std::ops::BitOr; +use std::sync::Arc; + +use serde::{Serialize, Deserialize}; +use linked_hash_map::LinkedHashMap; + +use meilidb_core::DocumentId; + +pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false, ranked: false }; +pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true, ranked: false }; +pub const RANKED: SchemaProps = SchemaProps { stored: false, indexed: false, ranked: true }; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SchemaProps { + #[serde(default)] + stored: bool, + + #[serde(default)] + indexed: bool, + + #[serde(default)] + ranked: bool, +} + +impl SchemaProps { + pub fn is_stored(self) -> bool { + self.stored + } + + pub fn is_indexed(self) -> bool { + self.indexed + } + + pub fn is_ranked(self) -> bool { + self.ranked + } +} + +impl BitOr for SchemaProps { + type Output = Self; + + fn bitor(self, other: Self) -> Self::Output { + SchemaProps { + stored: self.stored | other.stored, + indexed: self.indexed | other.indexed, + ranked: self.ranked | other.ranked, + } + } +} + +#[derive(Serialize, Deserialize)] +pub struct SchemaBuilder { + identifier: String, + attributes: LinkedHashMap, +} + +impl SchemaBuilder { + pub fn with_identifier>(name: S) -> SchemaBuilder { + SchemaBuilder { + identifier: name.into(), + attributes: LinkedHashMap::new(), + } + } + + pub fn new_attribute>(&mut self, name: S, props: SchemaProps) -> SchemaAttr { + let len = self.attributes.len(); + if self.attributes.insert(name.into(), props).is_some() { + panic!("Field already inserted.") + } + SchemaAttr(len as u16) + } + + pub fn build(self) -> Schema { + let mut attrs = HashMap::new(); + let mut props = Vec::new(); + + for (i, (name, prop)) in self.attributes.into_iter().enumerate() { + attrs.insert(name.clone(), SchemaAttr(i as u16)); + props.push((name, prop)); + } + + let identifier = self.identifier; + Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Schema { + inner: Arc, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct InnerSchema { + identifier: String, + attrs: HashMap, + props: Vec<(String, SchemaProps)>, +} + +impl Schema { + pub fn from_toml(mut reader: R) -> Result> { + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + let builder: SchemaBuilder = toml::from_slice(&buffer)?; + Ok(builder.build()) + } + + pub fn to_toml(&self, mut writer: W) -> Result<(), Box> { + let identifier = self.inner.identifier.clone(); + let attributes = self.attributes_ordered(); + let builder = SchemaBuilder { identifier, attributes }; + + let string = toml::to_string_pretty(&builder)?; + writer.write_all(string.as_bytes())?; + + Ok(()) + } + + pub fn from_json(mut reader: R) -> Result> { + let mut buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + let builder: SchemaBuilder = serde_json::from_slice(&buffer)?; + Ok(builder.build()) + } + + pub fn to_json(&self, mut writer: W) -> Result<(), Box> { + let identifier = self.inner.identifier.clone(); + let attributes = self.attributes_ordered(); + let builder = SchemaBuilder { identifier, attributes }; + let string = serde_json::to_string_pretty(&builder)?; + writer.write_all(string.as_bytes())?; + + Ok(()) + } + + pub(crate) fn read_from_bin(reader: R) -> bincode::Result { + let builder: SchemaBuilder = bincode::deserialize_from(reader)?; + Ok(builder.build()) + } + + pub(crate) fn write_to_bin(&self, writer: W) -> bincode::Result<()> { + let identifier = self.inner.identifier.clone(); + let attributes = self.attributes_ordered(); + let builder = SchemaBuilder { identifier, attributes }; + + bincode::serialize_into(writer, &builder) + } + + fn attributes_ordered(&self) -> LinkedHashMap { + let mut ordered = BTreeMap::new(); + for (name, attr) in &self.inner.attrs { + let (_, props) = self.inner.props[attr.0 as usize]; + ordered.insert(attr.0, (name, props)); + } + + let mut attributes = LinkedHashMap::with_capacity(ordered.len()); + for (_, (name, props)) in ordered { + attributes.insert(name.clone(), props); + } + + attributes + } + + pub fn props(&self, attr: SchemaAttr) -> SchemaProps { + let (_, props) = self.inner.props[attr.0 as usize]; + props + } + + pub fn identifier_name(&self) -> &str { + &self.inner.identifier + } + + pub fn attribute>(&self, name: S) -> Option { + self.inner.attrs.get(name.as_ref()).cloned() + } + + pub fn attribute_name(&self, attr: SchemaAttr) -> &str { + let (name, _) = &self.inner.props[attr.0 as usize]; + name + } +} + +#[derive(Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] +pub struct SchemaAttr(pub u16); + +impl SchemaAttr { + pub fn new(value: u16) -> SchemaAttr { + SchemaAttr(value) + } + + pub fn min() -> SchemaAttr { + SchemaAttr(0) + } + + pub fn next(self) -> Option { + self.0.checked_add(1).map(SchemaAttr) + } + + pub fn prev(self) -> Option { + self.0.checked_sub(1).map(SchemaAttr) + } + + pub fn max() -> SchemaAttr { + SchemaAttr(u16::MAX) + } +} + +impl fmt::Display for SchemaAttr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::error::Error; + + #[test] + fn serialize_deserialize() -> bincode::Result<()> { + let mut builder = SchemaBuilder::with_identifier("id"); + builder.new_attribute("alpha", STORED); + builder.new_attribute("beta", STORED | INDEXED); + builder.new_attribute("gamma", INDEXED); + let schema = builder.build(); + + let mut buffer = Vec::new(); + + schema.write_to_bin(&mut buffer)?; + let schema2 = Schema::read_from_bin(buffer.as_slice())?; + + assert_eq!(schema, schema2); + + Ok(()) + } + + #[test] + fn serialize_deserialize_toml() -> Result<(), Box> { + let mut builder = SchemaBuilder::with_identifier("id"); + builder.new_attribute("alpha", STORED); + builder.new_attribute("beta", STORED | INDEXED); + builder.new_attribute("gamma", INDEXED); + let schema = builder.build(); + + let mut buffer = Vec::new(); + schema.to_toml(&mut buffer)?; + + let schema2 = Schema::from_toml(buffer.as_slice())?; + assert_eq!(schema, schema2); + + let data = r#" + identifier = "id" + + [attributes."alpha"] + stored = true + + [attributes."beta"] + stored = true + indexed = true + + [attributes."gamma"] + indexed = true + "#; + let schema2 = Schema::from_toml(data.as_bytes())?; + assert_eq!(schema, schema2); + + Ok(()) + } + + #[test] + fn serialize_deserialize_json() -> Result<(), Box> { + let mut builder = SchemaBuilder::with_identifier("id"); + builder.new_attribute("alpha", STORED); + builder.new_attribute("beta", STORED | INDEXED); + builder.new_attribute("gamma", INDEXED); + let schema = builder.build(); + + let mut buffer = Vec::new(); + schema.to_json(&mut buffer)?; + + let schema2 = Schema::from_json(buffer.as_slice())?; + assert_eq!(schema, schema2); + + let data = r#" + { + "identifier": "id", + "attributes": { + "alpha": { + "stored": true + }, + "beta": { + "stored": true, + "indexed": true + }, + "gamma": { + "indexed": true + } + } + }"#; + let schema2 = Schema::from_json(data.as_bytes())?; + assert_eq!(schema, schema2); + + Ok(()) + } +} diff --git a/meilidb/Cargo.toml b/meilidb/Cargo.toml index 8dc6f0db5..e8cdb8d56 100644 --- a/meilidb/Cargo.toml +++ b/meilidb/Cargo.toml @@ -6,6 +6,7 @@ authors = ["Kerollmops "] [dependencies] meilidb-core = { path = "../meilidb-core", version = "0.1.0" } +meilidb-data = { path = "../meilidb-data", version = "0.1.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } [features] diff --git a/meilidb/src/sort_by_attr.rs b/meilidb/src/sort_by_attr.rs index 24364aaf4..b7a1013fd 100644 --- a/meilidb/src/sort_by_attr.rs +++ b/meilidb/src/sort_by_attr.rs @@ -5,8 +5,8 @@ use std::fmt; use meilidb_core::criterion::Criterion; use meilidb_core::RawDocument; -use crate::database::schema::{Schema, SchemaAttr}; -use crate::database::RankedMap; +use meilidb_data::{Schema, SchemaAttr}; +use meilidb_data::RankedMap; /// An helper struct that permit to sort documents by /// some of their stored attributes.