From 3d0b334942214d8979a9a911391f5fab6a172152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 3 Dec 2018 14:39:56 +0100 Subject: [PATCH] feat: Introduce the DocumentKey and DocumentKeyAttr structs --- src/automaton.rs | 1 + src/blob/mod.rs | 1 + src/database/mod.rs | 335 ++++++++++++++------------------------------ src/index/schema.rs | 5 + src/lib.rs | 3 - 5 files changed, 116 insertions(+), 229 deletions(-) diff --git a/src/automaton.rs b/src/automaton.rs index 90ceb407a..f8898424d 100644 --- a/src/automaton.rs +++ b/src/automaton.rs @@ -1,6 +1,7 @@ use std::ops::Deref; use fst::Automaton; +use lazy_static::lazy_static; use levenshtein_automata::{ LevenshteinAutomatonBuilder as LevBuilder, DFA, Distance, diff --git a/src/blob/mod.rs b/src/blob/mod.rs index 10357e7dc..d2c9d4253 100644 --- a/src/blob/mod.rs +++ b/src/blob/mod.rs @@ -8,6 +8,7 @@ pub use self::ops::OpBuilder; use std::fmt; +use serde_derive::{Serialize, Deserialize}; use serde::ser::{Serialize, Serializer, SerializeTuple}; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; diff --git a/src/database/mod.rs b/src/database/mod.rs index 961b98ab2..3d8e6e184 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1,13 +1,15 @@ -use std::io::{Cursor, Write}; +use std::io::{Cursor, Read, Write}; use std::{fmt, marker}; use std::error::Error; +use std::mem::size_of; use std::path::Path; use rocksdb::rocksdb::{DB, Snapshot, DBVector}; use rocksdb::rocksdb_options::ReadOptions; -use byteorder::{NetworkEndian, WriteBytesExt}; +use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt}; use serde::de::{DeserializeOwned, Visitor}; use serde::de::value::MapDeserializer; +use serde::forward_to_deserialize_any; use crate::index::schema::{Schema, SchemaAttr}; use crate::blob::positive::PositiveBlob; @@ -17,8 +19,8 @@ use crate::DocumentId; const DATA_INDEX: &[u8] = b"data-index"; const DATA_SCHEMA: &[u8] = b"data-schema"; -const DOC_KEY_LEN: usize = 4 + std::mem::size_of::(); -const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::(); +const DOC_KEY_LEN: usize = 4 + size_of::(); +const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::(); // FIXME Do not panic! fn retrieve_data_schema(snapshot: &Snapshot<&DB>) -> Result> { @@ -35,36 +37,90 @@ fn retrieve_data_index(snapshot: &Snapshot<&DB>) -> Result, - id: DocumentId, - attr: SchemaAttr -) -> Result, Box> -{ - let attribute_key = document_key_attr(id, attr); - Ok(snapshot.get(&attribute_key)?) +#[derive(Copy, Clone)] +pub struct DocumentKey([u8; DOC_KEY_LEN]); + +impl DocumentKey { + pub fn new(id: DocumentId) -> DocumentKey { + let mut buffer = [0; DOC_KEY_LEN]; + + let mut wtr = Cursor::new(&mut buffer[..]); + wtr.write_all(b"doc-").unwrap(); + wtr.write_u64::(id).unwrap(); + + DocumentKey(buffer) + } + + pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey { + assert!(bytes.len() >= DOC_KEY_LEN); + assert_eq!(&bytes[..4], b"doc-"); + + let mut buffer = [0; DOC_KEY_LEN]; + bytes.read_exact(&mut buffer).unwrap(); + + DocumentKey(buffer) + } + + pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr { + DocumentKeyAttr::new(self.document_id(), attr) + } + + pub fn document_id(&self) -> DocumentId { + (&self.0[4..]).read_u64::().unwrap() + } } -fn document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] { - let mut key = [0; DOC_KEY_LEN]; - - let mut wtr = Cursor::new(&mut key[..]); - wtr.write_all(b"doc-").unwrap(); - wtr.write_u64::(id).unwrap(); - - key +impl AsRef<[u8]> for DocumentKey { + fn as_ref(&self) -> &[u8] { + &self.0 + } } -fn document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] { - let mut key = [0; DOC_KEY_ATTR_LEN]; - let raw_key = document_key(id); +#[derive(Copy, Clone)] +pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); - let mut wtr = Cursor::new(&mut key[..]); - wtr.write_all(&raw_key).unwrap(); - wtr.write_all(b"-").unwrap(); - wtr.write_u32::(attr.as_u32()).unwrap(); +impl DocumentKeyAttr { + pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr { + let mut buffer = [0; DOC_KEY_ATTR_LEN]; + let DocumentKey(raw_key) = DocumentKey::new(id); - key + let mut wtr = Cursor::new(&mut buffer[..]); + wtr.write_all(&raw_key).unwrap(); + wtr.write_all(b"-").unwrap(); + wtr.write_u32::(attr.as_u32()).unwrap(); + + DocumentKeyAttr(buffer) + } + + pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr { + assert!(bytes.len() >= DOC_KEY_ATTR_LEN); + assert_eq!(&bytes[..4], b"doc-"); + + let mut buffer = [0; DOC_KEY_ATTR_LEN]; + bytes.read_exact(&mut buffer).unwrap(); + + DocumentKeyAttr(buffer) + } + + pub fn document_id(&self) -> DocumentId { + (&self.0[4..]).read_u64::().unwrap() + } + + pub fn attribute(&self) -> SchemaAttr { + let offset = 4 + size_of::() + 1; + let value = (&self.0[offset..]).read_u32::().unwrap(); + SchemaAttr::new(value) + } + + pub fn into_document_key(self) -> DocumentKey { + DocumentKey::new(self.document_id()) + } +} + +impl AsRef<[u8]> for DocumentKeyAttr { + fn as_ref(&self) -> &[u8] { + &self.0 + } } pub struct Database(DB); @@ -162,229 +218,56 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de> { - unimplemented!() + self.deserialize_map(visitor) } - fn deserialize_bool(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_i8(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_i16(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_i32(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_i64(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_u8(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_u16(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_u32(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_u64(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_f32(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_f64(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_char(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_str(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_string(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_bytes(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_option(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_unit(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_unit_struct( - self, - name: &'static str, - visitor: V - ) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_newtype_struct( - self, - name: &'static str, - visitor: V - ) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_seq(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_tuple( - self, - len: usize, - visitor: V - ) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_tuple_struct( - self, - name: &'static str, - len: usize, - visitor: V - ) -> Result - where V: Visitor<'de> - { - unimplemented!() + forward_to_deserialize_any! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq + bytes byte_buf unit_struct tuple_struct + identifier tuple ignored_any option newtype_struct enum + struct } fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_struct( - self, - name: &'static str, - fields: &'static [&'static str], - visitor: V - ) -> Result - where V: Visitor<'de> { let mut options = ReadOptions::new(); - options.set_iterate_lower_bound(&document_key(self.document_id)); - options.set_iterate_upper_bound(&document_key(self.document_id + 1)); + let lower = DocumentKey::new(self.document_id); + let upper = DocumentKey::new(self.document_id + 1); + options.set_iterate_lower_bound(lower.as_ref()); + options.set_iterate_upper_bound(upper.as_ref()); let mut db_iter = self.snapshot.iter_opt(options); - let iter = db_iter.map(|(key, value)| ("hello", "ok")); - - // Create the DocumentKey and DocumentKeyAttr types - // to help create and parse document keys attributes... - unimplemented!(); + let iter = db_iter.map(|(key, value)| { + // retrieve the schema attribute name + // from the schema attribute number + let document_key_attr = DocumentKeyAttr::from_bytes(&key); + let schema_attr = document_key_attr.attribute(); + let attribute_name = self.schema.attribute_name(schema_attr); + (attribute_name, value) + }); let map_deserializer = MapDeserializer::new(iter); visitor.visit_map(map_deserializer) } - - fn deserialize_enum( - self, - name: &'static str, - variants: &'static [&'static str], - visitor: V - ) -> Result - where V: Visitor<'de> - { - unimplemented!() - } - - fn deserialize_identifier(self, visitor: V) -> Result - where V: Visitor<'de>, - { - unimplemented!() - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where V: Visitor<'de> - { - unimplemented!() - } } #[derive(Debug)] -struct DeserializerError; +enum DeserializerError { + Custom(String), +} impl serde::de::Error for DeserializerError { fn custom(msg: T) -> Self { - unimplemented!() + DeserializerError::Custom(msg.to_string()) } } impl fmt::Display for DeserializerError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - unimplemented!() + match self { + DeserializerError::Custom(s) => f.write_str(&s), + } } } diff --git a/src/index/schema.rs b/src/index/schema.rs index 286d48398..2ff190c69 100644 --- a/src/index/schema.rs +++ b/src/index/schema.rs @@ -5,6 +5,7 @@ use std::ops::BitOr; use std::fs::File; use std::fmt; +use serde_derive::{Serialize, Deserialize}; use linked_hash_map::LinkedHashMap; pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false }; @@ -118,6 +119,10 @@ impl Schema { pub struct SchemaAttr(u32); impl SchemaAttr { + pub fn new(value: u32) -> SchemaAttr { + SchemaAttr(value) + } + pub fn as_u32(&self) -> u32 { self.0 } diff --git a/src/lib.rs b/src/lib.rs index 9c8f84c8a..ecb48b9b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,3 @@ -#[macro_use] extern crate lazy_static; -#[macro_use] extern crate serde_derive; - pub mod automaton; pub mod blob; pub mod database;