diff --git a/src/database/database_view.rs b/src/database/database_view.rs index b72a24bbb..91f097660 100644 --- a/src/database/database_view.rs +++ b/src/database/database_view.rs @@ -1,13 +1,15 @@ use std::error::Error; -use std::marker; +use std::{fmt, marker}; -use rocksdb::rocksdb::{DB, Snapshot}; +use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey}; +use rocksdb::rocksdb_options::ReadOptions; use serde::de::DeserializeOwned; -use crate::index::schema::Schema; -use crate::blob::positive::PositiveBlob; use crate::database::deserializer::{Deserializer, DeserializerError}; use crate::database::{DATA_INDEX, DATA_SCHEMA}; +use crate::blob::positive::PositiveBlob; +use crate::index::schema::Schema; +use crate::database::{DocumentKey, DocumentKeyAttr}; use crate::DocumentId; // FIXME Do not panic! @@ -40,6 +42,10 @@ impl<'a> DatabaseView<'a> { self.snapshot } + pub fn get(&self, key: &[u8]) -> Result, Box> { + Ok(self.snapshot.get(key)?) + } + // TODO create an enum error type pub fn retrieve_document(&self, id: DocumentId) -> Result> where D: DeserializeOwned @@ -60,6 +66,36 @@ impl<'a> DatabaseView<'a> { } } +impl<'a> fmt::Debug for DatabaseView<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut options = ReadOptions::new(); + let lower = DocumentKey::new(0); + options.set_iterate_lower_bound(lower.as_ref()); + + let mut iter = self.snapshot.iter_opt(options); + iter.seek(SeekKey::Start); + let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key)); + + if f.alternate() { + writeln!(f, "DatabaseView(")?; + } else { + write!(f, "DatabaseView(")?; + } + + self.schema.fmt(f)?; + + if f.alternate() { + writeln!(f, ",")?; + } else { + write!(f, ", ")?; + } + + f.debug_list().entries(iter).finish()?; + + write!(f, ")") + } +} + // TODO this is just an iter::Map !!! pub struct DocumentIter<'a, D, I> { database_view: &'a DatabaseView<'a>, diff --git a/src/database/deserializer.rs b/src/database/deserializer.rs index 3069717f0..2591b92d2 100644 --- a/src/database/deserializer.rs +++ b/src/database/deserializer.rs @@ -1,11 +1,11 @@ use std::error::Error; use std::fmt; -use rocksdb::rocksdb::{DB, Snapshot}; +use rocksdb::rocksdb::{DB, Snapshot, SeekKey}; use rocksdb::rocksdb_options::ReadOptions; -use serde::de::value::MapDeserializer; use serde::forward_to_deserialize_any; -use serde::de::Visitor; +use serde::de::value::MapDeserializer; +use serde::de::{self, Visitor, IntoDeserializer}; use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; use crate::index::schema::Schema; @@ -23,7 +23,7 @@ impl<'a> Deserializer<'a> { } } -impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { +impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> { type Error = DeserializerError; fn deserialize_any(self, visitor: V) -> Result @@ -35,8 +35,7 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { forward_to_deserialize_any! { bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq bytes byte_buf unit_struct tuple_struct - identifier tuple ignored_any option newtype_struct enum - struct + identifier tuple ignored_any option newtype_struct enum struct } fn deserialize_map(self, visitor: V) -> Result @@ -48,14 +47,20 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { options.set_iterate_lower_bound(lower.as_ref()); options.set_iterate_upper_bound(upper.as_ref()); - let mut db_iter = self.snapshot.iter_opt(options); - let iter = db_iter.map(|(key, value)| { + let mut iter = self.snapshot.iter_opt(options); + iter.seek(SeekKey::Start); + + if iter.kv().is_none() { + // FIXME return an error + } + + let iter = iter.map(|(key, value)| { // retrieve the schema attribute name // from the schema attribute number let document_key_attr = DocumentKeyAttr::from_bytes(&key); let schema_attr = document_key_attr.attribute(); let attribute_name = self.schema.attribute_name(schema_attr); - (attribute_name, value) + (attribute_name, Value(value)) }); let map_deserializer = MapDeserializer::new(iter); @@ -63,12 +68,101 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { } } +struct Value(Vec); + +impl<'de> IntoDeserializer<'de, DeserializerError> for Value { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +macro_rules! forward_to_bincode_values { + ($($ty:ident => $de_method:ident,)*) => { + $( + fn $de_method(self, visitor: V) -> Result + where V: de::Visitor<'de> + { + match bincode::deserialize::<$ty>(&self.0) { + Ok(val) => val.into_deserializer().$de_method(visitor), + Err(e) => Err(de::Error::custom(e)), + } + } + )* + } +} + +impl<'de, 'a> de::Deserializer<'de> for Value { + type Error = DeserializerError; + + fn deserialize_any(self, visitor: V) -> Result + where V: Visitor<'de> + { + self.0.into_deserializer().deserialize_any(visitor) + } + + fn deserialize_str(self, visitor: V) -> Result + where V: Visitor<'de> + { + self.deserialize_string(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where V: Visitor<'de> + { + match bincode::deserialize::(&self.0) { + Ok(val) => val.into_deserializer().deserialize_string(visitor), + Err(e) => Err(de::Error::custom(e)), + } + } + + fn deserialize_bytes(self, visitor: V) -> Result + where V: Visitor<'de> + { + self.deserialize_byte_buf(visitor) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where V: Visitor<'de> + { + match bincode::deserialize::>(&self.0) { + Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor), + Err(e) => Err(de::Error::custom(e)), + } + } + + forward_to_bincode_values! { + char => deserialize_char, + bool => deserialize_bool, + + u8 => deserialize_u8, + u16 => deserialize_u16, + u32 => deserialize_u32, + u64 => deserialize_u64, + + i8 => deserialize_i8, + i16 => deserialize_i16, + i32 => deserialize_i32, + i64 => deserialize_i64, + + f32 => deserialize_f32, + f64 => deserialize_f64, + } + + forward_to_deserialize_any! { + unit seq map + unit_struct tuple_struct + identifier tuple ignored_any option newtype_struct enum struct + } +} + #[derive(Debug)] pub enum DeserializerError { Custom(String), } -impl serde::de::Error for DeserializerError { +impl de::Error for DeserializerError { fn custom(msg: T) -> Self { DeserializerError::Custom(msg.to_string()) } diff --git a/src/database/document_key.rs b/src/database/document_key.rs index 09eba8067..62010f008 100644 --- a/src/database/document_key.rs +++ b/src/database/document_key.rs @@ -1,5 +1,6 @@ use std::io::{Cursor, Read, Write}; use std::mem::size_of; +use std::fmt; use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt}; @@ -48,6 +49,14 @@ impl AsRef<[u8]> for DocumentKey { } } +impl fmt::Debug for DocumentKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("DocumentKey") + .field("document_id", &self.document_id()) + .finish() + } +} + #[derive(Copy, Clone)] pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]); @@ -94,3 +103,12 @@ impl AsRef<[u8]> for DocumentKeyAttr { &self.0 } } + +impl fmt::Debug for DocumentKeyAttr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("DocumentKeyAttr") + .field("document_id", &self.document_id()) + .field("attribute", &self.attribute().as_u32()) + .finish() + } +} diff --git a/src/database/mod.rs b/src/database/mod.rs index ae2f0cf00..d6fff8e7d 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -1,11 +1,13 @@ use std::error::Error; use std::path::Path; +use std::fmt; use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions}; -use rocksdb::{DB, MergeOperands}; +use rocksdb::{DB, DBVector, MergeOperands, SeekKey}; use rocksdb::rocksdb::Writable; pub use crate::database::database_view::DatabaseView; +pub use crate::database::document_key::{DocumentKey, DocumentKeyAttr}; use crate::index::update::Update; use crate::index::schema::Schema; use crate::blob::{self, Blob}; @@ -30,6 +32,7 @@ impl Database { let path = path.to_string_lossy(); let mut opts = DBOptions::new(); opts.create_if_missing(true); + // opts.error_if_exists(true); // FIXME pull request that let mut cf_opts = ColumnFamilyOptions::new(); cf_opts.add_merge_operator("data-index merge operator", merge_indexes); @@ -80,14 +83,40 @@ impl Database { Ok(()) } + pub fn get(&self, key: &[u8]) -> Result, Box> { + Ok(self.0.get(key)?) + } + + pub fn flush(&self) -> Result<(), Box> { + Ok(self.0.flush(true)?) + } + pub fn view(&self) -> Result> { let snapshot = self.0.snapshot(); DatabaseView::new(snapshot) } } +impl fmt::Debug for Database { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Database([")?; + let mut iter = self.0.iter(); + iter.seek(SeekKey::Start); + let mut first = true; + for (key, value) in &mut iter { + if !first { write!(f, ", ")?; } + first = false; + let key = String::from_utf8_lossy(&key); + write!(f, "{:?}", key)?; + } + write!(f, "])") + } +} + fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec { - if key != DATA_INDEX { panic!("The merge operator only supports \"data-index\" merging") } + if key != DATA_INDEX { + panic!("The merge operator only supports \"data-index\" merging") + } let capacity = { let remaining = operands.size_hint().0; @@ -109,3 +138,90 @@ fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut Merge let blob = op.merge().expect("BUG: could not merge blobs"); bincode::serialize(&blob).expect("BUG: could not serialize merged blob") } + +#[cfg(test)] +mod tests { + use super::*; + use std::error::Error; + use std::path::PathBuf; + + use serde_derive::{Serialize, Deserialize}; + use tempfile::tempdir; + + use crate::tokenizer::DefaultBuilder; + use crate::index::update::PositiveUpdateBuilder; + use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED}; + + #[test] + fn ingest_update_file() -> Result<(), Box> { + let dir = tempdir()?; + + let rocksdb_path = dir.path().join("rocksdb.rdb"); + + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] + struct SimpleDoc { + title: String, + description: String, + } + + let title; + let description; + let schema = { + let mut builder = SchemaBuilder::new(); + title = builder.new_attribute("title", STORED | INDEXED); + description = builder.new_attribute("description", STORED | INDEXED); + builder.build() + }; + + let database = Database::create(&rocksdb_path, schema.clone())?; + let tokenizer_builder = DefaultBuilder::new(); + + let update_path = dir.path().join("update.sst"); + + let doc0 = SimpleDoc { + title: String::from("I am a title"), + description: String::from("I am a description"), + }; + let doc1 = SimpleDoc { + title: String::from("I am the second title"), + description: String::from("I am the second description"), + }; + + let mut update = { + let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder); + + // builder.update_field(0, title, doc0.title.clone()); + // builder.update_field(0, description, doc0.description.clone()); + + // builder.update_field(1, title, doc1.title.clone()); + // builder.update_field(1, description, doc1.description.clone()); + + builder.update(0, &doc0).unwrap(); + builder.update(1, &doc1).unwrap(); + + builder.build()? + }; + + update.set_move(true); + database.ingest_update_file(update)?; + let view = database.view()?; + + println!("{:?}", view); + + #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] + struct DeSimpleDoc { + title: char, + } + + let de_doc0: DeSimpleDoc = view.retrieve_document(0)?; + let de_doc1: DeSimpleDoc = view.retrieve_document(1)?; + + println!("{:?}", de_doc0); + println!("{:?}", de_doc1); + + // assert_eq!(doc0, de_doc0); + // assert_eq!(doc1, de_doc1); + + Ok(dir.close()?) + } +} diff --git a/src/index/schema.rs b/src/index/schema.rs index 2ff190c69..9467ce18c 100644 --- a/src/index/schema.rs +++ b/src/index/schema.rs @@ -111,7 +111,11 @@ impl Schema { } pub fn attribute_name(&self, attr: SchemaAttr) -> &str { - unimplemented!("cannot retrieve the attribute name by its attribute number") + // FIXME complexity is insane ! + for (key, &value) in &self.attrs { + if value == attr { return &key } + } + panic!("schema attribute name not found for {:?}", attr) } } diff --git a/src/index/update/mod.rs b/src/index/update/mod.rs index a12f547e6..6d99ab8cb 100644 --- a/src/index/update/mod.rs +++ b/src/index/update/mod.rs @@ -13,9 +13,6 @@ mod positive; pub use self::positive::{PositiveUpdateBuilder, NewState}; pub use self::negative::NegativeUpdateBuilder; -const DOC_KEY_LEN: usize = 4 + std::mem::size_of::(); -const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::(); - pub struct Update { path: PathBuf, can_be_moved: bool, @@ -30,6 +27,10 @@ impl Update { Ok(Update { path: path.into(), can_be_moved: true }) } + pub fn set_move(&mut self, can_be_moved: bool) { + self.can_be_moved = can_be_moved + } + pub fn can_be_moved(&self) -> bool { self.can_be_moved } @@ -38,27 +39,3 @@ impl Update { self.path } } - -// "doc-{ID_8_BYTES}" -fn raw_document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] { - let mut key = [0; DOC_KEY_LEN]; - - let mut wtr = Cursor::new(&mut key[..]); - wtr.write_all(b"doc-").unwrap(); - wtr.write_u64::(id).unwrap(); - - key -} - -// "doc-{ID_8_BYTES}-{ATTR_4_BYTES}" -fn raw_document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] { - let mut key = [0; DOC_KEY_ATTR_LEN]; - let raw_key = raw_document_key(id); - - let mut wtr = Cursor::new(&mut key[..]); - wtr.write_all(&raw_key).unwrap(); - wtr.write_all(b"-").unwrap(); - wtr.write_u32::(attr.as_u32()).unwrap(); - - key -} diff --git a/src/index/update/negative/update.rs b/src/index/update/negative/update.rs index 54355c6e0..ddf2fe768 100644 --- a/src/index/update/negative/update.rs +++ b/src/index/update/negative/update.rs @@ -4,7 +4,8 @@ use std::error::Error; use ::rocksdb::rocksdb_options; use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder; -use crate::index::update::{Update, raw_document_key}; +use crate::index::update::Update; +use crate::database::{DocumentKey, DocumentKeyAttr}; use crate::blob::{Blob, NegativeBlob}; use crate::index::DATA_INDEX; use crate::DocumentId; @@ -48,9 +49,9 @@ impl NegativeUpdateBuilder { }; for &document_id in negative_blob.as_ref() { - let start = raw_document_key(document_id); - let end = raw_document_key(document_id + 1); - file_writer.delete_range(&start, &end)?; + let start = DocumentKey::new(document_id); + let end = DocumentKey::new(document_id + 1); + file_writer.delete_range(start.as_ref(), end.as_ref())?; } file_writer.finish()?; diff --git a/src/index/update/positive/update.rs b/src/index/update/positive/update.rs index 8551867b1..852d4572f 100644 --- a/src/index/update/positive/update.rs +++ b/src/index/update/positive/update.rs @@ -1,12 +1,15 @@ use std::collections::BTreeMap; use std::path::PathBuf; use std::error::Error; +use std::fmt; use ::rocksdb::rocksdb_options; +use serde::ser::{self, Serialize}; use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder; use crate::index::schema::{SchemaProps, Schema, SchemaAttr}; -use crate::index::update::{Update, raw_document_key_attr}; +use crate::index::update::Update; +use crate::database::{DocumentKey, DocumentKeyAttr}; use crate::blob::positive::PositiveBlob; use crate::tokenizer::TokenizerBuilder; use crate::{DocumentId, DocIndex}; @@ -14,10 +17,7 @@ use crate::index::DATA_INDEX; use crate::blob::Blob; pub enum NewState { - Updated { - value: String, - props: SchemaProps, - }, + Updated { value: String }, Removed, } @@ -38,10 +38,19 @@ impl PositiveUpdateBuilder { } } + pub fn update(&mut self, id: DocumentId, document: &T) -> Result<(), Box> { + let serializer = Serializer { + schema: &self.schema, + document_id: id, + new_states: &mut self.new_states + }; + + Ok(ser::Serialize::serialize(document, serializer)?) + } + // TODO value must be a field that can be indexed pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) { - let state = NewState::Updated { value, props: self.schema.props(field) }; - self.new_states.insert((id, field), state); + self.new_states.insert((id, field), NewState::Updated { value }); } pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) { @@ -49,6 +58,298 @@ impl PositiveUpdateBuilder { } } +#[derive(Debug)] +pub enum SerializerError { + SchemaDontMatch { attribute: String }, + UnserializableType { name: &'static str }, + Custom(String), +} + +impl ser::Error for SerializerError { + fn custom(msg: T) -> Self { + SerializerError::Custom(msg.to_string()) + } +} + +impl fmt::Display for SerializerError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SerializerError::SchemaDontMatch { attribute } => { + write!(f, "serialized document try to specify the \ + {:?} attribute that is not known by the schema", attribute) + }, + SerializerError::UnserializableType { name } => { + write!(f, "Only struct and map types are considered valid documents and + can be serialized, not {} types directly.", name) + }, + SerializerError::Custom(s) => f.write_str(&s), + } + } +} + +impl Error for SerializerError {} + +struct Serializer<'a> { + schema: &'a Schema, + document_id: DocumentId, + new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, +} + +macro_rules! forward_to_unserializable_type { + ($($ty:ident => $se_method:ident,)*) => { + $( + fn $se_method(self, v: $ty) -> Result { + Err(SerializerError::UnserializableType { name: "$ty" }) + } + )* + } +} + +impl<'a> ser::Serializer for Serializer<'a> { + type Ok = (); + type Error = SerializerError; + type SerializeSeq = ser::Impossible; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = MapSerializer<'a>; + type SerializeStruct = StructSerializer<'a>; + type SerializeStructVariant = ser::Impossible; + + forward_to_unserializable_type! { + bool => serialize_bool, + char => serialize_char, + + i8 => serialize_i8, + i16 => serialize_i16, + i32 => serialize_i32, + i64 => serialize_i64, + + u8 => serialize_u8, + u16 => serialize_u16, + u32 => serialize_u32, + u64 => serialize_u64, + + f32 => serialize_f32, + f64 => serialize_f64, + } + + fn serialize_str(self, v: &str) -> Result { + Err(SerializerError::UnserializableType { name: "str" }) + } + + fn serialize_bytes(self, v: &[u8]) -> Result { + Err(SerializerError::UnserializableType { name: "&[u8]" }) + } + + fn serialize_none(self) -> Result { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_some(self, _value: &T) -> Result + where T: Serialize, + { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_unit(self) -> Result { + Err(SerializerError::UnserializableType { name: "()" }) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(SerializerError::UnserializableType { name: "unit struct" }) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str + ) -> Result + { + Err(SerializerError::UnserializableType { name: "unit variant" }) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T + ) -> Result + where T: Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T + ) -> Result + where T: Serialize, + { + Err(SerializerError::UnserializableType { name: "newtype variant" }) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(SerializerError::UnserializableType { name: "sequence" }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(SerializerError::UnserializableType { name: "tuple" }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple struct" }) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple variant" }) + } + + fn serialize_map(self, _len: Option) -> Result { + Ok(MapSerializer { + schema: self.schema, + document_id: self.document_id, + new_states: self.new_states, + }) + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Ok(StructSerializer { + schema: self.schema, + document_id: self.document_id, + new_states: self.new_states, + }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "struct variant" }) + } +} + +fn serialize_field( + schema: &Schema, + document_id: DocumentId, + new_states: &mut BTreeMap<(DocumentId, SchemaAttr), NewState>, + name: &str, + value: &T +) -> Result<(), SerializerError> +where T: Serialize, +{ + match schema.attribute(name) { + Some(attr) => { + if schema.props(attr).is_stored() { + let value = unimplemented!(); + new_states.insert((document_id, attr), NewState::Updated { value }); + } + Ok(()) + }, + None => Err(SerializerError::SchemaDontMatch { attribute: name.to_owned() }), + } +} + +struct StructSerializer<'a> { + schema: &'a Schema, + document_id: DocumentId, + new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, +} + +impl<'a> ser::SerializeStruct for StructSerializer<'a> { + type Ok = (); + type Error = SerializerError; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T + ) -> Result<(), Self::Error> + where T: Serialize, + { + serialize_field(self.schema, self.document_id, self.new_states, key, value) + } + + fn end(self) -> Result { + Ok(()) + } +} + +struct MapSerializer<'a> { + schema: &'a Schema, + document_id: DocumentId, + new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>, + // pending_key: Option, +} + +impl<'a> ser::SerializeMap for MapSerializer<'a> { + type Ok = (); + type Error = SerializerError; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + where T: Serialize + { + Err(SerializerError::UnserializableType { name: "setmap" }) + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + where T: Serialize + { + unimplemented!() + } + + fn end(self) -> Result { + Ok(()) + } + + fn serialize_entry( + &mut self, + key: &K, + value: &V + ) -> Result<(), Self::Error> + where K: Serialize, V: Serialize, + { + let key = unimplemented!(); + serialize_field(self.schema, self.document_id, self.new_states, key, value) + } +} + +// struct MapKeySerializer; + +// impl ser::Serializer for MapKeySerializer { +// type Ok = String; +// type Error = SerializerError; + +// #[inline] +// fn serialize_str(self, value: &str) -> Result<()> { +// unimplemented!() +// } +// } + impl PositiveUpdateBuilder where B: TokenizerBuilder { @@ -60,8 +361,9 @@ where B: TokenizerBuilder let mut builder = UnorderedPositiveBlobBuilder::memory(); for ((document_id, attr), state) in &self.new_states { + let props = self.schema.props(*attr); let value = match state { - NewState::Updated { value, props } if props.is_indexed() => value, + NewState::Updated { value } if props.is_indexed() => value, _ => continue, }; @@ -95,12 +397,13 @@ where B: TokenizerBuilder // write all the documents fields updates for ((id, attr), state) in self.new_states { - let key = raw_document_key_attr(id, attr); + let key = DocumentKeyAttr::new(id, attr); + let props = self.schema.props(attr); match state { - NewState::Updated { value, props } => if props.is_stored() { - file_writer.put(&key, value.as_bytes())? + NewState::Updated { value } => if props.is_stored() { + file_writer.put(key.as_ref(), value.as_bytes())? }, - NewState::Removed => file_writer.delete(&key)?, + NewState::Removed => file_writer.delete(key.as_ref())?, } } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 9b075786b..58ef52d5c 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -1,6 +1,52 @@ use std::mem; use self::Separator::*; +struct MegaTokenizer { + strings: I, +} + +impl From for MegaTokenizer> { + fn from(string: String) -> Self { + MegaTokenizer { strings: Some(string) } + } +} + +impl From> for MegaTokenizer> { + fn from(strings: Vec) -> Self { + MegaTokenizer { strings } + } +} + +impl Iterator for MegaTokenizer { + type Item = (usize, String); + + fn next(&mut self) -> Option { + unimplemented!() + } +} + +#[test] +fn xxx() { + let s1 = "hello world!"; + let mut s1 = MegaTokenizer::from(s1.to_owned()); + + assert_eq!(s1.next(), Some((0, "hello".into()))); + assert_eq!(s1.next(), Some((1, "world".into()))); + + assert_eq!(s1.next(), None); + + let v1 = vec!["Vin Diesel".to_owned(), "Quentin Tarantino".to_owned()]; + let mut v1 = MegaTokenizer::from(v1); + + assert_eq!(v1.next(), Some((0, "Vin".into()))); + assert_eq!(v1.next(), Some((1, "Diesel".into()))); + + assert_eq!(v1.next(), Some((8, "Quentin".into()))); + assert_eq!(v1.next(), Some((9, "Tarantino".into()))); + + assert_eq!(v1.next(), None); +} + pub trait TokenizerBuilder { fn build<'a>(&self, text: &'a str) -> Box + 'a>; }