mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
feat: Implemented a basic deserialiazation
This commit is contained in:
parent
2a35d72fe2
commit
b2cec98805
@ -1,13 +1,15 @@
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::marker;
|
use std::{fmt, marker};
|
||||||
|
|
||||||
use rocksdb::rocksdb::{DB, Snapshot};
|
use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey};
|
||||||
|
use rocksdb::rocksdb_options::ReadOptions;
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
|
|
||||||
use crate::index::schema::Schema;
|
|
||||||
use crate::blob::positive::PositiveBlob;
|
|
||||||
use crate::database::deserializer::{Deserializer, DeserializerError};
|
use crate::database::deserializer::{Deserializer, DeserializerError};
|
||||||
use crate::database::{DATA_INDEX, DATA_SCHEMA};
|
use crate::database::{DATA_INDEX, DATA_SCHEMA};
|
||||||
|
use crate::blob::positive::PositiveBlob;
|
||||||
|
use crate::index::schema::Schema;
|
||||||
|
use crate::database::{DocumentKey, DocumentKeyAttr};
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
|
|
||||||
// FIXME Do not panic!
|
// FIXME Do not panic!
|
||||||
@ -40,6 +42,10 @@ impl<'a> DatabaseView<'a> {
|
|||||||
self.snapshot
|
self.snapshot
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
|
||||||
|
Ok(self.snapshot.get(key)?)
|
||||||
|
}
|
||||||
|
|
||||||
// TODO create an enum error type
|
// TODO create an enum error type
|
||||||
pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>>
|
pub fn retrieve_document<D>(&self, id: DocumentId) -> Result<D, Box<Error>>
|
||||||
where D: DeserializeOwned
|
where D: DeserializeOwned
|
||||||
@ -60,6 +66,36 @@ impl<'a> DatabaseView<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> fmt::Debug for DatabaseView<'a> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let mut options = ReadOptions::new();
|
||||||
|
let lower = DocumentKey::new(0);
|
||||||
|
options.set_iterate_lower_bound(lower.as_ref());
|
||||||
|
|
||||||
|
let mut iter = self.snapshot.iter_opt(options);
|
||||||
|
iter.seek(SeekKey::Start);
|
||||||
|
let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
|
||||||
|
|
||||||
|
if f.alternate() {
|
||||||
|
writeln!(f, "DatabaseView(")?;
|
||||||
|
} else {
|
||||||
|
write!(f, "DatabaseView(")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.schema.fmt(f)?;
|
||||||
|
|
||||||
|
if f.alternate() {
|
||||||
|
writeln!(f, ",")?;
|
||||||
|
} else {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
f.debug_list().entries(iter).finish()?;
|
||||||
|
|
||||||
|
write!(f, ")")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO this is just an iter::Map !!!
|
// TODO this is just an iter::Map !!!
|
||||||
pub struct DocumentIter<'a, D, I> {
|
pub struct DocumentIter<'a, D, I> {
|
||||||
database_view: &'a DatabaseView<'a>,
|
database_view: &'a DatabaseView<'a>,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use rocksdb::rocksdb::{DB, Snapshot};
|
use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
|
||||||
use rocksdb::rocksdb_options::ReadOptions;
|
use rocksdb::rocksdb_options::ReadOptions;
|
||||||
use serde::de::value::MapDeserializer;
|
|
||||||
use serde::forward_to_deserialize_any;
|
use serde::forward_to_deserialize_any;
|
||||||
use serde::de::Visitor;
|
use serde::de::value::MapDeserializer;
|
||||||
|
use serde::de::{self, Visitor, IntoDeserializer};
|
||||||
|
|
||||||
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
|
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
|
||||||
use crate::index::schema::Schema;
|
use crate::index::schema::Schema;
|
||||||
@ -23,7 +23,7 @@ impl<'a> Deserializer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
||||||
type Error = DeserializerError;
|
type Error = DeserializerError;
|
||||||
|
|
||||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
@ -35,8 +35,7 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
|||||||
forward_to_deserialize_any! {
|
forward_to_deserialize_any! {
|
||||||
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
|
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
|
||||||
bytes byte_buf unit_struct tuple_struct
|
bytes byte_buf unit_struct tuple_struct
|
||||||
identifier tuple ignored_any option newtype_struct enum
|
identifier tuple ignored_any option newtype_struct enum struct
|
||||||
struct
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
@ -48,14 +47,20 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
|||||||
options.set_iterate_lower_bound(lower.as_ref());
|
options.set_iterate_lower_bound(lower.as_ref());
|
||||||
options.set_iterate_upper_bound(upper.as_ref());
|
options.set_iterate_upper_bound(upper.as_ref());
|
||||||
|
|
||||||
let mut db_iter = self.snapshot.iter_opt(options);
|
let mut iter = self.snapshot.iter_opt(options);
|
||||||
let iter = db_iter.map(|(key, value)| {
|
iter.seek(SeekKey::Start);
|
||||||
|
|
||||||
|
if iter.kv().is_none() {
|
||||||
|
// FIXME return an error
|
||||||
|
}
|
||||||
|
|
||||||
|
let iter = iter.map(|(key, value)| {
|
||||||
// retrieve the schema attribute name
|
// retrieve the schema attribute name
|
||||||
// from the schema attribute number
|
// from the schema attribute number
|
||||||
let document_key_attr = DocumentKeyAttr::from_bytes(&key);
|
let document_key_attr = DocumentKeyAttr::from_bytes(&key);
|
||||||
let schema_attr = document_key_attr.attribute();
|
let schema_attr = document_key_attr.attribute();
|
||||||
let attribute_name = self.schema.attribute_name(schema_attr);
|
let attribute_name = self.schema.attribute_name(schema_attr);
|
||||||
(attribute_name, value)
|
(attribute_name, Value(value))
|
||||||
});
|
});
|
||||||
|
|
||||||
let map_deserializer = MapDeserializer::new(iter);
|
let map_deserializer = MapDeserializer::new(iter);
|
||||||
@ -63,12 +68,101 @@ impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Value(Vec<u8>);
|
||||||
|
|
||||||
|
impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
|
||||||
|
type Deserializer = Self;
|
||||||
|
|
||||||
|
fn into_deserializer(self) -> Self::Deserializer {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! forward_to_bincode_values {
|
||||||
|
($($ty:ident => $de_method:ident,)*) => {
|
||||||
|
$(
|
||||||
|
fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: de::Visitor<'de>
|
||||||
|
{
|
||||||
|
match bincode::deserialize::<$ty>(&self.0) {
|
||||||
|
Ok(val) => val.into_deserializer().$de_method(visitor),
|
||||||
|
Err(e) => Err(de::Error::custom(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de, 'a> de::Deserializer<'de> for Value {
|
||||||
|
type Error = DeserializerError;
|
||||||
|
|
||||||
|
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: Visitor<'de>
|
||||||
|
{
|
||||||
|
self.0.into_deserializer().deserialize_any(visitor)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: Visitor<'de>
|
||||||
|
{
|
||||||
|
self.deserialize_string(visitor)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: Visitor<'de>
|
||||||
|
{
|
||||||
|
match bincode::deserialize::<String>(&self.0) {
|
||||||
|
Ok(val) => val.into_deserializer().deserialize_string(visitor),
|
||||||
|
Err(e) => Err(de::Error::custom(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: Visitor<'de>
|
||||||
|
{
|
||||||
|
self.deserialize_byte_buf(visitor)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
|
where V: Visitor<'de>
|
||||||
|
{
|
||||||
|
match bincode::deserialize::<Vec<u8>>(&self.0) {
|
||||||
|
Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
|
||||||
|
Err(e) => Err(de::Error::custom(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
forward_to_bincode_values! {
|
||||||
|
char => deserialize_char,
|
||||||
|
bool => deserialize_bool,
|
||||||
|
|
||||||
|
u8 => deserialize_u8,
|
||||||
|
u16 => deserialize_u16,
|
||||||
|
u32 => deserialize_u32,
|
||||||
|
u64 => deserialize_u64,
|
||||||
|
|
||||||
|
i8 => deserialize_i8,
|
||||||
|
i16 => deserialize_i16,
|
||||||
|
i32 => deserialize_i32,
|
||||||
|
i64 => deserialize_i64,
|
||||||
|
|
||||||
|
f32 => deserialize_f32,
|
||||||
|
f64 => deserialize_f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
forward_to_deserialize_any! {
|
||||||
|
unit seq map
|
||||||
|
unit_struct tuple_struct
|
||||||
|
identifier tuple ignored_any option newtype_struct enum struct
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum DeserializerError {
|
pub enum DeserializerError {
|
||||||
Custom(String),
|
Custom(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl serde::de::Error for DeserializerError {
|
impl de::Error for DeserializerError {
|
||||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||||
DeserializerError::Custom(msg.to_string())
|
DeserializerError::Custom(msg.to_string())
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::io::{Cursor, Read, Write};
|
use std::io::{Cursor, Read, Write};
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt};
|
use byteorder::{NativeEndian, WriteBytesExt, ReadBytesExt};
|
||||||
|
|
||||||
@ -48,6 +49,14 @@ impl AsRef<[u8]> for DocumentKey {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for DocumentKey {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_struct("DocumentKey")
|
||||||
|
.field("document_id", &self.document_id())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
|
pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
|
||||||
|
|
||||||
@ -94,3 +103,12 @@ impl AsRef<[u8]> for DocumentKeyAttr {
|
|||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for DocumentKeyAttr {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_struct("DocumentKeyAttr")
|
||||||
|
.field("document_id", &self.document_id())
|
||||||
|
.field("attribute", &self.attribute().as_u32())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
|
use rocksdb::rocksdb_options::{DBOptions, IngestExternalFileOptions, ColumnFamilyOptions};
|
||||||
use rocksdb::{DB, MergeOperands};
|
use rocksdb::{DB, DBVector, MergeOperands, SeekKey};
|
||||||
use rocksdb::rocksdb::Writable;
|
use rocksdb::rocksdb::Writable;
|
||||||
|
|
||||||
pub use crate::database::database_view::DatabaseView;
|
pub use crate::database::database_view::DatabaseView;
|
||||||
|
pub use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
|
||||||
use crate::index::update::Update;
|
use crate::index::update::Update;
|
||||||
use crate::index::schema::Schema;
|
use crate::index::schema::Schema;
|
||||||
use crate::blob::{self, Blob};
|
use crate::blob::{self, Blob};
|
||||||
@ -30,6 +32,7 @@ impl Database {
|
|||||||
let path = path.to_string_lossy();
|
let path = path.to_string_lossy();
|
||||||
let mut opts = DBOptions::new();
|
let mut opts = DBOptions::new();
|
||||||
opts.create_if_missing(true);
|
opts.create_if_missing(true);
|
||||||
|
// opts.error_if_exists(true); // FIXME pull request that
|
||||||
|
|
||||||
let mut cf_opts = ColumnFamilyOptions::new();
|
let mut cf_opts = ColumnFamilyOptions::new();
|
||||||
cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
|
cf_opts.add_merge_operator("data-index merge operator", merge_indexes);
|
||||||
@ -80,14 +83,40 @@ impl Database {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
|
||||||
|
Ok(self.0.get(key)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn flush(&self) -> Result<(), Box<Error>> {
|
||||||
|
Ok(self.0.flush(true)?)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn view(&self) -> Result<DatabaseView, Box<Error>> {
|
pub fn view(&self) -> Result<DatabaseView, Box<Error>> {
|
||||||
let snapshot = self.0.snapshot();
|
let snapshot = self.0.snapshot();
|
||||||
DatabaseView::new(snapshot)
|
DatabaseView::new(snapshot)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Database {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "Database([")?;
|
||||||
|
let mut iter = self.0.iter();
|
||||||
|
iter.seek(SeekKey::Start);
|
||||||
|
let mut first = true;
|
||||||
|
for (key, value) in &mut iter {
|
||||||
|
if !first { write!(f, ", ")?; }
|
||||||
|
first = false;
|
||||||
|
let key = String::from_utf8_lossy(&key);
|
||||||
|
write!(f, "{:?}", key)?;
|
||||||
|
}
|
||||||
|
write!(f, "])")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
||||||
if key != DATA_INDEX { panic!("The merge operator only supports \"data-index\" merging") }
|
if key != DATA_INDEX {
|
||||||
|
panic!("The merge operator only supports \"data-index\" merging")
|
||||||
|
}
|
||||||
|
|
||||||
let capacity = {
|
let capacity = {
|
||||||
let remaining = operands.size_hint().0;
|
let remaining = operands.size_hint().0;
|
||||||
@ -109,3 +138,90 @@ fn merge_indexes(key: &[u8], existing_value: Option<&[u8]>, operands: &mut Merge
|
|||||||
let blob = op.merge().expect("BUG: could not merge blobs");
|
let blob = op.merge().expect("BUG: could not merge blobs");
|
||||||
bincode::serialize(&blob).expect("BUG: could not serialize merged blob")
|
bincode::serialize(&blob).expect("BUG: could not serialize merged blob")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use serde_derive::{Serialize, Deserialize};
|
||||||
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
use crate::tokenizer::DefaultBuilder;
|
||||||
|
use crate::index::update::PositiveUpdateBuilder;
|
||||||
|
use crate::index::schema::{Schema, SchemaBuilder, STORED, INDEXED};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ingest_update_file() -> Result<(), Box<Error>> {
|
||||||
|
let dir = tempdir()?;
|
||||||
|
|
||||||
|
let rocksdb_path = dir.path().join("rocksdb.rdb");
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
|
struct SimpleDoc {
|
||||||
|
title: String,
|
||||||
|
description: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
let title;
|
||||||
|
let description;
|
||||||
|
let schema = {
|
||||||
|
let mut builder = SchemaBuilder::new();
|
||||||
|
title = builder.new_attribute("title", STORED | INDEXED);
|
||||||
|
description = builder.new_attribute("description", STORED | INDEXED);
|
||||||
|
builder.build()
|
||||||
|
};
|
||||||
|
|
||||||
|
let database = Database::create(&rocksdb_path, schema.clone())?;
|
||||||
|
let tokenizer_builder = DefaultBuilder::new();
|
||||||
|
|
||||||
|
let update_path = dir.path().join("update.sst");
|
||||||
|
|
||||||
|
let doc0 = SimpleDoc {
|
||||||
|
title: String::from("I am a title"),
|
||||||
|
description: String::from("I am a description"),
|
||||||
|
};
|
||||||
|
let doc1 = SimpleDoc {
|
||||||
|
title: String::from("I am the second title"),
|
||||||
|
description: String::from("I am the second description"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut update = {
|
||||||
|
let mut builder = PositiveUpdateBuilder::new(update_path, schema, tokenizer_builder);
|
||||||
|
|
||||||
|
// builder.update_field(0, title, doc0.title.clone());
|
||||||
|
// builder.update_field(0, description, doc0.description.clone());
|
||||||
|
|
||||||
|
// builder.update_field(1, title, doc1.title.clone());
|
||||||
|
// builder.update_field(1, description, doc1.description.clone());
|
||||||
|
|
||||||
|
builder.update(0, &doc0).unwrap();
|
||||||
|
builder.update(1, &doc1).unwrap();
|
||||||
|
|
||||||
|
builder.build()?
|
||||||
|
};
|
||||||
|
|
||||||
|
update.set_move(true);
|
||||||
|
database.ingest_update_file(update)?;
|
||||||
|
let view = database.view()?;
|
||||||
|
|
||||||
|
println!("{:?}", view);
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||||
|
struct DeSimpleDoc {
|
||||||
|
title: char,
|
||||||
|
}
|
||||||
|
|
||||||
|
let de_doc0: DeSimpleDoc = view.retrieve_document(0)?;
|
||||||
|
let de_doc1: DeSimpleDoc = view.retrieve_document(1)?;
|
||||||
|
|
||||||
|
println!("{:?}", de_doc0);
|
||||||
|
println!("{:?}", de_doc1);
|
||||||
|
|
||||||
|
// assert_eq!(doc0, de_doc0);
|
||||||
|
// assert_eq!(doc1, de_doc1);
|
||||||
|
|
||||||
|
Ok(dir.close()?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -111,7 +111,11 @@ impl Schema {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
|
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
|
||||||
unimplemented!("cannot retrieve the attribute name by its attribute number")
|
// FIXME complexity is insane !
|
||||||
|
for (key, &value) in &self.attrs {
|
||||||
|
if value == attr { return &key }
|
||||||
|
}
|
||||||
|
panic!("schema attribute name not found for {:?}", attr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,9 +13,6 @@ mod positive;
|
|||||||
pub use self::positive::{PositiveUpdateBuilder, NewState};
|
pub use self::positive::{PositiveUpdateBuilder, NewState};
|
||||||
pub use self::negative::NegativeUpdateBuilder;
|
pub use self::negative::NegativeUpdateBuilder;
|
||||||
|
|
||||||
const DOC_KEY_LEN: usize = 4 + std::mem::size_of::<u64>();
|
|
||||||
const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::<u32>();
|
|
||||||
|
|
||||||
pub struct Update {
|
pub struct Update {
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
can_be_moved: bool,
|
can_be_moved: bool,
|
||||||
@ -30,6 +27,10 @@ impl Update {
|
|||||||
Ok(Update { path: path.into(), can_be_moved: true })
|
Ok(Update { path: path.into(), can_be_moved: true })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_move(&mut self, can_be_moved: bool) {
|
||||||
|
self.can_be_moved = can_be_moved
|
||||||
|
}
|
||||||
|
|
||||||
pub fn can_be_moved(&self) -> bool {
|
pub fn can_be_moved(&self) -> bool {
|
||||||
self.can_be_moved
|
self.can_be_moved
|
||||||
}
|
}
|
||||||
@ -38,27 +39,3 @@ impl Update {
|
|||||||
self.path
|
self.path
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// "doc-{ID_8_BYTES}"
|
|
||||||
fn raw_document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] {
|
|
||||||
let mut key = [0; DOC_KEY_LEN];
|
|
||||||
|
|
||||||
let mut wtr = Cursor::new(&mut key[..]);
|
|
||||||
wtr.write_all(b"doc-").unwrap();
|
|
||||||
wtr.write_u64::<NetworkEndian>(id).unwrap();
|
|
||||||
|
|
||||||
key
|
|
||||||
}
|
|
||||||
|
|
||||||
// "doc-{ID_8_BYTES}-{ATTR_4_BYTES}"
|
|
||||||
fn raw_document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] {
|
|
||||||
let mut key = [0; DOC_KEY_ATTR_LEN];
|
|
||||||
let raw_key = raw_document_key(id);
|
|
||||||
|
|
||||||
let mut wtr = Cursor::new(&mut key[..]);
|
|
||||||
wtr.write_all(&raw_key).unwrap();
|
|
||||||
wtr.write_all(b"-").unwrap();
|
|
||||||
wtr.write_u32::<NetworkEndian>(attr.as_u32()).unwrap();
|
|
||||||
|
|
||||||
key
|
|
||||||
}
|
|
||||||
|
@ -4,7 +4,8 @@ use std::error::Error;
|
|||||||
use ::rocksdb::rocksdb_options;
|
use ::rocksdb::rocksdb_options;
|
||||||
|
|
||||||
use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
|
use crate::index::update::negative::unordered_builder::UnorderedNegativeBlobBuilder;
|
||||||
use crate::index::update::{Update, raw_document_key};
|
use crate::index::update::Update;
|
||||||
|
use crate::database::{DocumentKey, DocumentKeyAttr};
|
||||||
use crate::blob::{Blob, NegativeBlob};
|
use crate::blob::{Blob, NegativeBlob};
|
||||||
use crate::index::DATA_INDEX;
|
use crate::index::DATA_INDEX;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
@ -48,9 +49,9 @@ impl NegativeUpdateBuilder {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for &document_id in negative_blob.as_ref() {
|
for &document_id in negative_blob.as_ref() {
|
||||||
let start = raw_document_key(document_id);
|
let start = DocumentKey::new(document_id);
|
||||||
let end = raw_document_key(document_id + 1);
|
let end = DocumentKey::new(document_id + 1);
|
||||||
file_writer.delete_range(&start, &end)?;
|
file_writer.delete_range(start.as_ref(), end.as_ref())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
file_writer.finish()?;
|
file_writer.finish()?;
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use ::rocksdb::rocksdb_options;
|
use ::rocksdb::rocksdb_options;
|
||||||
|
use serde::ser::{self, Serialize};
|
||||||
|
|
||||||
use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
|
use crate::index::update::positive::unordered_builder::UnorderedPositiveBlobBuilder;
|
||||||
use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
|
use crate::index::schema::{SchemaProps, Schema, SchemaAttr};
|
||||||
use crate::index::update::{Update, raw_document_key_attr};
|
use crate::index::update::Update;
|
||||||
|
use crate::database::{DocumentKey, DocumentKeyAttr};
|
||||||
use crate::blob::positive::PositiveBlob;
|
use crate::blob::positive::PositiveBlob;
|
||||||
use crate::tokenizer::TokenizerBuilder;
|
use crate::tokenizer::TokenizerBuilder;
|
||||||
use crate::{DocumentId, DocIndex};
|
use crate::{DocumentId, DocIndex};
|
||||||
@ -14,10 +17,7 @@ use crate::index::DATA_INDEX;
|
|||||||
use crate::blob::Blob;
|
use crate::blob::Blob;
|
||||||
|
|
||||||
pub enum NewState {
|
pub enum NewState {
|
||||||
Updated {
|
Updated { value: String },
|
||||||
value: String,
|
|
||||||
props: SchemaProps,
|
|
||||||
},
|
|
||||||
Removed,
|
Removed,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,10 +38,19 @@ impl<B> PositiveUpdateBuilder<B> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn update<T: Serialize>(&mut self, id: DocumentId, document: &T) -> Result<(), Box<Error>> {
|
||||||
|
let serializer = Serializer {
|
||||||
|
schema: &self.schema,
|
||||||
|
document_id: id,
|
||||||
|
new_states: &mut self.new_states
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ser::Serialize::serialize(document, serializer)?)
|
||||||
|
}
|
||||||
|
|
||||||
// TODO value must be a field that can be indexed
|
// TODO value must be a field that can be indexed
|
||||||
pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) {
|
pub fn update_field(&mut self, id: DocumentId, field: SchemaAttr, value: String) {
|
||||||
let state = NewState::Updated { value, props: self.schema.props(field) };
|
self.new_states.insert((id, field), NewState::Updated { value });
|
||||||
self.new_states.insert((id, field), state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) {
|
pub fn remove_field(&mut self, id: DocumentId, field: SchemaAttr) {
|
||||||
@ -49,6 +58,298 @@ impl<B> PositiveUpdateBuilder<B> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum SerializerError {
|
||||||
|
SchemaDontMatch { attribute: String },
|
||||||
|
UnserializableType { name: &'static str },
|
||||||
|
Custom(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ser::Error for SerializerError {
|
||||||
|
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||||
|
SerializerError::Custom(msg.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SerializerError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
SerializerError::SchemaDontMatch { attribute } => {
|
||||||
|
write!(f, "serialized document try to specify the \
|
||||||
|
{:?} attribute that is not known by the schema", attribute)
|
||||||
|
},
|
||||||
|
SerializerError::UnserializableType { name } => {
|
||||||
|
write!(f, "Only struct and map types are considered valid documents and
|
||||||
|
can be serialized, not {} types directly.", name)
|
||||||
|
},
|
||||||
|
SerializerError::Custom(s) => f.write_str(&s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for SerializerError {}
|
||||||
|
|
||||||
|
struct Serializer<'a> {
|
||||||
|
schema: &'a Schema,
|
||||||
|
document_id: DocumentId,
|
||||||
|
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! forward_to_unserializable_type {
|
||||||
|
($($ty:ident => $se_method:ident,)*) => {
|
||||||
|
$(
|
||||||
|
fn $se_method(self, v: $ty) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "$ty" })
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ser::Serializer for Serializer<'a> {
|
||||||
|
type Ok = ();
|
||||||
|
type Error = SerializerError;
|
||||||
|
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
||||||
|
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
||||||
|
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||||
|
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||||
|
type SerializeMap = MapSerializer<'a>;
|
||||||
|
type SerializeStruct = StructSerializer<'a>;
|
||||||
|
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||||
|
|
||||||
|
forward_to_unserializable_type! {
|
||||||
|
bool => serialize_bool,
|
||||||
|
char => serialize_char,
|
||||||
|
|
||||||
|
i8 => serialize_i8,
|
||||||
|
i16 => serialize_i16,
|
||||||
|
i32 => serialize_i32,
|
||||||
|
i64 => serialize_i64,
|
||||||
|
|
||||||
|
u8 => serialize_u8,
|
||||||
|
u16 => serialize_u16,
|
||||||
|
u32 => serialize_u32,
|
||||||
|
u64 => serialize_u64,
|
||||||
|
|
||||||
|
f32 => serialize_f32,
|
||||||
|
f64 => serialize_f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "str" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "Option" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||||
|
where T: Serialize,
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "Option" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "()" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "unit struct" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_unit_variant(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_variant_index: u32,
|
||||||
|
_variant: &'static str
|
||||||
|
) -> Result<Self::Ok, Self::Error>
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "unit variant" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_newtype_struct<T: ?Sized>(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
value: &T
|
||||||
|
) -> Result<Self::Ok, Self::Error>
|
||||||
|
where T: Serialize,
|
||||||
|
{
|
||||||
|
value.serialize(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_newtype_variant<T: ?Sized>(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_variant_index: u32,
|
||||||
|
_variant: &'static str,
|
||||||
|
_value: &T
|
||||||
|
) -> Result<Self::Ok, Self::Error>
|
||||||
|
where T: Serialize,
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "sequence" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||||
|
Err(SerializerError::UnserializableType { name: "tuple" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_tuple_struct(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_len: usize
|
||||||
|
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_tuple_variant(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_variant_index: u32,
|
||||||
|
_variant: &'static str,
|
||||||
|
_len: usize
|
||||||
|
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||||
|
Ok(MapSerializer {
|
||||||
|
schema: self.schema,
|
||||||
|
document_id: self.document_id,
|
||||||
|
new_states: self.new_states,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_struct(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_len: usize
|
||||||
|
) -> Result<Self::SerializeStruct, Self::Error>
|
||||||
|
{
|
||||||
|
Ok(StructSerializer {
|
||||||
|
schema: self.schema,
|
||||||
|
document_id: self.document_id,
|
||||||
|
new_states: self.new_states,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_struct_variant(
|
||||||
|
self,
|
||||||
|
_name: &'static str,
|
||||||
|
_variant_index: u32,
|
||||||
|
_variant: &'static str,
|
||||||
|
_len: usize
|
||||||
|
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "struct variant" })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_field<T: ?Sized>(
|
||||||
|
schema: &Schema,
|
||||||
|
document_id: DocumentId,
|
||||||
|
new_states: &mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
|
||||||
|
name: &str,
|
||||||
|
value: &T
|
||||||
|
) -> Result<(), SerializerError>
|
||||||
|
where T: Serialize,
|
||||||
|
{
|
||||||
|
match schema.attribute(name) {
|
||||||
|
Some(attr) => {
|
||||||
|
if schema.props(attr).is_stored() {
|
||||||
|
let value = unimplemented!();
|
||||||
|
new_states.insert((document_id, attr), NewState::Updated { value });
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
None => Err(SerializerError::SchemaDontMatch { attribute: name.to_owned() }),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct StructSerializer<'a> {
|
||||||
|
schema: &'a Schema,
|
||||||
|
document_id: DocumentId,
|
||||||
|
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||||
|
type Ok = ();
|
||||||
|
type Error = SerializerError;
|
||||||
|
|
||||||
|
fn serialize_field<T: ?Sized>(
|
||||||
|
&mut self,
|
||||||
|
key: &'static str,
|
||||||
|
value: &T
|
||||||
|
) -> Result<(), Self::Error>
|
||||||
|
where T: Serialize,
|
||||||
|
{
|
||||||
|
serialize_field(self.schema, self.document_id, self.new_states, key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct MapSerializer<'a> {
|
||||||
|
schema: &'a Schema,
|
||||||
|
document_id: DocumentId,
|
||||||
|
new_states: &'a mut BTreeMap<(DocumentId, SchemaAttr), NewState>,
|
||||||
|
// pending_key: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||||
|
type Ok = ();
|
||||||
|
type Error = SerializerError;
|
||||||
|
|
||||||
|
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||||
|
where T: Serialize
|
||||||
|
{
|
||||||
|
Err(SerializerError::UnserializableType { name: "setmap" })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||||
|
where T: Serialize
|
||||||
|
{
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_entry<K: ?Sized, V: ?Sized>(
|
||||||
|
&mut self,
|
||||||
|
key: &K,
|
||||||
|
value: &V
|
||||||
|
) -> Result<(), Self::Error>
|
||||||
|
where K: Serialize, V: Serialize,
|
||||||
|
{
|
||||||
|
let key = unimplemented!();
|
||||||
|
serialize_field(self.schema, self.document_id, self.new_states, key, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// struct MapKeySerializer;
|
||||||
|
|
||||||
|
// impl ser::Serializer for MapKeySerializer {
|
||||||
|
// type Ok = String;
|
||||||
|
// type Error = SerializerError;
|
||||||
|
|
||||||
|
// #[inline]
|
||||||
|
// fn serialize_str(self, value: &str) -> Result<()> {
|
||||||
|
// unimplemented!()
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
impl<B> PositiveUpdateBuilder<B>
|
impl<B> PositiveUpdateBuilder<B>
|
||||||
where B: TokenizerBuilder
|
where B: TokenizerBuilder
|
||||||
{
|
{
|
||||||
@ -60,8 +361,9 @@ where B: TokenizerBuilder
|
|||||||
|
|
||||||
let mut builder = UnorderedPositiveBlobBuilder::memory();
|
let mut builder = UnorderedPositiveBlobBuilder::memory();
|
||||||
for ((document_id, attr), state) in &self.new_states {
|
for ((document_id, attr), state) in &self.new_states {
|
||||||
|
let props = self.schema.props(*attr);
|
||||||
let value = match state {
|
let value = match state {
|
||||||
NewState::Updated { value, props } if props.is_indexed() => value,
|
NewState::Updated { value } if props.is_indexed() => value,
|
||||||
_ => continue,
|
_ => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -95,12 +397,13 @@ where B: TokenizerBuilder
|
|||||||
|
|
||||||
// write all the documents fields updates
|
// write all the documents fields updates
|
||||||
for ((id, attr), state) in self.new_states {
|
for ((id, attr), state) in self.new_states {
|
||||||
let key = raw_document_key_attr(id, attr);
|
let key = DocumentKeyAttr::new(id, attr);
|
||||||
|
let props = self.schema.props(attr);
|
||||||
match state {
|
match state {
|
||||||
NewState::Updated { value, props } => if props.is_stored() {
|
NewState::Updated { value } => if props.is_stored() {
|
||||||
file_writer.put(&key, value.as_bytes())?
|
file_writer.put(key.as_ref(), value.as_bytes())?
|
||||||
},
|
},
|
||||||
NewState::Removed => file_writer.delete(&key)?,
|
NewState::Removed => file_writer.delete(key.as_ref())?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,52 @@
|
|||||||
use std::mem;
|
use std::mem;
|
||||||
use self::Separator::*;
|
use self::Separator::*;
|
||||||
|
|
||||||
|
struct MegaTokenizer<I> {
|
||||||
|
strings: I,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for MegaTokenizer<Option<String>> {
|
||||||
|
fn from(string: String) -> Self {
|
||||||
|
MegaTokenizer { strings: Some(string) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Vec<String>> for MegaTokenizer<Vec<String>> {
|
||||||
|
fn from(strings: Vec<String>) -> Self {
|
||||||
|
MegaTokenizer { strings }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I> Iterator for MegaTokenizer<I> {
|
||||||
|
type Item = (usize, String);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn xxx() {
|
||||||
|
let s1 = "hello world!";
|
||||||
|
let mut s1 = MegaTokenizer::from(s1.to_owned());
|
||||||
|
|
||||||
|
assert_eq!(s1.next(), Some((0, "hello".into())));
|
||||||
|
assert_eq!(s1.next(), Some((1, "world".into())));
|
||||||
|
|
||||||
|
assert_eq!(s1.next(), None);
|
||||||
|
|
||||||
|
let v1 = vec!["Vin Diesel".to_owned(), "Quentin Tarantino".to_owned()];
|
||||||
|
let mut v1 = MegaTokenizer::from(v1);
|
||||||
|
|
||||||
|
assert_eq!(v1.next(), Some((0, "Vin".into())));
|
||||||
|
assert_eq!(v1.next(), Some((1, "Diesel".into())));
|
||||||
|
|
||||||
|
assert_eq!(v1.next(), Some((8, "Quentin".into())));
|
||||||
|
assert_eq!(v1.next(), Some((9, "Tarantino".into())));
|
||||||
|
|
||||||
|
assert_eq!(v1.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
pub trait TokenizerBuilder {
|
pub trait TokenizerBuilder {
|
||||||
fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>;
|
fn build<'a>(&self, text: &'a str) -> Box<Iterator<Item=(usize, &'a str)> + 'a>;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user