mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
chore: Remove the database module from meilidb
This commit is contained in:
parent
abf7191eec
commit
77405cc103
@ -5,39 +5,13 @@ version = "0.3.1"
|
|||||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arc-swap = "0.3.7"
|
|
||||||
bincode = "1.1.2"
|
|
||||||
byteorder = "1.3.1"
|
|
||||||
fst = "0.3.3"
|
|
||||||
hashbrown = { version = "0.1.8", features = ["serde"] }
|
|
||||||
linked-hash-map = { version = "0.5.1", features = ["serde_impl"] }
|
|
||||||
lockfree = "0.5.1"
|
|
||||||
log = "0.4.6"
|
|
||||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||||
sdset = "0.3.1"
|
|
||||||
serde = "1.0.88"
|
|
||||||
serde_derive = "1.0.88"
|
|
||||||
serde_json = { version = "1.0.38", features = ["preserve_order"] }
|
|
||||||
size_format = "1.0.2"
|
|
||||||
slice-group-by = "0.2.4"
|
|
||||||
toml = { version = "0.5.0", features = ["preserve_order"] }
|
|
||||||
unidecode = "0.3.0"
|
|
||||||
|
|
||||||
[dependencies.rocksdb]
|
|
||||||
git = "https://github.com/pingcap/rust-rocksdb.git"
|
|
||||||
rev = "306e201"
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["simd"]
|
default = []
|
||||||
i128 = ["bincode/i128", "meilidb-core/i128"]
|
i128 = ["meilidb-core/i128"]
|
||||||
portable = ["rocksdb/portable"]
|
nightly = ["meilidb-core/nightly"]
|
||||||
simd = ["rocksdb/sse"]
|
|
||||||
nightly = [
|
|
||||||
"hashbrown/nightly",
|
|
||||||
"slice-group-by/nightly",
|
|
||||||
"meilidb-core/nightly"
|
|
||||||
]
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
csv = "1.0.5"
|
csv = "1.0.5"
|
||||||
|
@ -1,46 +0,0 @@
|
|||||||
use std::collections::{HashSet, HashMap};
|
|
||||||
use serde_derive::{Serialize, Deserialize};
|
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "lowercase")]
|
|
||||||
pub enum RankingOrdering {
|
|
||||||
Asc,
|
|
||||||
Dsc
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
||||||
pub struct AccessToken {
|
|
||||||
pub read_key: String,
|
|
||||||
pub write_key: String,
|
|
||||||
pub admin_key: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
||||||
pub struct Config {
|
|
||||||
pub stop_words: Option<HashSet<String>>,
|
|
||||||
pub ranking_order: Option<Vec<String>>,
|
|
||||||
pub distinct_field: Option<String>,
|
|
||||||
pub ranking_rules: Option<HashMap<String, RankingOrdering>>,
|
|
||||||
pub access_token: Option<AccessToken>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Config {
|
|
||||||
pub fn update_with(&mut self, new: Config) {
|
|
||||||
if let Some(stop_words) = new.stop_words {
|
|
||||||
self.stop_words = Some(stop_words);
|
|
||||||
};
|
|
||||||
if let Some(ranking_order) = new.ranking_order {
|
|
||||||
self.ranking_order = Some(ranking_order);
|
|
||||||
};
|
|
||||||
if let Some(distinct_field) = new.distinct_field {
|
|
||||||
self.distinct_field = Some(distinct_field);
|
|
||||||
};
|
|
||||||
if let Some(ranking_rules) = new.ranking_rules {
|
|
||||||
self.ranking_rules = Some(ranking_rules);
|
|
||||||
};
|
|
||||||
if let Some(access_token) = new.access_token {
|
|
||||||
self.access_token = Some(access_token);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,149 +0,0 @@
|
|||||||
use std::io::{Cursor, Read, Write};
|
|
||||||
use std::mem::size_of;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use byteorder::{BigEndian, WriteBytesExt, ReadBytesExt};
|
|
||||||
|
|
||||||
use crate::database::schema::SchemaAttr;
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
|
|
||||||
const DOC_KEY_LEN: usize = 4 + size_of::<u64>();
|
|
||||||
const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + size_of::<u16>();
|
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
|
||||||
pub struct DocumentKey([u8; DOC_KEY_LEN]);
|
|
||||||
|
|
||||||
impl DocumentKey {
|
|
||||||
pub fn new(id: DocumentId) -> DocumentKey {
|
|
||||||
let mut buffer = [0; DOC_KEY_LEN];
|
|
||||||
|
|
||||||
let mut wtr = Cursor::new(&mut buffer[..]);
|
|
||||||
wtr.write_all(b"doc-").unwrap();
|
|
||||||
wtr.write_u64::<BigEndian>(id.0).unwrap();
|
|
||||||
|
|
||||||
DocumentKey(buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_bytes(mut bytes: &[u8]) -> DocumentKey {
|
|
||||||
assert!(bytes.len() >= DOC_KEY_LEN);
|
|
||||||
assert_eq!(&bytes[..4], b"doc-");
|
|
||||||
|
|
||||||
let mut buffer = [0; DOC_KEY_LEN];
|
|
||||||
bytes.read_exact(&mut buffer).unwrap();
|
|
||||||
|
|
||||||
DocumentKey(buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_attribute(&self, attr: SchemaAttr) -> DocumentKeyAttr {
|
|
||||||
DocumentKeyAttr::new(self.document_id(), attr)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_attribute_min(&self) -> DocumentKeyAttr {
|
|
||||||
DocumentKeyAttr::new(self.document_id(), SchemaAttr::min())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_attribute_max(&self) -> DocumentKeyAttr {
|
|
||||||
DocumentKeyAttr::new(self.document_id(), SchemaAttr::max())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_id(&self) -> DocumentId {
|
|
||||||
let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
|
|
||||||
DocumentId(id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AsRef<[u8]> for DocumentKey {
|
|
||||||
fn as_ref(&self) -> &[u8] {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Debug for DocumentKey {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
f.debug_struct("DocumentKey")
|
|
||||||
.field("document_id", &self.document_id())
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub struct DocumentKeyAttr([u8; DOC_KEY_ATTR_LEN]);
|
|
||||||
|
|
||||||
impl DocumentKeyAttr {
|
|
||||||
pub fn new(id: DocumentId, attr: SchemaAttr) -> DocumentKeyAttr {
|
|
||||||
let mut buffer = [0; DOC_KEY_ATTR_LEN];
|
|
||||||
let DocumentKey(raw_key) = DocumentKey::new(id);
|
|
||||||
|
|
||||||
let mut wtr = Cursor::new(&mut buffer[..]);
|
|
||||||
wtr.write_all(&raw_key).unwrap();
|
|
||||||
wtr.write_all(b"-").unwrap();
|
|
||||||
wtr.write_u16::<BigEndian>(attr.0).unwrap();
|
|
||||||
|
|
||||||
DocumentKeyAttr(buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_attribute_min(id: DocumentId) -> DocumentKeyAttr {
|
|
||||||
DocumentKeyAttr::new(id, SchemaAttr::min())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_attribute_max(id: DocumentId) -> DocumentKeyAttr {
|
|
||||||
DocumentKeyAttr::new(id, SchemaAttr::max())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_bytes(mut bytes: &[u8]) -> DocumentKeyAttr {
|
|
||||||
assert!(bytes.len() >= DOC_KEY_ATTR_LEN);
|
|
||||||
assert_eq!(&bytes[..4], b"doc-");
|
|
||||||
|
|
||||||
let mut buffer = [0; DOC_KEY_ATTR_LEN];
|
|
||||||
bytes.read_exact(&mut buffer).unwrap();
|
|
||||||
|
|
||||||
DocumentKeyAttr(buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_id(&self) -> DocumentId {
|
|
||||||
let id = (&self.0[4..]).read_u64::<BigEndian>().unwrap();
|
|
||||||
DocumentId(id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn attribute(&self) -> SchemaAttr {
|
|
||||||
let offset = 4 + size_of::<u64>() + 1;
|
|
||||||
let value = (&self.0[offset..]).read_u16::<BigEndian>().unwrap();
|
|
||||||
SchemaAttr::new(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_document_key(self) -> DocumentKey {
|
|
||||||
DocumentKey::new(self.document_id())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AsRef<[u8]> for DocumentKeyAttr {
|
|
||||||
fn as_ref(&self) -> &[u8] {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Debug for DocumentKeyAttr {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
f.debug_struct("DocumentKeyAttr")
|
|
||||||
.field("document_id", &self.document_id())
|
|
||||||
.field("attribute", &self.attribute().0)
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keep_as_ref_order() {
|
|
||||||
for (a, b) in (0..).zip(1..).take(u16::max_value() as usize - 1) {
|
|
||||||
let id = DocumentId(0);
|
|
||||||
let a = DocumentKeyAttr::new(id, SchemaAttr(a));
|
|
||||||
let b = DocumentKeyAttr::new(id, SchemaAttr(b));
|
|
||||||
|
|
||||||
assert!(a < b);
|
|
||||||
assert!(a.as_ref() < b.as_ref());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,899 +0,0 @@
|
|||||||
use std::time::Instant;
|
|
||||||
use std::error::Error;
|
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::fs;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
|
||||||
use std::ops::{Deref, DerefMut};
|
|
||||||
|
|
||||||
use rocksdb::rocksdb_options::{DBOptions, ColumnFamilyOptions};
|
|
||||||
use rocksdb::rocksdb::{Writable, Snapshot};
|
|
||||||
use rocksdb::{DB, MergeOperands};
|
|
||||||
use size_format::SizeFormatterBinary;
|
|
||||||
use arc_swap::ArcSwap;
|
|
||||||
use lockfree::map::Map;
|
|
||||||
use hashbrown::HashMap;
|
|
||||||
use log::{info, error, warn};
|
|
||||||
|
|
||||||
use crate::database::schema::SchemaAttr;
|
|
||||||
use meilidb_core::shared_data_cursor::FromSharedDataCursor;
|
|
||||||
use meilidb_core::write_to_bytes::WriteToBytes;
|
|
||||||
use meilidb_core::{Index, DocumentId};
|
|
||||||
|
|
||||||
use self::update::{ReadIndexEvent, ReadRankedMapEvent};
|
|
||||||
|
|
||||||
pub use self::config::Config;
|
|
||||||
pub use self::document_key::{DocumentKey, DocumentKeyAttr};
|
|
||||||
pub use self::view::{DatabaseView, DocumentIter};
|
|
||||||
pub use self::update::Update;
|
|
||||||
pub use self::serde::SerializerError;
|
|
||||||
pub use self::schema::Schema;
|
|
||||||
pub use self::number::{Number, ParseNumberError};
|
|
||||||
|
|
||||||
pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>;
|
|
||||||
|
|
||||||
const DATA_INDEX: &[u8] = b"data-index";
|
|
||||||
const DATA_RANKED_MAP: &[u8] = b"data-ranked-map";
|
|
||||||
const DATA_SCHEMA: &[u8] = b"data-schema";
|
|
||||||
const CONFIG: &[u8] = b"config";
|
|
||||||
|
|
||||||
pub mod config;
|
|
||||||
pub mod schema;
|
|
||||||
mod number;
|
|
||||||
mod document_key;
|
|
||||||
mod serde;
|
|
||||||
mod update;
|
|
||||||
mod view;
|
|
||||||
|
|
||||||
fn retrieve_data_schema<D>(snapshot: &Snapshot<D>) -> Result<Schema, Box<Error>>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
match snapshot.get(DATA_SCHEMA)? {
|
|
||||||
Some(vector) => Ok(Schema::read_from_bin(&*vector)?),
|
|
||||||
None => Err(String::from("BUG: no schema found in the database").into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn retrieve_data_index<D>(snapshot: &Snapshot<D>) -> Result<Index, Box<Error>>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
let start = Instant::now();
|
|
||||||
let vector = snapshot.get(DATA_INDEX)?;
|
|
||||||
info!("loading index from kv-store took {:.2?}", start.elapsed());
|
|
||||||
|
|
||||||
match vector {
|
|
||||||
Some(vector) => {
|
|
||||||
let start = Instant::now();
|
|
||||||
|
|
||||||
let bytes = vector.as_ref().to_vec();
|
|
||||||
info!("index size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
|
|
||||||
|
|
||||||
let event = ReadIndexEvent::from_bytes(bytes)?;
|
|
||||||
let index = event.updated_documents().expect("BUG: invalid event deserialized");
|
|
||||||
|
|
||||||
info!("loading index from bytes took {:.2?}", start.elapsed());
|
|
||||||
|
|
||||||
Ok(index)
|
|
||||||
},
|
|
||||||
None => Ok(Index::default()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn retrieve_data_ranked_map<D>(snapshot: &Snapshot<D>) -> Result<RankedMap, Box<Error>>
|
|
||||||
where D: Deref<Target=DB>,
|
|
||||||
{
|
|
||||||
let start = Instant::now();
|
|
||||||
let vector = snapshot.get(DATA_RANKED_MAP)?;
|
|
||||||
info!("loading ranked map from kv-store took {:.2?}", start.elapsed());
|
|
||||||
|
|
||||||
match vector {
|
|
||||||
Some(vector) => {
|
|
||||||
let start = Instant::now();
|
|
||||||
|
|
||||||
let bytes = vector.as_ref().to_vec();
|
|
||||||
info!("ranked map size is {}B", SizeFormatterBinary::new(bytes.len() as u64));
|
|
||||||
|
|
||||||
let event = ReadRankedMapEvent::from_bytes(bytes)?;
|
|
||||||
let ranked_map = event.updated_documents().expect("BUG: invalid event deserialized");
|
|
||||||
|
|
||||||
info!("loading ranked map from bytes took {:.2?}", start.elapsed());
|
|
||||||
|
|
||||||
Ok(ranked_map)
|
|
||||||
},
|
|
||||||
None => Ok(RankedMap::new()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn retrieve_config<D>(snapshot: &Snapshot<D>) -> Result<Config, Box<Error>>
|
|
||||||
where D: Deref<Target=DB>,
|
|
||||||
{
|
|
||||||
match snapshot.get(CONFIG)? {
|
|
||||||
Some(vector) => Ok(bincode::deserialize(&*vector)?),
|
|
||||||
None => Ok(Config::default()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn merge_indexes(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
|
||||||
use self::update::ReadIndexEvent::{self, *};
|
|
||||||
use self::update::WriteIndexEvent;
|
|
||||||
|
|
||||||
let mut index = Index::default();
|
|
||||||
for bytes in existing.into_iter().chain(operands) {
|
|
||||||
match ReadIndexEvent::from_bytes(bytes.to_vec()).unwrap() {
|
|
||||||
RemovedDocuments(d) => index = index.remove_documents(d.as_ref()),
|
|
||||||
UpdatedDocuments(i) => index = index.union(&i),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
WriteIndexEvent::UpdatedDocuments(&index).into_bytes()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn merge_ranked_maps(existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
|
||||||
use self::update::ReadRankedMapEvent::{self, *};
|
|
||||||
use self::update::WriteRankedMapEvent;
|
|
||||||
|
|
||||||
let mut ranked_map = RankedMap::default();
|
|
||||||
for bytes in existing.into_iter().chain(operands) {
|
|
||||||
match ReadRankedMapEvent::from_bytes(bytes.to_vec()).unwrap() {
|
|
||||||
RemovedDocuments(d) => ranked_map.retain(|(k, _), _| !d.as_ref().binary_search(k).is_ok()),
|
|
||||||
UpdatedDocuments(i) => ranked_map.extend(i),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
WriteRankedMapEvent::UpdatedDocuments(&ranked_map).into_bytes()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn merge_operator(key: &[u8], existing: Option<&[u8]>, operands: &mut MergeOperands) -> Vec<u8> {
|
|
||||||
match key {
|
|
||||||
DATA_INDEX => merge_indexes(existing, operands),
|
|
||||||
DATA_RANKED_MAP => merge_ranked_maps(existing, operands),
|
|
||||||
key => panic!("The merge operator does not support merging {:?}", key),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct IndexUpdate {
|
|
||||||
index: String,
|
|
||||||
update: Update,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for IndexUpdate {
|
|
||||||
type Target = Update;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Update {
|
|
||||||
&self.update
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DerefMut for IndexUpdate {
|
|
||||||
fn deref_mut(&mut self) -> &mut Update {
|
|
||||||
&mut self.update
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct DatabaseIndex {
|
|
||||||
db: Arc<DB>,
|
|
||||||
|
|
||||||
// This view is updated each time the DB ingests an update.
|
|
||||||
view: ArcSwap<DatabaseView<Arc<DB>>>,
|
|
||||||
|
|
||||||
// The path of the mdb folder stored on disk.
|
|
||||||
path: PathBuf,
|
|
||||||
|
|
||||||
// must_die false by default, must be set as true when the Index is dropped.
|
|
||||||
// It is used to erase the folder saved on disk when the user request to delete an index.
|
|
||||||
must_die: AtomicBool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DatabaseIndex {
|
|
||||||
fn create<P: AsRef<Path>>(path: P, schema: &Schema) -> Result<DatabaseIndex, Box<Error>> {
|
|
||||||
let path = path.as_ref();
|
|
||||||
if path.exists() {
|
|
||||||
return Err(format!("File already exists at path: {}, cannot create database.",
|
|
||||||
path.display()).into())
|
|
||||||
}
|
|
||||||
|
|
||||||
let path_lossy = path.to_string_lossy();
|
|
||||||
let mut opts = DBOptions::new();
|
|
||||||
opts.create_if_missing(true);
|
|
||||||
// opts.error_if_exists(true); // FIXME pull request that
|
|
||||||
|
|
||||||
let mut cf_opts = ColumnFamilyOptions::new();
|
|
||||||
cf_opts.add_merge_operator("data merge operator", merge_operator);
|
|
||||||
|
|
||||||
let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;
|
|
||||||
|
|
||||||
let mut schema_bytes = Vec::new();
|
|
||||||
schema.write_to_bin(&mut schema_bytes)?;
|
|
||||||
db.put(DATA_SCHEMA, &schema_bytes)?;
|
|
||||||
|
|
||||||
let db = Arc::new(db);
|
|
||||||
let snapshot = Snapshot::new(db.clone());
|
|
||||||
let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));
|
|
||||||
|
|
||||||
Ok(DatabaseIndex {
|
|
||||||
db: db,
|
|
||||||
view: view,
|
|
||||||
path: path.to_path_buf(),
|
|
||||||
must_die: AtomicBool::new(false)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn open<P: AsRef<Path>>(path: P) -> Result<DatabaseIndex, Box<Error>> {
|
|
||||||
let path_lossy = path.as_ref().to_string_lossy();
|
|
||||||
|
|
||||||
let mut opts = DBOptions::new();
|
|
||||||
opts.create_if_missing(false);
|
|
||||||
|
|
||||||
let mut cf_opts = ColumnFamilyOptions::new();
|
|
||||||
cf_opts.add_merge_operator("data merge operator", merge_operator);
|
|
||||||
|
|
||||||
let db = DB::open_cf(opts, &path_lossy, vec![("default", cf_opts)])?;
|
|
||||||
|
|
||||||
// FIXME create a generic function to do that !
|
|
||||||
let _schema = match db.get(DATA_SCHEMA)? {
|
|
||||||
Some(value) => Schema::read_from_bin(&*value)?,
|
|
||||||
None => return Err(String::from("Database does not contain a schema").into()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let db = Arc::new(db);
|
|
||||||
let snapshot = Snapshot::new(db.clone());
|
|
||||||
let view = ArcSwap::new(Arc::new(DatabaseView::new(snapshot)?));
|
|
||||||
|
|
||||||
Ok(DatabaseIndex {
|
|
||||||
db: db,
|
|
||||||
view: view,
|
|
||||||
path: path.as_ref().to_path_buf(),
|
|
||||||
must_die: AtomicBool::new(false)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn must_die(&self) {
|
|
||||||
self.must_die.store(true, Ordering::Relaxed)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn start_update(&self) -> Result<Update, Box<Error>> {
|
|
||||||
let schema = match self.db.get(DATA_SCHEMA)? {
|
|
||||||
Some(value) => Schema::read_from_bin(&*value)?,
|
|
||||||
None => panic!("Database does not contain a schema"),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Update::new(schema))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn commit_update(&self, update: Update) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
|
|
||||||
let batch = update.build()?;
|
|
||||||
self.db.write(batch)?;
|
|
||||||
self.db.compact_range(None, None);
|
|
||||||
self.db.flush(true)?;
|
|
||||||
|
|
||||||
let snapshot = Snapshot::new(self.db.clone());
|
|
||||||
let view = Arc::new(DatabaseView::new(snapshot)?);
|
|
||||||
self.view.store(view.clone());
|
|
||||||
|
|
||||||
Ok(view)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn view(&self) -> Arc<DatabaseView<Arc<DB>>> {
|
|
||||||
self.view.load()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_config(&self) -> Config {
|
|
||||||
self.view().config().clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn update_config(&self, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
|
|
||||||
let data = bincode::serialize(&config)?;
|
|
||||||
self.db.put(CONFIG, &data)?;
|
|
||||||
|
|
||||||
let snapshot = Snapshot::new(self.db.clone());
|
|
||||||
let view = Arc::new(DatabaseView::new(snapshot)?);
|
|
||||||
self.view.store(view.clone());
|
|
||||||
|
|
||||||
Ok(view)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn path(&self) -> &Path {
|
|
||||||
self.path.as_path()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for DatabaseIndex {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
if self.must_die.load(Ordering::Relaxed) {
|
|
||||||
if let Err(err) = fs::remove_dir_all(&self.path) {
|
|
||||||
error!("Impossible to remove mdb when Database is dropped; {}", err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Database {
|
|
||||||
indexes: Map<String, Arc<DatabaseIndex>>,
|
|
||||||
path: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Database {
|
|
||||||
pub fn create<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
|
|
||||||
Ok(Database {
|
|
||||||
indexes: Map::new(),
|
|
||||||
path: path.as_ref().to_path_buf(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Box<Error>> {
|
|
||||||
let entries = fs::read_dir(&path)?;
|
|
||||||
|
|
||||||
let indexes = Map::new();
|
|
||||||
for entry in entries {
|
|
||||||
let path = match entry {
|
|
||||||
Ok(p) => p.path(),
|
|
||||||
Err(err) => {
|
|
||||||
warn!("Impossible to retrieve the path from an entry; {}", err);
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let name = match path.file_stem().and_then(OsStr::to_str) {
|
|
||||||
Some(name) => name.to_owned(),
|
|
||||||
None => continue
|
|
||||||
};
|
|
||||||
|
|
||||||
let db = match DatabaseIndex::open(path.clone()) {
|
|
||||||
Ok(db) => db,
|
|
||||||
Err(err) => {
|
|
||||||
warn!("Impossible to open the database; {}", err);
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
info!("Load database {}", name);
|
|
||||||
indexes.insert(name, Arc::new(db));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Database {
|
|
||||||
indexes: indexes,
|
|
||||||
path: path.as_ref().to_path_buf(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn create_index(&self, name: &str, schema: &Schema) -> Result<(), Box<Error>> {
|
|
||||||
let index_path = self.path.join(name);
|
|
||||||
|
|
||||||
if index_path.exists() {
|
|
||||||
return Err("Index already exists".into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let index = DatabaseIndex::create(index_path, schema)?;
|
|
||||||
self.indexes.insert(name.to_owned(), Arc::new(index));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_index(&self, name: &str) -> Result<(), Box<Error>> {
|
|
||||||
let index_guard = self.indexes.remove(name).ok_or("Index not found")?;
|
|
||||||
index_guard.val().must_die();
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn list_indexes(&self) -> Vec<String> {
|
|
||||||
self.indexes.iter().map(|g| g.key().clone()).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start_update(&self, index: &str) -> Result<IndexUpdate, Box<Error>> {
|
|
||||||
let index_guard = self.indexes.get(index).ok_or("Index not found")?;
|
|
||||||
let update = index_guard.val().start_update()?;
|
|
||||||
|
|
||||||
Ok(IndexUpdate { index: index.to_owned(), update })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn commit_update(&self, update: IndexUpdate)-> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
|
|
||||||
let index_guard = self.indexes.get(&update.index).ok_or("Index not found")?;
|
|
||||||
|
|
||||||
index_guard.val().commit_update(update.update)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn view(&self, index: &str) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>> {
|
|
||||||
let index_guard = self.indexes.get(index).ok_or("Index not found")?;
|
|
||||||
|
|
||||||
Ok(index_guard.val().view())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_config(&self, index: &str) -> Result<Config, Box<Error>> {
|
|
||||||
let index_guard = self.indexes.get(index).ok_or("Index not found")?;
|
|
||||||
|
|
||||||
Ok(index_guard.val().get_config())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_config(&self, index: &str, config: Config) -> Result<Arc<DatabaseView<Arc<DB>>>, Box<Error>>{
|
|
||||||
let index_guard = self.indexes.get(index).ok_or("Index not found")?;
|
|
||||||
|
|
||||||
Ok(index_guard.val().update_config(config)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn path(&self) -> &Path {
|
|
||||||
self.path.as_path()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index_path(&self, index: &str) -> Result<PathBuf, Box<Error>> {
|
|
||||||
let index_guard = self.indexes.get(index).ok_or("Index not found")?;
|
|
||||||
let path = index_guard.val().path();
|
|
||||||
Ok(path.to_path_buf())
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::error::Error;
|
|
||||||
|
|
||||||
use serde_derive::{Serialize, Deserialize};
|
|
||||||
|
|
||||||
use crate::database::schema::{SchemaBuilder, STORED, INDEXED};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn ingest_one_easy_update() -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let meilidb_path = dir.path().join("meilidb.mdb");
|
|
||||||
let meilidb_index_name = "default";
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
struct SimpleDoc {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
timestamp: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema = {
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("id", STORED);
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
builder.new_attribute("timestamp", STORED);
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
|
|
||||||
let database = Database::create(&meilidb_path)?;
|
|
||||||
|
|
||||||
database.create_index(meilidb_index_name, &schema)?;
|
|
||||||
|
|
||||||
let doc0 = SimpleDoc {
|
|
||||||
id: 0,
|
|
||||||
title: String::from("I am a title"),
|
|
||||||
description: String::from("I am a description"),
|
|
||||||
timestamp: 1234567,
|
|
||||||
};
|
|
||||||
let doc1 = SimpleDoc {
|
|
||||||
id: 1,
|
|
||||||
title: String::from("I am the second title"),
|
|
||||||
description: String::from("I am the second description"),
|
|
||||||
timestamp: 7654321,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut builder = database.start_update(meilidb_index_name)?;
|
|
||||||
|
|
||||||
let docid0 = builder.update_document(&doc0, &stop_words)?;
|
|
||||||
let docid1 = builder.update_document(&doc1, &stop_words)?;
|
|
||||||
|
|
||||||
let view = database.commit_update(builder)?;
|
|
||||||
|
|
||||||
let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
|
|
||||||
let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
|
|
||||||
|
|
||||||
assert_eq!(doc0, de_doc0);
|
|
||||||
assert_eq!(doc1, de_doc1);
|
|
||||||
|
|
||||||
Ok(dir.close()?)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn ingest_two_easy_updates() -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let meilidb_path = dir.path().join("meilidb.mdb");
|
|
||||||
let meilidb_index_name = "default";
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
|
||||||
struct SimpleDoc {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
timestamp: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
let schema = {
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("id", STORED);
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
builder.new_attribute("timestamp", STORED);
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
|
|
||||||
let database = Database::create(&meilidb_path)?;
|
|
||||||
|
|
||||||
database.create_index(meilidb_index_name, &schema)?;
|
|
||||||
|
|
||||||
let doc0 = SimpleDoc {
|
|
||||||
id: 0,
|
|
||||||
title: String::from("I am a title"),
|
|
||||||
description: String::from("I am a description"),
|
|
||||||
timestamp: 1234567,
|
|
||||||
};
|
|
||||||
let doc1 = SimpleDoc {
|
|
||||||
id: 1,
|
|
||||||
title: String::from("I am the second title"),
|
|
||||||
description: String::from("I am the second description"),
|
|
||||||
timestamp: 7654321,
|
|
||||||
};
|
|
||||||
let doc2 = SimpleDoc {
|
|
||||||
id: 2,
|
|
||||||
title: String::from("I am the third title"),
|
|
||||||
description: String::from("I am the third description"),
|
|
||||||
timestamp: 7654321,
|
|
||||||
};
|
|
||||||
let doc3 = SimpleDoc {
|
|
||||||
id: 3,
|
|
||||||
title: String::from("I am the fourth title"),
|
|
||||||
description: String::from("I am the fourth description"),
|
|
||||||
timestamp: 7654321,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut builder = database.start_update(meilidb_index_name)?;
|
|
||||||
let docid0 = builder.update_document(&doc0, &stop_words)?;
|
|
||||||
let docid1 = builder.update_document(&doc1, &stop_words)?;
|
|
||||||
database.commit_update(builder)?;
|
|
||||||
|
|
||||||
let mut builder = database.start_update(meilidb_index_name)?;
|
|
||||||
let docid2 = builder.update_document(&doc2, &stop_words)?;
|
|
||||||
let docid3 = builder.update_document(&doc3, &stop_words)?;
|
|
||||||
let view = database.commit_update(builder)?;
|
|
||||||
|
|
||||||
let de_doc0: SimpleDoc = view.document_by_id(docid0)?;
|
|
||||||
let de_doc1: SimpleDoc = view.document_by_id(docid1)?;
|
|
||||||
|
|
||||||
assert_eq!(doc0, de_doc0);
|
|
||||||
assert_eq!(doc1, de_doc1);
|
|
||||||
|
|
||||||
let de_doc2: SimpleDoc = view.document_by_id(docid2)?;
|
|
||||||
let de_doc3: SimpleDoc = view.document_by_id(docid3)?;
|
|
||||||
|
|
||||||
assert_eq!(doc2, de_doc2);
|
|
||||||
assert_eq!(doc3, de_doc3);
|
|
||||||
|
|
||||||
Ok(dir.close()?)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(all(feature = "nightly", test))]
|
|
||||||
mod bench {
|
|
||||||
extern crate test;
|
|
||||||
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::error::Error;
|
|
||||||
use std::iter::repeat_with;
|
|
||||||
use self::test::Bencher;
|
|
||||||
|
|
||||||
use rand::distributions::Alphanumeric;
|
|
||||||
use rand_xorshift::XorShiftRng;
|
|
||||||
use rand::{Rng, SeedableRng};
|
|
||||||
use serde_derive::Serialize;
|
|
||||||
use rand::seq::SliceRandom;
|
|
||||||
|
|
||||||
use crate::tokenizer::DefaultBuilder;
|
|
||||||
use crate::database::schema::*;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
fn random_sentences<R: Rng>(number: usize, rng: &mut R) -> String {
|
|
||||||
let mut words = String::new();
|
|
||||||
|
|
||||||
for i in 0..number {
|
|
||||||
let word_len = rng.gen_range(1, 12);
|
|
||||||
let iter = repeat_with(|| rng.sample(Alphanumeric)).take(word_len);
|
|
||||||
words.extend(iter);
|
|
||||||
|
|
||||||
if i == number - 1 { // last word
|
|
||||||
let final_ = [".", "?", "!", "..."].choose(rng).cloned();
|
|
||||||
words.extend(final_);
|
|
||||||
} else {
|
|
||||||
let middle = [",", ", "].choose(rng).cloned();
|
|
||||||
words.extend(middle);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
words
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn open_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..300 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
database.commit_update(builder)?;
|
|
||||||
|
|
||||||
drop(database);
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
let database = Database::open(db_path.clone()).unwrap();
|
|
||||||
test::black_box(|| database);
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn open_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..3000 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
database.commit_update(builder)?;
|
|
||||||
|
|
||||||
drop(database);
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
let database = Database::open(db_path.clone()).unwrap();
|
|
||||||
test::black_box(|| database);
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
#[ignore]
|
|
||||||
fn open_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..30_000 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
database.commit_update(builder)?;
|
|
||||||
|
|
||||||
drop(database);
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
let database = Database::open(db_path.clone()).unwrap();
|
|
||||||
test::black_box(|| database);
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn search_oneletter_little_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..300 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let view = database.commit_update(builder)?;
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
for q in &["a", "b", "c", "d", "e"] {
|
|
||||||
let documents = view.query_builder().query(q, 0..20);
|
|
||||||
test::black_box(|| documents);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
fn search_oneletter_medium_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..3000 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let view = database.commit_update(builder)?;
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
for q in &["a", "b", "c", "d", "e"] {
|
|
||||||
let documents = view.query_builder().query(q, 0..20);
|
|
||||||
test::black_box(|| documents);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[bench]
|
|
||||||
#[ignore]
|
|
||||||
fn search_oneletter_big_database(bench: &mut Bencher) -> Result<(), Box<Error>> {
|
|
||||||
let dir = tempfile::tempdir()?;
|
|
||||||
let stop_words = HashSet::new();
|
|
||||||
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("title", STORED | INDEXED);
|
|
||||||
builder.new_attribute("description", STORED | INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let db_path = dir.path().join("bench.mdb");
|
|
||||||
let index_name = "default";
|
|
||||||
|
|
||||||
let database = Database::create(&db_path)?;
|
|
||||||
database.create_index(index_name, &schema)?;
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Document {
|
|
||||||
id: u64,
|
|
||||||
title: String,
|
|
||||||
description: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut builder = database.start_update(index_name)?;
|
|
||||||
let mut rng = XorShiftRng::seed_from_u64(42);
|
|
||||||
|
|
||||||
for i in 0..30_000 {
|
|
||||||
let document = Document {
|
|
||||||
id: i,
|
|
||||||
title: random_sentences(rng.gen_range(1, 8), &mut rng),
|
|
||||||
description: random_sentences(rng.gen_range(20, 200), &mut rng),
|
|
||||||
};
|
|
||||||
builder.update_document(&document, &stop_words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let view = database.commit_update(builder)?;
|
|
||||||
|
|
||||||
bench.iter(|| {
|
|
||||||
for q in &["a", "b", "c", "d", "e"] {
|
|
||||||
let documents = view.query_builder().query(q, 0..20);
|
|
||||||
test::black_box(|| documents);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,98 +0,0 @@
|
|||||||
use std::cmp::Ordering;
|
|
||||||
use std::str::FromStr;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use serde_derive::{Serialize, Deserialize};
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub enum Number {
|
|
||||||
Unsigned(u64),
|
|
||||||
Signed(i64),
|
|
||||||
Float(f64),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FromStr for Number {
|
|
||||||
type Err = ParseNumberError;
|
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
||||||
if let Ok(unsigned) = u64::from_str(s) {
|
|
||||||
return Ok(Number::Unsigned(unsigned))
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(signed) = i64::from_str(s) {
|
|
||||||
return Ok(Number::Signed(signed))
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(float) = f64::from_str(s) {
|
|
||||||
if float == 0.0 || float.is_normal() {
|
|
||||||
return Ok(Number::Float(float))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(ParseNumberError)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialOrd for Number {
|
|
||||||
fn partial_cmp(&self, other: &Number) -> Option<Ordering> {
|
|
||||||
Some(self.cmp(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Ord for Number {
|
|
||||||
fn cmp(&self, other: &Number) -> Ordering {
|
|
||||||
use Number::*;
|
|
||||||
match (self, other) {
|
|
||||||
(Unsigned(s), Unsigned(o)) => s.cmp(o),
|
|
||||||
(Unsigned(s), Signed(o)) => {
|
|
||||||
let s = i128::from(*s);
|
|
||||||
let o = i128::from(*o);
|
|
||||||
s.cmp(&o)
|
|
||||||
},
|
|
||||||
(Unsigned(s), Float(o)) => {
|
|
||||||
let s = *s as f64;
|
|
||||||
s.partial_cmp(&o).unwrap_or(Ordering::Equal)
|
|
||||||
},
|
|
||||||
|
|
||||||
(Signed(s), Unsigned(o)) => {
|
|
||||||
let s = i128::from(*s);
|
|
||||||
let o = i128::from(*o);
|
|
||||||
s.cmp(&o)
|
|
||||||
},
|
|
||||||
(Signed(s), Signed(o)) => s.cmp(o),
|
|
||||||
(Signed(s), Float(o)) => {
|
|
||||||
let s = *s as f64;
|
|
||||||
s.partial_cmp(o).unwrap_or(Ordering::Equal)
|
|
||||||
},
|
|
||||||
|
|
||||||
(Float(s), Unsigned(o)) => {
|
|
||||||
let o = *o as f64;
|
|
||||||
s.partial_cmp(&o).unwrap_or(Ordering::Equal)
|
|
||||||
},
|
|
||||||
(Float(s), Signed(o)) => {
|
|
||||||
let o = *o as f64;
|
|
||||||
s.partial_cmp(&o).unwrap_or(Ordering::Equal)
|
|
||||||
},
|
|
||||||
(Float(s), Float(o)) => {
|
|
||||||
s.partial_cmp(o).unwrap_or(Ordering::Equal)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq for Number {
|
|
||||||
fn eq(&self, other: &Number) -> bool {
|
|
||||||
self.cmp(other) == Ordering::Equal
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Eq for Number { }
|
|
||||||
|
|
||||||
pub struct ParseNumberError;
|
|
||||||
|
|
||||||
impl fmt::Display for ParseNumberError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
f.write_str("can not parse number")
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,319 +0,0 @@
|
|||||||
use std::collections::{HashMap, BTreeMap};
|
|
||||||
use std::io::{Read, Write};
|
|
||||||
use std::error::Error;
|
|
||||||
use std::{fmt, u16};
|
|
||||||
use std::ops::BitOr;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use serde_derive::{Serialize, Deserialize};
|
|
||||||
use linked_hash_map::LinkedHashMap;
|
|
||||||
|
|
||||||
use crate::database::serde::find_id::FindDocumentIdSerializer;
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
|
|
||||||
pub const STORED: SchemaProps = SchemaProps { stored: true, indexed: false, ranked: false };
|
|
||||||
pub const INDEXED: SchemaProps = SchemaProps { stored: false, indexed: true, ranked: false };
|
|
||||||
pub const RANKED: SchemaProps = SchemaProps { stored: false, indexed: false, ranked: true };
|
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
||||||
pub struct SchemaProps {
|
|
||||||
#[serde(default)]
|
|
||||||
stored: bool,
|
|
||||||
|
|
||||||
#[serde(default)]
|
|
||||||
indexed: bool,
|
|
||||||
|
|
||||||
#[serde(default)]
|
|
||||||
ranked: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SchemaProps {
|
|
||||||
pub fn is_stored(self) -> bool {
|
|
||||||
self.stored
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_indexed(self) -> bool {
|
|
||||||
self.indexed
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_ranked(self) -> bool {
|
|
||||||
self.ranked
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BitOr for SchemaProps {
|
|
||||||
type Output = Self;
|
|
||||||
|
|
||||||
fn bitor(self, other: Self) -> Self::Output {
|
|
||||||
SchemaProps {
|
|
||||||
stored: self.stored | other.stored,
|
|
||||||
indexed: self.indexed | other.indexed,
|
|
||||||
ranked: self.ranked | other.ranked,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
pub struct SchemaBuilder {
|
|
||||||
identifier: String,
|
|
||||||
attributes: LinkedHashMap<String, SchemaProps>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SchemaBuilder {
|
|
||||||
pub fn with_identifier<S: Into<String>>(name: S) -> SchemaBuilder {
|
|
||||||
SchemaBuilder {
|
|
||||||
identifier: name.into(),
|
|
||||||
attributes: LinkedHashMap::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new_attribute<S: Into<String>>(&mut self, name: S, props: SchemaProps) -> SchemaAttr {
|
|
||||||
let len = self.attributes.len();
|
|
||||||
if self.attributes.insert(name.into(), props).is_some() {
|
|
||||||
panic!("Field already inserted.")
|
|
||||||
}
|
|
||||||
SchemaAttr(len as u16)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build(self) -> Schema {
|
|
||||||
let mut attrs = HashMap::new();
|
|
||||||
let mut props = Vec::new();
|
|
||||||
|
|
||||||
for (i, (name, prop)) in self.attributes.into_iter().enumerate() {
|
|
||||||
attrs.insert(name.clone(), SchemaAttr(i as u16));
|
|
||||||
props.push((name, prop));
|
|
||||||
}
|
|
||||||
|
|
||||||
let identifier = self.identifier;
|
|
||||||
Schema { inner: Arc::new(InnerSchema { identifier, attrs, props }) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct Schema {
|
|
||||||
inner: Arc<InnerSchema>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
struct InnerSchema {
|
|
||||||
identifier: String,
|
|
||||||
attrs: HashMap<String, SchemaAttr>,
|
|
||||||
props: Vec<(String, SchemaProps)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Schema {
|
|
||||||
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
reader.read_to_end(&mut buffer)?;
|
|
||||||
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
|
|
||||||
Ok(builder.build())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
|
||||||
let identifier = self.inner.identifier.clone();
|
|
||||||
let attributes = self.attributes_ordered();
|
|
||||||
let builder = SchemaBuilder { identifier, attributes };
|
|
||||||
|
|
||||||
let string = toml::to_string_pretty(&builder)?;
|
|
||||||
writer.write_all(string.as_bytes())?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
reader.read_to_end(&mut buffer)?;
|
|
||||||
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
|
|
||||||
Ok(builder.build())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
|
||||||
let identifier = self.inner.identifier.clone();
|
|
||||||
let attributes = self.attributes_ordered();
|
|
||||||
let builder = SchemaBuilder { identifier, attributes };
|
|
||||||
let string = serde_json::to_string_pretty(&builder)?;
|
|
||||||
writer.write_all(string.as_bytes())?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn read_from_bin<R: Read>(reader: R) -> bincode::Result<Schema> {
|
|
||||||
let builder: SchemaBuilder = bincode::deserialize_from(reader)?;
|
|
||||||
Ok(builder.build())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn write_to_bin<W: Write>(&self, writer: W) -> bincode::Result<()> {
|
|
||||||
let identifier = self.inner.identifier.clone();
|
|
||||||
let attributes = self.attributes_ordered();
|
|
||||||
let builder = SchemaBuilder { identifier, attributes };
|
|
||||||
|
|
||||||
bincode::serialize_into(writer, &builder)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn attributes_ordered(&self) -> LinkedHashMap<String, SchemaProps> {
|
|
||||||
let mut ordered = BTreeMap::new();
|
|
||||||
for (name, attr) in &self.inner.attrs {
|
|
||||||
let (_, props) = self.inner.props[attr.0 as usize];
|
|
||||||
ordered.insert(attr.0, (name, props));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut attributes = LinkedHashMap::with_capacity(ordered.len());
|
|
||||||
for (_, (name, props)) in ordered {
|
|
||||||
attributes.insert(name.clone(), props);
|
|
||||||
}
|
|
||||||
|
|
||||||
attributes
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_id<T>(&self, document: T) -> Result<DocumentId, SerializerError>
|
|
||||||
where T: serde::Serialize,
|
|
||||||
{
|
|
||||||
let id_attribute_name = &self.inner.identifier;
|
|
||||||
let serializer = FindDocumentIdSerializer { id_attribute_name };
|
|
||||||
document.serialize(serializer)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn props(&self, attr: SchemaAttr) -> SchemaProps {
|
|
||||||
let (_, props) = self.inner.props[attr.0 as usize];
|
|
||||||
props
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn identifier_name(&self) -> &str {
|
|
||||||
&self.inner.identifier
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn attribute<S: AsRef<str>>(&self, name: S) -> Option<SchemaAttr> {
|
|
||||||
self.inner.attrs.get(name.as_ref()).cloned()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn attribute_name(&self, attr: SchemaAttr) -> &str {
|
|
||||||
let (name, _) = &self.inner.props[attr.0 as usize];
|
|
||||||
name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
|
||||||
pub struct SchemaAttr(pub u16);
|
|
||||||
|
|
||||||
impl SchemaAttr {
|
|
||||||
pub fn new(value: u16) -> SchemaAttr {
|
|
||||||
SchemaAttr(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn min() -> SchemaAttr {
|
|
||||||
SchemaAttr(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn next(self) -> Option<SchemaAttr> {
|
|
||||||
self.0.checked_add(1).map(SchemaAttr)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn prev(self) -> Option<SchemaAttr> {
|
|
||||||
self.0.checked_sub(1).map(SchemaAttr)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn max() -> SchemaAttr {
|
|
||||||
SchemaAttr(u16::MAX)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for SchemaAttr {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
self.0.fmt(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use std::error::Error;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn serialize_deserialize() -> bincode::Result<()> {
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("alpha", STORED);
|
|
||||||
builder.new_attribute("beta", STORED | INDEXED);
|
|
||||||
builder.new_attribute("gamma", INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
|
|
||||||
schema.write_to_bin(&mut buffer)?;
|
|
||||||
let schema2 = Schema::read_from_bin(buffer.as_slice())?;
|
|
||||||
|
|
||||||
assert_eq!(schema, schema2);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn serialize_deserialize_toml() -> Result<(), Box<Error>> {
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("alpha", STORED);
|
|
||||||
builder.new_attribute("beta", STORED | INDEXED);
|
|
||||||
builder.new_attribute("gamma", INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
schema.to_toml(&mut buffer)?;
|
|
||||||
|
|
||||||
let schema2 = Schema::from_toml(buffer.as_slice())?;
|
|
||||||
assert_eq!(schema, schema2);
|
|
||||||
|
|
||||||
let data = r#"
|
|
||||||
identifier = "id"
|
|
||||||
|
|
||||||
[attributes."alpha"]
|
|
||||||
stored = true
|
|
||||||
|
|
||||||
[attributes."beta"]
|
|
||||||
stored = true
|
|
||||||
indexed = true
|
|
||||||
|
|
||||||
[attributes."gamma"]
|
|
||||||
indexed = true
|
|
||||||
"#;
|
|
||||||
let schema2 = Schema::from_toml(data.as_bytes())?;
|
|
||||||
assert_eq!(schema, schema2);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn serialize_deserialize_json() -> Result<(), Box<Error>> {
|
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
|
||||||
builder.new_attribute("alpha", STORED);
|
|
||||||
builder.new_attribute("beta", STORED | INDEXED);
|
|
||||||
builder.new_attribute("gamma", INDEXED);
|
|
||||||
let schema = builder.build();
|
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
|
||||||
schema.to_json(&mut buffer)?;
|
|
||||||
|
|
||||||
let schema2 = Schema::from_json(buffer.as_slice())?;
|
|
||||||
assert_eq!(schema, schema2);
|
|
||||||
|
|
||||||
let data = r#"
|
|
||||||
{
|
|
||||||
"identifier": "id",
|
|
||||||
"attributes": {
|
|
||||||
"alpha": {
|
|
||||||
"stored": true
|
|
||||||
},
|
|
||||||
"beta": {
|
|
||||||
"stored": true,
|
|
||||||
"indexed": true
|
|
||||||
},
|
|
||||||
"gamma": {
|
|
||||||
"indexed": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}"#;
|
|
||||||
let schema2 = Schema::from_json(data.as_bytes())?;
|
|
||||||
assert_eq!(schema, schema2);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,186 +0,0 @@
|
|||||||
use std::error::Error;
|
|
||||||
use std::ops::Deref;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use rocksdb::rocksdb::{DB, Snapshot, SeekKey};
|
|
||||||
use rocksdb::rocksdb_options::ReadOptions;
|
|
||||||
use serde::forward_to_deserialize_any;
|
|
||||||
use serde::de::value::MapDeserializer;
|
|
||||||
use serde::de::{self, Visitor, IntoDeserializer};
|
|
||||||
|
|
||||||
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
|
|
||||||
use crate::database::schema::Schema;
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
|
|
||||||
pub struct Deserializer<'a, D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
snapshot: &'a Snapshot<D>,
|
|
||||||
schema: &'a Schema,
|
|
||||||
document_id: DocumentId,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, D> Deserializer<'a, D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
pub fn new(snapshot: &'a Snapshot<D>, schema: &'a Schema, doc: DocumentId) -> Self {
|
|
||||||
Deserializer { snapshot, schema, document_id: doc }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de, 'a, 'b, D> de::Deserializer<'de> for &'b mut Deserializer<'a, D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
type Error = DeserializerError;
|
|
||||||
|
|
||||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
self.deserialize_map(visitor)
|
|
||||||
}
|
|
||||||
|
|
||||||
forward_to_deserialize_any! {
|
|
||||||
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
|
|
||||||
bytes byte_buf unit_struct tuple_struct
|
|
||||||
identifier tuple ignored_any option newtype_struct enum struct
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
let mut options = ReadOptions::new();
|
|
||||||
let lower = DocumentKey::new(self.document_id);
|
|
||||||
let upper = lower.with_attribute_max();
|
|
||||||
options.set_iterate_lower_bound(lower.as_ref());
|
|
||||||
options.set_iterate_upper_bound(upper.as_ref());
|
|
||||||
|
|
||||||
let mut iter = self.snapshot.iter_opt(options);
|
|
||||||
iter.seek(SeekKey::Start);
|
|
||||||
|
|
||||||
if iter.kv().is_none() {
|
|
||||||
// FIXME return an error
|
|
||||||
}
|
|
||||||
|
|
||||||
let iter = iter.map(|(key, value)| {
|
|
||||||
// retrieve the schema attribute name
|
|
||||||
// from the schema attribute number
|
|
||||||
let document_key_attr = DocumentKeyAttr::from_bytes(&key);
|
|
||||||
let schema_attr = document_key_attr.attribute();
|
|
||||||
let attribute_name = self.schema.attribute_name(schema_attr);
|
|
||||||
(attribute_name, Value(value))
|
|
||||||
});
|
|
||||||
|
|
||||||
let map_deserializer = MapDeserializer::new(iter);
|
|
||||||
visitor.visit_map(map_deserializer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Value(Vec<u8>);
|
|
||||||
|
|
||||||
impl<'de> IntoDeserializer<'de, DeserializerError> for Value {
|
|
||||||
type Deserializer = Self;
|
|
||||||
|
|
||||||
fn into_deserializer(self) -> Self::Deserializer {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! forward_to_bincode_values {
|
|
||||||
($($ty:ident => $de_method:ident,)*) => {
|
|
||||||
$(
|
|
||||||
fn $de_method<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: de::Visitor<'de>
|
|
||||||
{
|
|
||||||
match bincode::deserialize::<$ty>(&self.0) {
|
|
||||||
Ok(val) => val.into_deserializer().$de_method(visitor),
|
|
||||||
Err(e) => Err(de::Error::custom(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)*
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'de, 'a> de::Deserializer<'de> for Value {
|
|
||||||
type Error = DeserializerError;
|
|
||||||
|
|
||||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
self.0.into_deserializer().deserialize_any(visitor)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
self.deserialize_string(visitor)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
match bincode::deserialize::<String>(&self.0) {
|
|
||||||
Ok(val) => val.into_deserializer().deserialize_string(visitor),
|
|
||||||
Err(e) => Err(de::Error::custom(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
self.deserialize_byte_buf(visitor)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
|
||||||
where V: Visitor<'de>
|
|
||||||
{
|
|
||||||
match bincode::deserialize::<Vec<u8>>(&self.0) {
|
|
||||||
Ok(val) => val.into_deserializer().deserialize_byte_buf(visitor),
|
|
||||||
Err(e) => Err(de::Error::custom(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
forward_to_bincode_values! {
|
|
||||||
char => deserialize_char,
|
|
||||||
bool => deserialize_bool,
|
|
||||||
|
|
||||||
u8 => deserialize_u8,
|
|
||||||
u16 => deserialize_u16,
|
|
||||||
u32 => deserialize_u32,
|
|
||||||
u64 => deserialize_u64,
|
|
||||||
|
|
||||||
i8 => deserialize_i8,
|
|
||||||
i16 => deserialize_i16,
|
|
||||||
i32 => deserialize_i32,
|
|
||||||
i64 => deserialize_i64,
|
|
||||||
|
|
||||||
f32 => deserialize_f32,
|
|
||||||
f64 => deserialize_f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
forward_to_deserialize_any! {
|
|
||||||
unit seq map
|
|
||||||
unit_struct tuple_struct
|
|
||||||
identifier tuple ignored_any option newtype_struct enum struct
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum DeserializerError {
|
|
||||||
Custom(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl de::Error for DeserializerError {
|
|
||||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
|
||||||
DeserializerError::Custom(msg.to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for DeserializerError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
DeserializerError::Custom(s) => f.write_str(&s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Error for DeserializerError {}
|
|
@ -1,243 +0,0 @@
|
|||||||
use serde::Serialize;
|
|
||||||
use serde::ser;
|
|
||||||
|
|
||||||
use crate::database::serde::key_to_string::KeyToStringSerializer;
|
|
||||||
use crate::database::serde::{SerializerError, calculate_hash};
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
|
|
||||||
pub struct FindDocumentIdSerializer<'a> {
|
|
||||||
pub id_attribute_name: &'a str,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ser::Serializer for FindDocumentIdSerializer<'a> {
|
|
||||||
type Ok = DocumentId;
|
|
||||||
type Error = SerializerError;
|
|
||||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeMap = FindDocumentIdMapSerializer<'a>;
|
|
||||||
type SerializeStruct = FindDocumentIdStructSerializer<'a>;
|
|
||||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
|
|
||||||
forward_to_unserializable_type! {
|
|
||||||
bool => serialize_bool,
|
|
||||||
char => serialize_char,
|
|
||||||
|
|
||||||
i8 => serialize_i8,
|
|
||||||
i16 => serialize_i16,
|
|
||||||
i32 => serialize_i32,
|
|
||||||
i64 => serialize_i64,
|
|
||||||
|
|
||||||
u8 => serialize_u8,
|
|
||||||
u16 => serialize_u16,
|
|
||||||
u32 => serialize_u32,
|
|
||||||
u64 => serialize_u64,
|
|
||||||
|
|
||||||
f32 => serialize_f32,
|
|
||||||
f64 => serialize_f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "str" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "()" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_struct<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
value.serialize(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_variant<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
|
||||||
Ok(FindDocumentIdMapSerializer {
|
|
||||||
id_attribute_name: self.id_attribute_name,
|
|
||||||
document_id: None,
|
|
||||||
current_key_name: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Ok(FindDocumentIdStructSerializer {
|
|
||||||
id_attribute_name: self.id_attribute_name,
|
|
||||||
document_id: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FindDocumentIdMapSerializer<'a> {
|
|
||||||
id_attribute_name: &'a str,
|
|
||||||
document_id: Option<DocumentId>,
|
|
||||||
current_key_name: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ser::SerializeMap for FindDocumentIdMapSerializer<'a> {
|
|
||||||
type Ok = DocumentId;
|
|
||||||
type Error = SerializerError;
|
|
||||||
|
|
||||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let key = key.serialize(KeyToStringSerializer)?;
|
|
||||||
self.current_key_name = Some(key);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let key = self.current_key_name.take().unwrap();
|
|
||||||
self.serialize_entry(&key, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_entry<K: ?Sized, V: ?Sized>(
|
|
||||||
&mut self,
|
|
||||||
key: &K,
|
|
||||||
value: &V
|
|
||||||
) -> Result<(), Self::Error>
|
|
||||||
where K: Serialize, V: Serialize,
|
|
||||||
{
|
|
||||||
let key = key.serialize(KeyToStringSerializer)?;
|
|
||||||
|
|
||||||
if self.id_attribute_name == key {
|
|
||||||
// TODO is it possible to have multiple ids?
|
|
||||||
let id = bincode::serialize(value).unwrap();
|
|
||||||
let hash = calculate_hash(&id);
|
|
||||||
self.document_id = Some(DocumentId(hash));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
match self.document_id {
|
|
||||||
Some(document_id) => Ok(document_id),
|
|
||||||
None => Err(SerializerError::DocumentIdNotFound)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FindDocumentIdStructSerializer<'a> {
|
|
||||||
id_attribute_name: &'a str,
|
|
||||||
document_id: Option<DocumentId>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> ser::SerializeStruct for FindDocumentIdStructSerializer<'a> {
|
|
||||||
type Ok = DocumentId;
|
|
||||||
type Error = SerializerError;
|
|
||||||
|
|
||||||
fn serialize_field<T: ?Sized>(
|
|
||||||
&mut self,
|
|
||||||
key: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
if self.id_attribute_name == key {
|
|
||||||
// TODO can it be possible to have multiple ids?
|
|
||||||
let id = bincode::serialize(value).unwrap();
|
|
||||||
let hash = calculate_hash(&id);
|
|
||||||
self.document_id = Some(DocumentId(hash));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
match self.document_id {
|
|
||||||
Some(document_id) => Ok(document_id),
|
|
||||||
None => Err(SerializerError::DocumentIdNotFound)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,190 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
use serde::ser;
|
|
||||||
use meilidb_core::{DocumentId, DocIndex};
|
|
||||||
use meilidb_tokenizer::{Tokenizer, Token, is_cjk};
|
|
||||||
|
|
||||||
use crate::database::update::DocumentUpdate;
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
use crate::database::schema::SchemaAttr;
|
|
||||||
|
|
||||||
pub struct IndexerSerializer<'a, 'b> {
|
|
||||||
pub update: &'a mut DocumentUpdate<'b>,
|
|
||||||
pub document_id: DocumentId,
|
|
||||||
pub attribute: SchemaAttr,
|
|
||||||
pub stop_words: &'a HashSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, 'b> ser::Serializer for IndexerSerializer<'a, 'b> {
|
|
||||||
type Ok = ();
|
|
||||||
type Error = SerializerError;
|
|
||||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
|
|
||||||
forward_to_unserializable_type! {
|
|
||||||
bool => serialize_bool,
|
|
||||||
char => serialize_char,
|
|
||||||
|
|
||||||
i8 => serialize_i8,
|
|
||||||
i16 => serialize_i16,
|
|
||||||
i32 => serialize_i32,
|
|
||||||
i64 => serialize_i64,
|
|
||||||
|
|
||||||
u8 => serialize_u8,
|
|
||||||
u16 => serialize_u16,
|
|
||||||
u32 => serialize_u32,
|
|
||||||
u64 => serialize_u64,
|
|
||||||
|
|
||||||
f32 => serialize_f32,
|
|
||||||
f64 => serialize_f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
for token in Tokenizer::new(v) {
|
|
||||||
let Token { word, word_index, char_index } = token;
|
|
||||||
let document_id = self.document_id;
|
|
||||||
|
|
||||||
// FIXME must u32::try_from instead
|
|
||||||
let attribute = self.attribute.0;
|
|
||||||
let word_index = word_index as u16;
|
|
||||||
|
|
||||||
// insert the exact representation
|
|
||||||
let word_lower = word.to_lowercase();
|
|
||||||
let length = word.chars().count() as u16;
|
|
||||||
|
|
||||||
if self.stop_words.contains(&word_lower) { continue }
|
|
||||||
|
|
||||||
// and the unidecoded lowercased version
|
|
||||||
if !word_lower.chars().any(is_cjk) {
|
|
||||||
let word_unidecoded = unidecode::unidecode(word).to_lowercase();
|
|
||||||
let word_unidecoded = word_unidecoded.trim();
|
|
||||||
if word_lower != word_unidecoded {
|
|
||||||
let char_index = char_index as u16;
|
|
||||||
let char_length = length;
|
|
||||||
|
|
||||||
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
|
|
||||||
self.update.insert_doc_index(word_unidecoded.as_bytes().to_vec(), doc_index)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let char_index = char_index as u16;
|
|
||||||
let char_length = length;
|
|
||||||
|
|
||||||
let doc_index = DocIndex { document_id, attribute, word_index, char_index, char_length };
|
|
||||||
self.update.insert_doc_index(word_lower.into_bytes(), doc_index)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "()" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_struct<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
value.serialize(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_variant<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "seq" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "map" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,146 +0,0 @@
|
|||||||
use serde::Serialize;
|
|
||||||
use serde::ser;
|
|
||||||
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
|
|
||||||
pub struct KeyToStringSerializer;
|
|
||||||
|
|
||||||
impl ser::Serializer for KeyToStringSerializer {
|
|
||||||
type Ok = String;
|
|
||||||
type Error = SerializerError;
|
|
||||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
|
|
||||||
forward_to_unserializable_type! {
|
|
||||||
bool => serialize_bool,
|
|
||||||
char => serialize_char,
|
|
||||||
|
|
||||||
i8 => serialize_i8,
|
|
||||||
i16 => serialize_i16,
|
|
||||||
i32 => serialize_i32,
|
|
||||||
i64 => serialize_i64,
|
|
||||||
|
|
||||||
u8 => serialize_u8,
|
|
||||||
u16 => serialize_u16,
|
|
||||||
u32 => serialize_u32,
|
|
||||||
u64 => serialize_u64,
|
|
||||||
|
|
||||||
f32 => serialize_f32,
|
|
||||||
f64 => serialize_f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(value.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "()" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_struct<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
value.serialize(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_variant<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "map" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,65 +0,0 @@
|
|||||||
use std::collections::hash_map::DefaultHasher;
|
|
||||||
use std::hash::{Hash, Hasher};
|
|
||||||
use std::error::Error;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use serde::ser;
|
|
||||||
|
|
||||||
macro_rules! forward_to_unserializable_type {
|
|
||||||
($($ty:ident => $se_method:ident,)*) => {
|
|
||||||
$(
|
|
||||||
fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "$ty" })
|
|
||||||
}
|
|
||||||
)*
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub mod find_id;
|
|
||||||
pub mod key_to_string;
|
|
||||||
pub mod value_to_number;
|
|
||||||
pub mod serializer;
|
|
||||||
pub mod indexer_serializer;
|
|
||||||
pub mod deserializer;
|
|
||||||
|
|
||||||
pub fn calculate_hash<T: Hash>(t: &T) -> u64 {
|
|
||||||
let mut s = DefaultHasher::new();
|
|
||||||
t.hash(&mut s);
|
|
||||||
s.finish()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum SerializerError {
|
|
||||||
DocumentIdNotFound,
|
|
||||||
UnserializableType { name: &'static str },
|
|
||||||
Custom(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ser::Error for SerializerError {
|
|
||||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
|
||||||
SerializerError::Custom(msg.to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for SerializerError {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
SerializerError::DocumentIdNotFound => {
|
|
||||||
write!(f, "serialized document does not have an id according to the schema")
|
|
||||||
}
|
|
||||||
SerializerError::UnserializableType { name } => {
|
|
||||||
write!(f, "Only struct and map types are considered valid documents and
|
|
||||||
can be serialized, not {} types directly.", name)
|
|
||||||
},
|
|
||||||
SerializerError::Custom(s) => f.write_str(&s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Error for SerializerError {}
|
|
||||||
|
|
||||||
impl From<String> for SerializerError {
|
|
||||||
fn from(value: String) -> SerializerError {
|
|
||||||
SerializerError::Custom(value)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,282 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
use serde::ser;
|
|
||||||
|
|
||||||
use crate::database::serde::indexer_serializer::IndexerSerializer;
|
|
||||||
use crate::database::serde::key_to_string::KeyToStringSerializer;
|
|
||||||
use crate::database::serde::value_to_number::ValueToNumberSerializer;
|
|
||||||
use crate::database::update::DocumentUpdate;
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
use crate::database::schema::Schema;
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
|
|
||||||
pub struct Serializer<'a, 'b> {
|
|
||||||
pub schema: &'a Schema,
|
|
||||||
pub update: &'a mut DocumentUpdate<'b>,
|
|
||||||
pub document_id: DocumentId,
|
|
||||||
pub stop_words: &'a HashSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, 'b> ser::Serializer for Serializer<'a, 'b> {
|
|
||||||
type Ok = ();
|
|
||||||
type Error = SerializerError;
|
|
||||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeMap = MapSerializer<'a, 'b>;
|
|
||||||
type SerializeStruct = StructSerializer<'a, 'b>;
|
|
||||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
|
|
||||||
forward_to_unserializable_type! {
|
|
||||||
bool => serialize_bool,
|
|
||||||
char => serialize_char,
|
|
||||||
|
|
||||||
i8 => serialize_i8,
|
|
||||||
i16 => serialize_i16,
|
|
||||||
i32 => serialize_i32,
|
|
||||||
i64 => serialize_i64,
|
|
||||||
|
|
||||||
u8 => serialize_u8,
|
|
||||||
u16 => serialize_u16,
|
|
||||||
u32 => serialize_u32,
|
|
||||||
u64 => serialize_u64,
|
|
||||||
|
|
||||||
f32 => serialize_f32,
|
|
||||||
f64 => serialize_f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "str" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "()" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_struct<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
value.serialize(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_variant<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
|
||||||
Ok(MapSerializer {
|
|
||||||
schema: self.schema,
|
|
||||||
document_id: self.document_id,
|
|
||||||
update: self.update,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
current_key_name: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Ok(StructSerializer {
|
|
||||||
schema: self.schema,
|
|
||||||
document_id: self.document_id,
|
|
||||||
update: self.update,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct MapSerializer<'a, 'b> {
|
|
||||||
pub schema: &'a Schema,
|
|
||||||
pub document_id: DocumentId,
|
|
||||||
pub update: &'a mut DocumentUpdate<'b>,
|
|
||||||
pub stop_words: &'a HashSet<String>,
|
|
||||||
pub current_key_name: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, 'b> ser::SerializeMap for MapSerializer<'a, 'b> {
|
|
||||||
type Ok = ();
|
|
||||||
type Error = SerializerError;
|
|
||||||
|
|
||||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let key = key.serialize(KeyToStringSerializer)?;
|
|
||||||
self.current_key_name = Some(key);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let key = self.current_key_name.take().unwrap();
|
|
||||||
self.serialize_entry(&key, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_entry<K: ?Sized, V: ?Sized>(
|
|
||||||
&mut self,
|
|
||||||
key: &K,
|
|
||||||
value: &V,
|
|
||||||
) -> Result<(), Self::Error>
|
|
||||||
where K: Serialize, V: Serialize,
|
|
||||||
{
|
|
||||||
let key = key.serialize(KeyToStringSerializer)?;
|
|
||||||
|
|
||||||
if let Some(attr) = self.schema.attribute(key) {
|
|
||||||
let props = self.schema.props(attr);
|
|
||||||
if props.is_stored() {
|
|
||||||
let value = bincode::serialize(value).unwrap();
|
|
||||||
self.update.insert_attribute_value(attr, &value)?;
|
|
||||||
}
|
|
||||||
if props.is_indexed() {
|
|
||||||
let serializer = IndexerSerializer {
|
|
||||||
update: self.update,
|
|
||||||
document_id: self.document_id,
|
|
||||||
attribute: attr,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
};
|
|
||||||
value.serialize(serializer)?;
|
|
||||||
}
|
|
||||||
if props.is_ranked() {
|
|
||||||
let number = value.serialize(ValueToNumberSerializer)?;
|
|
||||||
self.update.register_ranked_attribute(attr, number)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct StructSerializer<'a, 'b> {
|
|
||||||
pub schema: &'a Schema,
|
|
||||||
pub document_id: DocumentId,
|
|
||||||
pub update: &'a mut DocumentUpdate<'b>,
|
|
||||||
pub stop_words: &'a HashSet<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, 'b> ser::SerializeStruct for StructSerializer<'a, 'b> {
|
|
||||||
type Ok = ();
|
|
||||||
type Error = SerializerError;
|
|
||||||
|
|
||||||
fn serialize_field<T: ?Sized>(
|
|
||||||
&mut self,
|
|
||||||
key: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<(), Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
if let Some(attr) = self.schema.attribute(key) {
|
|
||||||
let props = self.schema.props(attr);
|
|
||||||
if props.is_stored() {
|
|
||||||
let value = bincode::serialize(value).unwrap();
|
|
||||||
self.update.insert_attribute_value(attr, &value)?;
|
|
||||||
}
|
|
||||||
if props.is_indexed() {
|
|
||||||
let serializer = IndexerSerializer {
|
|
||||||
update: self.update,
|
|
||||||
document_id: self.document_id,
|
|
||||||
attribute: attr,
|
|
||||||
stop_words: self.stop_words,
|
|
||||||
};
|
|
||||||
value.serialize(serializer)?;
|
|
||||||
}
|
|
||||||
if props.is_ranked() {
|
|
||||||
let integer = value.serialize(ValueToNumberSerializer)?;
|
|
||||||
self.update.register_ranked_attribute(attr, integer)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,176 +0,0 @@
|
|||||||
use std::str::FromStr;
|
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
use serde::{ser, ser::Error};
|
|
||||||
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
use crate::database::Number;
|
|
||||||
|
|
||||||
pub struct ValueToNumberSerializer;
|
|
||||||
|
|
||||||
impl ser::Serializer for ValueToNumberSerializer {
|
|
||||||
type Ok = Number;
|
|
||||||
type Error = SerializerError;
|
|
||||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
|
||||||
|
|
||||||
forward_to_unserializable_type! {
|
|
||||||
bool => serialize_bool,
|
|
||||||
char => serialize_char,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Signed(value as i64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Signed(value as i64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Signed(value as i64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Signed(value as i64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Unsigned(value as u64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Unsigned(value as u64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Unsigned(value as u64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Unsigned(value as u64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Float(value as f64))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Ok(Number::Float(value))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Number::from_str(value).map_err(SerializerError::custom)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "Option" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "()" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_unit_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_struct<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
value.serialize(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_newtype_variant<T: ?Sized>(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_value: &T
|
|
||||||
) -> Result<Self::Ok, Self::Error>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_tuple_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
|
||||||
Err(SerializerError::UnserializableType { name: "map" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStruct, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct" })
|
|
||||||
}
|
|
||||||
|
|
||||||
fn serialize_struct_variant(
|
|
||||||
self,
|
|
||||||
_name: &'static str,
|
|
||||||
_variant_index: u32,
|
|
||||||
_variant: &'static str,
|
|
||||||
_len: usize
|
|
||||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
|
||||||
{
|
|
||||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,55 +0,0 @@
|
|||||||
use std::error::Error;
|
|
||||||
|
|
||||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
|
||||||
use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
|
|
||||||
use meilidb_core::write_to_bytes::WriteToBytes;
|
|
||||||
use meilidb_core::data::DocIds;
|
|
||||||
|
|
||||||
use crate::database::Index;
|
|
||||||
|
|
||||||
pub enum WriteIndexEvent<'a> {
|
|
||||||
RemovedDocuments(&'a DocIds),
|
|
||||||
UpdatedDocuments(&'a Index),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> WriteToBytes for WriteIndexEvent<'a> {
|
|
||||||
fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
|
|
||||||
match self {
|
|
||||||
WriteIndexEvent::RemovedDocuments(doc_ids) => {
|
|
||||||
let _ = bytes.write_u8(0);
|
|
||||||
doc_ids.write_to_bytes(bytes);
|
|
||||||
},
|
|
||||||
WriteIndexEvent::UpdatedDocuments(index) => {
|
|
||||||
let _ = bytes.write_u8(1);
|
|
||||||
index.write_to_bytes(bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum ReadIndexEvent {
|
|
||||||
RemovedDocuments(DocIds),
|
|
||||||
UpdatedDocuments(Index),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReadIndexEvent {
|
|
||||||
pub fn updated_documents(self) -> Option<Index> {
|
|
||||||
use ReadIndexEvent::*;
|
|
||||||
match self {
|
|
||||||
RemovedDocuments(_) => None,
|
|
||||||
UpdatedDocuments(index) => Some(index),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FromSharedDataCursor for ReadIndexEvent {
|
|
||||||
type Error = Box<Error>;
|
|
||||||
|
|
||||||
fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
|
|
||||||
match cursor.read_u8()? {
|
|
||||||
0 => DocIds::from_shared_data_cursor(cursor).map(ReadIndexEvent::RemovedDocuments),
|
|
||||||
1 => Index::from_shared_data_cursor(cursor).map(ReadIndexEvent::UpdatedDocuments),
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,234 +0,0 @@
|
|||||||
use std::collections::{HashSet, BTreeMap};
|
|
||||||
use std::error::Error;
|
|
||||||
|
|
||||||
use rocksdb::rocksdb::{Writable, WriteBatch};
|
|
||||||
use hashbrown::hash_map::HashMap;
|
|
||||||
use sdset::{Set, SetBuf};
|
|
||||||
use serde::Serialize;
|
|
||||||
use meilidb_core::write_to_bytes::WriteToBytes;
|
|
||||||
use meilidb_core::data::DocIds;
|
|
||||||
use meilidb_core::{IndexBuilder, DocumentId, DocIndex};
|
|
||||||
|
|
||||||
use crate::database::document_key::{DocumentKey, DocumentKeyAttr};
|
|
||||||
use crate::database::serde::serializer::Serializer;
|
|
||||||
use crate::database::serde::SerializerError;
|
|
||||||
use crate::database::schema::SchemaAttr;
|
|
||||||
use crate::database::schema::Schema;
|
|
||||||
use crate::database::{DATA_INDEX, DATA_RANKED_MAP};
|
|
||||||
use crate::database::{RankedMap, Number};
|
|
||||||
|
|
||||||
pub use self::index_event::{ReadIndexEvent, WriteIndexEvent};
|
|
||||||
pub use self::ranked_map_event::{ReadRankedMapEvent, WriteRankedMapEvent};
|
|
||||||
|
|
||||||
mod index_event;
|
|
||||||
mod ranked_map_event;
|
|
||||||
|
|
||||||
pub type Token = Vec<u8>; // TODO could be replaced by a SmallVec
|
|
||||||
|
|
||||||
pub struct Update {
|
|
||||||
schema: Schema,
|
|
||||||
raw_builder: RawUpdateBuilder,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Update {
|
|
||||||
pub(crate) fn new(schema: Schema) -> Update {
|
|
||||||
Update { schema, raw_builder: RawUpdateBuilder::new() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_document<T>(
|
|
||||||
&mut self,
|
|
||||||
document: T,
|
|
||||||
stop_words: &HashSet<String>,
|
|
||||||
) -> Result<DocumentId, SerializerError>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let document_id = self.schema.document_id(&document)?;
|
|
||||||
|
|
||||||
let serializer = Serializer {
|
|
||||||
schema: &self.schema,
|
|
||||||
document_id: document_id,
|
|
||||||
update: &mut self.raw_builder.document_update(document_id)?,
|
|
||||||
stop_words: stop_words,
|
|
||||||
};
|
|
||||||
|
|
||||||
document.serialize(serializer)?;
|
|
||||||
|
|
||||||
Ok(document_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn remove_document<T>(&mut self, document: T) -> Result<DocumentId, SerializerError>
|
|
||||||
where T: Serialize,
|
|
||||||
{
|
|
||||||
let document_id = self.schema.document_id(&document)?;
|
|
||||||
self.raw_builder.document_update(document_id)?.remove()?;
|
|
||||||
Ok(document_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn build(self) -> Result<WriteBatch, Box<Error>> {
|
|
||||||
self.raw_builder.build()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
|
||||||
enum UpdateType {
|
|
||||||
Updated,
|
|
||||||
Deleted,
|
|
||||||
}
|
|
||||||
|
|
||||||
use UpdateType::{Updated, Deleted};
|
|
||||||
|
|
||||||
pub struct RawUpdateBuilder {
|
|
||||||
documents_update: HashMap<DocumentId, UpdateType>,
|
|
||||||
documents_ranked_fields: RankedMap,
|
|
||||||
indexed_words: BTreeMap<Token, Vec<DocIndex>>,
|
|
||||||
batch: WriteBatch,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RawUpdateBuilder {
|
|
||||||
pub fn new() -> RawUpdateBuilder {
|
|
||||||
RawUpdateBuilder {
|
|
||||||
documents_update: HashMap::new(),
|
|
||||||
documents_ranked_fields: HashMap::new(),
|
|
||||||
indexed_words: BTreeMap::new(),
|
|
||||||
batch: WriteBatch::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_update(&mut self, document_id: DocumentId) -> Result<DocumentUpdate, SerializerError> {
|
|
||||||
use serde::ser::Error;
|
|
||||||
|
|
||||||
match self.documents_update.get(&document_id) {
|
|
||||||
Some(Deleted) | None => Ok(DocumentUpdate { document_id, inner: self }),
|
|
||||||
Some(Updated) => Err(SerializerError::custom(
|
|
||||||
"This document has already been removed and cannot be updated in the same update"
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build(self) -> Result<WriteBatch, Box<Error>> {
|
|
||||||
// create the list of all the removed documents
|
|
||||||
let removed_documents = {
|
|
||||||
let mut document_ids = Vec::new();
|
|
||||||
for (id, update_type) in self.documents_update {
|
|
||||||
if update_type == Deleted {
|
|
||||||
document_ids.push(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
document_ids.sort_unstable();
|
|
||||||
let setbuf = SetBuf::new_unchecked(document_ids);
|
|
||||||
DocIds::new(&setbuf)
|
|
||||||
};
|
|
||||||
|
|
||||||
// create the Index of all the document updates
|
|
||||||
let index = {
|
|
||||||
let mut builder = IndexBuilder::new();
|
|
||||||
for (key, mut indexes) in self.indexed_words {
|
|
||||||
indexes.sort_unstable();
|
|
||||||
let indexes = Set::new_unchecked(&indexes);
|
|
||||||
builder.insert(key, indexes).unwrap();
|
|
||||||
}
|
|
||||||
builder.build()
|
|
||||||
};
|
|
||||||
|
|
||||||
// WARN: removed documents must absolutely
|
|
||||||
// be merged *before* document updates
|
|
||||||
|
|
||||||
// === index ===
|
|
||||||
|
|
||||||
if !removed_documents.is_empty() {
|
|
||||||
// remove the documents using the appropriate IndexEvent
|
|
||||||
let event_bytes = WriteIndexEvent::RemovedDocuments(&removed_documents).into_bytes();
|
|
||||||
self.batch.merge(DATA_INDEX, &event_bytes)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the documents using the appropriate IndexEvent
|
|
||||||
let event_bytes = WriteIndexEvent::UpdatedDocuments(&index).into_bytes();
|
|
||||||
self.batch.merge(DATA_INDEX, &event_bytes)?;
|
|
||||||
|
|
||||||
// === ranked map ===
|
|
||||||
|
|
||||||
if !removed_documents.is_empty() {
|
|
||||||
// update the ranked map using the appropriate RankedMapEvent
|
|
||||||
let event_bytes = WriteRankedMapEvent::RemovedDocuments(&removed_documents).into_bytes();
|
|
||||||
self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update the documents using the appropriate IndexEvent
|
|
||||||
let event_bytes = WriteRankedMapEvent::UpdatedDocuments(&self.documents_ranked_fields).into_bytes();
|
|
||||||
self.batch.merge(DATA_RANKED_MAP, &event_bytes)?;
|
|
||||||
|
|
||||||
Ok(self.batch)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DocumentUpdate<'a> {
|
|
||||||
document_id: DocumentId,
|
|
||||||
inner: &'a mut RawUpdateBuilder,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DocumentUpdate<'a> {
|
|
||||||
pub fn remove(&mut self) -> Result<(), SerializerError> {
|
|
||||||
use serde::ser::Error;
|
|
||||||
|
|
||||||
if let Updated = self.inner.documents_update.entry(self.document_id).or_insert(Deleted) {
|
|
||||||
return Err(SerializerError::custom(
|
|
||||||
"This document has already been updated and cannot be removed in the same update"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let start = DocumentKey::new(self.document_id).with_attribute_min();
|
|
||||||
let end = DocumentKey::new(self.document_id).with_attribute_max(); // FIXME max + 1
|
|
||||||
self.inner.batch.delete_range(start.as_ref(), end.as_ref())?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_attribute_value(&mut self, attr: SchemaAttr, value: &[u8]) -> Result<(), SerializerError> {
|
|
||||||
use serde::ser::Error;
|
|
||||||
|
|
||||||
if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
|
|
||||||
return Err(SerializerError::custom(
|
|
||||||
"This document has already been deleted and cannot be updated in the same update"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let key = DocumentKeyAttr::new(self.document_id, attr);
|
|
||||||
self.inner.batch.put(key.as_ref(), &value)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert_doc_index(&mut self, token: Token, doc_index: DocIndex) -> Result<(), SerializerError> {
|
|
||||||
use serde::ser::Error;
|
|
||||||
|
|
||||||
if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
|
|
||||||
return Err(SerializerError::custom(
|
|
||||||
"This document has already been deleted and cannot be updated in the same update"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.inner.indexed_words.entry(token).or_insert_with(Vec::new).push(doc_index);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn register_ranked_attribute(
|
|
||||||
&mut self,
|
|
||||||
attr: SchemaAttr,
|
|
||||||
number: Number,
|
|
||||||
) -> Result<(), SerializerError>
|
|
||||||
{
|
|
||||||
use serde::ser::Error;
|
|
||||||
|
|
||||||
if let Deleted = self.inner.documents_update.entry(self.document_id).or_insert(Updated) {
|
|
||||||
return Err(SerializerError::custom(
|
|
||||||
"This document has already been deleted, ranked attributes cannot be added in the same update"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.inner.documents_ranked_fields.insert((self.document_id, attr), number);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,58 +0,0 @@
|
|||||||
use std::error::Error;
|
|
||||||
|
|
||||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
|
||||||
use meilidb_core::shared_data_cursor::{SharedDataCursor, FromSharedDataCursor};
|
|
||||||
use meilidb_core::write_to_bytes::WriteToBytes;
|
|
||||||
use meilidb_core::data::DocIds;
|
|
||||||
|
|
||||||
use crate::database::RankedMap;
|
|
||||||
|
|
||||||
pub enum WriteRankedMapEvent<'a> {
|
|
||||||
RemovedDocuments(&'a DocIds),
|
|
||||||
UpdatedDocuments(&'a RankedMap),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> WriteToBytes for WriteRankedMapEvent<'a> {
|
|
||||||
fn write_to_bytes(&self, bytes: &mut Vec<u8>) {
|
|
||||||
match self {
|
|
||||||
WriteRankedMapEvent::RemovedDocuments(doc_ids) => {
|
|
||||||
let _ = bytes.write_u8(0);
|
|
||||||
doc_ids.write_to_bytes(bytes);
|
|
||||||
},
|
|
||||||
WriteRankedMapEvent::UpdatedDocuments(ranked_map) => {
|
|
||||||
let _ = bytes.write_u8(1);
|
|
||||||
bincode::serialize_into(bytes, ranked_map).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum ReadRankedMapEvent {
|
|
||||||
RemovedDocuments(DocIds),
|
|
||||||
UpdatedDocuments(RankedMap),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReadRankedMapEvent {
|
|
||||||
pub fn updated_documents(self) -> Option<RankedMap> {
|
|
||||||
use ReadRankedMapEvent::*;
|
|
||||||
match self {
|
|
||||||
RemovedDocuments(_) => None,
|
|
||||||
UpdatedDocuments(ranked_map) => Some(ranked_map),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FromSharedDataCursor for ReadRankedMapEvent {
|
|
||||||
type Error = Box<Error>;
|
|
||||||
|
|
||||||
fn from_shared_data_cursor(cursor: &mut SharedDataCursor) -> Result<Self, Self::Error> {
|
|
||||||
match cursor.read_u8()? {
|
|
||||||
0 => DocIds::from_shared_data_cursor(cursor).map(ReadRankedMapEvent::RemovedDocuments),
|
|
||||||
1 => {
|
|
||||||
let ranked_map = bincode::deserialize_from(cursor)?;
|
|
||||||
Ok(ReadRankedMapEvent::UpdatedDocuments(ranked_map))
|
|
||||||
},
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,199 +0,0 @@
|
|||||||
use std::error::Error;
|
|
||||||
use std::path::Path;
|
|
||||||
use std::ops::Deref;
|
|
||||||
use std::{fmt, marker};
|
|
||||||
|
|
||||||
use rocksdb::rocksdb_options::{ReadOptions, EnvOptions, ColumnFamilyOptions};
|
|
||||||
use rocksdb::rocksdb::{DB, DBVector, Snapshot, SeekKey, SstFileWriter};
|
|
||||||
use serde::de::DeserializeOwned;
|
|
||||||
use meilidb_core::{Index, QueryBuilder, DocumentId};
|
|
||||||
|
|
||||||
use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_ranked_map, retrieve_config};
|
|
||||||
use crate::database::serde::deserializer::Deserializer;
|
|
||||||
use crate::database::{DocumentKey, DocumentKeyAttr};
|
|
||||||
use crate::database::schema::Schema;
|
|
||||||
use crate::database::RankedMap;
|
|
||||||
use crate::database::Config;
|
|
||||||
|
|
||||||
pub struct DatabaseView<D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
snapshot: Snapshot<D>,
|
|
||||||
index: Index,
|
|
||||||
ranked_map: RankedMap,
|
|
||||||
schema: Schema,
|
|
||||||
config: Config,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<D> DatabaseView<D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
pub fn new(snapshot: Snapshot<D>) -> Result<DatabaseView<D>, Box<Error>> {
|
|
||||||
let schema = retrieve_data_schema(&snapshot)?;
|
|
||||||
let index = retrieve_data_index(&snapshot)?;
|
|
||||||
let ranked_map = retrieve_data_ranked_map(&snapshot)?;
|
|
||||||
let config = retrieve_config(&snapshot)?;
|
|
||||||
Ok(DatabaseView { snapshot, index, ranked_map, schema, config })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn schema(&self) -> &Schema {
|
|
||||||
&self.schema
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index(&self) -> &Index {
|
|
||||||
&self.index
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn ranked_map(&self) -> &RankedMap {
|
|
||||||
&self.ranked_map
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_snapshot(self) -> Snapshot<D> {
|
|
||||||
self.snapshot
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn snapshot(&self) -> &Snapshot<D> {
|
|
||||||
&self.snapshot
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn config(&self) -> &Config {
|
|
||||||
&self.config
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self, key: &[u8]) -> Result<Option<DBVector>, Box<Error>> {
|
|
||||||
Ok(self.snapshot.get(key)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dump_all<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
|
|
||||||
let path = path.as_ref().to_string_lossy();
|
|
||||||
|
|
||||||
let env_options = EnvOptions::new();
|
|
||||||
let column_family_options = ColumnFamilyOptions::new();
|
|
||||||
let mut file_writer = SstFileWriter::new(env_options, column_family_options);
|
|
||||||
file_writer.open(&path)?;
|
|
||||||
|
|
||||||
let mut iter = self.snapshot.iter();
|
|
||||||
iter.seek(SeekKey::Start);
|
|
||||||
|
|
||||||
for (key, value) in &mut iter {
|
|
||||||
file_writer.put(&key, &value)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
file_writer.finish()?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn query_builder(&self) -> QueryBuilder {
|
|
||||||
QueryBuilder::new(self.index())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn raw_field_by_document_id(
|
|
||||||
&self,
|
|
||||||
name: &str,
|
|
||||||
id: DocumentId
|
|
||||||
) -> Result<Option<Vec<u8>>, Box<Error>>
|
|
||||||
{
|
|
||||||
let attr = self.schema.attribute(name).ok_or("field not found")?;
|
|
||||||
let key = DocumentKeyAttr::new(id, attr);
|
|
||||||
let vector = self.snapshot.get(key.as_ref())?;
|
|
||||||
|
|
||||||
Ok(vector.map(|v| v.to_vec()))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_by_id<T>(&self, id: DocumentId) -> Result<T, Box<Error>>
|
|
||||||
where T: DeserializeOwned,
|
|
||||||
{
|
|
||||||
let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id);
|
|
||||||
Ok(T::deserialize(&mut deserializer)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn documents_by_id<T, I>(&self, ids: I) -> DocumentIter<D, T, I::IntoIter>
|
|
||||||
where T: DeserializeOwned,
|
|
||||||
I: IntoIterator<Item=DocumentId>,
|
|
||||||
{
|
|
||||||
DocumentIter {
|
|
||||||
database_view: self,
|
|
||||||
document_ids: ids.into_iter(),
|
|
||||||
_phantom: marker::PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<D> fmt::Debug for DatabaseView<D>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
let mut options = ReadOptions::new();
|
|
||||||
let lower = DocumentKey::new(DocumentId(0));
|
|
||||||
options.set_iterate_lower_bound(lower.as_ref());
|
|
||||||
|
|
||||||
let mut iter = self.snapshot.iter_opt(options);
|
|
||||||
iter.seek(SeekKey::Start);
|
|
||||||
let iter = iter.map(|(key, _)| DocumentKeyAttr::from_bytes(&key));
|
|
||||||
|
|
||||||
if f.alternate() {
|
|
||||||
writeln!(f, "DatabaseView(")?;
|
|
||||||
} else {
|
|
||||||
write!(f, "DatabaseView(")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.schema.fmt(f)?;
|
|
||||||
|
|
||||||
if f.alternate() {
|
|
||||||
writeln!(f, ",")?;
|
|
||||||
} else {
|
|
||||||
write!(f, ", ")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
f.debug_list().entries(iter).finish()?;
|
|
||||||
|
|
||||||
write!(f, ")")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO this is just an iter::Map !!!
|
|
||||||
pub struct DocumentIter<'a, D, T, I>
|
|
||||||
where D: Deref<Target=DB>
|
|
||||||
{
|
|
||||||
database_view: &'a DatabaseView<D>,
|
|
||||||
document_ids: I,
|
|
||||||
_phantom: marker::PhantomData<T>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, D, T, I> Iterator for DocumentIter<'a, D, T, I>
|
|
||||||
where D: Deref<Target=DB>,
|
|
||||||
T: DeserializeOwned,
|
|
||||||
I: Iterator<Item=DocumentId>,
|
|
||||||
{
|
|
||||||
type Item = Result<T, Box<Error>>;
|
|
||||||
|
|
||||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
||||||
self.document_ids.size_hint()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
match self.document_ids.next() {
|
|
||||||
Some(id) => Some(self.database_view.document_by_id(id)),
|
|
||||||
None => None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, D, T, I> ExactSizeIterator for DocumentIter<'a, D, T, I>
|
|
||||||
where D: Deref<Target=DB>,
|
|
||||||
T: DeserializeOwned,
|
|
||||||
I: ExactSizeIterator + Iterator<Item=DocumentId>,
|
|
||||||
{ }
|
|
||||||
|
|
||||||
impl<'a, D, T, I> DoubleEndedIterator for DocumentIter<'a, D, T, I>
|
|
||||||
where D: Deref<Target=DB>,
|
|
||||||
T: DeserializeOwned,
|
|
||||||
I: DoubleEndedIterator + Iterator<Item=DocumentId>,
|
|
||||||
{
|
|
||||||
fn next_back(&mut self) -> Option<Self::Item> {
|
|
||||||
match self.document_ids.next_back() {
|
|
||||||
Some(id) => Some(self.database_view.document_by_id(id)),
|
|
||||||
None => None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +1,7 @@
|
|||||||
#![cfg_attr(feature = "nightly", feature(test))]
|
#![cfg_attr(feature = "nightly", feature(test))]
|
||||||
|
|
||||||
pub mod database;
|
|
||||||
mod common_words;
|
mod common_words;
|
||||||
mod sort_by_attr;
|
mod sort_by_attr;
|
||||||
|
|
||||||
pub use rocksdb;
|
|
||||||
|
|
||||||
pub use self::sort_by_attr::SortByAttr;
|
pub use self::sort_by_attr::SortByAttr;
|
||||||
pub use self::common_words::CommonWords;
|
pub use self::common_words::CommonWords;
|
||||||
|
Loading…
Reference in New Issue
Block a user