mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Merge pull request #175 from meilisearch/moving-back-to-sled
Moving back to sled
This commit is contained in:
commit
fd880e0a0e
@ -5,28 +5,34 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arc-swap = "0.3.11"
|
arc-swap = "0.4.2"
|
||||||
bincode = "1.1.2"
|
bincode = "1.1.4"
|
||||||
deunicode = "1.0.0"
|
deunicode = "1.0.0"
|
||||||
hashbrown = { version = "0.2.2", features = ["serde"] }
|
hashbrown = { version = "0.6.0", features = ["serde"] }
|
||||||
|
log = "0.4.6"
|
||||||
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
meilidb-core = { path = "../meilidb-core", version = "0.1.0" }
|
||||||
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
|
||||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||||
rocksdb = { version = "0.12.2", default-features = false }
|
|
||||||
sdset = "0.3.2"
|
sdset = "0.3.2"
|
||||||
serde = { version = "1.0.99", features = ["derive"] }
|
serde = { version = "1.0.99", features = ["derive"] }
|
||||||
serde_json = "1.0.40"
|
serde_json = "1.0.40"
|
||||||
siphasher = "0.3.0"
|
siphasher = "0.3.0"
|
||||||
zerocopy = "0.2.2"
|
sled = "0.26.0"
|
||||||
|
zerocopy = "0.2.8"
|
||||||
|
|
||||||
[dependencies.rmp-serde]
|
[dependencies.rmp-serde]
|
||||||
git = "https://github.com/3Hren/msgpack-rust.git"
|
git = "https://github.com/3Hren/msgpack-rust.git"
|
||||||
rev = "40b3d48"
|
rev = "40b3d48"
|
||||||
|
|
||||||
|
[dependencies.rmpv]
|
||||||
|
git = "https://github.com/3Hren/msgpack-rust.git"
|
||||||
|
rev = "40b3d48"
|
||||||
|
features = ["with-serde"]
|
||||||
|
|
||||||
[dependencies.fst]
|
[dependencies.fst]
|
||||||
git = "https://github.com/Kerollmops/fst.git"
|
git = "https://github.com/Kerollmops/fst.git"
|
||||||
branch = "arc-byte-slice"
|
branch = "arc-byte-slice"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.0.7"
|
tempfile = "3.1.0"
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
use std::ops::Deref;
|
|
||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct CustomSettings(pub(crate) InnerRawIndex);
|
|
||||||
|
|
||||||
impl Deref for CustomSettings {
|
|
||||||
type Target = InnerRawIndex;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,134 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
use fst::{SetBuilder, set::OpBuilder};
|
|
||||||
use sdset::{SetOperation, duo::Union};
|
|
||||||
|
|
||||||
use crate::indexer::Indexer;
|
|
||||||
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
|
||||||
use crate::RankedMap;
|
|
||||||
|
|
||||||
use super::{Error, Index, InnerIndex, DocumentsDeletion};
|
|
||||||
|
|
||||||
pub struct DocumentsAddition<'a> {
|
|
||||||
inner: &'a Index,
|
|
||||||
document_ids: HashSet<DocumentId>,
|
|
||||||
document_store: RamDocumentStore,
|
|
||||||
indexer: Indexer,
|
|
||||||
ranked_map: RankedMap,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DocumentsAddition<'a> {
|
|
||||||
pub fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsAddition<'a> {
|
|
||||||
DocumentsAddition {
|
|
||||||
inner,
|
|
||||||
document_ids: HashSet::new(),
|
|
||||||
document_store: RamDocumentStore::new(),
|
|
||||||
indexer: Indexer::new(),
|
|
||||||
ranked_map,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
|
|
||||||
where D: serde::Serialize,
|
|
||||||
{
|
|
||||||
let schema = &self.inner.lease_inner().schema;
|
|
||||||
let identifier = schema.identifier_name();
|
|
||||||
|
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
|
||||||
Some(id) => id,
|
|
||||||
None => return Err(Error::MissingDocumentId),
|
|
||||||
};
|
|
||||||
|
|
||||||
// 1. store the document id for future deletion
|
|
||||||
self.document_ids.insert(document_id);
|
|
||||||
|
|
||||||
// 2. index the document fields in ram stores
|
|
||||||
let serializer = Serializer {
|
|
||||||
schema,
|
|
||||||
document_store: &mut self.document_store,
|
|
||||||
indexer: &mut self.indexer,
|
|
||||||
ranked_map: &mut self.ranked_map,
|
|
||||||
document_id,
|
|
||||||
};
|
|
||||||
|
|
||||||
document.serialize(serializer)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finalize(self) -> Result<(), Error> {
|
|
||||||
let lease_inner = self.inner.lease_inner();
|
|
||||||
let docs_words = &lease_inner.raw.docs_words;
|
|
||||||
let documents = &lease_inner.raw.documents;
|
|
||||||
let main = &lease_inner.raw.main;
|
|
||||||
let words = &lease_inner.raw.words;
|
|
||||||
|
|
||||||
// 1. remove the previous documents match indexes
|
|
||||||
let mut documents_deletion = DocumentsDeletion::new(self.inner, self.ranked_map.clone());
|
|
||||||
documents_deletion.extend(self.document_ids);
|
|
||||||
documents_deletion.finalize()?;
|
|
||||||
|
|
||||||
// 2. insert new document attributes in the database
|
|
||||||
for ((id, attr), value) in self.document_store.into_inner() {
|
|
||||||
documents.set_document_field(id, attr, value)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let indexed = self.indexer.build();
|
|
||||||
let mut delta_words_builder = SetBuilder::memory();
|
|
||||||
|
|
||||||
for (word, delta_set) in indexed.words_doc_indexes {
|
|
||||||
delta_words_builder.insert(&word).unwrap();
|
|
||||||
|
|
||||||
let set = match words.doc_indexes(&word)? {
|
|
||||||
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
|
|
||||||
None => delta_set,
|
|
||||||
};
|
|
||||||
|
|
||||||
words.set_doc_indexes(&word, &set)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (id, words) in indexed.docs_words {
|
|
||||||
docs_words.set_doc_words(id, &words)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let delta_words = delta_words_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let words = match main.words_set()? {
|
|
||||||
Some(words) => {
|
|
||||||
let op = OpBuilder::new()
|
|
||||||
.add(words.stream())
|
|
||||||
.add(delta_words.stream())
|
|
||||||
.r#union();
|
|
||||||
|
|
||||||
let mut words_builder = SetBuilder::memory();
|
|
||||||
words_builder.extend_stream(op).unwrap();
|
|
||||||
words_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
None => delta_words,
|
|
||||||
};
|
|
||||||
|
|
||||||
main.set_words_set(&words)?;
|
|
||||||
main.set_ranked_map(&self.ranked_map)?;
|
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
|
||||||
let words = Arc::new(words);
|
|
||||||
let ranked_map = self.ranked_map;
|
|
||||||
let synonyms = lease_inner.synonyms.clone();
|
|
||||||
let schema = lease_inner.schema.clone();
|
|
||||||
let raw = lease_inner.raw.clone();
|
|
||||||
lease_inner.raw.compact();
|
|
||||||
|
|
||||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
|
||||||
self.inner.0.store(Arc::new(inner));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,139 +0,0 @@
|
|||||||
use std::collections::{HashMap, BTreeSet};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use fst::{SetBuilder, Streamer};
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
|
|
||||||
|
|
||||||
use crate::RankedMap;
|
|
||||||
use crate::serde::extract_document_id;
|
|
||||||
|
|
||||||
use super::{Index, Error, InnerIndex};
|
|
||||||
|
|
||||||
pub struct DocumentsDeletion<'a> {
|
|
||||||
inner: &'a Index,
|
|
||||||
documents: Vec<DocumentId>,
|
|
||||||
ranked_map: RankedMap,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> DocumentsDeletion<'a> {
|
|
||||||
pub fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsDeletion {
|
|
||||||
DocumentsDeletion { inner, documents: Vec::new(), ranked_map }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete_document_by_id(&mut self, id: DocumentId) {
|
|
||||||
self.documents.push(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
|
|
||||||
where D: serde::Serialize,
|
|
||||||
{
|
|
||||||
let schema = &self.inner.lease_inner().schema;
|
|
||||||
let identifier = schema.identifier_name();
|
|
||||||
|
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
|
||||||
Some(id) => id,
|
|
||||||
None => return Err(Error::MissingDocumentId),
|
|
||||||
};
|
|
||||||
|
|
||||||
self.delete_document_by_id(document_id);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finalize(mut self) -> Result<(), Error> {
|
|
||||||
let lease_inner = self.inner.lease_inner();
|
|
||||||
let docs_words = &lease_inner.raw.docs_words;
|
|
||||||
let documents = &lease_inner.raw.documents;
|
|
||||||
let main = &lease_inner.raw.main;
|
|
||||||
let schema = &lease_inner.schema;
|
|
||||||
let words = &lease_inner.raw.words;
|
|
||||||
|
|
||||||
let idset = SetBuf::from_dirty(self.documents);
|
|
||||||
|
|
||||||
// collect the ranked attributes according to the schema
|
|
||||||
let ranked_attrs: Vec<_> = schema.iter()
|
|
||||||
.filter_map(|(_, attr, prop)| {
|
|
||||||
if prop.is_ranked() { Some(attr) } else { None }
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let mut words_document_ids = HashMap::new();
|
|
||||||
for id in idset {
|
|
||||||
// remove all the ranked attributes from the ranked_map
|
|
||||||
for ranked_attr in &ranked_attrs {
|
|
||||||
self.ranked_map.remove(id, *ranked_attr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(words) = docs_words.doc_words(id)? {
|
|
||||||
let mut stream = words.stream();
|
|
||||||
while let Some(word) = stream.next() {
|
|
||||||
let word = word.to_vec();
|
|
||||||
words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut removed_words = BTreeSet::new();
|
|
||||||
for (word, document_ids) in words_document_ids {
|
|
||||||
let document_ids = SetBuf::from_dirty(document_ids);
|
|
||||||
|
|
||||||
if let Some(doc_indexes) = words.doc_indexes(&word)? {
|
|
||||||
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
|
|
||||||
let doc_indexes = op.into_set_buf();
|
|
||||||
|
|
||||||
if !doc_indexes.is_empty() {
|
|
||||||
words.set_doc_indexes(&word, &doc_indexes)?;
|
|
||||||
} else {
|
|
||||||
words.del_doc_indexes(&word)?;
|
|
||||||
removed_words.insert(word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for id in document_ids {
|
|
||||||
documents.del_all_document_fields(id)?;
|
|
||||||
docs_words.del_doc_words(id)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
|
||||||
let words = match main.words_set()? {
|
|
||||||
Some(words_set) => {
|
|
||||||
let op = fst::set::OpBuilder::new()
|
|
||||||
.add(words_set.stream())
|
|
||||||
.add(removed_words.stream())
|
|
||||||
.difference();
|
|
||||||
|
|
||||||
let mut words_builder = SetBuilder::memory();
|
|
||||||
words_builder.extend_stream(op).unwrap();
|
|
||||||
words_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
None => fst::Set::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
main.set_words_set(&words)?;
|
|
||||||
main.set_ranked_map(&self.ranked_map)?;
|
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
|
||||||
let words = Arc::new(words);
|
|
||||||
let ranked_map = lease_inner.ranked_map.clone();
|
|
||||||
let synonyms = lease_inner.synonyms.clone();
|
|
||||||
let schema = lease_inner.schema.clone();
|
|
||||||
let raw = lease_inner.raw.clone();
|
|
||||||
lease_inner.raw.compact();
|
|
||||||
|
|
||||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
|
||||||
self.inner.0.store(Arc::new(inner));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Extend<DocumentId> for DocumentsDeletion<'a> {
|
|
||||||
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
|
|
||||||
self.documents.extend(iter)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,90 +0,0 @@
|
|||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use meilidb_core::DocumentId;
|
|
||||||
use meilidb_schema::SchemaAttr;
|
|
||||||
use rocksdb::DBVector;
|
|
||||||
|
|
||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
use crate::document_attr_key::DocumentAttrKey;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct DocumentsIndex(pub(crate) InnerRawIndex);
|
|
||||||
|
|
||||||
impl DocumentsIndex {
|
|
||||||
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<DBVector>, rocksdb::Error> {
|
|
||||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
|
||||||
self.0.get(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> Result<(), rocksdb::Error> {
|
|
||||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
|
||||||
self.0.set(key, value)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<(), rocksdb::Error> {
|
|
||||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
|
||||||
self.0.delete(key)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn del_all_document_fields(&self, id: DocumentId) -> Result<(), rocksdb::Error> {
|
|
||||||
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
|
||||||
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
|
||||||
self.0.delete_range(start, end)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
|
||||||
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
|
||||||
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
|
||||||
|
|
||||||
let from = rocksdb::IteratorMode::From(&start[..], rocksdb::Direction::Forward);
|
|
||||||
let iter = self.0.iterator(from).unwrap();
|
|
||||||
|
|
||||||
DocumentFieldsIter(iter, end.to_vec())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn len(&self) -> Result<usize, rocksdb::Error> {
|
|
||||||
let mut last_document_id = None;
|
|
||||||
let mut count = 0;
|
|
||||||
|
|
||||||
let from = rocksdb::IteratorMode::Start;
|
|
||||||
let iterator = self.0.iterator(from)?;
|
|
||||||
|
|
||||||
for (key, _) in iterator {
|
|
||||||
let slice = key.as_ref().try_into().unwrap();
|
|
||||||
let document_id = DocumentAttrKey::from_be_bytes(slice).document_id;
|
|
||||||
|
|
||||||
if Some(document_id) != last_document_id {
|
|
||||||
last_document_id = Some(document_id);
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DocumentFieldsIter<'a>(rocksdb::DBIterator<'a>, Vec<u8>);
|
|
||||||
|
|
||||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
|
||||||
type Item = Result<(SchemaAttr, Box<[u8]>), rocksdb::Error>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
match self.0.next() {
|
|
||||||
Some((key, value)) => {
|
|
||||||
|
|
||||||
if key.as_ref() > self.1.as_ref() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let slice: &[u8] = key.as_ref();
|
|
||||||
let array = slice.try_into().unwrap();
|
|
||||||
let key = DocumentAttrKey::from_be_bytes(array);
|
|
||||||
Some(Ok((key.attribute, value)))
|
|
||||||
},
|
|
||||||
None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -7,15 +7,17 @@ pub enum Error {
|
|||||||
SchemaMissing,
|
SchemaMissing,
|
||||||
WordIndexMissing,
|
WordIndexMissing,
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
RocksdbError(rocksdb::Error),
|
SledError(sled::Error),
|
||||||
FstError(fst::Error),
|
FstError(fst::Error),
|
||||||
|
RmpDecodeError(rmp_serde::decode::Error),
|
||||||
|
RmpEncodeError(rmp_serde::encode::Error),
|
||||||
BincodeError(bincode::Error),
|
BincodeError(bincode::Error),
|
||||||
SerializerError(SerializerError),
|
SerializerError(SerializerError),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<rocksdb::Error> for Error {
|
impl From<sled::Error> for Error {
|
||||||
fn from(error: rocksdb::Error) -> Error {
|
fn from(error: sled::Error) -> Error {
|
||||||
Error::RocksdbError(error)
|
Error::SledError(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,6 +27,18 @@ impl From<fst::Error> for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<rmp_serde::decode::Error> for Error {
|
||||||
|
fn from(error: rmp_serde::decode::Error) -> Error {
|
||||||
|
Error::RmpDecodeError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<rmp_serde::encode::Error> for Error {
|
||||||
|
fn from(error: rmp_serde::encode::Error) -> Error {
|
||||||
|
Error::RmpEncodeError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<bincode::Error> for Error {
|
impl From<bincode::Error> for Error {
|
||||||
fn from(error: bincode::Error) -> Error {
|
fn from(error: bincode::Error) -> Error {
|
||||||
Error::BincodeError(error)
|
Error::BincodeError(error)
|
||||||
@ -45,8 +59,10 @@ impl fmt::Display for Error {
|
|||||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||||
MissingDocumentId => write!(f, "document id is missing"),
|
MissingDocumentId => write!(f, "document id is missing"),
|
||||||
RocksdbError(e) => write!(f, "RocksDB error; {}", e),
|
SledError(e) => write!(f, "Sled error; {}", e),
|
||||||
FstError(e) => write!(f, "fst error; {}", e),
|
FstError(e) => write!(f, "fst error; {}", e),
|
||||||
|
RmpDecodeError(e) => write!(f, "rmp decode error; {}", e),
|
||||||
|
RmpEncodeError(e) => write!(f, "rmp encode error; {}", e),
|
||||||
BincodeError(e) => write!(f, "bincode error; {}", e),
|
BincodeError(e) => write!(f, "bincode error; {}", e),
|
||||||
SerializerError(e) => write!(f, "serializer error; {}", e),
|
SerializerError(e) => write!(f, "serializer error; {}", e),
|
||||||
}
|
}
|
||||||
|
@ -1,170 +0,0 @@
|
|||||||
use sdset::SetBuf;
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use arc_swap::{ArcSwap, Lease};
|
|
||||||
use meilidb_core::criterion::Criteria;
|
|
||||||
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
|
|
||||||
use meilidb_schema::Schema;
|
|
||||||
use rmp_serde::decode::Error as RmpError;
|
|
||||||
use serde::de;
|
|
||||||
|
|
||||||
use crate::ranked_map::RankedMap;
|
|
||||||
use crate::serde::Deserializer;
|
|
||||||
|
|
||||||
use super::{Error, CustomSettings};
|
|
||||||
use super::{
|
|
||||||
RawIndex,
|
|
||||||
DocumentsAddition, DocumentsDeletion,
|
|
||||||
SynonymsAddition, SynonymsDeletion,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
|
||||||
pub struct IndexStats {
|
|
||||||
pub number_of_words: usize,
|
|
||||||
pub number_of_documents: usize,
|
|
||||||
pub number_attrs_in_ranked_map: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Index(pub ArcSwap<InnerIndex>);
|
|
||||||
|
|
||||||
pub struct InnerIndex {
|
|
||||||
pub words: Arc<fst::Set>,
|
|
||||||
pub synonyms: Arc<fst::Set>,
|
|
||||||
pub schema: Schema,
|
|
||||||
pub ranked_map: RankedMap,
|
|
||||||
pub raw: RawIndex, // TODO this will be a snapshot in the future
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Index {
|
|
||||||
pub fn from_raw(raw: RawIndex) -> Result<Index, Error> {
|
|
||||||
let words = match raw.main.words_set()? {
|
|
||||||
Some(words) => Arc::new(words),
|
|
||||||
None => Arc::new(fst::Set::default()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let synonyms = match raw.main.synonyms_set()? {
|
|
||||||
Some(synonyms) => Arc::new(synonyms),
|
|
||||||
None => Arc::new(fst::Set::default()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let schema = match raw.main.schema()? {
|
|
||||||
Some(schema) => schema,
|
|
||||||
None => return Err(Error::SchemaMissing),
|
|
||||||
};
|
|
||||||
|
|
||||||
let ranked_map = match raw.main.ranked_map()? {
|
|
||||||
Some(map) => map,
|
|
||||||
None => RankedMap::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
|
||||||
let index = Index(ArcSwap::new(Arc::new(inner)));
|
|
||||||
|
|
||||||
Ok(index)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stats(&self) -> Result<IndexStats, rocksdb::Error> {
|
|
||||||
let lease = self.0.lease();
|
|
||||||
|
|
||||||
Ok(IndexStats {
|
|
||||||
number_of_words: lease.words.len(),
|
|
||||||
number_of_documents: lease.raw.documents.len()?,
|
|
||||||
number_attrs_in_ranked_map: lease.ranked_map.len(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn query_builder(&self) -> QueryBuilder<IndexLease> {
|
|
||||||
let lease = IndexLease(self.0.lease());
|
|
||||||
QueryBuilder::new(lease)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn query_builder_with_criteria<'c>(
|
|
||||||
&self,
|
|
||||||
criteria: Criteria<'c>,
|
|
||||||
) -> QueryBuilder<'c, IndexLease>
|
|
||||||
{
|
|
||||||
let lease = IndexLease(self.0.lease());
|
|
||||||
QueryBuilder::with_criteria(lease, criteria)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn lease_inner(&self) -> Lease<Arc<InnerIndex>> {
|
|
||||||
self.0.lease()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn schema(&self) -> Schema {
|
|
||||||
self.0.lease().schema.clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn custom_settings(&self) -> CustomSettings {
|
|
||||||
self.0.lease().raw.custom.clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn documents_addition(&self) -> DocumentsAddition {
|
|
||||||
let ranked_map = self.0.lease().ranked_map.clone();
|
|
||||||
DocumentsAddition::new(self, ranked_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
|
||||||
let ranked_map = self.0.lease().ranked_map.clone();
|
|
||||||
DocumentsDeletion::new(self, ranked_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn synonyms_addition(&self) -> SynonymsAddition {
|
|
||||||
SynonymsAddition::new(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
|
|
||||||
SynonymsDeletion::new(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn document<T>(
|
|
||||||
&self,
|
|
||||||
fields: Option<&HashSet<&str>>,
|
|
||||||
id: DocumentId,
|
|
||||||
) -> Result<Option<T>, RmpError>
|
|
||||||
where T: de::DeserializeOwned,
|
|
||||||
{
|
|
||||||
let schema = &self.lease_inner().schema;
|
|
||||||
let fields = fields
|
|
||||||
.map(|fields| {
|
|
||||||
fields
|
|
||||||
.iter()
|
|
||||||
.filter_map(|name| schema.attribute(name))
|
|
||||||
.collect()
|
|
||||||
});
|
|
||||||
|
|
||||||
let mut deserializer = Deserializer {
|
|
||||||
document_id: id,
|
|
||||||
index: &self,
|
|
||||||
fields: fields.as_ref(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: currently we return an error if all document fields are missing,
|
|
||||||
// returning None would have been better
|
|
||||||
T::deserialize(&mut deserializer).map(Some)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct IndexLease(Lease<Arc<InnerIndex>>);
|
|
||||||
|
|
||||||
impl Store for IndexLease {
|
|
||||||
type Error = Error;
|
|
||||||
|
|
||||||
fn words(&self) -> Result<&fst::Set, Self::Error> {
|
|
||||||
Ok(&self.0.words)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
|
||||||
Ok(self.0.raw.words.doc_indexes(word)?)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
|
|
||||||
Ok(&self.0.synonyms)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
|
|
||||||
Ok(self.0.raw.synonyms.alternatives_to(word)?)
|
|
||||||
}
|
|
||||||
}
|
|
13
meilidb-data/src/database/index/custom_settings_index.rs
Normal file
13
meilidb-data/src/database/index/custom_settings_index.rs
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct CustomSettingsIndex(pub(crate) Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl Deref for CustomSettingsIndex {
|
||||||
|
type Target = sled::Tree;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
@ -1,17 +1,14 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
|
use crate::database::Error;
|
||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
use super::Error;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct DocsWordsIndex(pub(crate) InnerRawIndex);
|
pub struct DocsWordsIndex(pub Arc<sled::Tree>);
|
||||||
|
|
||||||
impl DocsWordsIndex {
|
impl DocsWordsIndex {
|
||||||
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
|
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
|
||||||
let key = id.0.to_be_bytes();
|
let key = id.0.to_be_bytes();
|
||||||
match self.0.get_pinned(key)? {
|
match self.0.get(key)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
let value = Arc::from(bytes.as_ref());
|
let value = Arc::from(bytes.as_ref());
|
||||||
@ -24,13 +21,13 @@ impl DocsWordsIndex {
|
|||||||
|
|
||||||
pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
|
pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> {
|
||||||
let key = id.0.to_be_bytes();
|
let key = id.0.to_be_bytes();
|
||||||
self.0.set(key, words.as_fst().as_bytes())?;
|
self.0.insert(key, words.as_fst().as_bytes())?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
|
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
|
||||||
let key = id.0.to_be_bytes();
|
let key = id.0.to_be_bytes();
|
||||||
self.0.delete(key)?;
|
self.0.remove(key)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
91
meilidb-data/src/database/index/documents_index.rs
Normal file
91
meilidb-data/src/database/index/documents_index.rs
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
use std::convert::TryInto;
|
||||||
|
use std::ops::Bound;
|
||||||
|
|
||||||
|
use meilidb_core::DocumentId;
|
||||||
|
use meilidb_schema::SchemaAttr;
|
||||||
|
|
||||||
|
use crate::document_attr_key::DocumentAttrKey;
|
||||||
|
|
||||||
|
fn document_fields_range(id: DocumentId) -> (Bound<[u8; 10]>, Bound<[u8; 10]>) {
|
||||||
|
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
||||||
|
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
||||||
|
|
||||||
|
(Bound::Included(start), Bound::Included(end))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DocumentsIndex(pub(crate) Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl DocumentsIndex {
|
||||||
|
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<Option<sled::IVec>> {
|
||||||
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
|
self.0.get(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> sled::Result<()> {
|
||||||
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
|
self.0.insert(key, value)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<()> {
|
||||||
|
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||||
|
self.0.remove(key)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_all_document_fields(&self, id: DocumentId) -> sled::Result<()> {
|
||||||
|
let range = document_fields_range(id);
|
||||||
|
|
||||||
|
for result in self.0.range(range) {
|
||||||
|
let (key, _) = result?;
|
||||||
|
self.0.remove(key)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
||||||
|
let range = document_fields_range(id);
|
||||||
|
|
||||||
|
let iter = self.0.range(range);
|
||||||
|
DocumentFieldsIter(iter)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> sled::Result<usize> {
|
||||||
|
let mut last_document_id = None;
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
for result in self.0.iter() {
|
||||||
|
let (key, _) = result?;
|
||||||
|
let array = key.as_ref().try_into().unwrap();
|
||||||
|
let document_id = DocumentAttrKey::from_be_bytes(array).document_id;
|
||||||
|
|
||||||
|
if Some(document_id) != last_document_id {
|
||||||
|
last_document_id = Some(document_id);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||||
|
|
||||||
|
impl Iterator for DocumentFieldsIter<'_> {
|
||||||
|
type Item = sled::Result<(SchemaAttr, sled::IVec)>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.0.next() {
|
||||||
|
Some(Ok((key, value))) => {
|
||||||
|
let array = key.as_ref().try_into().unwrap();
|
||||||
|
let key = DocumentAttrKey::from_be_bytes(array);
|
||||||
|
Some(Ok((key.attribute, value)))
|
||||||
|
},
|
||||||
|
Some(Err(e)) => return Some(Err(e)),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -2,10 +2,8 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use meilidb_schema::Schema;
|
use meilidb_schema::Schema;
|
||||||
|
|
||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
use crate::ranked_map::RankedMap;
|
use crate::ranked_map::RankedMap;
|
||||||
|
use crate::database::Error;
|
||||||
use super::Error;
|
|
||||||
|
|
||||||
const SCHEMA_KEY: &str = "schema";
|
const SCHEMA_KEY: &str = "schema";
|
||||||
const WORDS_KEY: &str = "words";
|
const WORDS_KEY: &str = "words";
|
||||||
@ -13,11 +11,11 @@ const SYNONYMS_KEY: &str = "synonyms";
|
|||||||
const RANKED_MAP_KEY: &str = "ranked-map";
|
const RANKED_MAP_KEY: &str = "ranked-map";
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct MainIndex(pub(crate) InnerRawIndex);
|
pub struct MainIndex(pub(crate) Arc<sled::Tree>);
|
||||||
|
|
||||||
impl MainIndex {
|
impl MainIndex {
|
||||||
pub fn schema(&self) -> Result<Option<Schema>, Error> {
|
pub fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||||
match self.0.get_pinned(SCHEMA_KEY)? {
|
match self.0.get(SCHEMA_KEY)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||||
Ok(Some(schema))
|
Ok(Some(schema))
|
||||||
@ -29,12 +27,12 @@ impl MainIndex {
|
|||||||
pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
|
pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> {
|
||||||
let mut bytes = Vec::new();
|
let mut bytes = Vec::new();
|
||||||
schema.write_to_bin(&mut bytes)?;
|
schema.write_to_bin(&mut bytes)?;
|
||||||
self.0.set(SCHEMA_KEY, bytes)?;
|
self.0.insert(SCHEMA_KEY, bytes)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||||
match self.0.get_pinned(WORDS_KEY)? {
|
match self.0.get(WORDS_KEY)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
let value = Arc::from(bytes.as_ref());
|
let value = Arc::from(bytes.as_ref());
|
||||||
@ -46,11 +44,11 @@ impl MainIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
|
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||||
self.0.set(WORDS_KEY, value.as_fst().as_bytes()).map_err(Into::into)
|
self.0.insert(WORDS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
|
pub fn synonyms_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||||
match self.0.get_pinned(SYNONYMS_KEY)? {
|
match self.0.get(SYNONYMS_KEY)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
let value = Arc::from(bytes.as_ref());
|
let value = Arc::from(bytes.as_ref());
|
||||||
@ -62,11 +60,11 @@ impl MainIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
|
pub fn set_synonyms_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||||
self.0.set(SYNONYMS_KEY, value.as_fst().as_bytes()).map_err(Into::into)
|
self.0.insert(SYNONYMS_KEY, value.as_fst().as_bytes()).map(drop).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||||
match self.0.get_pinned(RANKED_MAP_KEY)? {
|
match self.0.get(RANKED_MAP_KEY)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
||||||
Ok(Some(ranked_map))
|
Ok(Some(ranked_map))
|
||||||
@ -78,7 +76,7 @@ impl MainIndex {
|
|||||||
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
|
pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> {
|
||||||
let mut bytes = Vec::new();
|
let mut bytes = Vec::new();
|
||||||
value.write_to_bin(&mut bytes)?;
|
value.write_to_bin(&mut bytes)?;
|
||||||
self.0.set(RANKED_MAP_KEY, bytes)?;
|
self.0.insert(RANKED_MAP_KEY, bytes)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
469
meilidb-data/src/database/index/mod.rs
Normal file
469
meilidb-data/src/database/index/mod.rs
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
use std::collections::{HashSet, BTreeMap};
|
||||||
|
use std::convert::TryInto;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::thread;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use arc_swap::{ArcSwap, ArcSwapOption, Guard};
|
||||||
|
use meilidb_core::criterion::Criteria;
|
||||||
|
use meilidb_core::{DocIndex, Store, DocumentId, QueryBuilder};
|
||||||
|
use meilidb_schema::Schema;
|
||||||
|
use sdset::SetBuf;
|
||||||
|
use serde::{de, Serialize, Deserialize};
|
||||||
|
use sled::Transactional;
|
||||||
|
|
||||||
|
use crate::ranked_map::RankedMap;
|
||||||
|
use crate::serde::{Deserializer, DeserializerError};
|
||||||
|
|
||||||
|
pub use self::custom_settings_index::CustomSettingsIndex;
|
||||||
|
use self::docs_words_index::DocsWordsIndex;
|
||||||
|
use self::documents_index::DocumentsIndex;
|
||||||
|
use self::main_index::MainIndex;
|
||||||
|
use self::synonyms_index::SynonymsIndex;
|
||||||
|
use self::words_index::WordsIndex;
|
||||||
|
|
||||||
|
use crate::database::{
|
||||||
|
Error,
|
||||||
|
DocumentsAddition, DocumentsDeletion,
|
||||||
|
SynonymsAddition, SynonymsDeletion,
|
||||||
|
apply_documents_addition, apply_documents_deletion,
|
||||||
|
apply_synonyms_addition, apply_synonyms_deletion,
|
||||||
|
};
|
||||||
|
|
||||||
|
mod custom_settings_index;
|
||||||
|
mod docs_words_index;
|
||||||
|
mod documents_index;
|
||||||
|
mod main_index;
|
||||||
|
mod synonyms_index;
|
||||||
|
mod words_index;
|
||||||
|
|
||||||
|
fn event_is_set(event: &sled::Event) -> bool {
|
||||||
|
match event {
|
||||||
|
sled::Event::Set(_, _) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
enum UpdateOwned {
|
||||||
|
DocumentsAddition(Vec<rmpv::Value>),
|
||||||
|
DocumentsDeletion(Vec<DocumentId>),
|
||||||
|
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||||
|
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
enum Update {
|
||||||
|
DocumentsAddition(Vec<rmpv::Value>),
|
||||||
|
DocumentsDeletion(Vec<DocumentId>),
|
||||||
|
SynonymsAddition(BTreeMap<String, Vec<String>>),
|
||||||
|
SynonymsDeletion(BTreeMap<String, Option<Vec<String>>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub enum UpdateType {
|
||||||
|
DocumentsAddition { number: usize },
|
||||||
|
DocumentsDeletion { number: usize },
|
||||||
|
SynonymsAddition { number: usize },
|
||||||
|
SynonymsDeletion { number: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct DetailedDuration {
|
||||||
|
main: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize)]
|
||||||
|
pub struct UpdateStatus {
|
||||||
|
pub update_id: u64,
|
||||||
|
pub update_type: UpdateType,
|
||||||
|
pub result: Result<(), String>,
|
||||||
|
pub detailed_duration: DetailedDuration,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn spawn_update_system(index: Index) -> thread::JoinHandle<()> {
|
||||||
|
thread::spawn(move || {
|
||||||
|
loop {
|
||||||
|
let subscription = index.updates_index.watch_prefix(vec![]);
|
||||||
|
while let Some(result) = index.updates_index.iter().next() {
|
||||||
|
let (key, _) = result.unwrap();
|
||||||
|
let update_id = key.as_ref().try_into().map(u64::from_be_bytes).unwrap();
|
||||||
|
|
||||||
|
let updates = &index.updates_index;
|
||||||
|
let results = &index.updates_results_index;
|
||||||
|
|
||||||
|
(updates, results).transaction(|(updates, results)| {
|
||||||
|
let update = updates.remove(&key)?.unwrap();
|
||||||
|
|
||||||
|
let (update_type, result, duration) = match rmp_serde::from_read_ref(&update).unwrap() {
|
||||||
|
UpdateOwned::DocumentsAddition(documents) => {
|
||||||
|
let update_type = UpdateType::DocumentsAddition { number: documents.len() };
|
||||||
|
let ranked_map = index.cache.load().ranked_map.clone();
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = apply_documents_addition(&index, ranked_map, documents);
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
},
|
||||||
|
UpdateOwned::DocumentsDeletion(documents) => {
|
||||||
|
let update_type = UpdateType::DocumentsDeletion { number: documents.len() };
|
||||||
|
let ranked_map = index.cache.load().ranked_map.clone();
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = apply_documents_deletion(&index, ranked_map, documents);
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
},
|
||||||
|
UpdateOwned::SynonymsAddition(synonyms) => {
|
||||||
|
let update_type = UpdateType::SynonymsAddition { number: synonyms.len() };
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = apply_synonyms_addition(&index, synonyms);
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
},
|
||||||
|
UpdateOwned::SynonymsDeletion(synonyms) => {
|
||||||
|
let update_type = UpdateType::SynonymsDeletion { number: synonyms.len() };
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = apply_synonyms_deletion(&index, synonyms);
|
||||||
|
(update_type, result, start.elapsed())
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let detailed_duration = DetailedDuration { main: duration };
|
||||||
|
let status = UpdateStatus {
|
||||||
|
update_id,
|
||||||
|
update_type,
|
||||||
|
result: result.map_err(|e| e.to_string()),
|
||||||
|
detailed_duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(callback) = &*index.update_callback.load() {
|
||||||
|
(callback)(status.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = bincode::serialize(&status).unwrap();
|
||||||
|
results.insert(&key, value)
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// this subscription is just used to block
|
||||||
|
// the loop until a new update is inserted
|
||||||
|
subscription.filter(event_is_set).next();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct IndexStats {
|
||||||
|
pub number_of_words: usize,
|
||||||
|
pub number_of_documents: usize,
|
||||||
|
pub number_attrs_in_ranked_map: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Index {
|
||||||
|
pub(crate) cache: Arc<ArcSwap<Cache>>,
|
||||||
|
|
||||||
|
// TODO this will be a snapshot in the future
|
||||||
|
main_index: MainIndex,
|
||||||
|
synonyms_index: SynonymsIndex,
|
||||||
|
words_index: WordsIndex,
|
||||||
|
docs_words_index: DocsWordsIndex,
|
||||||
|
documents_index: DocumentsIndex,
|
||||||
|
custom_settings_index: CustomSettingsIndex,
|
||||||
|
|
||||||
|
// used by the update system
|
||||||
|
db: sled::Db,
|
||||||
|
updates_index: Arc<sled::Tree>,
|
||||||
|
updates_results_index: Arc<sled::Tree>,
|
||||||
|
update_callback: Arc<ArcSwapOption<Box<dyn Fn(UpdateStatus) + Send + Sync + 'static>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct Cache {
|
||||||
|
pub words: Arc<fst::Set>,
|
||||||
|
pub synonyms: Arc<fst::Set>,
|
||||||
|
pub schema: Schema,
|
||||||
|
pub ranked_map: RankedMap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index {
|
||||||
|
pub fn new(db: sled::Db, name: &str) -> Result<Index, Error> {
|
||||||
|
Index::new_raw(db, name, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_schema(db: sled::Db, name: &str, schema: Schema) -> Result<Index, Error> {
|
||||||
|
Index::new_raw(db, name, Some(schema))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_raw(db: sled::Db, name: &str, schema: Option<Schema>) -> Result<Index, Error> {
|
||||||
|
let main_index = db.open_tree(name).map(MainIndex)?;
|
||||||
|
let synonyms_index = db.open_tree(format!("{}-synonyms", name)).map(SynonymsIndex)?;
|
||||||
|
let words_index = db.open_tree(format!("{}-words", name)).map(WordsIndex)?;
|
||||||
|
let docs_words_index = db.open_tree(format!("{}-docs-words", name)).map(DocsWordsIndex)?;
|
||||||
|
let documents_index = db.open_tree(format!("{}-documents", name)).map(DocumentsIndex)?;
|
||||||
|
let custom_settings_index = db.open_tree(format!("{}-custom", name)).map(CustomSettingsIndex)?;
|
||||||
|
let updates_index = db.open_tree(format!("{}-updates", name))?;
|
||||||
|
let updates_results_index = db.open_tree(format!("{}-updates-results", name))?;
|
||||||
|
|
||||||
|
let words = match main_index.words_set()? {
|
||||||
|
Some(words) => Arc::new(words),
|
||||||
|
None => Arc::new(fst::Set::default()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let synonyms = match main_index.synonyms_set()? {
|
||||||
|
Some(synonyms) => Arc::new(synonyms),
|
||||||
|
None => Arc::new(fst::Set::default()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let schema = match (schema, main_index.schema()?) {
|
||||||
|
(Some(ref expected), Some(ref current)) if current != expected => {
|
||||||
|
return Err(Error::SchemaDiffer)
|
||||||
|
},
|
||||||
|
(Some(expected), Some(_)) => expected,
|
||||||
|
(Some(expected), None) => {
|
||||||
|
main_index.set_schema(&expected)?;
|
||||||
|
expected
|
||||||
|
},
|
||||||
|
(None, Some(current)) => current,
|
||||||
|
(None, None) => return Err(Error::SchemaMissing),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ranked_map = match main_index.ranked_map()? {
|
||||||
|
Some(map) => map,
|
||||||
|
None => RankedMap::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map };
|
||||||
|
let cache = Arc::new(ArcSwap::from_pointee(cache));
|
||||||
|
|
||||||
|
let index = Index {
|
||||||
|
cache,
|
||||||
|
main_index,
|
||||||
|
synonyms_index,
|
||||||
|
words_index,
|
||||||
|
docs_words_index,
|
||||||
|
documents_index,
|
||||||
|
custom_settings_index,
|
||||||
|
db,
|
||||||
|
updates_index,
|
||||||
|
updates_results_index,
|
||||||
|
update_callback: Arc::new(ArcSwapOption::empty()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let _handle = spawn_update_system(index.clone());
|
||||||
|
|
||||||
|
Ok(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_update_callback<F>(&self, callback: F)
|
||||||
|
where F: Fn(UpdateStatus) + Send + Sync + 'static
|
||||||
|
{
|
||||||
|
self.update_callback.store(Some(Arc::new(Box::new(callback))));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unset_update_callback(&self) {
|
||||||
|
self.update_callback.store(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stats(&self) -> sled::Result<IndexStats> {
|
||||||
|
let cache = self.cache.load();
|
||||||
|
Ok(IndexStats {
|
||||||
|
number_of_words: cache.words.len(),
|
||||||
|
number_of_documents: self.documents_index.len()?,
|
||||||
|
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_builder(&self) -> QueryBuilder<RefIndex> {
|
||||||
|
let ref_index = self.as_ref();
|
||||||
|
QueryBuilder::new(ref_index)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_builder_with_criteria<'c>(
|
||||||
|
&self,
|
||||||
|
criteria: Criteria<'c>,
|
||||||
|
) -> QueryBuilder<'c, RefIndex>
|
||||||
|
{
|
||||||
|
let ref_index = self.as_ref();
|
||||||
|
QueryBuilder::with_criteria(ref_index, criteria)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_ref(&self) -> RefIndex {
|
||||||
|
RefIndex {
|
||||||
|
cache: self.cache.load(),
|
||||||
|
main_index: &self.main_index,
|
||||||
|
synonyms_index: &self.synonyms_index,
|
||||||
|
words_index: &self.words_index,
|
||||||
|
docs_words_index: &self.docs_words_index,
|
||||||
|
documents_index: &self.documents_index,
|
||||||
|
custom_settings_index: &self.custom_settings_index,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn schema(&self) -> Schema {
|
||||||
|
self.cache.load().schema.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn custom_settings(&self) -> CustomSettingsIndex {
|
||||||
|
self.custom_settings_index.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn documents_addition<D>(&self) -> DocumentsAddition<D> {
|
||||||
|
DocumentsAddition::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn documents_deletion(&self) -> DocumentsDeletion {
|
||||||
|
DocumentsDeletion::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn synonyms_addition(&self) -> SynonymsAddition {
|
||||||
|
SynonymsAddition::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn synonyms_deletion(&self) -> SynonymsDeletion {
|
||||||
|
SynonymsDeletion::new(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update_status(
|
||||||
|
&self,
|
||||||
|
update_id: u64,
|
||||||
|
) -> Result<Option<UpdateStatus>, Error>
|
||||||
|
{
|
||||||
|
let update_id = update_id.to_be_bytes();
|
||||||
|
match self.updates_results_index.get(update_id)? {
|
||||||
|
Some(value) => {
|
||||||
|
let value = bincode::deserialize(&value)?;
|
||||||
|
Ok(Some(value))
|
||||||
|
},
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update_status_blocking(
|
||||||
|
&self,
|
||||||
|
update_id: u64,
|
||||||
|
) -> Result<UpdateStatus, Error>
|
||||||
|
{
|
||||||
|
let update_id_bytes = update_id.to_be_bytes().to_vec();
|
||||||
|
let mut subscription = self.updates_results_index.watch_prefix(update_id_bytes);
|
||||||
|
|
||||||
|
// if we find the update result return it now
|
||||||
|
if let Some(result) = self.update_status(update_id)? {
|
||||||
|
return Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// this subscription is used to block the thread
|
||||||
|
// until the update_id is inserted in the tree
|
||||||
|
subscription.next();
|
||||||
|
|
||||||
|
// the thread has been unblocked, it means that the update result
|
||||||
|
// has been inserted in the tree, retrieve it
|
||||||
|
Ok(self.update_status(update_id)?.unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn document<T>(
|
||||||
|
&self,
|
||||||
|
fields: Option<&HashSet<&str>>,
|
||||||
|
id: DocumentId,
|
||||||
|
) -> Result<Option<T>, DeserializerError>
|
||||||
|
where T: de::DeserializeOwned,
|
||||||
|
{
|
||||||
|
let schema = self.schema();
|
||||||
|
let fields = match fields {
|
||||||
|
Some(fields) => fields.into_iter().map(|name| schema.attribute(name)).collect(),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut deserializer = Deserializer {
|
||||||
|
document_id: id,
|
||||||
|
index: &self,
|
||||||
|
fields: fields.as_ref(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: currently we return an error if all document fields are missing,
|
||||||
|
// returning None would have been better
|
||||||
|
T::deserialize(&mut deserializer).map(Some)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index {
|
||||||
|
pub(crate) fn push_documents_addition<D>(&self, addition: Vec<D>) -> Result<u64, Error>
|
||||||
|
where D: serde::Serialize
|
||||||
|
{
|
||||||
|
let mut values = Vec::with_capacity(addition.len());
|
||||||
|
for add in addition {
|
||||||
|
let vec = rmp_serde::to_vec_named(&add)?;
|
||||||
|
let add = rmp_serde::from_read(&vec[..])?;
|
||||||
|
values.push(add);
|
||||||
|
}
|
||||||
|
|
||||||
|
let addition = Update::DocumentsAddition(values);
|
||||||
|
let update = rmp_serde::to_vec_named(&addition)?;
|
||||||
|
self.raw_push_update(update)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn push_documents_deletion(
|
||||||
|
&self,
|
||||||
|
deletion: Vec<DocumentId>,
|
||||||
|
) -> Result<u64, Error>
|
||||||
|
{
|
||||||
|
let deletion = Update::DocumentsDeletion(deletion);
|
||||||
|
let update = rmp_serde::to_vec_named(&deletion)?;
|
||||||
|
self.raw_push_update(update)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn push_synonyms_addition(
|
||||||
|
&self,
|
||||||
|
addition: BTreeMap<String, Vec<String>>,
|
||||||
|
) -> Result<u64, Error>
|
||||||
|
{
|
||||||
|
let addition = Update::SynonymsAddition(addition);
|
||||||
|
let update = rmp_serde::to_vec_named(&addition)?;
|
||||||
|
self.raw_push_update(update)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn push_synonyms_deletion(
|
||||||
|
&self,
|
||||||
|
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
) -> Result<u64, Error>
|
||||||
|
{
|
||||||
|
let deletion = Update::SynonymsDeletion(deletion);
|
||||||
|
let update = rmp_serde::to_vec_named(&deletion)?;
|
||||||
|
self.raw_push_update(update)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn raw_push_update(&self, raw_update: Vec<u8>) -> Result<u64, Error> {
|
||||||
|
let update_id = self.db.generate_id()?;
|
||||||
|
let update_id_array = update_id.to_be_bytes();
|
||||||
|
|
||||||
|
self.updates_index.insert(update_id_array, raw_update)?;
|
||||||
|
|
||||||
|
Ok(update_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct RefIndex<'a> {
|
||||||
|
pub(crate) cache: Guard<'static, Arc<Cache>>,
|
||||||
|
pub main_index: &'a MainIndex,
|
||||||
|
pub synonyms_index: &'a SynonymsIndex,
|
||||||
|
pub words_index: &'a WordsIndex,
|
||||||
|
pub docs_words_index: &'a DocsWordsIndex,
|
||||||
|
pub documents_index: &'a DocumentsIndex,
|
||||||
|
pub custom_settings_index: &'a CustomSettingsIndex,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Store for RefIndex<'_> {
|
||||||
|
type Error = Error;
|
||||||
|
|
||||||
|
fn words(&self) -> Result<&fst::Set, Self::Error> {
|
||||||
|
Ok(&self.cache.words)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||||
|
Ok(self.words_index.doc_indexes(word)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn synonyms(&self) -> Result<&fst::Set, Self::Error> {
|
||||||
|
Ok(&self.cache.synonyms)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, Self::Error> {
|
||||||
|
Ok(self.synonyms_index.alternatives_to(word)?)
|
||||||
|
}
|
||||||
|
}
|
21
meilidb-data/src/database/index/synonyms_index.rs
Normal file
21
meilidb-data/src/database/index/synonyms_index.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SynonymsIndex(pub(crate) Arc<sled::Tree>);
|
||||||
|
|
||||||
|
impl SynonymsIndex {
|
||||||
|
pub fn alternatives_to(&self, word: &[u8]) -> sled::Result<Option<fst::Set>> {
|
||||||
|
match self.0.get(word)? {
|
||||||
|
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> sled::Result<()> {
|
||||||
|
self.0.insert(word, value).map(drop)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn del_alternatives_of(&self, word: &[u8]) -> sled::Result<()> {
|
||||||
|
self.0.remove(word).map(drop)
|
||||||
|
}
|
||||||
|
}
|
@ -1,14 +1,14 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use meilidb_core::DocIndex;
|
use meilidb_core::DocIndex;
|
||||||
use sdset::{Set, SetBuf};
|
use sdset::{Set, SetBuf};
|
||||||
use zerocopy::{LayoutVerified, AsBytes};
|
use zerocopy::{LayoutVerified, AsBytes};
|
||||||
|
|
||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct WordsIndex(pub(crate) InnerRawIndex);
|
pub struct WordsIndex(pub(crate) Arc<sled::Tree>);
|
||||||
|
|
||||||
impl WordsIndex {
|
impl WordsIndex {
|
||||||
pub fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, rocksdb::Error> {
|
pub fn doc_indexes(&self, word: &[u8]) -> sled::Result<Option<SetBuf<DocIndex>>> {
|
||||||
// we must force an allocation to make the memory aligned
|
// we must force an allocation to make the memory aligned
|
||||||
match self.0.get(word)? {
|
match self.0.get(word)? {
|
||||||
Some(bytes) => {
|
Some(bytes) => {
|
||||||
@ -36,13 +36,11 @@ impl WordsIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> Result<(), rocksdb::Error> {
|
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> sled::Result<()> {
|
||||||
self.0.set(word, set.as_bytes())?;
|
self.0.insert(word, set.as_bytes()).map(drop)
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn del_doc_indexes(&self, word: &[u8]) -> Result<(), rocksdb::Error> {
|
pub fn del_doc_indexes(&self, word: &[u8]) -> sled::Result<()> {
|
||||||
self.0.delete(word)?;
|
self.0.remove(word).map(drop)
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,88 +1,64 @@
|
|||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
use std::collections::{HashSet, HashMap};
|
use std::collections::{HashSet, HashMap};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::RwLock;
|
||||||
use meilidb_schema::Schema;
|
use meilidb_schema::Schema;
|
||||||
|
|
||||||
mod custom_settings;
|
|
||||||
mod docs_words_index;
|
|
||||||
mod documents_addition;
|
|
||||||
mod documents_deletion;
|
|
||||||
mod documents_index;
|
|
||||||
mod error;
|
mod error;
|
||||||
mod index;
|
mod index;
|
||||||
mod main_index;
|
mod update;
|
||||||
mod raw_index;
|
|
||||||
mod synonyms_addition;
|
|
||||||
mod synonyms_deletion;
|
|
||||||
mod synonyms_index;
|
|
||||||
mod words_index;
|
|
||||||
|
|
||||||
pub use self::error::Error;
|
pub use self::error::Error;
|
||||||
pub use self::index::Index;
|
pub use self::index::{Index, CustomSettingsIndex};
|
||||||
pub use self::custom_settings::CustomSettings;
|
|
||||||
|
|
||||||
use self::docs_words_index::DocsWordsIndex;
|
pub use self::update::DocumentsAddition;
|
||||||
use self::documents_addition::DocumentsAddition;
|
pub use self::update::DocumentsDeletion;
|
||||||
use self::documents_deletion::DocumentsDeletion;
|
pub use self::update::SynonymsAddition;
|
||||||
use self::synonyms_addition::SynonymsAddition;
|
pub use self::update::SynonymsDeletion;
|
||||||
use self::synonyms_deletion::SynonymsDeletion;
|
|
||||||
use self::documents_index::DocumentsIndex;
|
use self::update::apply_documents_addition;
|
||||||
use self::index::InnerIndex;
|
use self::update::apply_documents_deletion;
|
||||||
use self::main_index::MainIndex;
|
use self::update::apply_synonyms_addition;
|
||||||
use self::raw_index::{RawIndex, InnerRawIndex};
|
use self::update::apply_synonyms_deletion;
|
||||||
use self::words_index::WordsIndex;
|
|
||||||
use self::synonyms_index::SynonymsIndex;
|
fn load_indexes(tree: &sled::Tree) -> Result<HashSet<String>, Error> {
|
||||||
|
match tree.get("indexes")? {
|
||||||
|
Some(bytes) => Ok(bincode::deserialize(&bytes)?),
|
||||||
|
None => Ok(HashSet::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Database {
|
pub struct Database {
|
||||||
cache: RwLock<HashMap<String, Arc<Index>>>,
|
cache: RwLock<HashMap<String, Index>>,
|
||||||
inner: Arc<rocksdb::DB>,
|
inner: sled::Db,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Database {
|
impl Database {
|
||||||
pub fn start_default<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
|
pub fn open<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
|
||||||
let path = path.as_ref();
|
|
||||||
let cache = RwLock::new(HashMap::new());
|
let cache = RwLock::new(HashMap::new());
|
||||||
|
let inner = sled::Db::open(path)?;
|
||||||
|
|
||||||
let options = {
|
let indexes = load_indexes(&inner)?;
|
||||||
let mut options = rocksdb::Options::default();
|
|
||||||
options.create_if_missing(true);
|
|
||||||
options
|
|
||||||
};
|
|
||||||
let cfs = rocksdb::DB::list_cf(&options, path).unwrap_or(Vec::new());
|
|
||||||
let inner = Arc::new(rocksdb::DB::open_cf(&options, path, &cfs)?);
|
|
||||||
let database = Database { cache, inner };
|
let database = Database { cache, inner };
|
||||||
|
|
||||||
let mut indexes: Vec<_> = cfs.iter()
|
|
||||||
.filter_map(|c| c.split('-').nth(0).filter(|&c| c != "default"))
|
|
||||||
.collect();
|
|
||||||
indexes.sort_unstable();
|
|
||||||
indexes.dedup();
|
|
||||||
|
|
||||||
for index in indexes {
|
for index in indexes {
|
||||||
database.open_index(index)?;
|
database.open_index(&index)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(database)
|
Ok(database)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn indexes(&self) -> Result<Option<HashSet<String>>, Error> {
|
pub fn indexes(&self) -> Result<HashSet<String>, Error> {
|
||||||
let bytes = match self.inner.get("indexes")? {
|
load_indexes(&self.inner)
|
||||||
Some(bytes) => bytes,
|
|
||||||
None => return Ok(None),
|
|
||||||
};
|
|
||||||
|
|
||||||
let indexes = bincode::deserialize(&bytes)?;
|
|
||||||
Ok(Some(indexes))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
|
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
|
||||||
let bytes = bincode::serialize(value)?;
|
let bytes = bincode::serialize(value)?;
|
||||||
self.inner.put("indexes", bytes)?;
|
self.inner.insert("indexes", bytes)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn open_index(&self, name: &str) -> Result<Option<Arc<Index>>, Error> {
|
pub fn open_index(&self, name: &str) -> Result<Option<Index>, Error> {
|
||||||
{
|
{
|
||||||
let cache = self.cache.read().unwrap();
|
let cache = self.cache.read().unwrap();
|
||||||
if let Some(index) = cache.get(name).cloned() {
|
if let Some(index) = cache.get(name).cloned() {
|
||||||
@ -96,56 +72,19 @@ impl Database {
|
|||||||
occupied.get().clone()
|
occupied.get().clone()
|
||||||
},
|
},
|
||||||
Entry::Vacant(vacant) => {
|
Entry::Vacant(vacant) => {
|
||||||
if !self.indexes()?.map_or(false, |x| x.contains(name)) {
|
if !self.indexes()?.contains(name) {
|
||||||
return Ok(None)
|
return Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
let main = {
|
let index = Index::new(self.inner.clone(), name)?;
|
||||||
self.inner.cf_handle(name).expect("cf not found");
|
vacant.insert(index).clone()
|
||||||
MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let synonyms = {
|
|
||||||
let cf_name = format!("{}-synonyms", name);
|
|
||||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
|
||||||
SynonymsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let words = {
|
|
||||||
let cf_name = format!("{}-words", name);
|
|
||||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
|
||||||
WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let docs_words = {
|
|
||||||
let cf_name = format!("{}-docs-words", name);
|
|
||||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
|
||||||
DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = {
|
|
||||||
let cf_name = format!("{}-documents", name);
|
|
||||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
|
||||||
DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let custom = {
|
|
||||||
let cf_name = format!("{}-custom", name);
|
|
||||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
|
||||||
CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let raw_index = RawIndex { main, synonyms, words, docs_words, documents, custom };
|
|
||||||
let index = Index::from_raw(raw_index)?;
|
|
||||||
|
|
||||||
vacant.insert(Arc::new(index)).clone()
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Some(index))
|
Ok(Some(index))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Arc<Index>, Error> {
|
pub fn create_index(&self, name: &str, schema: Schema) -> Result<Index, Error> {
|
||||||
let mut cache = self.cache.write().unwrap();
|
let mut cache = self.cache.write().unwrap();
|
||||||
|
|
||||||
let index = match cache.entry(name.to_string()) {
|
let index = match cache.entry(name.to_string()) {
|
||||||
@ -153,57 +92,13 @@ impl Database {
|
|||||||
occupied.get().clone()
|
occupied.get().clone()
|
||||||
},
|
},
|
||||||
Entry::Vacant(vacant) => {
|
Entry::Vacant(vacant) => {
|
||||||
let main = {
|
let index = Index::with_schema(self.inner.clone(), name, schema)?;
|
||||||
self.inner.create_cf(name, &rocksdb::Options::default())?;
|
|
||||||
MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(prev_schema) = main.schema()? {
|
let mut indexes = self.indexes()?;
|
||||||
if prev_schema != schema {
|
|
||||||
return Err(Error::SchemaDiffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
main.set_schema(&schema)?;
|
|
||||||
|
|
||||||
let synonyms = {
|
|
||||||
let cf_name = format!("{}-synonyms", name);
|
|
||||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
|
||||||
SynonymsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let words = {
|
|
||||||
let cf_name = format!("{}-words", name);
|
|
||||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
|
||||||
WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let docs_words = {
|
|
||||||
let cf_name = format!("{}-docs-words", name);
|
|
||||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
|
||||||
DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let documents = {
|
|
||||||
let cf_name = format!("{}-documents", name);
|
|
||||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
|
||||||
DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let custom = {
|
|
||||||
let cf_name = format!("{}-custom", name);
|
|
||||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
|
||||||
CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut indexes = self.indexes()?.unwrap_or_else(HashSet::new);
|
|
||||||
indexes.insert(name.to_string());
|
indexes.insert(name.to_string());
|
||||||
self.set_indexes(&indexes)?;
|
self.set_indexes(&indexes)?;
|
||||||
|
|
||||||
let raw_index = RawIndex { main, synonyms, words, docs_words, documents, custom };
|
vacant.insert(index).clone()
|
||||||
let index = Index::from_raw(raw_index)?;
|
|
||||||
|
|
||||||
vacant.insert(Arc::new(index)).clone()
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,88 +0,0 @@
|
|||||||
use std::sync::Arc;
|
|
||||||
use super::{MainIndex, SynonymsIndex, WordsIndex, DocsWordsIndex, DocumentsIndex, CustomSettings};
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct RawIndex {
|
|
||||||
pub main: MainIndex,
|
|
||||||
pub synonyms: SynonymsIndex,
|
|
||||||
pub words: WordsIndex,
|
|
||||||
pub docs_words: DocsWordsIndex,
|
|
||||||
pub documents: DocumentsIndex,
|
|
||||||
pub custom: CustomSettings,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RawIndex {
|
|
||||||
pub(crate) fn compact(&self) {
|
|
||||||
self.main.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
self.synonyms.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
self.words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
self.docs_words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
self.documents.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
self.custom.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct InnerRawIndex {
|
|
||||||
database: Arc<rocksdb::DB>,
|
|
||||||
name: Arc<str>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl InnerRawIndex {
|
|
||||||
pub fn new(database: Arc<rocksdb::DB>, name: Arc<str>) -> InnerRawIndex {
|
|
||||||
InnerRawIndex { database, name }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get<K>(&self, key: K) -> Result<Option<rocksdb::DBVector>, rocksdb::Error>
|
|
||||||
where K: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.get_cf(cf, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_pinned<K>(&self, key: K) -> Result<Option<rocksdb::DBPinnableSlice>, rocksdb::Error>
|
|
||||||
where K: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.get_pinned_cf(cf, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn iterator(&self, from: rocksdb::IteratorMode) -> Result<rocksdb::DBIterator, rocksdb::Error> {
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.iterator_cf(cf, from)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set<K, V>(&self, key: K, value: V) -> Result<(), rocksdb::Error>
|
|
||||||
where K: AsRef<[u8]>,
|
|
||||||
V: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.put_cf(cf, key, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete<K>(&self, key: K) -> Result<(), rocksdb::Error>
|
|
||||||
where K: AsRef<[u8]>
|
|
||||||
{
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.delete_cf(cf, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_range<K>(&self, start: K, end: K) -> Result<(), rocksdb::Error>
|
|
||||||
where K: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
let mut batch = rocksdb::WriteBatch::default();
|
|
||||||
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
batch.delete_range_cf(cf, start, end)?;
|
|
||||||
|
|
||||||
self.database.write(batch)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compact_range<S, E>(&self, start: Option<S>, end: Option<E>)
|
|
||||||
where S: AsRef<[u8]>,
|
|
||||||
E: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
|
||||||
self.database.compact_range_cf(cf, start, end)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,87 +0,0 @@
|
|||||||
use std::collections::BTreeMap;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use fst::{SetBuilder, set::OpBuilder};
|
|
||||||
use meilidb_core::normalize_str;
|
|
||||||
use sdset::SetBuf;
|
|
||||||
|
|
||||||
use crate::database::index::InnerIndex;
|
|
||||||
use super::{Error, Index};
|
|
||||||
|
|
||||||
pub struct SynonymsAddition<'a> {
|
|
||||||
inner: &'a Index,
|
|
||||||
synonyms: BTreeMap<String, Vec<String>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> SynonymsAddition<'a> {
|
|
||||||
pub fn new(inner: &'a Index) -> SynonymsAddition<'a> {
|
|
||||||
SynonymsAddition { inner, synonyms: BTreeMap::new() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
|
|
||||||
where S: AsRef<str>,
|
|
||||||
T: AsRef<str>,
|
|
||||||
I: IntoIterator<Item=T>,
|
|
||||||
{
|
|
||||||
let synonym = normalize_str(synonym.as_ref());
|
|
||||||
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
|
|
||||||
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finalize(self) -> Result<(), Error> {
|
|
||||||
let lease_inner = self.inner.lease_inner();
|
|
||||||
let synonyms = &lease_inner.raw.synonyms;
|
|
||||||
let main = &lease_inner.raw.main;
|
|
||||||
|
|
||||||
let mut synonyms_builder = SetBuilder::memory();
|
|
||||||
|
|
||||||
for (synonym, alternatives) in self.synonyms {
|
|
||||||
synonyms_builder.insert(&synonym).unwrap();
|
|
||||||
|
|
||||||
let alternatives = {
|
|
||||||
let alternatives = SetBuf::from_dirty(alternatives);
|
|
||||||
let mut alternatives_builder = SetBuilder::memory();
|
|
||||||
alternatives_builder.extend_iter(alternatives).unwrap();
|
|
||||||
alternatives_builder.into_inner().unwrap()
|
|
||||||
};
|
|
||||||
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let delta_synonyms = synonyms_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let synonyms = match main.synonyms_set()? {
|
|
||||||
Some(synonyms) => {
|
|
||||||
let op = OpBuilder::new()
|
|
||||||
.add(synonyms.stream())
|
|
||||||
.add(delta_synonyms.stream())
|
|
||||||
.r#union();
|
|
||||||
|
|
||||||
let mut synonyms_builder = SetBuilder::memory();
|
|
||||||
synonyms_builder.extend_stream(op).unwrap();
|
|
||||||
synonyms_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
None => delta_synonyms,
|
|
||||||
};
|
|
||||||
|
|
||||||
main.set_synonyms_set(&synonyms)?;
|
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
|
||||||
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
|
||||||
let ranked_map = lease_inner.ranked_map.clone();
|
|
||||||
let synonyms = Arc::new(synonyms);
|
|
||||||
let schema = lease_inner.schema.clone();
|
|
||||||
let raw = lease_inner.raw.clone();
|
|
||||||
lease_inner.raw.compact();
|
|
||||||
|
|
||||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
|
||||||
self.inner.0.store(Arc::new(inner));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,130 +0,0 @@
|
|||||||
use std::collections::BTreeMap;
|
|
||||||
use std::iter::FromIterator;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use fst::{SetBuilder, set::OpBuilder};
|
|
||||||
use meilidb_core::normalize_str;
|
|
||||||
use sdset::SetBuf;
|
|
||||||
|
|
||||||
use crate::database::index::InnerIndex;
|
|
||||||
use super::{Error, Index};
|
|
||||||
|
|
||||||
pub struct SynonymsDeletion<'a> {
|
|
||||||
inner: &'a Index,
|
|
||||||
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> SynonymsDeletion<'a> {
|
|
||||||
pub fn new(inner: &'a Index) -> SynonymsDeletion<'a> {
|
|
||||||
SynonymsDeletion { inner, synonyms: BTreeMap::new() }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
|
|
||||||
let synonym = normalize_str(synonym.as_ref());
|
|
||||||
self.synonyms.insert(synonym, None);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
|
|
||||||
where S: AsRef<str>,
|
|
||||||
T: AsRef<str>,
|
|
||||||
I: Iterator<Item=T>,
|
|
||||||
{
|
|
||||||
let synonym = normalize_str(synonym.as_ref());
|
|
||||||
let value = self.synonyms.entry(synonym).or_insert(None);
|
|
||||||
let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
|
|
||||||
match value {
|
|
||||||
Some(v) => v.extend(alternatives),
|
|
||||||
None => *value = Some(Vec::from_iter(alternatives)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finalize(self) -> Result<(), Error> {
|
|
||||||
let lease_inner = self.inner.lease_inner();
|
|
||||||
let synonyms = &lease_inner.raw.synonyms;
|
|
||||||
let main = &lease_inner.raw.main;
|
|
||||||
|
|
||||||
let mut delete_whole_synonym_builder = SetBuilder::memory();
|
|
||||||
|
|
||||||
for (synonym, alternatives) in self.synonyms {
|
|
||||||
match alternatives {
|
|
||||||
Some(alternatives) => {
|
|
||||||
let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
|
|
||||||
let prev_alternatives = match prev_alternatives {
|
|
||||||
Some(alternatives) => alternatives,
|
|
||||||
None => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
let delta_alternatives = {
|
|
||||||
let alternatives = SetBuf::from_dirty(alternatives);
|
|
||||||
let mut builder = SetBuilder::memory();
|
|
||||||
builder.extend_iter(alternatives).unwrap();
|
|
||||||
builder.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
let op = OpBuilder::new()
|
|
||||||
.add(prev_alternatives.stream())
|
|
||||||
.add(delta_alternatives.stream())
|
|
||||||
.difference();
|
|
||||||
|
|
||||||
let (alternatives, empty_alternatives) = {
|
|
||||||
let mut builder = SetBuilder::memory();
|
|
||||||
let len = builder.get_ref().len();
|
|
||||||
builder.extend_stream(op).unwrap();
|
|
||||||
let is_empty = len == builder.get_ref().len();
|
|
||||||
let alternatives = builder.into_inner().unwrap();
|
|
||||||
(alternatives, is_empty)
|
|
||||||
};
|
|
||||||
|
|
||||||
if empty_alternatives {
|
|
||||||
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
|
|
||||||
} else {
|
|
||||||
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
None => {
|
|
||||||
delete_whole_synonym_builder.insert(&synonym).unwrap();
|
|
||||||
synonyms.del_alternatives_of(synonym.as_bytes())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let delta_synonyms = delete_whole_synonym_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let synonyms = match main.synonyms_set()? {
|
|
||||||
Some(synonyms) => {
|
|
||||||
let op = OpBuilder::new()
|
|
||||||
.add(synonyms.stream())
|
|
||||||
.add(delta_synonyms.stream())
|
|
||||||
.difference();
|
|
||||||
|
|
||||||
let mut synonyms_builder = SetBuilder::memory();
|
|
||||||
synonyms_builder.extend_stream(op).unwrap();
|
|
||||||
synonyms_builder
|
|
||||||
.into_inner()
|
|
||||||
.and_then(fst::Set::from_bytes)
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
None => fst::Set::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
main.set_synonyms_set(&synonyms)?;
|
|
||||||
|
|
||||||
// update the "consistent" view of the Index
|
|
||||||
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
|
||||||
let ranked_map = lease_inner.ranked_map.clone();
|
|
||||||
let synonyms = Arc::new(synonyms);
|
|
||||||
let schema = lease_inner.schema.clone();
|
|
||||||
let raw = lease_inner.raw.clone();
|
|
||||||
lease_inner.raw.compact();
|
|
||||||
|
|
||||||
let inner = InnerIndex { words, synonyms, schema, ranked_map, raw };
|
|
||||||
self.inner.0.store(Arc::new(inner));
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
use crate::database::raw_index::InnerRawIndex;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct SynonymsIndex(pub(crate) InnerRawIndex);
|
|
||||||
|
|
||||||
impl SynonymsIndex {
|
|
||||||
pub fn alternatives_to(&self, word: &[u8]) -> Result<Option<fst::Set>, rocksdb::Error> {
|
|
||||||
match self.0.get(word)? {
|
|
||||||
Some(vector) => Ok(Some(fst::Set::from_bytes(vector.to_vec()).unwrap())),
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_alternatives_to(&self, word: &[u8], value: Vec<u8>) -> Result<(), rocksdb::Error> {
|
|
||||||
self.0.set(word, value)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn del_alternatives_of(&self, word: &[u8]) -> Result<(), rocksdb::Error> {
|
|
||||||
self.0.delete(word)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
137
meilidb-data/src/database/update/documents_addition.rs
Normal file
137
meilidb-data/src/database/update/documents_addition.rs
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
|
use sdset::{SetOperation, duo::Union};
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
use crate::indexer::Indexer;
|
||||||
|
use crate::serde::{extract_document_id, Serializer, RamDocumentStore};
|
||||||
|
use crate::RankedMap;
|
||||||
|
|
||||||
|
use crate::database::{Error, Index, index::Cache, apply_documents_deletion};
|
||||||
|
|
||||||
|
pub struct DocumentsAddition<'a, D> {
|
||||||
|
index: &'a Index,
|
||||||
|
documents: Vec<D>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, D> DocumentsAddition<'a, D> {
|
||||||
|
pub fn new(index: &'a Index) -> DocumentsAddition<'a, D> {
|
||||||
|
DocumentsAddition { index, documents: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update_document(&mut self, document: D) {
|
||||||
|
self.documents.push(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self) -> Result<u64, Error>
|
||||||
|
where D: serde::Serialize
|
||||||
|
{
|
||||||
|
self.index.push_documents_addition(self.documents)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_documents_addition(
|
||||||
|
index: &Index,
|
||||||
|
mut ranked_map: RankedMap,
|
||||||
|
addition: Vec<rmpv::Value>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
{
|
||||||
|
let mut document_ids = HashSet::new();
|
||||||
|
let mut document_store = RamDocumentStore::new();
|
||||||
|
let mut indexer = Indexer::new();
|
||||||
|
|
||||||
|
let schema = &index.schema();
|
||||||
|
let identifier = schema.identifier_name();
|
||||||
|
|
||||||
|
for document in addition {
|
||||||
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::MissingDocumentId),
|
||||||
|
};
|
||||||
|
|
||||||
|
// 1. store the document id for future deletion
|
||||||
|
document_ids.insert(document_id);
|
||||||
|
|
||||||
|
// 2. index the document fields in ram stores
|
||||||
|
let serializer = Serializer {
|
||||||
|
schema,
|
||||||
|
document_store: &mut document_store,
|
||||||
|
indexer: &mut indexer,
|
||||||
|
ranked_map: &mut ranked_map,
|
||||||
|
document_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
document.serialize(serializer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ref_index = index.as_ref();
|
||||||
|
let docs_words = ref_index.docs_words_index;
|
||||||
|
let documents = ref_index.documents_index;
|
||||||
|
let main = ref_index.main_index;
|
||||||
|
let words = ref_index.words_index;
|
||||||
|
|
||||||
|
// 1. remove the previous documents match indexes
|
||||||
|
let document_ids = document_ids.into_iter().collect();
|
||||||
|
apply_documents_deletion(index, ranked_map.clone(), document_ids)?;
|
||||||
|
|
||||||
|
// 2. insert new document attributes in the database
|
||||||
|
for ((id, attr), value) in document_store.into_inner() {
|
||||||
|
documents.set_document_field(id, attr, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let indexed = indexer.build();
|
||||||
|
let mut delta_words_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
for (word, delta_set) in indexed.words_doc_indexes {
|
||||||
|
delta_words_builder.insert(&word).unwrap();
|
||||||
|
|
||||||
|
let set = match words.doc_indexes(&word)? {
|
||||||
|
Some(set) => Union::new(&set, &delta_set).into_set_buf(),
|
||||||
|
None => delta_set,
|
||||||
|
};
|
||||||
|
|
||||||
|
words.set_doc_indexes(&word, &set)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (id, words) in indexed.docs_words {
|
||||||
|
docs_words.set_doc_words(id, &words)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let delta_words = delta_words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let words = match main.words_set()? {
|
||||||
|
Some(words) => {
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(words.stream())
|
||||||
|
.add(delta_words.stream())
|
||||||
|
.r#union();
|
||||||
|
|
||||||
|
let mut words_builder = SetBuilder::memory();
|
||||||
|
words_builder.extend_stream(op).unwrap();
|
||||||
|
words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => delta_words,
|
||||||
|
};
|
||||||
|
|
||||||
|
main.set_words_set(&words)?;
|
||||||
|
main.set_ranked_map(&ranked_map)?;
|
||||||
|
|
||||||
|
// update the "consistent" view of the Index
|
||||||
|
let cache = ref_index.cache;
|
||||||
|
let words = Arc::new(words);
|
||||||
|
let synonyms = cache.synonyms.clone();
|
||||||
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map };
|
||||||
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
144
meilidb-data/src/database/update/documents_deletion.rs
Normal file
144
meilidb-data/src/database/update/documents_deletion.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
use std::collections::{HashMap, BTreeSet};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use fst::{SetBuilder, Streamer};
|
||||||
|
use meilidb_core::DocumentId;
|
||||||
|
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
|
||||||
|
|
||||||
|
use crate::RankedMap;
|
||||||
|
use crate::serde::extract_document_id;
|
||||||
|
|
||||||
|
use crate::database::{Index, Error, index::Cache};
|
||||||
|
|
||||||
|
pub struct DocumentsDeletion<'a> {
|
||||||
|
index: &'a Index,
|
||||||
|
documents: Vec<DocumentId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> DocumentsDeletion<'a> {
|
||||||
|
pub fn new(index: &'a Index) -> DocumentsDeletion<'a> {
|
||||||
|
DocumentsDeletion { index, documents: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_document_by_id(&mut self, document_id: DocumentId) {
|
||||||
|
self.documents.push(document_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_document<D>(&mut self, document: D) -> Result<(), Error>
|
||||||
|
where D: serde::Serialize,
|
||||||
|
{
|
||||||
|
let schema = self.index.schema();
|
||||||
|
let identifier = schema.identifier_name();
|
||||||
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::MissingDocumentId),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.delete_document_by_id(document_id);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self) -> Result<u64, Error> {
|
||||||
|
self.index.push_documents_deletion(self.documents)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Extend<DocumentId> for DocumentsDeletion<'_> {
|
||||||
|
fn extend<T: IntoIterator<Item=DocumentId>>(&mut self, iter: T) {
|
||||||
|
self.documents.extend(iter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_documents_deletion(
|
||||||
|
index: &Index,
|
||||||
|
mut ranked_map: RankedMap,
|
||||||
|
deletion: Vec<DocumentId>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
{
|
||||||
|
let ref_index = index.as_ref();
|
||||||
|
let schema = index.schema();
|
||||||
|
let docs_words = ref_index.docs_words_index;
|
||||||
|
let documents = ref_index.documents_index;
|
||||||
|
let main = ref_index.main_index;
|
||||||
|
let words = ref_index.words_index;
|
||||||
|
|
||||||
|
let idset = SetBuf::from_dirty(deletion);
|
||||||
|
|
||||||
|
// collect the ranked attributes according to the schema
|
||||||
|
let ranked_attrs: Vec<_> = schema.iter()
|
||||||
|
.filter_map(|(_, attr, prop)| {
|
||||||
|
if prop.is_ranked() { Some(attr) } else { None }
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut words_document_ids = HashMap::new();
|
||||||
|
for id in idset {
|
||||||
|
// remove all the ranked attributes from the ranked_map
|
||||||
|
for ranked_attr in &ranked_attrs {
|
||||||
|
ranked_map.remove(id, *ranked_attr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(words) = docs_words.doc_words(id)? {
|
||||||
|
let mut stream = words.stream();
|
||||||
|
while let Some(word) = stream.next() {
|
||||||
|
let word = word.to_vec();
|
||||||
|
words_document_ids.entry(word).or_insert_with(Vec::new).push(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut removed_words = BTreeSet::new();
|
||||||
|
for (word, document_ids) in words_document_ids {
|
||||||
|
let document_ids = SetBuf::from_dirty(document_ids);
|
||||||
|
|
||||||
|
if let Some(doc_indexes) = words.doc_indexes(&word)? {
|
||||||
|
let op = DifferenceByKey::new(&doc_indexes, &document_ids, |d| d.document_id, |id| *id);
|
||||||
|
let doc_indexes = op.into_set_buf();
|
||||||
|
|
||||||
|
if !doc_indexes.is_empty() {
|
||||||
|
words.set_doc_indexes(&word, &doc_indexes)?;
|
||||||
|
} else {
|
||||||
|
words.del_doc_indexes(&word)?;
|
||||||
|
removed_words.insert(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for id in document_ids {
|
||||||
|
documents.del_all_document_fields(id)?;
|
||||||
|
docs_words.del_doc_words(id)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let removed_words = fst::Set::from_iter(removed_words).unwrap();
|
||||||
|
let words = match main.words_set()? {
|
||||||
|
Some(words_set) => {
|
||||||
|
let op = fst::set::OpBuilder::new()
|
||||||
|
.add(words_set.stream())
|
||||||
|
.add(removed_words.stream())
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let mut words_builder = SetBuilder::memory();
|
||||||
|
words_builder.extend_stream(op).unwrap();
|
||||||
|
words_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => fst::Set::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
main.set_words_set(&words)?;
|
||||||
|
main.set_ranked_map(&ranked_map)?;
|
||||||
|
|
||||||
|
// update the "consistent" view of the Index
|
||||||
|
let cache = ref_index.cache;
|
||||||
|
let words = Arc::new(words);
|
||||||
|
let synonyms = cache.synonyms.clone();
|
||||||
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map };
|
||||||
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
9
meilidb-data/src/database/update/mod.rs
Normal file
9
meilidb-data/src/database/update/mod.rs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
mod documents_addition;
|
||||||
|
mod documents_deletion;
|
||||||
|
mod synonyms_addition;
|
||||||
|
mod synonyms_deletion;
|
||||||
|
|
||||||
|
pub use self::documents_addition::{DocumentsAddition, apply_documents_addition};
|
||||||
|
pub use self::documents_deletion::{DocumentsDeletion, apply_documents_deletion};
|
||||||
|
pub use self::synonyms_addition::{SynonymsAddition, apply_synonyms_addition};
|
||||||
|
pub use self::synonyms_deletion::{SynonymsDeletion, apply_synonyms_deletion};
|
93
meilidb-data/src/database/update/synonyms_addition.rs
Normal file
93
meilidb-data/src/database/update/synonyms_addition.rs
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
|
use meilidb_core::normalize_str;
|
||||||
|
use sdset::SetBuf;
|
||||||
|
|
||||||
|
use crate::database::{Error, Index,index::Cache};
|
||||||
|
|
||||||
|
pub struct SynonymsAddition<'a> {
|
||||||
|
index: &'a Index,
|
||||||
|
synonyms: BTreeMap<String, Vec<String>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SynonymsAddition<'a> {
|
||||||
|
pub fn new(index: &'a Index) -> SynonymsAddition<'a> {
|
||||||
|
SynonymsAddition { index, synonyms: BTreeMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_synonym<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||||
|
where S: AsRef<str>,
|
||||||
|
T: AsRef<str>,
|
||||||
|
I: IntoIterator<Item=T>,
|
||||||
|
{
|
||||||
|
let synonym = normalize_str(synonym.as_ref());
|
||||||
|
let alternatives = alternatives.into_iter().map(|s| s.as_ref().to_lowercase());
|
||||||
|
self.synonyms.entry(synonym).or_insert_with(Vec::new).extend(alternatives);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self) -> Result<u64, Error> {
|
||||||
|
self.index.push_synonyms_addition(self.synonyms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_synonyms_addition(
|
||||||
|
index: &Index,
|
||||||
|
addition: BTreeMap<String, Vec<String>>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
{
|
||||||
|
let ref_index = index.as_ref();
|
||||||
|
let synonyms = ref_index.synonyms_index;
|
||||||
|
let main = ref_index.main_index;
|
||||||
|
|
||||||
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
for (synonym, alternatives) in addition {
|
||||||
|
synonyms_builder.insert(&synonym).unwrap();
|
||||||
|
|
||||||
|
let alternatives = {
|
||||||
|
let alternatives = SetBuf::from_dirty(alternatives);
|
||||||
|
let mut alternatives_builder = SetBuilder::memory();
|
||||||
|
alternatives_builder.extend_iter(alternatives).unwrap();
|
||||||
|
alternatives_builder.into_inner().unwrap()
|
||||||
|
};
|
||||||
|
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let delta_synonyms = synonyms_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let synonyms = match main.synonyms_set()? {
|
||||||
|
Some(synonyms) => {
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(synonyms.stream())
|
||||||
|
.add(delta_synonyms.stream())
|
||||||
|
.r#union();
|
||||||
|
|
||||||
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
|
synonyms_builder.extend_stream(op).unwrap();
|
||||||
|
synonyms_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => delta_synonyms,
|
||||||
|
};
|
||||||
|
|
||||||
|
main.set_synonyms_set(&synonyms)?;
|
||||||
|
|
||||||
|
// update the "consistent" view of the Index
|
||||||
|
let cache = ref_index.cache;
|
||||||
|
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
||||||
|
let ranked_map = cache.ranked_map.clone();
|
||||||
|
let synonyms = Arc::new(synonyms);
|
||||||
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map };
|
||||||
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
136
meilidb-data/src/database/update/synonyms_deletion.rs
Normal file
136
meilidb-data/src/database/update/synonyms_deletion.rs
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::iter::FromIterator;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
|
use meilidb_core::normalize_str;
|
||||||
|
use sdset::SetBuf;
|
||||||
|
|
||||||
|
use crate::database::{Error, Index, index::Cache};
|
||||||
|
|
||||||
|
pub struct SynonymsDeletion<'a> {
|
||||||
|
index: &'a Index,
|
||||||
|
synonyms: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> SynonymsDeletion<'a> {
|
||||||
|
pub fn new(index: &'a Index) -> SynonymsDeletion<'a> {
|
||||||
|
SynonymsDeletion { index, synonyms: BTreeMap::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_all_alternatives_of<S: AsRef<str>>(&mut self, synonym: S) {
|
||||||
|
let synonym = normalize_str(synonym.as_ref());
|
||||||
|
self.synonyms.insert(synonym, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_specific_alternatives_of<S, T, I>(&mut self, synonym: S, alternatives: I)
|
||||||
|
where S: AsRef<str>,
|
||||||
|
T: AsRef<str>,
|
||||||
|
I: Iterator<Item=T>,
|
||||||
|
{
|
||||||
|
let synonym = normalize_str(synonym.as_ref());
|
||||||
|
let value = self.synonyms.entry(synonym).or_insert(None);
|
||||||
|
let alternatives = alternatives.map(|s| s.as_ref().to_lowercase());
|
||||||
|
match value {
|
||||||
|
Some(v) => v.extend(alternatives),
|
||||||
|
None => *value = Some(Vec::from_iter(alternatives)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finalize(self) -> Result<u64, Error> {
|
||||||
|
self.index.push_synonyms_deletion(self.synonyms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_synonyms_deletion(
|
||||||
|
index: &Index,
|
||||||
|
deletion: BTreeMap<String, Option<Vec<String>>>,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
{
|
||||||
|
let ref_index = index.as_ref();
|
||||||
|
let synonyms = ref_index.synonyms_index;
|
||||||
|
let main = ref_index.main_index;
|
||||||
|
|
||||||
|
let mut delete_whole_synonym_builder = SetBuilder::memory();
|
||||||
|
|
||||||
|
for (synonym, alternatives) in deletion {
|
||||||
|
match alternatives {
|
||||||
|
Some(alternatives) => {
|
||||||
|
let prev_alternatives = synonyms.alternatives_to(synonym.as_bytes())?;
|
||||||
|
let prev_alternatives = match prev_alternatives {
|
||||||
|
Some(alternatives) => alternatives,
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let delta_alternatives = {
|
||||||
|
let alternatives = SetBuf::from_dirty(alternatives);
|
||||||
|
let mut builder = SetBuilder::memory();
|
||||||
|
builder.extend_iter(alternatives).unwrap();
|
||||||
|
builder.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(prev_alternatives.stream())
|
||||||
|
.add(delta_alternatives.stream())
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let (alternatives, empty_alternatives) = {
|
||||||
|
let mut builder = SetBuilder::memory();
|
||||||
|
let len = builder.get_ref().len();
|
||||||
|
builder.extend_stream(op).unwrap();
|
||||||
|
let is_empty = len == builder.get_ref().len();
|
||||||
|
let alternatives = builder.into_inner().unwrap();
|
||||||
|
(alternatives, is_empty)
|
||||||
|
};
|
||||||
|
|
||||||
|
if empty_alternatives {
|
||||||
|
delete_whole_synonym_builder.insert(synonym.as_bytes())?;
|
||||||
|
} else {
|
||||||
|
synonyms.set_alternatives_to(synonym.as_bytes(), alternatives)?;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
delete_whole_synonym_builder.insert(&synonym).unwrap();
|
||||||
|
synonyms.del_alternatives_of(synonym.as_bytes())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let delta_synonyms = delete_whole_synonym_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let synonyms = match main.synonyms_set()? {
|
||||||
|
Some(synonyms) => {
|
||||||
|
let op = OpBuilder::new()
|
||||||
|
.add(synonyms.stream())
|
||||||
|
.add(delta_synonyms.stream())
|
||||||
|
.difference();
|
||||||
|
|
||||||
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
|
synonyms_builder.extend_stream(op).unwrap();
|
||||||
|
synonyms_builder
|
||||||
|
.into_inner()
|
||||||
|
.and_then(fst::Set::from_bytes)
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
None => fst::Set::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
main.set_synonyms_set(&synonyms)?;
|
||||||
|
|
||||||
|
// update the "consistent" view of the Index
|
||||||
|
let cache = ref_index.cache;
|
||||||
|
let words = Arc::new(main.words_set()?.unwrap_or_default());
|
||||||
|
let ranked_map = cache.ranked_map.clone();
|
||||||
|
let synonyms = Arc::new(synonyms);
|
||||||
|
let schema = cache.schema.clone();
|
||||||
|
|
||||||
|
let cache = Cache { words, synonyms, schema, ranked_map };
|
||||||
|
index.cache.store(Arc::new(cache));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -5,8 +5,8 @@ mod number;
|
|||||||
mod ranked_map;
|
mod ranked_map;
|
||||||
mod serde;
|
mod serde;
|
||||||
|
|
||||||
pub use rocksdb;
|
pub use sled;
|
||||||
pub use self::database::{Database, Index, CustomSettings};
|
pub use self::database::{Database, Index, CustomSettingsIndex};
|
||||||
pub use self::number::Number;
|
pub use self::number::Number;
|
||||||
pub use self::ranked_map::RankedMap;
|
pub use self::ranked_map::RankedMap;
|
||||||
pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
|
pub use self::serde::{compute_document_id, extract_document_id, value_to_string};
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
|
use std::{fmt, error::Error};
|
||||||
|
|
||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilidb_schema::SchemaAttr;
|
||||||
@ -9,6 +10,43 @@ use serde::{de, forward_to_deserialize_any};
|
|||||||
|
|
||||||
use crate::database::Index;
|
use crate::database::Index;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum DeserializerError {
|
||||||
|
RmpError(RmpError),
|
||||||
|
SledError(sled::Error),
|
||||||
|
Custom(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl de::Error for DeserializerError {
|
||||||
|
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||||
|
DeserializerError::Custom(msg.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for DeserializerError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
DeserializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||||
|
DeserializerError::SledError(e) => write!(f, "Sled related error: {}", e),
|
||||||
|
DeserializerError::Custom(s) => f.write_str(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for DeserializerError {}
|
||||||
|
|
||||||
|
impl From<RmpError> for DeserializerError {
|
||||||
|
fn from(error: RmpError) -> DeserializerError {
|
||||||
|
DeserializerError::RmpError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<sled::Error> for DeserializerError {
|
||||||
|
fn from(error: sled::Error) -> DeserializerError {
|
||||||
|
DeserializerError::SledError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Deserializer<'a> {
|
pub struct Deserializer<'a> {
|
||||||
pub document_id: DocumentId,
|
pub document_id: DocumentId,
|
||||||
pub index: &'a Index,
|
pub index: &'a Index,
|
||||||
@ -17,7 +55,7 @@ pub struct Deserializer<'a> {
|
|||||||
|
|
||||||
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
||||||
{
|
{
|
||||||
type Error = RmpError;
|
type Error = DeserializerError;
|
||||||
|
|
||||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
where V: de::Visitor<'de>
|
where V: de::Visitor<'de>
|
||||||
@ -34,33 +72,41 @@ impl<'de, 'a, 'b> de::Deserializer<'de> for &'b mut Deserializer<'a>
|
|||||||
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
|
||||||
where V: de::Visitor<'de>
|
where V: de::Visitor<'de>
|
||||||
{
|
{
|
||||||
let schema = &self.index.lease_inner().schema;
|
let schema = self.index.schema();
|
||||||
let documents = &self.index.lease_inner().raw.documents;
|
let documents = self.index.as_ref().documents_index;
|
||||||
|
|
||||||
let document_attributes = documents.document_fields(self.document_id);
|
let mut error = None;
|
||||||
let document_attributes = document_attributes.filter_map(|result| {
|
|
||||||
match result {
|
|
||||||
Ok(value) => Some(value),
|
|
||||||
Err(e) => {
|
|
||||||
// TODO: must log the error
|
|
||||||
// error!("sled iter error; {}", e);
|
|
||||||
None
|
|
||||||
},
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let iter = document_attributes.filter_map(|(attr, value)| {
|
let iter = documents
|
||||||
let is_displayed = schema.props(attr).is_displayed();
|
.document_fields(self.document_id)
|
||||||
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
.filter_map(|result| {
|
||||||
let attribute_name = schema.attribute_name(attr);
|
match result {
|
||||||
Some((attribute_name, Value::new(value)))
|
Ok((attr, value)) => {
|
||||||
} else {
|
let is_displayed = schema.props(attr).is_displayed();
|
||||||
None
|
if is_displayed && self.fields.map_or(true, |f| f.contains(&attr)) {
|
||||||
}
|
let attribute_name = schema.attribute_name(attr);
|
||||||
});
|
Some((attribute_name, Value::new(value)))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
if error.is_none() {
|
||||||
|
error = Some(e);
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
let map_deserializer = de::value::MapDeserializer::new(iter);
|
let map_deserializer = de::value::MapDeserializer::new(iter);
|
||||||
visitor.visit_map(map_deserializer)
|
let result = visitor.visit_map(map_deserializer).map_err(DeserializerError::from);
|
||||||
|
|
||||||
|
if let Some(e) = error {
|
||||||
|
return Err(DeserializerError::from(e))
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ mod extract_document_id;
|
|||||||
mod indexer;
|
mod indexer;
|
||||||
mod serializer;
|
mod serializer;
|
||||||
|
|
||||||
pub use self::deserializer::Deserializer;
|
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||||
pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
|
pub use self::extract_document_id::{extract_document_id, compute_document_id, value_to_string};
|
||||||
pub use self::convert_to_string::ConvertToString;
|
pub use self::convert_to_string::ConvertToString;
|
||||||
pub use self::convert_to_number::ConvertToNumber;
|
pub use self::convert_to_number::ConvertToNumber;
|
||||||
@ -38,8 +38,8 @@ pub enum SerializerError {
|
|||||||
DocumentIdNotFound,
|
DocumentIdNotFound,
|
||||||
InvalidDocumentIdType,
|
InvalidDocumentIdType,
|
||||||
RmpError(RmpError),
|
RmpError(RmpError),
|
||||||
|
SledError(sled::Error),
|
||||||
SerdeJsonError(SerdeJsonError),
|
SerdeJsonError(SerdeJsonError),
|
||||||
RocksdbError(rocksdb::Error),
|
|
||||||
ParseNumberError(ParseNumberError),
|
ParseNumberError(ParseNumberError),
|
||||||
UnserializableType { type_name: &'static str },
|
UnserializableType { type_name: &'static str },
|
||||||
UnindexableType { type_name: &'static str },
|
UnindexableType { type_name: &'static str },
|
||||||
@ -63,8 +63,8 @@ impl fmt::Display for SerializerError {
|
|||||||
write!(f, "document identifier can only be of type string or number")
|
write!(f, "document identifier can only be of type string or number")
|
||||||
},
|
},
|
||||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||||
|
SerializerError::SledError(e) => write!(f, "Sled related error: {}", e),
|
||||||
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
|
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
|
||||||
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
|
|
||||||
SerializerError::ParseNumberError(e) => {
|
SerializerError::ParseNumberError(e) => {
|
||||||
write!(f, "error while trying to parse a number: {}", e)
|
write!(f, "error while trying to parse a number: {}", e)
|
||||||
},
|
},
|
||||||
@ -102,9 +102,9 @@ impl From<SerdeJsonError> for SerializerError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<rocksdb::Error> for SerializerError {
|
impl From<sled::Error> for SerializerError {
|
||||||
fn from(error: rocksdb::Error) -> SerializerError {
|
fn from(error: sled::Error) -> SerializerError {
|
||||||
SerializerError::RocksdbError(error)
|
SerializerError::SledError(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use meilidb_data::Database;
|
use meilidb_data::Database;
|
||||||
use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED};
|
use meilidb_schema::{Schema, SchemaBuilder, DISPLAYED, INDEXED};
|
||||||
@ -12,16 +15,24 @@ fn simple_schema() -> Schema {
|
|||||||
#[test]
|
#[test]
|
||||||
fn insert_delete_document() {
|
fn insert_delete_document() {
|
||||||
let tmp_dir = tempfile::tempdir().unwrap();
|
let tmp_dir = tempfile::tempdir().unwrap();
|
||||||
let database = Database::start_default(&tmp_dir).unwrap();
|
let database = Database::open(&tmp_dir).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
let schema = simple_schema();
|
let schema = simple_schema();
|
||||||
let index = database.create_index("hello", schema).unwrap();
|
let index = database.create_index("hello", schema).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated_clone = as_been_updated.clone();
|
||||||
|
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||||
|
|
||||||
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||||
|
|
||||||
let mut addition = index.documents_addition();
|
let mut addition = index.documents_addition();
|
||||||
addition.update_document(&doc1).unwrap();
|
addition.update_document(&doc1);
|
||||||
addition.finalize().unwrap();
|
let update_id = addition.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(docs.len(), 1);
|
||||||
@ -29,7 +40,10 @@ fn insert_delete_document() {
|
|||||||
|
|
||||||
let mut deletion = index.documents_deletion();
|
let mut deletion = index.documents_deletion();
|
||||||
deletion.delete_document(&doc1).unwrap();
|
deletion.delete_document(&doc1).unwrap();
|
||||||
deletion.finalize().unwrap();
|
let update_id = deletion.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 0);
|
assert_eq!(docs.len(), 0);
|
||||||
@ -38,25 +52,36 @@ fn insert_delete_document() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn replace_document() {
|
fn replace_document() {
|
||||||
let tmp_dir = tempfile::tempdir().unwrap();
|
let tmp_dir = tempfile::tempdir().unwrap();
|
||||||
let database = Database::start_default(&tmp_dir).unwrap();
|
let database = Database::open(&tmp_dir).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
let schema = simple_schema();
|
let schema = simple_schema();
|
||||||
let index = database.create_index("hello", schema).unwrap();
|
let index = database.create_index("hello", schema).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated_clone = as_been_updated.clone();
|
||||||
|
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||||
|
|
||||||
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||||
let doc2 = json!({ "objectId": 123, "title": "coucou" });
|
let doc2 = json!({ "objectId": 123, "title": "coucou" });
|
||||||
|
|
||||||
let mut addition = index.documents_addition();
|
let mut addition = index.documents_addition();
|
||||||
addition.update_document(&doc1).unwrap();
|
addition.update_document(&doc1);
|
||||||
addition.finalize().unwrap();
|
let update_id = addition.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(docs.len(), 1);
|
||||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc1));
|
||||||
|
|
||||||
let mut deletion = index.documents_addition();
|
let mut deletion = index.documents_addition();
|
||||||
deletion.update_document(&doc2).unwrap();
|
deletion.update_document(&doc2);
|
||||||
deletion.finalize().unwrap();
|
let update_id = deletion.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
|
||||||
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
let docs = index.query_builder().query("hello", 0..10).unwrap();
|
||||||
assert_eq!(docs.len(), 0);
|
assert_eq!(docs.len(), 0);
|
||||||
|
@ -99,14 +99,14 @@ struct InnerSchema {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Schema {
|
impl Schema {
|
||||||
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
pub fn from_toml<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
reader.read_to_end(&mut buffer)?;
|
reader.read_to_end(&mut buffer)?;
|
||||||
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
|
let builder: SchemaBuilder = toml::from_slice(&buffer)?;
|
||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
pub fn to_toml<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
|
||||||
let identifier = self.inner.identifier.clone();
|
let identifier = self.inner.identifier.clone();
|
||||||
let attributes = self.attributes_ordered();
|
let attributes = self.attributes_ordered();
|
||||||
let builder = SchemaBuilder { identifier, attributes };
|
let builder = SchemaBuilder { identifier, attributes };
|
||||||
@ -117,14 +117,14 @@ impl Schema {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<Error>> {
|
pub fn from_json<R: Read>(mut reader: R) -> Result<Schema, Box<dyn Error>> {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
reader.read_to_end(&mut buffer)?;
|
reader.read_to_end(&mut buffer)?;
|
||||||
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
|
let builder: SchemaBuilder = serde_json::from_slice(&buffer)?;
|
||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<Error>> {
|
pub fn to_json<W: Write>(&self, mut writer: W) -> Result<(), Box<dyn Error>> {
|
||||||
let identifier = self.inner.identifier.clone();
|
let identifier = self.inner.identifier.clone();
|
||||||
let attributes = self.attributes_ordered();
|
let attributes = self.attributes_ordered();
|
||||||
let builder = SchemaBuilder { identifier, attributes };
|
let builder = SchemaBuilder { identifier, attributes };
|
||||||
@ -245,7 +245,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn serialize_deserialize_toml() -> Result<(), Box<Error>> {
|
fn serialize_deserialize_toml() -> Result<(), Box<dyn Error>> {
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
let mut builder = SchemaBuilder::with_identifier("id");
|
||||||
builder.new_attribute("alpha", DISPLAYED);
|
builder.new_attribute("alpha", DISPLAYED);
|
||||||
builder.new_attribute("beta", DISPLAYED | INDEXED);
|
builder.new_attribute("beta", DISPLAYED | INDEXED);
|
||||||
@ -278,7 +278,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn serialize_deserialize_json() -> Result<(), Box<Error>> {
|
fn serialize_deserialize_json() -> Result<(), Box<dyn Error>> {
|
||||||
let mut builder = SchemaBuilder::with_identifier("id");
|
let mut builder = SchemaBuilder::with_identifier("id");
|
||||||
builder.new_attribute("alpha", DISPLAYED);
|
builder.new_attribute("alpha", DISPLAYED);
|
||||||
builder.new_attribute("beta", DISPLAYED | INDEXED);
|
builder.new_attribute("beta", DISPLAYED | INDEXED);
|
||||||
|
@ -6,7 +6,6 @@ use std::io::{self, BufRead, BufReader};
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use diskus::Walk;
|
use diskus::Walk;
|
||||||
@ -44,9 +43,8 @@ pub struct Opt {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
struct Document<'a> (
|
struct Document (
|
||||||
#[serde(borrow)]
|
HashMap<String, String>
|
||||||
HashMap<Cow<'a, str>, Cow<'a, str>>
|
|
||||||
);
|
);
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -85,7 +83,7 @@ fn index(
|
|||||||
synonyms: Vec<Synonym>,
|
synonyms: Vec<Synonym>,
|
||||||
) -> Result<Database, Box<dyn Error>>
|
) -> Result<Database, Box<dyn Error>>
|
||||||
{
|
{
|
||||||
let database = Database::start_default(database_path)?;
|
let database = Database::open(database_path)?;
|
||||||
|
|
||||||
let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
|
let mut wtr = csv::Writer::from_path("./stats.csv").unwrap();
|
||||||
wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
|
wtr.write_record(&["NumberOfDocuments", "DiskUsed", "MemoryUsed"])?;
|
||||||
@ -138,7 +136,7 @@ fn index(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
update.update_document(&document)?;
|
update.update_document(document);
|
||||||
|
|
||||||
print!("\rindexing document {}", i);
|
print!("\rindexing document {}", i);
|
||||||
i += 1;
|
i += 1;
|
||||||
|
@ -143,7 +143,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let opt = Opt::from_args();
|
let opt = Opt::from_args();
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let database = Database::start_default(&opt.database_path)?;
|
let database = Database::open(&opt.database_path)?;
|
||||||
|
|
||||||
let index = database.open_index("test")?.unwrap();
|
let index = database.open_index("test")?.unwrap();
|
||||||
let schema = index.schema();
|
let schema = index.schema();
|
||||||
|
Loading…
Reference in New Issue
Block a user