meilisearch/meilidb-core/src/store/mod.rs

353 lines
11 KiB
Rust
Raw Normal View History

2019-10-03 21:04:11 +08:00
mod docs_words;
2019-10-03 17:49:13 +08:00
mod documents_fields;
mod documents_fields_counts;
2019-10-03 21:04:11 +08:00
mod main;
mod postings_lists;
mod synonyms;
2019-10-03 21:04:11 +08:00
mod updates;
2019-10-03 22:13:09 +08:00
mod updates_results;
2019-10-03 21:04:11 +08:00
pub use self::docs_words::DocsWords;
2019-10-18 19:05:28 +08:00
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
pub use self::documents_fields_counts::{
DocumentFieldsCountsIter, DocumentsFieldsCounts, DocumentsIdsIter,
};
2019-10-03 21:04:11 +08:00
pub use self::main::Main;
pub use self::postings_lists::PostingsLists;
pub use self::synonyms::Synonyms;
2019-10-03 21:04:11 +08:00
pub use self::updates::Updates;
2019-10-03 22:13:09 +08:00
pub use self::updates_results::UpdatesResults;
use std::collections::HashSet;
2019-10-21 18:05:53 +08:00
use heed::Result as ZResult;
use meilidb_schema::{Schema, SchemaAttr};
use serde::de;
2019-10-16 23:05:24 +08:00
use zerocopy::{AsBytes, FromBytes};
use crate::criterion::Criteria;
use crate::serde::Deserializer;
2019-10-18 19:05:28 +08:00
use crate::{query_builder::QueryBuilder, update, DocumentId, Error, MResult};
2019-10-16 23:05:24 +08:00
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
2019-10-18 19:05:28 +08:00
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
2019-10-16 23:05:24 +08:00
#[repr(C)]
2019-10-18 19:05:28 +08:00
pub struct DocumentAttrKey {
docid: BEU64,
attr: BEU16,
}
2019-10-16 23:05:24 +08:00
impl DocumentAttrKey {
fn new(docid: DocumentId, attr: SchemaAttr) -> DocumentAttrKey {
2019-10-18 19:05:28 +08:00
DocumentAttrKey {
docid: BEU64::new(docid.0),
attr: BEU16::new(attr.0),
}
2019-10-16 23:05:24 +08:00
}
}
fn main_name(name: &str) -> String {
format!("store-{}", name)
}
2019-10-03 21:04:11 +08:00
fn postings_lists_name(name: &str) -> String {
format!("store-{}-postings-lists", name)
2019-10-03 21:04:11 +08:00
}
fn documents_fields_name(name: &str) -> String {
format!("store-{}-documents-fields", name)
2019-10-03 17:49:13 +08:00
}
fn documents_fields_counts_name(name: &str) -> String {
format!("store-{}-documents-fields-counts", name)
}
2019-10-03 17:49:13 +08:00
fn synonyms_name(name: &str) -> String {
format!("store-{}-synonyms", name)
2019-10-03 17:49:13 +08:00
}
2019-10-03 21:04:11 +08:00
fn docs_words_name(name: &str) -> String {
format!("store-{}-docs-words", name)
2019-10-03 17:49:13 +08:00
}
2019-10-03 21:04:11 +08:00
fn updates_name(name: &str) -> String {
format!("store-{}-updates", name)
2019-10-03 21:04:11 +08:00
}
2019-10-03 22:13:09 +08:00
fn updates_results_name(name: &str) -> String {
format!("store-{}-updates-results", name)
2019-10-03 22:13:09 +08:00
}
#[derive(Clone)]
2019-10-03 21:04:11 +08:00
pub struct Index {
pub main: Main,
pub postings_lists: PostingsLists,
pub documents_fields: DocumentsFields,
pub documents_fields_counts: DocumentsFieldsCounts,
2019-10-03 21:04:11 +08:00
pub synonyms: Synonyms,
pub docs_words: DocsWords,
2019-10-03 21:04:11 +08:00
pub updates: Updates,
2019-10-03 22:13:09 +08:00
pub updates_results: UpdatesResults,
updates_notifier: crossbeam_channel::Sender<()>,
2019-10-03 21:04:11 +08:00
}
impl Index {
2019-10-16 23:05:24 +08:00
pub fn document<T: de::DeserializeOwned>(
&self,
2019-10-21 18:05:53 +08:00
reader: &heed::RoTxn,
attributes: Option<&HashSet<&str>>,
document_id: DocumentId,
2019-10-18 19:05:28 +08:00
) -> MResult<Option<T>> {
let schema = self.main.schema(reader)?;
let schema = schema.ok_or(Error::SchemaMissing)?;
let attributes = match attributes {
2019-10-18 19:05:28 +08:00
Some(attributes) => attributes
2019-10-18 19:21:41 +08:00
.iter()
2019-10-18 19:05:28 +08:00
.map(|name| schema.attribute(name))
.collect(),
None => None,
};
let mut deserializer = Deserializer {
document_id,
reader,
documents_fields: self.documents_fields,
schema: &schema,
attributes: attributes.as_ref(),
};
// TODO: currently we return an error if all document fields are missing,
// returning None would have been better
Ok(T::deserialize(&mut deserializer).map(Some)?)
}
2019-10-16 23:05:24 +08:00
pub fn document_attribute<T: de::DeserializeOwned>(
&self,
2019-10-21 18:05:53 +08:00
reader: &heed::RoTxn,
document_id: DocumentId,
attribute: SchemaAttr,
2019-10-18 19:05:28 +08:00
) -> MResult<Option<T>> {
let bytes = self
.documents_fields
.document_attribute(reader, document_id, attribute)?;
match bytes {
2019-10-11 22:16:21 +08:00
Some(bytes) => Ok(Some(serde_json::from_slice(bytes)?)),
None => Ok(None),
}
}
2019-10-21 18:05:53 +08:00
pub fn schema_update(&self, writer: &mut heed::RwTxn, schema: Schema) -> MResult<u64> {
let _ = self.updates_notifier.send(());
2019-10-11 21:33:35 +08:00
update::push_schema_update(writer, self.updates, self.updates_results, schema)
}
2019-10-21 18:05:53 +08:00
pub fn customs_update(&self, writer: &mut heed::RwTxn, customs: Vec<u8>) -> ZResult<u64> {
2019-10-11 21:33:35 +08:00
let _ = self.updates_notifier.send(());
update::push_customs_update(writer, self.updates, self.updates_results, customs)
}
pub fn documents_addition<D>(&self) -> update::DocumentsAddition<D> {
update::DocumentsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn documents_deletion(&self) -> update::DocumentsDeletion {
update::DocumentsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
2019-10-23 22:32:11 +08:00
pub fn clear_all(&self, writer: &mut heed::RwTxn) -> MResult<u64> {
let _ = self.updates_notifier.send(());
2019-10-23 22:32:11 +08:00
update::push_clear_all(writer, self.updates, self.updates_results)
}
pub fn synonyms_addition(&self) -> update::SynonymsAddition {
update::SynonymsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn synonyms_deletion(&self) -> update::SynonymsDeletion {
update::SynonymsDeletion::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
pub fn stop_words_addition(&self) -> update::StopWordsAddition {
update::StopWordsAddition::new(
self.updates,
self.updates_results,
self.updates_notifier.clone(),
)
}
2019-10-21 18:05:53 +08:00
pub fn current_update_id(&self, reader: &heed::RoTxn) -> MResult<Option<u64>> {
match self.updates.last_update_id(reader)? {
Some((id, _)) => Ok(Some(id)),
None => Ok(None),
}
}
2019-10-16 23:05:24 +08:00
pub fn update_status(
&self,
2019-10-21 18:05:53 +08:00
reader: &heed::RoTxn,
update_id: u64,
2019-10-18 19:05:28 +08:00
) -> MResult<update::UpdateStatus> {
update::update_status(reader, self.updates, self.updates_results, update_id)
}
pub fn all_updates_status(&self, reader: &heed::RoTxn) -> MResult<Vec<update::UpdateStatus>> {
match self.updates_results.last_update_id(reader)? {
Some((last_id, _)) => {
let mut updates = Vec::with_capacity(last_id as usize + 1);
for id in 0..=last_id {
let update = self.update_status(reader, id)?;
updates.push(update);
}
Ok(updates)
}
None => Ok(Vec::new()),
}
}
pub fn query_builder(&self) -> QueryBuilder {
QueryBuilder::new(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
)
}
pub fn query_builder_with_criteria<'c, 'f, 'd>(
&self,
criteria: Criteria<'c>,
2019-10-18 19:05:28 +08:00
) -> QueryBuilder<'c, 'f, 'd> {
QueryBuilder::with_criteria(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
criteria,
)
}
2019-10-03 17:49:13 +08:00
}
pub fn create(
2019-10-21 18:05:53 +08:00
env: &heed::Env,
name: &str,
updates_notifier: crossbeam_channel::Sender<()>,
2019-10-18 19:05:28 +08:00
) -> MResult<Index> {
2019-10-16 23:05:24 +08:00
// create all the store names
let main_name = main_name(name);
let postings_lists_name = postings_lists_name(name);
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
2019-10-16 23:05:24 +08:00
// open all the stores
let main = env.create_dyn_database(Some(&main_name))?;
let postings_lists = env.create_database(Some(&postings_lists_name))?;
let documents_fields = env.create_database(Some(&documents_fields_name))?;
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
let synonyms = env.create_database(Some(&synonyms_name))?;
let docs_words = env.create_database(Some(&docs_words_name))?;
let updates = env.create_database(Some(&updates_name))?;
let updates_results = env.create_database(Some(&updates_results_name))?;
Ok(Index {
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
2019-10-18 19:05:28 +08:00
documents_fields_counts: DocumentsFieldsCounts {
documents_fields_counts,
},
2019-10-16 23:05:24 +08:00
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
updates_notifier,
})
2019-10-03 17:49:13 +08:00
}
2019-10-16 23:05:24 +08:00
pub fn open(
2019-10-21 18:05:53 +08:00
env: &heed::Env,
2019-10-03 17:49:13 +08:00
name: &str,
updates_notifier: crossbeam_channel::Sender<()>,
2019-10-18 19:05:28 +08:00
) -> MResult<Option<Index>> {
// create all the store names
let main_name = main_name(name);
2019-10-03 21:04:11 +08:00
let postings_lists_name = postings_lists_name(name);
2019-10-03 17:49:13 +08:00
let documents_fields_name = documents_fields_name(name);
let documents_fields_counts_name = documents_fields_counts_name(name);
2019-10-03 21:04:11 +08:00
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let updates_name = updates_name(name);
2019-10-03 22:13:09 +08:00
let updates_results_name = updates_results_name(name);
2019-10-03 17:49:13 +08:00
// open all the stores
2019-10-16 23:05:24 +08:00
let main = match env.open_dyn_database(Some(&main_name))? {
Some(main) => main,
None => return Ok(None),
};
let postings_lists = match env.open_database(Some(&postings_lists_name))? {
Some(postings_lists) => postings_lists,
None => return Ok(None),
};
let documents_fields = match env.open_database(Some(&documents_fields_name))? {
Some(documents_fields) => documents_fields,
None => return Ok(None),
};
let documents_fields_counts = match env.open_database(Some(&documents_fields_counts_name))? {
Some(documents_fields_counts) => documents_fields_counts,
None => return Ok(None),
};
let synonyms = match env.open_database(Some(&synonyms_name))? {
Some(synonyms) => synonyms,
None => return Ok(None),
};
let docs_words = match env.open_database(Some(&docs_words_name))? {
Some(docs_words) => docs_words,
None => return Ok(None),
};
let updates = match env.open_database(Some(&updates_name))? {
Some(updates) => updates,
None => return Ok(None),
};
let updates_results = match env.open_database(Some(&updates_results_name))? {
Some(updates_results) => updates_results,
None => return Ok(None),
};
2019-10-16 23:05:24 +08:00
Ok(Some(Index {
2019-10-03 21:04:11 +08:00
main: Main { main },
postings_lists: PostingsLists { postings_lists },
documents_fields: DocumentsFields { documents_fields },
2019-10-18 19:05:28 +08:00
documents_fields_counts: DocumentsFieldsCounts {
documents_fields_counts,
},
2019-10-03 21:04:11 +08:00
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
updates: Updates { updates },
2019-10-03 22:13:09 +08:00
updates_results: UpdatesResults { updates_results },
updates_notifier,
2019-10-16 23:05:24 +08:00
}))
}