mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Introduce the reindex_all_documents indexing function
This commit is contained in:
parent
e86a547e93
commit
0f30a221fa
@ -12,6 +12,7 @@ pub enum Error {
|
|||||||
SchemaMissing,
|
SchemaMissing,
|
||||||
WordIndexMissing,
|
WordIndexMissing,
|
||||||
MissingDocumentId,
|
MissingDocumentId,
|
||||||
|
DuplicateDocument,
|
||||||
Zlmdb(heed::Error),
|
Zlmdb(heed::Error),
|
||||||
Fst(fst::Error),
|
Fst(fst::Error),
|
||||||
SerdeJson(SerdeJsonError),
|
SerdeJson(SerdeJsonError),
|
||||||
@ -79,6 +80,7 @@ impl fmt::Display for Error {
|
|||||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||||
MissingDocumentId => write!(f, "document id is missing"),
|
MissingDocumentId => write!(f, "document id is missing"),
|
||||||
|
DuplicateDocument => write!(f, "update contains documents with the same id"),
|
||||||
Zlmdb(e) => write!(f, "heed error; {}", e),
|
Zlmdb(e) => write!(f, "heed error; {}", e),
|
||||||
Fst(e) => write!(f, "fst error; {}", e),
|
Fst(e) => write!(f, "fst error; {}", e),
|
||||||
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
SerdeJson(e) => write!(f, "serde json error; {}", e),
|
||||||
@ -95,6 +97,10 @@ impl error::Error for Error {}
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum UnsupportedOperation {
|
pub enum UnsupportedOperation {
|
||||||
SchemaAlreadyExists,
|
SchemaAlreadyExists,
|
||||||
|
CannotUpdateSchemaIdentifier,
|
||||||
|
CannotReorderSchemaAttribute,
|
||||||
|
CannotIntroduceNewSchemaAttribute,
|
||||||
|
CannotRemoveSchemaAttribute,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for UnsupportedOperation {
|
impl fmt::Display for UnsupportedOperation {
|
||||||
@ -102,6 +108,12 @@ impl fmt::Display for UnsupportedOperation {
|
|||||||
use self::UnsupportedOperation::*;
|
use self::UnsupportedOperation::*;
|
||||||
match self {
|
match self {
|
||||||
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
|
SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
|
||||||
|
CannotUpdateSchemaIdentifier => write!(f, "Cannot update the identifier of a schema"),
|
||||||
|
CannotReorderSchemaAttribute => write!(f, "Cannot reorder the attributes of a schema"),
|
||||||
|
CannotIntroduceNewSchemaAttribute => {
|
||||||
|
write!(f, "Cannot introduce new attributes in a schema")
|
||||||
|
}
|
||||||
|
CannotRemoveSchemaAttribute => write!(f, "Cannot remove attributes from a schema"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,16 +20,14 @@ pub use self::convert_to_string::ConvertToString;
|
|||||||
pub use self::deserializer::{Deserializer, DeserializerError};
|
pub use self::deserializer::{Deserializer, DeserializerError};
|
||||||
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
|
||||||
pub use self::indexer::Indexer;
|
pub use self::indexer::Indexer;
|
||||||
pub use self::serializer::Serializer;
|
pub use self::serializer::{serialize_value, Serializer};
|
||||||
|
|
||||||
use std::collections::BTreeMap;
|
|
||||||
use std::{error::Error, fmt};
|
use std::{error::Error, fmt};
|
||||||
|
|
||||||
use meilidb_schema::SchemaAttr;
|
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
use serde_json::Error as SerdeJsonError;
|
use serde_json::Error as SerdeJsonError;
|
||||||
|
|
||||||
use crate::{DocumentId, ParseNumberError};
|
use crate::ParseNumberError;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum SerializerError {
|
pub enum SerializerError {
|
||||||
@ -103,25 +101,3 @@ impl From<ParseNumberError> for SerializerError {
|
|||||||
SerializerError::ParseNumber(error)
|
SerializerError::ParseNumber(error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct RamDocumentStore(BTreeMap<(DocumentId, SchemaAttr), Vec<u8>>);
|
|
||||||
|
|
||||||
impl RamDocumentStore {
|
|
||||||
pub fn new() -> RamDocumentStore {
|
|
||||||
RamDocumentStore(BTreeMap::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_document_field(&mut self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) {
|
|
||||||
self.0.insert((id, attr), value);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_inner(self) -> BTreeMap<(DocumentId, SchemaAttr), Vec<u8>> {
|
|
||||||
self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for RamDocumentStore {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilidb_schema::{Schema, SchemaAttr, SchemaProps};
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::RamDocumentStore;
|
use crate::store::{DocumentsFields, DocumentsFieldsCounts};
|
||||||
use crate::{DocumentId, RankedMap};
|
use crate::{DocumentId, RankedMap};
|
||||||
|
|
||||||
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
use super::{ConvertToNumber, ConvertToString, Indexer, SerializerError};
|
||||||
|
|
||||||
pub struct Serializer<'a> {
|
pub struct Serializer<'a> {
|
||||||
|
pub txn: &'a mut heed::RwTxn,
|
||||||
pub schema: &'a Schema,
|
pub schema: &'a Schema,
|
||||||
pub document_store: &'a mut RamDocumentStore,
|
pub document_store: DocumentsFields,
|
||||||
pub document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
pub document_fields_counts: DocumentsFieldsCounts,
|
||||||
pub indexer: &'a mut RawIndexer,
|
pub indexer: &'a mut RawIndexer,
|
||||||
pub ranked_map: &'a mut RankedMap,
|
pub ranked_map: &'a mut RankedMap,
|
||||||
pub document_id: DocumentId,
|
pub document_id: DocumentId,
|
||||||
@ -150,6 +150,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
|||||||
|
|
||||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||||
Ok(MapSerializer {
|
Ok(MapSerializer {
|
||||||
|
txn: self.txn,
|
||||||
schema: self.schema,
|
schema: self.schema,
|
||||||
document_id: self.document_id,
|
document_id: self.document_id,
|
||||||
document_store: self.document_store,
|
document_store: self.document_store,
|
||||||
@ -166,6 +167,7 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
|||||||
_len: usize,
|
_len: usize,
|
||||||
) -> Result<Self::SerializeStruct, Self::Error> {
|
) -> Result<Self::SerializeStruct, Self::Error> {
|
||||||
Ok(StructSerializer {
|
Ok(StructSerializer {
|
||||||
|
txn: self.txn,
|
||||||
schema: self.schema,
|
schema: self.schema,
|
||||||
document_id: self.document_id,
|
document_id: self.document_id,
|
||||||
document_store: self.document_store,
|
document_store: self.document_store,
|
||||||
@ -189,10 +191,11 @@ impl<'a> ser::Serializer for Serializer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct MapSerializer<'a> {
|
pub struct MapSerializer<'a> {
|
||||||
|
txn: &'a mut heed::RwTxn,
|
||||||
schema: &'a Schema,
|
schema: &'a Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: &'a mut RamDocumentStore,
|
document_store: DocumentsFields,
|
||||||
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
document_fields_counts: DocumentsFieldsCounts,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
current_key_name: Option<String>,
|
current_key_name: Option<String>,
|
||||||
@ -229,17 +232,20 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
|||||||
V: ser::Serialize,
|
V: ser::Serialize,
|
||||||
{
|
{
|
||||||
let key = key.serialize(ConvertToString)?;
|
let key = key.serialize(ConvertToString)?;
|
||||||
|
match self.schema.attribute(&key) {
|
||||||
serialize_value(
|
Some(attribute) => serialize_value(
|
||||||
self.schema,
|
self.txn,
|
||||||
self.document_id,
|
attribute,
|
||||||
self.document_store,
|
self.schema.props(attribute),
|
||||||
self.document_fields_counts,
|
self.document_id,
|
||||||
self.indexer,
|
self.document_store,
|
||||||
self.ranked_map,
|
self.document_fields_counts,
|
||||||
&key,
|
self.indexer,
|
||||||
value,
|
self.ranked_map,
|
||||||
)
|
value,
|
||||||
|
),
|
||||||
|
None => Ok(()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||||
@ -248,10 +254,11 @@ impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct StructSerializer<'a> {
|
pub struct StructSerializer<'a> {
|
||||||
|
txn: &'a mut heed::RwTxn,
|
||||||
schema: &'a Schema,
|
schema: &'a Schema,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: &'a mut RamDocumentStore,
|
document_store: DocumentsFields,
|
||||||
document_fields_counts: &'a mut HashMap<(DocumentId, SchemaAttr), u64>,
|
document_fields_counts: DocumentsFieldsCounts,
|
||||||
indexer: &'a mut RawIndexer,
|
indexer: &'a mut RawIndexer,
|
||||||
ranked_map: &'a mut RankedMap,
|
ranked_map: &'a mut RankedMap,
|
||||||
}
|
}
|
||||||
@ -268,16 +275,20 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
|||||||
where
|
where
|
||||||
T: ser::Serialize,
|
T: ser::Serialize,
|
||||||
{
|
{
|
||||||
serialize_value(
|
match self.schema.attribute(key) {
|
||||||
self.schema,
|
Some(attribute) => serialize_value(
|
||||||
self.document_id,
|
self.txn,
|
||||||
self.document_store,
|
attribute,
|
||||||
self.document_fields_counts,
|
self.schema.props(attribute),
|
||||||
self.indexer,
|
self.document_id,
|
||||||
self.ranked_map,
|
self.document_store,
|
||||||
key,
|
self.document_fields_counts,
|
||||||
value,
|
self.indexer,
|
||||||
)
|
self.ranked_map,
|
||||||
|
value,
|
||||||
|
),
|
||||||
|
None => Ok(()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||||
@ -285,40 +296,42 @@ impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn serialize_value<T: ?Sized>(
|
pub fn serialize_value<T: ?Sized>(
|
||||||
schema: &Schema,
|
txn: &mut heed::RwTxn,
|
||||||
|
attribute: SchemaAttr,
|
||||||
|
props: SchemaProps,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
document_store: &mut RamDocumentStore,
|
document_store: DocumentsFields,
|
||||||
documents_fields_counts: &mut HashMap<(DocumentId, SchemaAttr), u64>,
|
documents_fields_counts: DocumentsFieldsCounts,
|
||||||
indexer: &mut RawIndexer,
|
indexer: &mut RawIndexer,
|
||||||
ranked_map: &mut RankedMap,
|
ranked_map: &mut RankedMap,
|
||||||
key: &str,
|
|
||||||
value: &T,
|
value: &T,
|
||||||
) -> Result<(), SerializerError>
|
) -> Result<(), SerializerError>
|
||||||
where
|
where
|
||||||
T: ser::Serialize,
|
T: ser::Serialize,
|
||||||
{
|
{
|
||||||
if let Some(attribute) = schema.attribute(key) {
|
let serialized = serde_json::to_vec(value)?;
|
||||||
let props = schema.props(attribute);
|
document_store.put_document_field(txn, document_id, attribute, &serialized)?;
|
||||||
|
|
||||||
let serialized = serde_json::to_vec(value)?;
|
if props.is_indexed() {
|
||||||
document_store.set_document_field(document_id, attribute, serialized);
|
let indexer = Indexer {
|
||||||
|
attribute,
|
||||||
if props.is_indexed() {
|
indexer,
|
||||||
let indexer = Indexer {
|
document_id,
|
||||||
attribute,
|
};
|
||||||
indexer,
|
if let Some(number_of_words) = value.serialize(indexer)? {
|
||||||
|
documents_fields_counts.put_document_field_count(
|
||||||
|
txn,
|
||||||
document_id,
|
document_id,
|
||||||
};
|
attribute,
|
||||||
if let Some(number_of_words) = value.serialize(indexer)? {
|
number_of_words as u64,
|
||||||
documents_fields_counts.insert((document_id, attribute), number_of_words as u64);
|
)?;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if props.is_ranked() {
|
if props.is_ranked() {
|
||||||
let number = value.serialize(ConvertToNumber)?;
|
let number = value.serialize(ConvertToNumber)?;
|
||||||
ranked_map.insert(document_id, attribute, number);
|
ranked_map.insert(document_id, attribute, number);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -121,7 +121,7 @@ pub struct AllDocumentsFieldsCountsIter<'txn> {
|
|||||||
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'r> Iterator for AllDocumentsFieldsCountsIter<'r> {
|
impl Iterator for AllDocumentsFieldsCountsIter<'_> {
|
||||||
type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
|
type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
@ -23,6 +23,10 @@ impl PostingsLists {
|
|||||||
self.postings_lists.delete(writer, word)
|
self.postings_lists.delete(writer, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn clear(self, writer: &mut heed::RwTxn) -> ZResult<()> {
|
||||||
|
self.postings_lists.clear(writer)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn postings_list<'txn>(
|
pub fn postings_list<'txn>(
|
||||||
self,
|
self,
|
||||||
reader: &'txn heed::RoTxn,
|
reader: &'txn heed::RoTxn,
|
||||||
|
@ -5,7 +5,7 @@ use sdset::{duo::Union, SetOperation};
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
||||||
use crate::serde::{extract_document_id, RamDocumentStore, Serializer};
|
use crate::serde::{extract_document_id, serialize_value, Serializer};
|
||||||
use crate::store;
|
use crate::store;
|
||||||
use crate::update::{apply_documents_deletion, next_update_id, Update};
|
use crate::update::{apply_documents_deletion, next_update_id, Update};
|
||||||
use crate::{Error, MResult, RankedMap};
|
use crate::{Error, MResult, RankedMap};
|
||||||
@ -84,12 +84,9 @@ pub fn apply_documents_addition(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
mut ranked_map: RankedMap,
|
|
||||||
addition: Vec<serde_json::Value>,
|
addition: Vec<serde_json::Value>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let mut document_ids = HashSet::new();
|
let mut documents_ids = HashSet::new();
|
||||||
let mut document_store = RamDocumentStore::new();
|
|
||||||
let mut document_fields_counts = HashMap::new();
|
|
||||||
let mut indexer = RawIndexer::new();
|
let mut indexer = RawIndexer::new();
|
||||||
|
|
||||||
let schema = match main_store.schema(writer)? {
|
let schema = match main_store.schema(writer)? {
|
||||||
@ -99,20 +96,47 @@ pub fn apply_documents_addition(
|
|||||||
|
|
||||||
let identifier = schema.identifier_name();
|
let identifier = schema.identifier_name();
|
||||||
|
|
||||||
|
// 1. store documents ids for future deletion
|
||||||
|
for document in addition.iter() {
|
||||||
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
|
Some(id) => id,
|
||||||
|
None => return Err(Error::MissingDocumentId),
|
||||||
|
};
|
||||||
|
|
||||||
|
if !documents_ids.insert(document_id) {
|
||||||
|
return Err(Error::DuplicateDocument);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. remove the documents posting lists
|
||||||
|
let number_of_inserted_documents = documents_ids.len();
|
||||||
|
apply_documents_deletion(
|
||||||
|
writer,
|
||||||
|
main_store,
|
||||||
|
documents_fields_store,
|
||||||
|
documents_fields_counts_store,
|
||||||
|
postings_lists_store,
|
||||||
|
docs_words_store,
|
||||||
|
documents_ids.into_iter().collect(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let mut ranked_map = match main_store.ranked_map(writer)? {
|
||||||
|
Some(ranked_map) => ranked_map,
|
||||||
|
None => RankedMap::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// 3. index the documents fields in the stores
|
||||||
for document in addition {
|
for document in addition {
|
||||||
let document_id = match extract_document_id(identifier, &document)? {
|
let document_id = match extract_document_id(identifier, &document)? {
|
||||||
Some(id) => id,
|
Some(id) => id,
|
||||||
None => return Err(Error::MissingDocumentId),
|
None => return Err(Error::MissingDocumentId),
|
||||||
};
|
};
|
||||||
|
|
||||||
// 1. store the document id for future deletion
|
|
||||||
document_ids.insert(document_id);
|
|
||||||
|
|
||||||
// 2. index the document fields in ram stores
|
|
||||||
let serializer = Serializer {
|
let serializer = Serializer {
|
||||||
|
txn: writer,
|
||||||
schema: &schema,
|
schema: &schema,
|
||||||
document_store: &mut document_store,
|
document_store: documents_fields_store,
|
||||||
document_fields_counts: &mut document_fields_counts,
|
document_fields_counts: documents_fields_counts_store,
|
||||||
indexer: &mut indexer,
|
indexer: &mut indexer,
|
||||||
ranked_map: &mut ranked_map,
|
ranked_map: &mut ranked_map,
|
||||||
document_id,
|
document_id,
|
||||||
@ -121,29 +145,93 @@ pub fn apply_documents_addition(
|
|||||||
document.serialize(serializer)?;
|
document.serialize(serializer)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. remove the previous documents match indexes
|
write_documents_addition_index(
|
||||||
let documents_to_insert = document_ids.iter().cloned().collect();
|
|
||||||
apply_documents_deletion(
|
|
||||||
writer,
|
writer,
|
||||||
main_store,
|
main_store,
|
||||||
documents_fields_store,
|
|
||||||
documents_fields_counts_store,
|
|
||||||
postings_lists_store,
|
postings_lists_store,
|
||||||
docs_words_store,
|
docs_words_store,
|
||||||
ranked_map.clone(),
|
ranked_map,
|
||||||
documents_to_insert,
|
number_of_inserted_documents,
|
||||||
)?;
|
indexer,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// 2. insert new document attributes in the database
|
pub fn reindex_all_documents(
|
||||||
for ((id, attr), value) in document_store.into_inner() {
|
writer: &mut heed::RwTxn,
|
||||||
documents_fields_store.put_document_field(writer, id, attr, &value)?;
|
main_store: store::Main,
|
||||||
|
documents_fields_store: store::DocumentsFields,
|
||||||
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
|
postings_lists_store: store::PostingsLists,
|
||||||
|
docs_words_store: store::DocsWords,
|
||||||
|
) -> MResult<()> {
|
||||||
|
let schema = match main_store.schema(writer)? {
|
||||||
|
Some(schema) => schema,
|
||||||
|
None => return Err(Error::SchemaMissing),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut ranked_map = RankedMap::default();
|
||||||
|
|
||||||
|
// 1. retrieve all documents ids
|
||||||
|
let mut documents_ids_to_reindex = Vec::new();
|
||||||
|
for result in documents_fields_counts_store.documents_ids(writer)? {
|
||||||
|
let document_id = result?;
|
||||||
|
documents_ids_to_reindex.push(document_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. insert new document attributes counts
|
// 2. remove the documents posting lists
|
||||||
for ((id, attr), count) in document_fields_counts {
|
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||||
documents_fields_counts_store.put_document_field_count(writer, id, attr, count)?;
|
main_store.put_words_fst(writer, &fst::Set::default())?;
|
||||||
|
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||||
|
main_store.put_number_of_documents(writer, |_| 0)?;
|
||||||
|
postings_lists_store.clear(writer)?;
|
||||||
|
|
||||||
|
// 3. re-index one document by one document (otherwise we make the borrow checker unhappy)
|
||||||
|
let mut indexer = RawIndexer::new();
|
||||||
|
let mut ram_store = HashMap::new();
|
||||||
|
|
||||||
|
for document_id in documents_ids_to_reindex {
|
||||||
|
for result in documents_fields_store.document_fields(writer, document_id)? {
|
||||||
|
let (attr, bytes) = result?;
|
||||||
|
let value: serde_json::Value = serde_json::from_slice(bytes)?;
|
||||||
|
ram_store.insert((document_id, attr), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ((docid, attr), value) in ram_store.drain() {
|
||||||
|
serialize_value(
|
||||||
|
writer,
|
||||||
|
attr,
|
||||||
|
schema.props(attr),
|
||||||
|
docid,
|
||||||
|
documents_fields_store,
|
||||||
|
documents_fields_counts_store,
|
||||||
|
&mut indexer,
|
||||||
|
&mut ranked_map,
|
||||||
|
&value,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 4. write the new index in the main store
|
||||||
|
write_documents_addition_index(
|
||||||
|
writer,
|
||||||
|
main_store,
|
||||||
|
postings_lists_store,
|
||||||
|
docs_words_store,
|
||||||
|
ranked_map,
|
||||||
|
number_of_inserted_documents,
|
||||||
|
indexer,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_documents_addition_index(
|
||||||
|
writer: &mut heed::RwTxn,
|
||||||
|
main_store: store::Main,
|
||||||
|
postings_lists_store: store::PostingsLists,
|
||||||
|
docs_words_store: store::DocsWords,
|
||||||
|
ranked_map: RankedMap,
|
||||||
|
number_of_inserted_documents: usize,
|
||||||
|
indexer: RawIndexer,
|
||||||
|
) -> MResult<()> {
|
||||||
let indexed = indexer.build();
|
let indexed = indexer.build();
|
||||||
let mut delta_words_builder = SetBuilder::memory();
|
let mut delta_words_builder = SetBuilder::memory();
|
||||||
|
|
||||||
@ -186,9 +274,7 @@ pub fn apply_documents_addition(
|
|||||||
|
|
||||||
main_store.put_words_fst(writer, &words)?;
|
main_store.put_words_fst(writer, &words)?;
|
||||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||||
|
main_store.put_number_of_documents(writer, |old| old + number_of_inserted_documents as u64)?;
|
||||||
let inserted_documents_len = document_ids.len() as u64;
|
|
||||||
main_store.put_number_of_documents(writer, |old| old + inserted_documents_len)?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -88,7 +88,6 @@ pub fn apply_documents_deletion(
|
|||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
postings_lists_store: store::PostingsLists,
|
postings_lists_store: store::PostingsLists,
|
||||||
docs_words_store: store::DocsWords,
|
docs_words_store: store::DocsWords,
|
||||||
mut ranked_map: RankedMap,
|
|
||||||
deletion: Vec<DocumentId>,
|
deletion: Vec<DocumentId>,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
let idset = SetBuf::from_dirty(deletion);
|
let idset = SetBuf::from_dirty(deletion);
|
||||||
@ -98,6 +97,11 @@ pub fn apply_documents_deletion(
|
|||||||
None => return Err(Error::SchemaMissing),
|
None => return Err(Error::SchemaMissing),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut ranked_map = match main_store.ranked_map(writer)? {
|
||||||
|
Some(ranked_map) => ranked_map,
|
||||||
|
None => RankedMap::default(),
|
||||||
|
};
|
||||||
|
|
||||||
// collect the ranked attributes according to the schema
|
// collect the ranked attributes according to the schema
|
||||||
let ranked_attrs: Vec<_> = schema
|
let ranked_attrs: Vec<_> = schema
|
||||||
.iter()
|
.iter()
|
||||||
@ -181,7 +185,6 @@ pub fn apply_documents_deletion(
|
|||||||
|
|
||||||
main_store.put_words_fst(writer, &words)?;
|
main_store.put_words_fst(writer, &words)?;
|
||||||
main_store.put_ranked_map(writer, &ranked_map)?;
|
main_store.put_ranked_map(writer, &ranked_map)?;
|
||||||
|
|
||||||
main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
|
main_store.put_number_of_documents(writer, |old| old - deleted_documents_len)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -20,7 +20,7 @@ use heed::Result as ZResult;
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{store, DocumentId, MResult, RankedMap};
|
use crate::{store, DocumentId, MResult};
|
||||||
use meilidb_schema::Schema;
|
use meilidb_schema::Schema;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@ -113,7 +113,15 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
let update_type = UpdateType::Schema {
|
let update_type = UpdateType::Schema {
|
||||||
schema: schema.clone(),
|
schema: schema.clone(),
|
||||||
};
|
};
|
||||||
let result = apply_schema_update(writer, index.main, &schema);
|
let result = apply_schema_update(
|
||||||
|
writer,
|
||||||
|
&schema,
|
||||||
|
index.main,
|
||||||
|
index.documents_fields,
|
||||||
|
index.documents_fields_counts,
|
||||||
|
index.postings_lists,
|
||||||
|
index.docs_words,
|
||||||
|
);
|
||||||
|
|
||||||
(update_type, result, start.elapsed())
|
(update_type, result, start.elapsed())
|
||||||
}
|
}
|
||||||
@ -128,11 +136,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
Update::DocumentsAddition(documents) => {
|
Update::DocumentsAddition(documents) => {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let ranked_map = match index.main.ranked_map(writer)? {
|
|
||||||
Some(ranked_map) => ranked_map,
|
|
||||||
None => RankedMap::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let update_type = UpdateType::DocumentsAddition {
|
let update_type = UpdateType::DocumentsAddition {
|
||||||
number: documents.len(),
|
number: documents.len(),
|
||||||
};
|
};
|
||||||
@ -144,7 +147,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
ranked_map,
|
|
||||||
documents,
|
documents,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -153,11 +155,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
Update::DocumentsDeletion(documents) => {
|
Update::DocumentsDeletion(documents) => {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let ranked_map = match index.main.ranked_map(writer)? {
|
|
||||||
Some(ranked_map) => ranked_map,
|
|
||||||
None => RankedMap::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let update_type = UpdateType::DocumentsDeletion {
|
let update_type = UpdateType::DocumentsDeletion {
|
||||||
number: documents.len(),
|
number: documents.len(),
|
||||||
};
|
};
|
||||||
@ -169,7 +166,6 @@ pub fn update_task(writer: &mut heed::RwTxn, index: store::Index) -> MResult<Opt
|
|||||||
index.documents_fields_counts,
|
index.documents_fields_counts,
|
||||||
index.postings_lists,
|
index.postings_lists,
|
||||||
index.docs_words,
|
index.docs_words,
|
||||||
ranked_map,
|
|
||||||
documents,
|
documents,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -1,19 +1,58 @@
|
|||||||
|
use meilidb_schema::{Diff, Schema};
|
||||||
|
|
||||||
|
use crate::update::documents_addition::reindex_all_documents;
|
||||||
use crate::update::{next_update_id, Update};
|
use crate::update::{next_update_id, Update};
|
||||||
use crate::{error::UnsupportedOperation, store, MResult};
|
use crate::{error::UnsupportedOperation, store, MResult};
|
||||||
use meilidb_schema::Schema;
|
|
||||||
|
|
||||||
pub fn apply_schema_update(
|
pub fn apply_schema_update(
|
||||||
writer: &mut heed::RwTxn,
|
writer: &mut heed::RwTxn,
|
||||||
main_store: store::Main,
|
|
||||||
new_schema: &Schema,
|
new_schema: &Schema,
|
||||||
|
main_store: store::Main,
|
||||||
|
documents_fields_store: store::DocumentsFields,
|
||||||
|
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||||
|
postings_lists_store: store::PostingsLists,
|
||||||
|
docs_words_store: store::DocsWords,
|
||||||
) -> MResult<()> {
|
) -> MResult<()> {
|
||||||
if main_store.schema(writer)?.is_some() {
|
use UnsupportedOperation::{
|
||||||
return Err(UnsupportedOperation::SchemaAlreadyExists.into());
|
CannotIntroduceNewSchemaAttribute, CannotRemoveSchemaAttribute,
|
||||||
|
CannotReorderSchemaAttribute, CannotUpdateSchemaIdentifier,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut need_full_reindexing = false;
|
||||||
|
|
||||||
|
if let Some(old_schema) = main_store.schema(writer)? {
|
||||||
|
for diff in meilidb_schema::diff(&old_schema, new_schema) {
|
||||||
|
match diff {
|
||||||
|
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
|
||||||
|
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
|
||||||
|
Diff::AttrPropsChange { old, new, .. } => {
|
||||||
|
if new.indexed != old.indexed {
|
||||||
|
need_full_reindexing = true;
|
||||||
|
}
|
||||||
|
if new.ranked != old.ranked {
|
||||||
|
need_full_reindexing = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Diff::NewAttr { .. } => return Err(CannotIntroduceNewSchemaAttribute.into()),
|
||||||
|
Diff::RemovedAttr { .. } => return Err(CannotRemoveSchemaAttribute.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
main_store
|
main_store.put_schema(writer, new_schema)?;
|
||||||
.put_schema(writer, new_schema)
|
|
||||||
.map_err(Into::into)
|
if need_full_reindexing {
|
||||||
|
reindex_all_documents(
|
||||||
|
writer,
|
||||||
|
main_store,
|
||||||
|
documents_fields_store,
|
||||||
|
documents_fields_counts_store,
|
||||||
|
postings_lists_store,
|
||||||
|
docs_words_store,
|
||||||
|
)?
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn push_schema_update(
|
pub fn push_schema_update(
|
||||||
|
Loading…
Reference in New Issue
Block a user