From d7b1b7a2a997567b34856d8092f5b4989db14b61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 21 May 2019 13:26:49 +0200 Subject: [PATCH] feat: Remove the documents from the ranked map on documents deletion --- .../src/database/documents_addition.rs | 6 ++-- .../src/database/documents_deletion.rs | 29 ++++++++++++++----- meilidb-data/src/database/index.rs | 3 +- meilidb-data/src/ranked_map.rs | 4 +++ meilidb-data/src/schema.rs | 8 +++++ 5 files changed, 39 insertions(+), 11 deletions(-) diff --git a/meilidb-data/src/database/documents_addition.rs b/meilidb-data/src/database/documents_addition.rs index a65334eb2..9f0794346 100644 --- a/meilidb-data/src/database/documents_addition.rs +++ b/meilidb-data/src/database/documents_addition.rs @@ -60,13 +60,13 @@ impl<'a> DocumentsAddition<'a> { pub fn finalize(self) -> Result<(), Error> { let lease_inner = self.inner.lease_inner(); - let main = &lease_inner.raw.main; - let words = &lease_inner.raw.words; let docs_words = &lease_inner.raw.docs_words; let documents = &lease_inner.raw.documents; + let main = &lease_inner.raw.main; + let words = &lease_inner.raw.words; // 1. remove the previous documents match indexes - let mut documents_deletion = DocumentsDeletion::new(self.inner); + let mut documents_deletion = DocumentsDeletion::new(self.inner, self.ranked_map.clone()); documents_deletion.extend(self.document_ids); documents_deletion.finalize()?; diff --git a/meilidb-data/src/database/documents_deletion.rs b/meilidb-data/src/database/documents_deletion.rs index 5f1424f3b..096429b61 100644 --- a/meilidb-data/src/database/documents_deletion.rs +++ b/meilidb-data/src/database/documents_deletion.rs @@ -1,9 +1,11 @@ use std::collections::{HashMap, BTreeSet}; use std::sync::Arc; -use sdset::{SetBuf, SetOperation, duo::DifferenceByKey}; use fst::{SetBuilder, Streamer}; use meilidb_core::DocumentId; +use sdset::{SetBuf, SetOperation, duo::DifferenceByKey}; + +use crate::RankedMap; use crate::serde::extract_document_id; use super::{Index, Error, InnerIndex}; @@ -11,11 +13,12 @@ use super::{Index, Error, InnerIndex}; pub struct DocumentsDeletion<'a> { inner: &'a Index, documents: Vec, + ranked_map: RankedMap, } impl<'a> DocumentsDeletion<'a> { - pub fn new(inner: &'a Index) -> DocumentsDeletion { - DocumentsDeletion { inner, documents: Vec::new() } + pub fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsDeletion { + DocumentsDeletion { inner, documents: Vec::new(), ranked_map } } fn delete_document_by_id(&mut self, id: DocumentId) { @@ -40,10 +43,11 @@ impl<'a> DocumentsDeletion<'a> { pub fn finalize(mut self) -> Result<(), Error> { let lease_inner = self.inner.lease_inner(); - let main = &lease_inner.raw.main; let docs_words = &lease_inner.raw.docs_words; - let words = &lease_inner.raw.words; let documents = &lease_inner.raw.documents; + let main = &lease_inner.raw.main; + let schema = &lease_inner.schema; + let words = &lease_inner.raw.words; let idset = { self.documents.sort_unstable(); @@ -51,8 +55,20 @@ impl<'a> DocumentsDeletion<'a> { SetBuf::new_unchecked(self.documents) }; + // collect the ranked attributes according to the schema + let ranked_attrs: Vec<_> = schema.iter() + .filter_map(|(_, attr, prop)| { + if prop.is_ranked() { Some(attr) } else { None } + }) + .collect(); + let mut words_document_ids = HashMap::new(); for id in idset.into_vec() { + // remove all the ranked attributes from the ranked_map + for ranked_attr in &ranked_attrs { + self.ranked_map.remove(id, *ranked_attr); + } + if let Some(words) = docs_words.doc_words(id)? { let mut stream = words.stream(); while let Some(word) = stream.next() { @@ -105,8 +121,7 @@ impl<'a> DocumentsDeletion<'a> { }; main.set_words_set(&words)?; - - // TODO must update the ranked_map too! + main.set_ranked_map(&self.ranked_map)?; // update the "consistent" view of the Index let ranked_map = lease_inner.ranked_map.clone(); diff --git a/meilidb-data/src/database/index.rs b/meilidb-data/src/database/index.rs index 2b981f5f1..e75a02a9a 100644 --- a/meilidb-data/src/database/index.rs +++ b/meilidb-data/src/database/index.rs @@ -80,7 +80,8 @@ impl Index { } pub fn documents_deletion(&self) -> DocumentsDeletion { - DocumentsDeletion::new(self) + let ranked_map = self.0.lease().ranked_map.clone(); + DocumentsDeletion::new(self, ranked_map) } pub fn document( diff --git a/meilidb-data/src/ranked_map.rs b/meilidb-data/src/ranked_map.rs index aea7c8b20..7c10a0649 100644 --- a/meilidb-data/src/ranked_map.rs +++ b/meilidb-data/src/ranked_map.rs @@ -13,6 +13,10 @@ impl RankedMap { self.0.insert((document, attribute), number); } + pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) { + self.0.remove(&(document, attribute)); + } + pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option { self.0.get(&(document, attribute)).cloned() } diff --git a/meilidb-data/src/schema.rs b/meilidb-data/src/schema.rs index bd1c853dc..011413ae4 100644 --- a/meilidb-data/src/schema.rs +++ b/meilidb-data/src/schema.rs @@ -179,6 +179,14 @@ impl Schema { let (name, _) = &self.inner.props[attr.0 as usize]; name } + + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + self.inner.props.iter() + .map(move |(name, prop)| { + let attr = self.inner.attrs.get(name).unwrap(); + (name.as_str(), *attr, *prop) + }) + } } #[derive(Serialize, Deserialize)]