Merge pull request #152 from meilisearch/documents-deletion-updates-ranked-map

Remove the documents from the ranked map on documents deletion
This commit is contained in:
Clément Renault 2019-05-21 13:59:21 +02:00 committed by GitHub
commit b1fc3e5cec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 39 additions and 11 deletions

View File

@ -60,13 +60,13 @@ impl<'a> DocumentsAddition<'a> {
pub fn finalize(self) -> Result<(), Error> { pub fn finalize(self) -> Result<(), Error> {
let lease_inner = self.inner.lease_inner(); let lease_inner = self.inner.lease_inner();
let main = &lease_inner.raw.main;
let words = &lease_inner.raw.words;
let docs_words = &lease_inner.raw.docs_words; let docs_words = &lease_inner.raw.docs_words;
let documents = &lease_inner.raw.documents; let documents = &lease_inner.raw.documents;
let main = &lease_inner.raw.main;
let words = &lease_inner.raw.words;
// 1. remove the previous documents match indexes // 1. remove the previous documents match indexes
let mut documents_deletion = DocumentsDeletion::new(self.inner); let mut documents_deletion = DocumentsDeletion::new(self.inner, self.ranked_map.clone());
documents_deletion.extend(self.document_ids); documents_deletion.extend(self.document_ids);
documents_deletion.finalize()?; documents_deletion.finalize()?;

View File

@ -1,9 +1,11 @@
use std::collections::{HashMap, BTreeSet}; use std::collections::{HashMap, BTreeSet};
use std::sync::Arc; use std::sync::Arc;
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
use fst::{SetBuilder, Streamer}; use fst::{SetBuilder, Streamer};
use meilidb_core::DocumentId; use meilidb_core::DocumentId;
use sdset::{SetBuf, SetOperation, duo::DifferenceByKey};
use crate::RankedMap;
use crate::serde::extract_document_id; use crate::serde::extract_document_id;
use super::{Index, Error, InnerIndex}; use super::{Index, Error, InnerIndex};
@ -11,11 +13,12 @@ use super::{Index, Error, InnerIndex};
pub struct DocumentsDeletion<'a> { pub struct DocumentsDeletion<'a> {
inner: &'a Index, inner: &'a Index,
documents: Vec<DocumentId>, documents: Vec<DocumentId>,
ranked_map: RankedMap,
} }
impl<'a> DocumentsDeletion<'a> { impl<'a> DocumentsDeletion<'a> {
pub fn new(inner: &'a Index) -> DocumentsDeletion { pub fn new(inner: &'a Index, ranked_map: RankedMap) -> DocumentsDeletion {
DocumentsDeletion { inner, documents: Vec::new() } DocumentsDeletion { inner, documents: Vec::new(), ranked_map }
} }
fn delete_document_by_id(&mut self, id: DocumentId) { fn delete_document_by_id(&mut self, id: DocumentId) {
@ -40,10 +43,11 @@ impl<'a> DocumentsDeletion<'a> {
pub fn finalize(mut self) -> Result<(), Error> { pub fn finalize(mut self) -> Result<(), Error> {
let lease_inner = self.inner.lease_inner(); let lease_inner = self.inner.lease_inner();
let main = &lease_inner.raw.main;
let docs_words = &lease_inner.raw.docs_words; let docs_words = &lease_inner.raw.docs_words;
let words = &lease_inner.raw.words;
let documents = &lease_inner.raw.documents; let documents = &lease_inner.raw.documents;
let main = &lease_inner.raw.main;
let schema = &lease_inner.schema;
let words = &lease_inner.raw.words;
let idset = { let idset = {
self.documents.sort_unstable(); self.documents.sort_unstable();
@ -51,8 +55,20 @@ impl<'a> DocumentsDeletion<'a> {
SetBuf::new_unchecked(self.documents) SetBuf::new_unchecked(self.documents)
}; };
// collect the ranked attributes according to the schema
let ranked_attrs: Vec<_> = schema.iter()
.filter_map(|(_, attr, prop)| {
if prop.is_ranked() { Some(attr) } else { None }
})
.collect();
let mut words_document_ids = HashMap::new(); let mut words_document_ids = HashMap::new();
for id in idset.into_vec() { for id in idset.into_vec() {
// remove all the ranked attributes from the ranked_map
for ranked_attr in &ranked_attrs {
self.ranked_map.remove(id, *ranked_attr);
}
if let Some(words) = docs_words.doc_words(id)? { if let Some(words) = docs_words.doc_words(id)? {
let mut stream = words.stream(); let mut stream = words.stream();
while let Some(word) = stream.next() { while let Some(word) = stream.next() {
@ -105,8 +121,7 @@ impl<'a> DocumentsDeletion<'a> {
}; };
main.set_words_set(&words)?; main.set_words_set(&words)?;
main.set_ranked_map(&self.ranked_map)?;
// TODO must update the ranked_map too!
// update the "consistent" view of the Index // update the "consistent" view of the Index
let ranked_map = lease_inner.ranked_map.clone(); let ranked_map = lease_inner.ranked_map.clone();

View File

@ -80,7 +80,8 @@ impl Index {
} }
pub fn documents_deletion(&self) -> DocumentsDeletion { pub fn documents_deletion(&self) -> DocumentsDeletion {
DocumentsDeletion::new(self) let ranked_map = self.0.lease().ranked_map.clone();
DocumentsDeletion::new(self, ranked_map)
} }
pub fn document<T>( pub fn document<T>(

View File

@ -13,6 +13,10 @@ impl RankedMap {
self.0.insert((document, attribute), number); self.0.insert((document, attribute), number);
} }
pub fn remove(&mut self, document: DocumentId, attribute: SchemaAttr) {
self.0.remove(&(document, attribute));
}
pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> { pub fn get(&self, document: DocumentId, attribute: SchemaAttr) -> Option<Number> {
self.0.get(&(document, attribute)).cloned() self.0.get(&(document, attribute)).cloned()
} }

View File

@ -179,6 +179,14 @@ impl Schema {
let (name, _) = &self.inner.props[attr.0 as usize]; let (name, _) = &self.inner.props[attr.0 as usize];
name name
} }
pub fn iter<'a>(&'a self) -> impl Iterator<Item=(&str, SchemaAttr, SchemaProps)> + 'a {
self.inner.props.iter()
.map(move |(name, prop)| {
let attr = self.inner.attrs.get(name).unwrap();
(name.as_str(), *attr, *prop)
})
}
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]