mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Fix the facet string docids filterable deletion bug
This commit is contained in:
parent
0d1f83ba4b
commit
c084f7f731
@ -4,12 +4,15 @@ use std::collections::HashMap;
|
||||
use chrono::Utc;
|
||||
use fst::IntoStreamer;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::ClearDocuments;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
|
||||
use crate::error::{InternalError, SerializationError, UserError};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueU32Codec, FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
|
||||
};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::{DocumentId, ExternalDocumentsIds, FieldId, Index, Result, SmallString32, BEU32};
|
||||
@ -451,26 +454,61 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_docids_from_facet_field_id_string_docids<'a, C>(
|
||||
fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
||||
wtxn: &'a mut heed::RwTxn,
|
||||
db: &heed::Database<C, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
|
||||
db: &heed::Database<C, D>,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> heed::Result<()>
|
||||
where
|
||||
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
||||
{
|
||||
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
||||
) -> crate::Result<()> {
|
||||
let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS);
|
||||
let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?;
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, (original_value, mut docids)) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids -= to_remove;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
} else if docids.len() != previous_len {
|
||||
let bytes = bytes.to_owned();
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.put_current(&bytes, &(original_value, docids))? };
|
||||
let (key, val) = result?;
|
||||
match FacetLevelValueU32Codec::bytes_decode(key) {
|
||||
Some(_) => {
|
||||
// If we are able to parse this key it means it is a facet string group
|
||||
// level key. We must then parse the value using the appropriate codec.
|
||||
let (group, mut docids) =
|
||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
||||
|
||||
let previous_len = docids.len();
|
||||
docids -= to_remove;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
} else if docids.len() != previous_len {
|
||||
let key = key.to_owned();
|
||||
let val = &(group, docids);
|
||||
let value_bytes =
|
||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
||||
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// The key corresponds to a level zero facet string.
|
||||
let (original_value, mut docids) =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
||||
|
||||
let previous_len = docids.len();
|
||||
docids -= to_remove;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
} else if docids.len() != previous_len {
|
||||
let key = key.to_owned();
|
||||
let val = &(original_value, docids);
|
||||
let value_bytes =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
||||
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -505,10 +543,13 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
|
||||
use super::*;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
use crate::update::{IndexDocuments, Settings, UpdateFormat};
|
||||
use crate::FilterCondition;
|
||||
|
||||
#[test]
|
||||
fn delete_documents_with_numbers_as_primary_key() {
|
||||
@ -566,4 +607,55 @@ mod tests {
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_documents_with_filterable_attributes() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_primary_key(S("docid"));
|
||||
builder.set_filterable_fields(hashset! { S("label") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
let content = &br#"[
|
||||
{"docid":"1_4","label":"sign"},
|
||||
{"docid":"1_5","label":"letter"},
|
||||
{"docid":"1_7","label":"abstract,cartoon,design,pattern"},
|
||||
{"docid":"1_36","label":"drawing,painting,pattern"},
|
||||
{"docid":"1_37","label":"art,drawing,outdoor"},
|
||||
{"docid":"1_38","label":"aquarium,art,drawing"},
|
||||
{"docid":"1_39","label":"abstract"},
|
||||
{"docid":"1_40","label":"cartoon"},
|
||||
{"docid":"1_41","label":"art,drawing"},
|
||||
{"docid":"1_42","label":"art,pattern"},
|
||||
{"docid":"1_43","label":"abstract,art,drawing,pattern"},
|
||||
{"docid":"1_44","label":"drawing"},
|
||||
{"docid":"1_45","label":"art"},
|
||||
{"docid":"1_46","label":"abstract,colorfulness,pattern"},
|
||||
{"docid":"1_47","label":"abstract,pattern"},
|
||||
{"docid":"1_52","label":"abstract,cartoon"},
|
||||
{"docid":"1_57","label":"abstract,drawing,pattern"},
|
||||
{"docid":"1_58","label":"abstract,art,cartoon"},
|
||||
{"docid":"1_68","label":"design"},
|
||||
{"docid":"1_69","label":"geometry"}
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
|
||||
// Delete not all of the documents but some of them.
|
||||
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
|
||||
builder.delete_external_id("1_4");
|
||||
builder.execute().unwrap();
|
||||
|
||||
let filter = FilterCondition::from_str(&wtxn, &index, "label = sign").unwrap();
|
||||
let results = index.search(&wtxn).filter(filter).execute().unwrap();
|
||||
assert!(results.documents_ids.is_empty());
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user