mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
WIP: reset documents in TypedChunk::Documents
This commit is contained in:
parent
cda6ca1ee6
commit
946c762d28
@ -35,7 +35,7 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
|||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
|
||||||
WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||||
};
|
};
|
||||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||||
@ -374,17 +374,6 @@ where
|
|||||||
drop(lmdb_writer_sx)
|
drop(lmdb_writer_sx)
|
||||||
});
|
});
|
||||||
|
|
||||||
// We delete the documents that this document addition replaces. This way we are
|
|
||||||
// able to simply insert all the documents even if they already exist in the database.
|
|
||||||
if !replaced_documents_ids.is_empty() {
|
|
||||||
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
|
|
||||||
deletion_builder.strategy(self.config.deletion_strategy);
|
|
||||||
debug!("documents to delete {:?}", replaced_documents_ids);
|
|
||||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
|
||||||
let deleted_documents_result = deletion_builder.execute_inner()?;
|
|
||||||
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
|
||||||
}
|
|
||||||
|
|
||||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
let index_is_empty = index_documents_ids.is_empty();
|
let index_is_empty = index_documents_ids.is_empty();
|
||||||
let mut final_documents_ids = RoaringBitmap::new();
|
let mut final_documents_ids = RoaringBitmap::new();
|
||||||
@ -437,6 +426,7 @@ where
|
|||||||
otherwise => otherwise,
|
otherwise => otherwise,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FIXME: return newly added as well as newly deleted documents
|
||||||
let (docids, is_merged_database) =
|
let (docids, is_merged_database) =
|
||||||
write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
|
write_typed_chunk_into_index(typed_chunk, self.index, self.wtxn, index_is_empty)?;
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
@ -472,8 +462,9 @@ where
|
|||||||
let external_documents_ids = external_documents_ids.into_static();
|
let external_documents_ids = external_documents_ids.into_static();
|
||||||
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
|
||||||
|
|
||||||
|
// FIXME: remove `new_documents_ids` entirely and `replaced_documents_ids`
|
||||||
let all_documents_ids = index_documents_ids | new_documents_ids;
|
let all_documents_ids = index_documents_ids | new_documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
//self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||||
|
|
||||||
// TODO: reactivate prefix DB with diff-indexing
|
// TODO: reactivate prefix DB with diff-indexing
|
||||||
// self.execute_prefix_databases(
|
// self.execute_prefix_databases(
|
||||||
|
@ -118,22 +118,38 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let mut is_merged_database = false;
|
let mut is_merged_database = false;
|
||||||
match typed_chunk {
|
match typed_chunk {
|
||||||
TypedChunk::Documents(obkv_documents_iter) => {
|
TypedChunk::Documents(obkv_documents_iter) => {
|
||||||
|
let mut docids = index.documents_ids(wtxn)?;
|
||||||
|
|
||||||
let mut cursor = obkv_documents_iter.into_cursor()?;
|
let mut cursor = obkv_documents_iter.into_cursor()?;
|
||||||
while let Some((docid, reader)) = cursor.move_on_next()? {
|
while let Some((docid, reader)) = cursor.move_on_next()? {
|
||||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||||
let reader: KvReader<FieldId> = KvReader::new(reader);
|
let reader: KvReader<FieldId> = KvReader::new(reader);
|
||||||
|
let mut written = false;
|
||||||
for (field_id, value) in reader.iter() {
|
for (field_id, value) in reader.iter() {
|
||||||
let Some(value) = KvReaderDelAdd::new(value).get(DelAdd::Addition) else {
|
let Some(value) = KvReaderDelAdd::new(value).get(DelAdd::Addition) else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
// TODO: writer.is_empty
|
||||||
|
written = true;
|
||||||
writer.insert(field_id, value)?;
|
writer.insert(field_id, value)?;
|
||||||
}
|
}
|
||||||
index.documents.remap_types::<ByteSlice, ByteSlice>().put(
|
|
||||||
wtxn,
|
let db = index.documents.remap_data_type::<ByteSlice>();
|
||||||
docid,
|
let docid = docid.try_into().map(DocumentId::from_be_bytes).unwrap();
|
||||||
&writer.into_inner().unwrap(),
|
|
||||||
)?;
|
if written {
|
||||||
|
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
|
||||||
|
docids.insert(docid);
|
||||||
|
} else {
|
||||||
|
db.delete(wtxn, &BEU32::new(docid))?;
|
||||||
|
// FIXME: unwrap
|
||||||
|
if !docids.remove(docid) {
|
||||||
|
panic!("Attempt to remove a document id that doesn't exist")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
index.put_documents_ids(wtxn, &docids)?;
|
||||||
}
|
}
|
||||||
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
|
||||||
append_entries_into_database(
|
append_entries_into_database(
|
||||||
|
Loading…
Reference in New Issue
Block a user