mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-02-07 11:26:16 +08:00
Make sure we correctly mix different document operations
This commit is contained in:
parent
d018346f18
commit
8e6893ddbe
@ -54,7 +54,8 @@ pub(crate) enum Batch {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum DocumentOperation {
|
||||
Add(Uuid),
|
||||
Replace(Uuid),
|
||||
Update(Uuid),
|
||||
Delete(Vec<String>),
|
||||
}
|
||||
|
||||
@ -253,7 +254,7 @@ impl IndexScheduler {
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
||||
BatchKind::DocumentOperation { operation_ids, .. } => {
|
||||
let tasks = self.queue.get_existing_tasks_for_processing_batch(
|
||||
rtxn,
|
||||
current_batch,
|
||||
@ -275,9 +276,17 @@ impl IndexScheduler {
|
||||
|
||||
for task in tasks.iter() {
|
||||
match task.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => {
|
||||
operations.push(DocumentOperation::Add(content_file));
|
||||
}
|
||||
KindWithContent::DocumentAdditionOrUpdate {
|
||||
content_file, method, ..
|
||||
} => match method {
|
||||
IndexDocumentsMethod::ReplaceDocuments => {
|
||||
operations.push(DocumentOperation::Replace(content_file))
|
||||
}
|
||||
IndexDocumentsMethod::UpdateDocuments => {
|
||||
operations.push(DocumentOperation::Update(content_file))
|
||||
}
|
||||
_ => unreachable!("Unknown document merging method"),
|
||||
},
|
||||
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
|
||||
operations.push(DocumentOperation::Delete(documents_ids.clone()));
|
||||
}
|
||||
@ -289,7 +298,6 @@ impl IndexScheduler {
|
||||
op: IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
operations,
|
||||
tasks,
|
||||
},
|
||||
|
@ -62,23 +62,21 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
IndexOperation::DocumentOperation {
|
||||
index_uid,
|
||||
primary_key,
|
||||
method,
|
||||
operations,
|
||||
mut tasks,
|
||||
} => {
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||
// to a fresh thread.
|
||||
let mut content_files = Vec::new();
|
||||
for operation in &operations {
|
||||
if let DocumentOperation::Add(content_uuid) = operation {
|
||||
let content_file = self.queue.file_store.get_update(*content_uuid)?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
||||
content_files.push(mmap);
|
||||
match operation {
|
||||
DocumentOperation::Replace(content_uuid)
|
||||
| DocumentOperation::Update(content_uuid) => {
|
||||
let content_file = self.queue.file_store.get_update(*content_uuid)?;
|
||||
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
|
||||
content_files.push(mmap);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
@ -87,17 +85,23 @@ impl IndexScheduler {
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut content_files_iter = content_files.iter();
|
||||
let mut indexer = indexer::DocumentOperation::new(method);
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index
|
||||
.embedding_configs(index_wtxn)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||
for operation in operations {
|
||||
match operation {
|
||||
DocumentOperation::Add(_content_uuid) => {
|
||||
DocumentOperation::Replace(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.add_documents(mmap)
|
||||
.replace_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Update(_content_uuid) => {
|
||||
let mmap = content_files_iter.next().unwrap();
|
||||
indexer
|
||||
.update_documents(mmap)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
}
|
||||
DocumentOperation::Delete(document_ids) => {
|
||||
|
@ -23,6 +23,7 @@ use crate::update::new::{Deletion, Insertion, Update};
|
||||
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
||||
use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DocumentOperation<'pl> {
|
||||
operations: Vec<Payload<'pl>>,
|
||||
method: MergeMethod,
|
||||
|
Loading…
x
Reference in New Issue
Block a user