mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Make sure we iterate over the payload documents in order
This commit is contained in:
parent
72c6a21a30
commit
8fd0afaaaa
@ -1318,7 +1318,7 @@ impl IndexScheduler {
|
|||||||
match operation {
|
match operation {
|
||||||
DocumentOperation::Add(_content_uuid) => {
|
DocumentOperation::Add(_content_uuid) => {
|
||||||
let mmap = content_files_iter.next().unwrap();
|
let mmap = content_files_iter.next().unwrap();
|
||||||
let stats = indexer.add_documents(&mmap)?;
|
let stats = indexer.add_documents(mmap)?;
|
||||||
// builder = builder.with_embedders(embedders.clone());
|
// builder = builder.with_embedders(embedders.clone());
|
||||||
|
|
||||||
let received_documents =
|
let received_documents =
|
||||||
|
@ -4,6 +4,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use memmap2::Mmap;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
@ -50,9 +51,10 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
|
|
||||||
/// TODO please give me a type
|
/// TODO please give me a type
|
||||||
/// The payload is expected to be in the grenad format
|
/// The payload is expected to be in the grenad format
|
||||||
pub fn add_documents(&mut self, payload: &'pl [u8]) -> Result<PayloadStats> {
|
pub fn add_documents(&mut self, payload: &'pl Mmap) -> Result<PayloadStats> {
|
||||||
let document_count = memchr::Memchr::new(b'\n', payload).count();
|
payload.advise(memmap2::Advice::Sequential)?;
|
||||||
self.operations.push(Payload::Addition(payload));
|
let document_count = memchr::Memchr::new(b'\n', &payload[..]).count();
|
||||||
|
self.operations.push(Payload::Addition(&payload[..]));
|
||||||
Ok(PayloadStats { bytes: payload.len() as u64, document_count })
|
Ok(PayloadStats { bytes: payload.len() as u64, document_count })
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,7 +183,18 @@ impl<'p, 'pl: 'p> DocumentChanges<'p> for DocumentOperation<'pl> {
|
|||||||
/// TODO is it the best way to provide FieldsIdsMap to the parallel iterator?
|
/// TODO is it the best way to provide FieldsIdsMap to the parallel iterator?
|
||||||
let fields_ids_map = fields_ids_map.clone();
|
let fields_ids_map = fields_ids_map.clone();
|
||||||
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
||||||
let docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect();
|
let mut docids_version_offsets: Vec<_> = docids_version_offsets.drain().collect();
|
||||||
|
// Reorder the offsets to make sure we iterate on the file sequentially
|
||||||
|
docids_version_offsets.sort_unstable_by_key(|(_, (_, offsets))| {
|
||||||
|
offsets
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.find_map(|ido| match ido {
|
||||||
|
InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize),
|
||||||
|
InnerDocOp::Deletion => None,
|
||||||
|
})
|
||||||
|
.unwrap_or(0)
|
||||||
|
});
|
||||||
|
|
||||||
Ok(docids_version_offsets
|
Ok(docids_version_offsets
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
|
Loading…
Reference in New Issue
Block a user