mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Bring the newly created word pair proximity docids
This commit is contained in:
parent
d28f18658e
commit
822f67e9ad
@ -16,11 +16,12 @@ use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
|||||||
|
|
||||||
pub use self::helpers::{
|
pub use self::helpers::{
|
||||||
create_sorter, create_writer, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
create_sorter, create_writer, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
|
||||||
sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, MergeFn,
|
sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn,
|
||||||
};
|
};
|
||||||
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use crate::documents::DocumentBatchReader;
|
use crate::documents::DocumentBatchReader;
|
||||||
|
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
self, Facets, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
|
||||||
WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
|
WordPrefixPairProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
|
||||||
@ -282,6 +283,7 @@ where
|
|||||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
let index_is_empty = index_documents_ids.len() == 0;
|
let index_is_empty = index_documents_ids.len() == 0;
|
||||||
let mut final_documents_ids = RoaringBitmap::new();
|
let mut final_documents_ids = RoaringBitmap::new();
|
||||||
|
let mut word_pair_proximity_docids = Vec::new();
|
||||||
|
|
||||||
let mut databases_seen = 0;
|
let mut databases_seen = 0;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
@ -289,9 +291,26 @@ where
|
|||||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||||
});
|
});
|
||||||
|
|
||||||
for typed_chunk in lmdb_writer_rx {
|
for result in lmdb_writer_rx {
|
||||||
|
let typed_chunk = match result? {
|
||||||
|
TypedChunk::WordPairProximityDocids(chunk) => {
|
||||||
|
// We extract and mmap our chunk file to be able to get it for next processes.
|
||||||
|
let mut file = chunk.into_inner();
|
||||||
|
let mmap = unsafe { memmap2::Mmap::map(&file)? };
|
||||||
|
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
|
||||||
|
let chunk = grenad::Reader::new(cursor_mmap)?;
|
||||||
|
word_pair_proximity_docids.push(chunk);
|
||||||
|
|
||||||
|
// We reconstruct our typed-chunk back.
|
||||||
|
file.rewind()?;
|
||||||
|
let chunk = grenad::Reader::new(file)?;
|
||||||
|
TypedChunk::WordPairProximityDocids(chunk)
|
||||||
|
}
|
||||||
|
otherwise => otherwise,
|
||||||
|
};
|
||||||
|
|
||||||
let (docids, is_merged_database) =
|
let (docids, is_merged_database) =
|
||||||
write_typed_chunk_into_index(typed_chunk?, &self.index, self.wtxn, index_is_empty)?;
|
write_typed_chunk_into_index(typed_chunk, &self.index, self.wtxn, index_is_empty)?;
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
final_documents_ids |= docids;
|
final_documents_ids |= docids;
|
||||||
let documents_seen_count = final_documents_ids.len();
|
let documents_seen_count = final_documents_ids.len();
|
||||||
@ -325,13 +344,19 @@ where
|
|||||||
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
|
let all_documents_ids = index_documents_ids | new_documents_ids | replaced_documents_ids;
|
||||||
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
|
||||||
|
|
||||||
self.execute_prefix_databases()?;
|
self.execute_prefix_databases(word_pair_proximity_docids)?;
|
||||||
|
|
||||||
Ok(all_documents_ids.len())
|
Ok(all_documents_ids.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[logging_timer::time("IndexDocuments::{}")]
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
pub fn execute_prefix_databases(self) -> Result<()> {
|
pub fn execute_prefix_databases(
|
||||||
|
self,
|
||||||
|
word_pair_proximity_docids: Vec<grenad::Reader<CursorClonableMmap>>,
|
||||||
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
|
{
|
||||||
// Merged databases are already been indexed, we start from this count;
|
// Merged databases are already been indexed, we start from this count;
|
||||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
@ -392,7 +417,7 @@ where
|
|||||||
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
|
||||||
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
|
builder.max_nb_chunks = self.indexer_config.max_nb_chunks;
|
||||||
builder.max_memory = self.indexer_config.max_memory;
|
builder.max_memory = self.indexer_config.max_memory;
|
||||||
builder.execute(&previous_words_prefixes_fst)?;
|
builder.execute(word_pair_proximity_docids, &previous_words_prefixes_fst)?;
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
@ -7,7 +7,8 @@ use log::debug;
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::update::index_documents::{
|
use crate::update::index_documents::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, MergeFn, WriteMethod,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap,
|
||||||
|
MergeFn, WriteMethod,
|
||||||
};
|
};
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
@ -61,7 +62,11 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||||
pub fn execute<A: AsRef<[u8]>>(self, old_prefix_fst: &fst::Set<A>) -> Result<()> {
|
pub fn execute<A: AsRef<[u8]>>(
|
||||||
|
self,
|
||||||
|
new_word_pair_proximity_docids: Vec<grenad::Reader<CursorClonableMmap>>,
|
||||||
|
old_prefix_fst: &fst::Set<A>,
|
||||||
|
) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user