mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Keep the caches in the AppendOnlyVec
This commit is contained in:
parent
0a8cb471df
commit
dead7a56a3
@ -99,12 +99,6 @@ fn test_indices() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Default for AppendOnlyVec<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> AppendOnlyVec<T> {
|
impl<T> AppendOnlyVec<T> {
|
||||||
const EMPTY: UnsafeCell<*mut T> = UnsafeCell::new(ptr::null_mut());
|
const EMPTY: UnsafeCell<*mut T> = UnsafeCell::new(ptr::null_mut());
|
||||||
|
|
||||||
@ -220,6 +214,12 @@ impl<T> AppendOnlyVec<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T> Default for AppendOnlyVec<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<T> Debug for AppendOnlyVec<T> {
|
impl<T> Debug for AppendOnlyVec<T> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
f.debug_struct("AppendOnlyVec").field("len", &self.len()).finish()
|
f.debug_struct("AppendOnlyVec").field("len", &self.len()).finish()
|
||||||
|
@ -12,6 +12,7 @@ use super::super::cache::CboCachedSorter;
|
|||||||
use super::facet_document::extract_document_facets;
|
use super::facet_document::extract_document_facets;
|
||||||
use super::FacetKind;
|
use super::FacetKind;
|
||||||
use crate::facet::value_encoding::f64_into_bytes;
|
use crate::facet::value_encoding::f64_into_bytes;
|
||||||
|
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||||
use crate::update::new::extract::DocidsExtractor;
|
use crate::update::new::extract::DocidsExtractor;
|
||||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
@ -209,45 +210,40 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||||
let attributes_to_extract: Vec<_> =
|
let attributes_to_extract: Vec<_> =
|
||||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||||
|
let caches = AppendOnlyVec::new();
|
||||||
let context_pool = ItemsPool::new(|| {
|
|
||||||
Ok((
|
|
||||||
fields_ids_map.clone(),
|
|
||||||
Vec::new(),
|
|
||||||
CboCachedSorter::new(
|
|
||||||
// TODO use a better value
|
|
||||||
100.try_into().unwrap(),
|
|
||||||
create_sorter(
|
|
||||||
grenad::SortAlgorithm::Stable,
|
|
||||||
MergeDeladdCboRoaringBitmaps,
|
|
||||||
indexer.chunk_compression_type,
|
|
||||||
indexer.chunk_compression_level,
|
|
||||||
indexer.max_nb_chunks,
|
|
||||||
max_memory,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
))
|
|
||||||
});
|
|
||||||
|
|
||||||
{
|
{
|
||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| index.read_txn().map_err(Error::from),
|
|| {
|
||||||
|rtxn, document_change| {
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
context_pool.with(|(fields_ids_map, buffer, cached_sorter)| {
|
let cache = caches.push(CboCachedSorter::new(
|
||||||
Self::extract_document_change(
|
// TODO use a better value
|
||||||
rtxn,
|
100.try_into().unwrap(),
|
||||||
index,
|
create_sorter(
|
||||||
buffer,
|
grenad::SortAlgorithm::Stable,
|
||||||
fields_ids_map,
|
MergeDeladdCboRoaringBitmaps,
|
||||||
&attributes_to_extract,
|
indexer.chunk_compression_type,
|
||||||
cached_sorter,
|
indexer.chunk_compression_level,
|
||||||
document_change?,
|
indexer.max_nb_chunks,
|
||||||
)
|
max_memory,
|
||||||
.map_err(Arc::new)
|
),
|
||||||
})
|
));
|
||||||
|
Ok((rtxn, fields_ids_map.clone(), Vec::new(), cache))
|
||||||
|
},
|
||||||
|
|(rtxn, fields_ids_map, buffer, cached_sorter), document_change| {
|
||||||
|
Self::extract_document_change(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
buffer,
|
||||||
|
fields_ids_map,
|
||||||
|
&attributes_to_extract,
|
||||||
|
cached_sorter,
|
||||||
|
document_change?,
|
||||||
|
)
|
||||||
|
.map_err(Arc::new)
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -257,14 +253,15 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let readers: Vec<_> = context_pool
|
let readers: Vec<_> = caches
|
||||||
.into_items()
|
.into_iter()
|
||||||
.par_bridge()
|
.par_bridge()
|
||||||
.map(|(_tokenizer, _fields_ids_map, cached_sorter)| {
|
.map(|cached_sorter| {
|
||||||
let sorter = cached_sorter.into_sorter()?;
|
let sorter = cached_sorter.into_sorter()?;
|
||||||
sorter.into_reader_cursors()
|
sorter.into_reader_cursors()
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for reader in readers {
|
for reader in readers {
|
||||||
builder.extend(reader?);
|
builder.extend(reader?);
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,8 @@ use heed::RoTxn;
|
|||||||
use rayon::iter::IntoParallelIterator;
|
use rayon::iter::IntoParallelIterator;
|
||||||
|
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
use super::SearchableExtractor;
|
||||||
|
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||||
use crate::update::new::extract::cache::CboCachedSorter;
|
use crate::update::new::extract::cache::CboCachedSorter;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||||
@ -339,37 +341,33 @@ impl WordDocidsExtractors {
|
|||||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
};
|
};
|
||||||
|
|
||||||
let context_pool = ItemsPool::new(|| {
|
let caches = AppendOnlyVec::new();
|
||||||
Ok((
|
|
||||||
&document_tokenizer,
|
|
||||||
fields_ids_map.clone(),
|
|
||||||
WordDocidsCachedSorters::new(
|
|
||||||
indexer,
|
|
||||||
max_memory,
|
|
||||||
// TODO use a better value
|
|
||||||
200_000.try_into().unwrap(),
|
|
||||||
),
|
|
||||||
))
|
|
||||||
});
|
|
||||||
|
|
||||||
{
|
{
|
||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| index.read_txn().map_err(Error::from),
|
|| {
|
||||||
|rtxn, document_change| {
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
context_pool.with(|(document_tokenizer, fields_ids_map, cached_sorter)| {
|
let cache = caches.push(WordDocidsCachedSorters::new(
|
||||||
Self::extract_document_change(
|
indexer,
|
||||||
rtxn,
|
max_memory,
|
||||||
index,
|
// TODO use a better value
|
||||||
document_tokenizer,
|
200_000.try_into().unwrap(),
|
||||||
fields_ids_map,
|
));
|
||||||
cached_sorter,
|
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
||||||
document_change?,
|
},
|
||||||
)
|
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
||||||
.map_err(Arc::new)
|
Self::extract_document_change(
|
||||||
})
|
rtxn,
|
||||||
|
index,
|
||||||
|
document_tokenizer,
|
||||||
|
fields_ids_map,
|
||||||
|
cached_sorter,
|
||||||
|
document_change?,
|
||||||
|
)
|
||||||
|
.map_err(Arc::new)
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -379,7 +377,7 @@ impl WordDocidsExtractors {
|
|||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
let mut builder = WordDocidsMergerBuilders::new();
|
let mut builder = WordDocidsMergerBuilders::new();
|
||||||
for (_tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
for cache in caches.into_iter() {
|
||||||
builder.add_sorters(cache)?;
|
builder.add_sorters(cache)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
|
|
||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
use super::DocidsExtractor;
|
use super::DocidsExtractor;
|
||||||
|
use crate::update::new::append_only_vec::AppendOnlyVec;
|
||||||
use crate::update::new::items_pool::ParallelIteratorExt;
|
use crate::update::new::items_pool::ParallelIteratorExt;
|
||||||
use crate::update::new::{DocumentChange, ItemsPool};
|
use crate::update::new::{DocumentChange, ItemsPool};
|
||||||
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps};
|
||||||
@ -57,44 +58,39 @@ pub trait SearchableExtractor {
|
|||||||
localized_attributes_rules: &localized_attributes_rules,
|
localized_attributes_rules: &localized_attributes_rules,
|
||||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||||
};
|
};
|
||||||
|
let caches = AppendOnlyVec::new();
|
||||||
let context_pool = ItemsPool::new(|| {
|
|
||||||
Ok((
|
|
||||||
&document_tokenizer,
|
|
||||||
fields_ids_map.clone(),
|
|
||||||
CboCachedSorter::new(
|
|
||||||
// TODO use a better value
|
|
||||||
1_000_000.try_into().unwrap(),
|
|
||||||
create_sorter(
|
|
||||||
grenad::SortAlgorithm::Stable,
|
|
||||||
MergeDeladdCboRoaringBitmaps,
|
|
||||||
indexer.chunk_compression_type,
|
|
||||||
indexer.chunk_compression_level,
|
|
||||||
indexer.max_nb_chunks,
|
|
||||||
max_memory,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
))
|
|
||||||
});
|
|
||||||
|
|
||||||
{
|
{
|
||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| index.read_txn().map_err(Error::from),
|
|| {
|
||||||
|rtxn, document_change| {
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
context_pool.with(|(document_tokenizer, fields_ids_map, cached_sorter)| {
|
let cache = caches.push(CboCachedSorter::new(
|
||||||
Self::extract_document_change(
|
// TODO use a better value
|
||||||
rtxn,
|
1_000_000.try_into().unwrap(),
|
||||||
index,
|
create_sorter(
|
||||||
document_tokenizer,
|
grenad::SortAlgorithm::Stable,
|
||||||
fields_ids_map,
|
MergeDeladdCboRoaringBitmaps,
|
||||||
cached_sorter,
|
indexer.chunk_compression_type,
|
||||||
document_change?,
|
indexer.chunk_compression_level,
|
||||||
)
|
indexer.max_nb_chunks,
|
||||||
.map_err(Arc::new)
|
max_memory,
|
||||||
})
|
),
|
||||||
|
));
|
||||||
|
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
||||||
|
},
|
||||||
|
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
||||||
|
Self::extract_document_change(
|
||||||
|
rtxn,
|
||||||
|
index,
|
||||||
|
document_tokenizer,
|
||||||
|
fields_ids_map,
|
||||||
|
cached_sorter,
|
||||||
|
document_change?,
|
||||||
|
)
|
||||||
|
.map_err(Arc::new)
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -104,14 +100,15 @@ pub trait SearchableExtractor {
|
|||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let readers: Vec<_> = context_pool
|
let readers: Vec<_> = caches
|
||||||
.into_items()
|
.into_iter()
|
||||||
.par_bridge()
|
.par_bridge()
|
||||||
.map(|(_tokenizer, _fields_ids_map, cached_sorter)| {
|
.map(|cached_sorter| {
|
||||||
let sorter = cached_sorter.into_sorter()?;
|
let sorter = cached_sorter.into_sorter()?;
|
||||||
sorter.into_reader_cursors()
|
sorter.into_reader_cursors()
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for reader in readers {
|
for reader in readers {
|
||||||
builder.extend(reader?);
|
builder.extend(reader?);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user