mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
Reduce number of cache created by using thread_local
This commit is contained in:
parent
58d96fbea3
commit
c11b7e5c0f
5
Cargo.lock
generated
5
Cargo.lock
generated
@ -3598,6 +3598,7 @@ dependencies = [
|
|||||||
"smartstring",
|
"smartstring",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
|
"thread_local",
|
||||||
"tiktoken-rs",
|
"tiktoken-rs",
|
||||||
"time",
|
"time",
|
||||||
"tokenizers",
|
"tokenizers",
|
||||||
@ -5332,9 +5333,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thread_local"
|
name = "thread_local"
|
||||||
version = "1.1.7"
|
version = "1.1.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
|
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -89,6 +89,7 @@ ureq = { version = "2.10.0", features = ["json"] }
|
|||||||
url = "2.5.2"
|
url = "2.5.2"
|
||||||
rayon-par-bridge = "0.1.0"
|
rayon-par-bridge = "0.1.0"
|
||||||
hashbrown = "0.14.5"
|
hashbrown = "0.14.5"
|
||||||
|
thread_local = "1.1.8"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.43", default-features = false }
|
mimalloc = { version = "0.1.43", default-features = false }
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::cell::RefCell;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
@ -7,6 +8,7 @@ use grenad::{MergeFunction, Merger};
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
use thread_local::ThreadLocal;
|
||||||
|
|
||||||
use super::super::cache::CboCachedSorter;
|
use super::super::cache::CboCachedSorter;
|
||||||
use super::facet_document::extract_document_facets;
|
use super::facet_document::extract_document_facets;
|
||||||
@ -216,11 +218,13 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
let local = ThreadLocal::new();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| {
|
|| {
|
||||||
|
local.get_or_try(|| {
|
||||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
let cache = caches.push(CboCachedSorter::new(
|
let cache = caches.push(CboCachedSorter::new(
|
||||||
// TODO use a better value
|
/// TODO use a better value
|
||||||
100.try_into().unwrap(),
|
100.try_into().unwrap(),
|
||||||
create_sorter(
|
create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
@ -231,9 +235,11 @@ impl DocidsExtractor for FacetedDocidsExtractor {
|
|||||||
max_memory,
|
max_memory,
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
Ok((rtxn, fields_ids_map.clone(), Vec::new(), cache))
|
Ok((rtxn, RefCell::new((fields_ids_map.clone(), Vec::new(), cache))))
|
||||||
|
})
|
||||||
},
|
},
|
||||||
|(rtxn, fields_ids_map, buffer, cached_sorter), document_change| {
|
|(rtxn, rc), document_change| {
|
||||||
|
let (fields_ids_map, buffer, cached_sorter) = &mut *rc.borrow_mut();
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::cell::RefCell;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::num::NonZero;
|
use std::num::NonZero;
|
||||||
@ -6,6 +7,7 @@ use std::sync::Arc;
|
|||||||
use grenad::{Merger, MergerBuilder};
|
use grenad::{Merger, MergerBuilder};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::IntoParallelIterator;
|
use rayon::iter::IntoParallelIterator;
|
||||||
|
use thread_local::ThreadLocal;
|
||||||
|
|
||||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
use super::SearchableExtractor;
|
use super::SearchableExtractor;
|
||||||
@ -347,18 +349,23 @@ impl WordDocidsExtractors {
|
|||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
let local = ThreadLocal::new();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| {
|
|| {
|
||||||
|
local.get_or_try(|| {
|
||||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
|
let fields_ids_map = fields_ids_map.clone();
|
||||||
let cache = caches.push(WordDocidsCachedSorters::new(
|
let cache = caches.push(WordDocidsCachedSorters::new(
|
||||||
indexer,
|
indexer,
|
||||||
max_memory,
|
max_memory,
|
||||||
// TODO use a better value
|
// TODO use a better value
|
||||||
200_000.try_into().unwrap(),
|
200_000.try_into().unwrap(),
|
||||||
));
|
));
|
||||||
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
Ok((rtxn, &document_tokenizer, RefCell::new((fields_ids_map, cache))))
|
||||||
|
})
|
||||||
},
|
},
|
||||||
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
|(rtxn, document_tokenizer, rc), document_change| {
|
||||||
|
let (fields_ids_map, cached_sorter) = &mut *rc.borrow_mut();
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
@ -377,7 +384,9 @@ impl WordDocidsExtractors {
|
|||||||
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
let mut builder = WordDocidsMergerBuilders::new();
|
let mut builder = WordDocidsMergerBuilders::new();
|
||||||
|
let mut count = 0;
|
||||||
for cache in caches.into_iter() {
|
for cache in caches.into_iter() {
|
||||||
|
count += 1;
|
||||||
builder.add_sorters(cache)?;
|
builder.add_sorters(cache)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ mod extract_word_docids;
|
|||||||
mod extract_word_pair_proximity_docids;
|
mod extract_word_pair_proximity_docids;
|
||||||
mod tokenize_document;
|
mod tokenize_document;
|
||||||
|
|
||||||
|
use std::cell::RefCell;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@ -10,6 +11,7 @@ pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
|||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||||
|
use thread_local::ThreadLocal;
|
||||||
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
|
||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
@ -64,11 +66,13 @@ pub trait SearchableExtractor {
|
|||||||
let span =
|
let span =
|
||||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
let local = ThreadLocal::new();
|
||||||
document_changes.into_par_iter().try_arc_for_each_try_init(
|
document_changes.into_par_iter().try_arc_for_each_try_init(
|
||||||
|| {
|
|| {
|
||||||
|
local.get_or_try(|| {
|
||||||
let rtxn = index.read_txn().map_err(Error::from)?;
|
let rtxn = index.read_txn().map_err(Error::from)?;
|
||||||
let cache = caches.push(CboCachedSorter::new(
|
let cache = caches.push(CboCachedSorter::new(
|
||||||
// TODO use a better value
|
/// TODO use a better value
|
||||||
1_000_000.try_into().unwrap(),
|
1_000_000.try_into().unwrap(),
|
||||||
create_sorter(
|
create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
@ -79,9 +83,15 @@ pub trait SearchableExtractor {
|
|||||||
max_memory,
|
max_memory,
|
||||||
),
|
),
|
||||||
));
|
));
|
||||||
Ok((rtxn, &document_tokenizer, fields_ids_map.clone(), cache))
|
Ok((
|
||||||
|
rtxn,
|
||||||
|
&document_tokenizer,
|
||||||
|
RefCell::new((fields_ids_map.clone(), cache)),
|
||||||
|
))
|
||||||
|
})
|
||||||
},
|
},
|
||||||
|(rtxn, document_tokenizer, fields_ids_map, cached_sorter), document_change| {
|
|(rtxn, document_tokenizer, rc), document_change| {
|
||||||
|
let (fields_ids_map, cached_sorter) = &mut *rc.borrow_mut();
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
|
Loading…
Reference in New Issue
Block a user