mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Allow users to set the arc cache size when indexing
This commit is contained in:
parent
ac8353a64f
commit
cd7e64b2b3
@ -41,6 +41,11 @@ struct Opt {
|
|||||||
#[structopt(long = "db", parse(from_os_str))]
|
#[structopt(long = "db", parse(from_os_str))]
|
||||||
database: PathBuf,
|
database: PathBuf,
|
||||||
|
|
||||||
|
/// The number of words that can fit in cache, the bigger this number is the less
|
||||||
|
/// the indexer will touch the databases on disk but the more it uses memory.
|
||||||
|
#[structopt(long, default_value = "100000")]
|
||||||
|
arc_cache_size: usize,
|
||||||
|
|
||||||
/// CSV file to index.
|
/// CSV file to index.
|
||||||
csv_file: PathBuf,
|
csv_file: PathBuf,
|
||||||
}
|
}
|
||||||
@ -82,13 +87,14 @@ fn index_csv<R: io::Read>(
|
|||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
mut rdr: csv::Reader<R>,
|
mut rdr: csv::Reader<R>,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
|
arc_cache_size: usize,
|
||||||
num_threads: usize,
|
num_threads: usize,
|
||||||
thread_index: usize,
|
thread_index: usize,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
eprintln!("Indexing into LMDB...");
|
eprintln!("Indexing into LMDB...");
|
||||||
|
|
||||||
let mut words_cache = ArcCache::<_, (RoaringBitmap, FastMap4<_, RoaringBitmap>)>::new(100_000);
|
let mut words_cache = ArcCache::<_, (RoaringBitmap, FastMap4<_, RoaringBitmap>)>::new(arc_cache_size);
|
||||||
|
|
||||||
// Write the headers into a Vec of bytes.
|
// Write the headers into a Vec of bytes.
|
||||||
let headers = rdr.headers()?;
|
let headers = rdr.headers()?;
|
||||||
@ -474,7 +480,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let mut wtxn = env.write_txn()?;
|
let mut wtxn = env.write_txn()?;
|
||||||
let rdr = csv::Reader::from_path(&opt.csv_file)?;
|
let rdr = csv::Reader::from_path(&opt.csv_file)?;
|
||||||
index_csv(&mut wtxn, rdr, &index, num_threads, i)?;
|
index_csv(&mut wtxn, rdr, &index, opt.arc_cache_size, num_threads, i)?;
|
||||||
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user