Move the heed env into the index itself to ease the usage of the library

This commit is contained in:
Clément Renault 2020-10-30 10:56:35 +01:00
parent b5d52b6b45
commit e63fdf2b22
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
6 changed files with 42 additions and 34 deletions

View File

@ -1,4 +1,5 @@
use std::borrow::Cow;
use std::path::Path;
use anyhow::Context;
use heed::types::*;
@ -20,6 +21,8 @@ pub const USERS_IDS_DOCUMENTS_IDS_KEY: &str = "users-ids-documents-ids";
#[derive(Clone)]
pub struct Index {
/// The LMDB environment which this index is associated with.
pub env: heed::Env,
/// Contains many different types (e.g. the fields ids map).
pub main: PolyDatabase,
/// A word and all the documents ids containing the word.
@ -33,14 +36,27 @@ pub struct Index {
}
impl Index {
pub fn new(env: &heed::Env) -> anyhow::Result<Index> {
Ok(Index {
main: env.create_poly_database(Some("main"))?,
word_docids: env.create_database(Some("word-docids"))?,
docid_word_positions: env.create_database(Some("docid-word-positions"))?,
word_pair_proximity_docids: env.create_database(Some("word-pair-proximity-docids"))?,
documents: env.create_database(Some("documents"))?,
})
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> {
options.max_dbs(5);
let env = options.open(path)?;
let main = env.create_poly_database(Some("main"))?;
let word_docids = env.create_database(Some("word-docids"))?;
let docid_word_positions = env.create_database(Some("docid-word-positions"))?;
let word_pair_proximity_docids = env.create_database(Some("word-pair-proximity-docids"))?;
let documents = env.create_database(Some("documents"))?;
Ok(Index { env, main, word_docids, docid_word_positions, word_pair_proximity_docids, documents })
}
/// Create a write transaction to be able to write into the index.
pub fn write_txn(&self) -> heed::Result<heed::RwTxn> {
self.env.write_txn()
}
/// Create a read transaction to be able to read the index.
pub fn read_txn(&self) -> heed::Result<heed::RoTxn> {
self.env.read_txn()
}
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.

View File

@ -130,20 +130,18 @@ enum Command {
}
pub fn run(opt: Opt) -> anyhow::Result<()> {
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
stderrlog::new()
.verbosity(opt.verbose)
.show_level(false)
.timestamp(stderrlog::Timestamp::Off)
.init()?;
let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size);
// Open the LMDB database.
let index = Index::new(&env)?;
let rtxn = env.read_txn()?;
let index = Index::new(options, opt.database)?;
let rtxn = index.read_txn()?;
match opt.command {
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),

View File

@ -40,14 +40,12 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
.init()?;
std::fs::create_dir_all(&opt.database)?;
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size);
// Open the LMDB database.
let index = Index::new(&env)?;
let rtxn = env.read_txn()?;
let index = Index::new(options, &opt.database)?;
let rtxn = index.read_txn()?;
let stdin = io::stdin();
let lines = match opt.query {

View File

@ -181,13 +181,11 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
.init()?;
create_dir_all(&opt.database)?;
let env = EnvOpenOptions::new()
.map_size(opt.database_size)
.max_dbs(10)
.open(&opt.database)?;
let mut options = EnvOpenOptions::new();
options.map_size(opt.database_size);
// Open the LMDB database.
let index = Index::new(&env)?;
let index = Index::new(options, &opt.database)?;
// Setup the LMDB based update database.
let mut update_store_options = EnvOpenOptions::new();
@ -198,7 +196,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
let (update_status_sender, _) = broadcast::channel(100);
let update_status_sender_cloned = update_status_sender.clone();
let env_cloned = env.clone();
let index_cloned = index.clone();
let indexer_opt_cloned = opt.indexer.clone();
let update_store = UpdateStore::open(
@ -226,7 +223,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
let result: anyhow::Result<()> = match meta {
UpdateMeta::DocumentsAddition => {
// We must use the write transaction of the update here.
let mut wtxn = env_cloned.write_txn()?;
let mut wtxn = index_cloned.write_txn()?;
let mut builder = update_builder.index_documents(&mut wtxn, &index_cloned);
let replace_documents = true;
@ -283,7 +280,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
// Expose an HTML page to debug the search in a browser
let db_name_cloned = db_name.clone();
let lmdb_path_cloned = lmdb_path.clone();
let env_cloned = env.clone();
let index_cloned = index.clone();
let dash_html_route = warp::filters::method::get()
.and(warp::filters::path::end())
@ -296,7 +292,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
.len() as usize;
// And the number of documents in the database.
let rtxn = env_cloned.clone().read_txn().unwrap();
let rtxn = index_cloned.clone().read_txn().unwrap();
let docs_count = index_cloned.clone().number_of_documents(&rtxn).unwrap() as usize;
IndexTemplate { db_name: db_name_cloned.clone(), db_size, docs_count }
@ -304,7 +300,6 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
let update_store_cloned = update_store.clone();
let lmdb_path_cloned = lmdb_path.clone();
let env_cloned = env.clone();
let index_cloned = index.clone();
let updates_list_or_html_route = warp::filters::method::get()
.and(warp::header("Accept"))
@ -335,7 +330,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
.len() as usize;
// And the number of documents in the database.
let rtxn = env_cloned.clone().read_txn().unwrap();
let rtxn = index_cloned.clone().read_txn().unwrap();
let docs_count = index_cloned.clone().number_of_documents(&rtxn).unwrap() as usize;
let template = UpdatesTemplate {
@ -418,14 +413,13 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
query: Option<String>,
}
let env_cloned = env.clone();
let disable_highlighting = opt.disable_highlighting;
let query_route = warp::filters::method::post()
.and(warp::path!("query"))
.and(warp::body::json())
.map(move |query: QueryBody| {
let before_search = Instant::now();
let rtxn = env_cloned.read_txn().unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
if let Some(query) = query.query {

View File

@ -13,6 +13,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
pub fn execute(self) -> anyhow::Result<usize> {
let Index {
env: _env,
main: _main,
word_docids,
docid_word_positions,

View File

@ -69,6 +69,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let id_field = fields_ids_map.id("id").expect(r#"the field "id" to be present"#);
let Index {
env: _env,
main: _main,
word_docids,
docid_word_positions,