diff --git a/Cargo.lock b/Cargo.lock
index 53546e3b1..447318d74 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1238,7 +1238,7 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
[[package]]
name = "oxidized-mtbl"
version = "0.1.0"
-source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=13294cc#13294ccd73c9d6f71645a3ed2852656f3c86d31d"
+source = "git+https://github.com/Kerollmops/oxidized-mtbl.git?rev=4ca66e5#4ca66e50115da760f602e878943af59f06c53af1"
dependencies = [
"byteorder",
"crc32c",
diff --git a/Cargo.toml b/Cargo.toml
index e86eac185..50683e7cb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ jemallocator = "0.3.2"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
memmap = "0.7.0"
once_cell = "1.4.0"
-oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "13294cc" }
+oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "4ca66e5" }
rayon = "1.3.1"
ringtail = "0.3.0"
roaring = { git = "https://github.com/Kerollmops/roaring-rs.git", branch = "mem-usage" }
diff --git a/src/bin/indexer.rs b/src/bin/indexer.rs
index 708c57b4c..0dd7ef074 100644
--- a/src/bin/indexer.rs
+++ b/src/bin/indexer.rs
@@ -579,13 +579,16 @@ fn main() -> anyhow::Result<()> {
builder.extend(docs_stores);
builder.build().write_into(&mut writer)?;
let file = writer.into_inner()?;
+
+ // Read back the documents MTBL database from the file.
let documents_mmap = unsafe { memmap::Mmap::map(&file)? };
+ let documents = Reader::new(documents_mmap)?;
debug!("We are writing the postings lists and documents into LMDB on disk...");
// We merge the postings lists into LMDB.
let mut wtxn = env.write_txn()?;
merge_into_lmdb(stores, |k, v| lmdb_writer(&mut wtxn, &index, k, v))?;
- index.put_documents(&mut wtxn, &documents_mmap)?;
+ index.put_documents(&mut wtxn, &documents)?;
let count = index.number_of_documents(&wtxn)?;
wtxn.commit()?;
diff --git a/src/heed_codec/mtbl_codec.rs b/src/heed_codec/mtbl_codec.rs
index c36960079..b4815da4f 100644
--- a/src/heed_codec/mtbl_codec.rs
+++ b/src/heed_codec/mtbl_codec.rs
@@ -1,9 +1,10 @@
use std::borrow::Cow;
+use std::marker::PhantomData;
use oxidized_mtbl::Reader;
-pub struct MtblCodec;
+pub struct MtblCodec(PhantomData);
-impl<'a> heed::BytesDecode<'a> for MtblCodec {
+impl<'a> heed::BytesDecode<'a> for MtblCodec<&'a [u8]> {
type DItem = Reader<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option {
@@ -11,10 +12,10 @@ impl<'a> heed::BytesDecode<'a> for MtblCodec {
}
}
-impl heed::BytesEncode<'_> for MtblCodec {
- type EItem = [u8];
+impl<'a, A: AsRef<[u8]> + 'a> heed::BytesEncode<'a> for MtblCodec {
+ type EItem = Reader;
fn bytes_encode(item: &Self::EItem) -> Option> {
- Some(Cow::Borrowed(item))
+ Some(Cow::Borrowed(item.as_bytes()))
}
}
diff --git a/src/lib.rs b/src/lib.rs
index bcfc3d2bb..65b8f0534 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -12,6 +12,7 @@ use anyhow::{bail, Context};
use fxhash::{FxHasher32, FxHasher64};
use heed::types::*;
use heed::{PolyDatabase, Database};
+use oxidized_mtbl as omtbl;
pub use self::search::{Search, SearchResult};
pub use self::criterion::{Criterion, default_criteria};
@@ -90,7 +91,7 @@ impl Index {
iter: impl IntoIterator- ,
) -> anyhow::Result)>>
{
- match self.main.get::<_, Str, MtblCodec>(rtxn, DOCUMENTS_KEY)? {
+ match self.main.get::<_, Str, MtblCodec<&[u8]>>(rtxn, DOCUMENTS_KEY)? {
Some(documents) => {
iter.into_iter().map(|id| {
let key = id.to_be_bytes();
@@ -103,13 +104,13 @@ impl Index {
}
}
- pub fn put_documents(&self, wtxn: &mut heed::RwTxn, documents: &[u8]) -> anyhow::Result<()> {
- Ok(self.main.put::<_, Str, MtblCodec>(wtxn, DOCUMENTS_KEY, documents)?)
+ pub fn put_documents>(&self, wtxn: &mut heed::RwTxn, documents: &omtbl::Reader) -> anyhow::Result<()> {
+ Ok(self.main.put::<_, Str, MtblCodec>(wtxn, DOCUMENTS_KEY, documents)?)
}
/// Returns the number of documents indexed in the database.
pub fn number_of_documents<'t>(&self, rtxn: &'t heed::RoTxn) -> anyhow::Result {
- match self.main.get::<_, Str, MtblCodec>(rtxn, DOCUMENTS_KEY)? {
+ match self.main.get::<_, Str, MtblCodec<&[u8]>>(rtxn, DOCUMENTS_KEY)? {
Some(documents) => Ok(documents.metadata().count_entries as usize),
None => return Ok(0),
}