283: Use the AlwaysFreePages flag when opening an index r=irevoire a=Kerollmops

We introduced a new flag in our fork of LMDB, this `AlwaysFreePages` flag forces LMDB to always free the single pages it uses before writing to the disk instead of keeping them in a linked list.

Declaring this flag reduces the memory print (leak) we have on memory after indexing a lot of documents.

Fixes #279.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-07-05 16:59:16 +00:00 committed by GitHub
commit cc54c41e30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 11 additions and 9 deletions

6
Cargo.lock generated
View File

@ -873,7 +873,7 @@ dependencies = [
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.12.0" version = "0.12.0"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"heed-traits", "heed-traits",
@ -891,12 +891,12 @@ dependencies = [
[[package]] [[package]]
name = "heed-traits" name = "heed-traits"
version = "0.7.0" version = "0.7.0"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.7.2" version = "0.7.2"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326" source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
dependencies = [ dependencies = [
"bincode", "bincode",
"heed-traits", "heed-traits",

View File

@ -11,7 +11,7 @@ milli = { path = "../milli" }
jemallocator = "0.3.2" jemallocator = "0.3.2"
[dev-dependencies] [dev-dependencies]
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
criterion = { version = "0.3.4", features = ["html_reports"] } criterion = { version = "0.3.4", features = ["html_reports"] }
[build-dependencies] [build-dependencies]

View File

@ -7,7 +7,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
milli = { path = "../milli" } milli = { path = "../milli" }
stderrlog = "0.5.1" stderrlog = "0.5.1"
structopt = { version = "0.3.21", default-features = false } structopt = { version = "0.3.21", default-features = false }

View File

@ -10,7 +10,7 @@ anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.0" crossbeam-channel = "0.5.0"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.3" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.3" }
memmap = "0.7.0" memmap = "0.7.0"
milli = { path = "../milli" } milli = { path = "../milli" }

View File

@ -8,7 +8,7 @@ edition = "2018"
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
csv = "1.1.5" csv = "1.1.5"
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
milli = { path = "../milli" } milli = { path = "../milli" }
roaring = "0.6.6" roaring = "0.6.6"
serde_json = "1.0.62" serde_json = "1.0.62"

View File

@ -14,7 +14,7 @@ flate2 = "1.0.20"
fst = "0.4.5" fst = "0.4.5"
fxhash = "0.2.1" fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0", default-features = false, features = ["lmdb", "sync-read-txn"] } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3" human_format = "1.0.3"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.4" linked-hash-map = "0.5.4"

View File

@ -3,6 +3,7 @@ use std::collections::{HashMap, HashSet};
use std::path::Path; use std::path::Path;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use heed::flags::Flags;
use heed::types::*; use heed::types::*;
use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use heed::{Database, PolyDatabase, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -106,6 +107,7 @@ impl Index {
use db_name::*; use db_name::*;
options.max_dbs(14); options.max_dbs(14);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?; let env = options.open(path)?;
let main = env.create_poly_database(Some(MAIN))?; let main = env.create_poly_database(Some(MAIN))?;

View File

@ -7,7 +7,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.1" }
log = "0.4.14" log = "0.4.14"
milli = { path = "../milli" } milli = { path = "../milli" }
serde_json = "1.0.62" serde_json = "1.0.62"