Bump heed to use the git repo with v0.12.0

This commit is contained in:
Clément Renault 2021-06-28 18:26:20 +02:00
parent 0013236e5d
commit bdc5599b73
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
8 changed files with 106 additions and 38 deletions

65
Cargo.lock generated
View File

@ -131,7 +131,7 @@ dependencies = [
"convert_case", "convert_case",
"criterion", "criterion",
"flate2", "flate2",
"heed", "heed 0.10.6",
"jemallocator", "jemallocator",
"milli", "milli",
"reqwest", "reqwest",
@ -868,10 +868,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afcc6c911acaadad3ebe9f1ef1707d80bd71c92037566f47b6238a03b60adf1a" checksum = "afcc6c911acaadad3ebe9f1ef1707d80bd71c92037566f47b6238a03b60adf1a"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"heed-traits", "heed-traits 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"heed-types", "heed-types 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc", "libc",
"lmdb-rkv-sys", "lmdb-rkv-sys 0.11.0",
"once_cell",
"page_size",
"serde",
"synchronoise",
"url",
"zerocopy",
]
[[package]]
name = "heed"
version = "0.12.0"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326"
dependencies = [
"byteorder",
"heed-traits 0.7.0 (git+https://github.com/Kerollmops/heed?tag=v0.12.0)",
"heed-types 0.7.2 (git+https://github.com/Kerollmops/heed?tag=v0.12.0)",
"libc",
"lmdb-rkv-sys 0.15.0",
"once_cell", "once_cell",
"page_size", "page_size",
"serde", "serde",
@ -886,6 +904,11 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c" checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c"
[[package]]
name = "heed-traits"
version = "0.7.0"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.7.2" version = "0.7.2"
@ -893,7 +916,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405" checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405"
dependencies = [ dependencies = [
"bincode", "bincode",
"heed-traits", "heed-traits 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde",
"serde_json",
"zerocopy",
]
[[package]]
name = "heed-types"
version = "0.7.2"
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.0#6c0b95793a805dc598f05c119494e6c069de0326"
dependencies = [
"bincode",
"heed-traits 0.7.0 (git+https://github.com/Kerollmops/heed?tag=v0.12.0)",
"serde", "serde",
"serde_json", "serde_json",
"zerocopy", "zerocopy",
@ -905,7 +940,7 @@ version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"byte-unit", "byte-unit",
"heed", "heed 0.10.6",
"jemallocator", "jemallocator",
"milli", "milli",
"stderrlog", "stderrlog",
@ -969,7 +1004,7 @@ dependencies = [
"funty", "funty",
"futures", "futures",
"grenad", "grenad",
"heed", "heed 0.12.0",
"jemallocator", "jemallocator",
"log", "log",
"maplit", "maplit",
@ -1103,7 +1138,7 @@ dependencies = [
"anyhow", "anyhow",
"byte-unit", "byte-unit",
"csv", "csv",
"heed", "heed 0.10.6",
"jemallocator", "jemallocator",
"milli", "milli",
"roaring", "roaring",
@ -1275,6 +1310,16 @@ dependencies = [
"pkg-config", "pkg-config",
] ]
[[package]]
name = "lmdb-rkv-sys"
version = "0.15.0"
source = "git+https://github.com/meilisearch/lmdb-rs#d0b50d02938ee84e4e4372697ea991fe2a4cae3b"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.14" version = "0.4.14"
@ -1389,7 +1434,7 @@ dependencies = [
"fst", "fst",
"fxhash", "fxhash",
"grenad", "grenad",
"heed", "heed 0.12.0",
"human_format", "human_format",
"itertools 0.10.0", "itertools 0.10.0",
"levenshtein_automata", "levenshtein_automata",
@ -2236,7 +2281,7 @@ version = "0.6.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"byte-unit", "byte-unit",
"heed", "heed 0.10.6",
"jemallocator", "jemallocator",
"log", "log",
"milli", "milli",

View File

@ -7,7 +7,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = "0.10.6" heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" }
milli = { path = "../milli" } milli = { path = "../milli" }
stderrlog = "0.5.1" stderrlog = "0.5.1"
structopt = { version = "0.3.21", default-features = false } structopt = { version = "0.3.21", default-features = false }

View File

@ -10,7 +10,7 @@ anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.0" crossbeam-channel = "0.5.0"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = "0.10.6" heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.3" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.3" }
memmap = "0.7.0" memmap = "0.7.0"
milli = { path = "../milli" } milli = { path = "../milli" }

View File

@ -8,7 +8,7 @@ edition = "2018"
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
csv = "1.1.5" csv = "1.1.5"
heed = "0.10.6" heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" }
milli = { path = "../milli" } milli = { path = "../milli" }
roaring = "0.6.6" roaring = "0.6.6"
serde_json = "1.0.62" serde_json = "1.0.62"

View File

@ -14,7 +14,7 @@ flate2 = "1.0.20"
fst = "0.4.5" fst = "0.4.5"
fxhash = "0.2.1" fxhash = "0.2.1"
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
heed = { version = "0.10.6", default-features = false, features = ["lmdb", "sync-read-txn"] } heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0", default-features = false, features = ["lmdb", "sync-read-txn"] }
human_format = "1.0.3" human_format = "1.0.3"
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.4" linked-hash-map = "0.5.4"

View File

@ -132,7 +132,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}; };
external_ids.push(external_id); external_ids.push(external_id);
} }
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} }
drop(iter); drop(iter);
@ -143,7 +144,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let ((_docid, word), _positions) = result?; let ((_docid, word), _positions) = result?;
// This boolean will indicate if we must remove this word from the words FST. // This boolean will indicate if we must remove this word from the words FST.
words.push((SmallString32::from(word), false)); words.push((SmallString32::from(word), false));
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} }
} }
@ -194,11 +196,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
*must_remove = true; *must_remove = true;
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let key = key.to_owned(); let key = key.to_owned();
iter.put_current(&key, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&key, &docids)? };
} }
} }
} }
@ -243,10 +247,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
prefixes_to_delete.insert(prefix)?; prefixes_to_delete.insert(prefix)?;
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
iter.put_current(&prefix, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&prefix, &docids)? };
} }
} }
@ -281,10 +287,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let key = key.to_owned(); let key = key.to_owned();
iter.put_current(&key, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&key, &docids)? };
} }
} }
@ -300,10 +308,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let bytes = bytes.to_owned(); let bytes = bytes.to_owned();
iter.put_current(&bytes, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&bytes, &docids)? };
} }
} }
@ -317,10 +327,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let bytes = bytes.to_owned(); let bytes = bytes.to_owned();
iter.put_current(&bytes, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&bytes, &docids)? };
} }
} }
@ -334,10 +346,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let bytes = bytes.to_owned(); let bytes = bytes.to_owned();
iter.put_current(&bytes, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&bytes, &docids)? };
} }
} }
@ -349,9 +363,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(&self.documents_ids); docids.difference_with(&self.documents_ids);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
iter.put_current(&key, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&key, &docids)? };
} }
} }
@ -420,7 +436,8 @@ where
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let (key, ()) = result?; let (key, ()) = result?;
if to_remove.contains(convert(key)) { if to_remove.contains(convert(key)) {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} }
} }
@ -441,10 +458,12 @@ where
let previous_len = docids.len(); let previous_len = docids.len();
docids.difference_with(to_remove); docids.difference_with(to_remove);
if docids.is_empty() { if docids.is_empty() {
iter.del_current()?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.del_current()? };
} else if docids.len() != previous_len { } else if docids.len() != previous_len {
let bytes = bytes.to_owned(); let bytes = bytes.to_owned();
iter.put_current(&bytes, &docids)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(&bytes, &docids)? };
} }
} }

View File

@ -144,7 +144,8 @@ where
WriteMethod::Append => { WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = reader.next()? { while let Some((k, v)) = reader.next()? {
out_iter.append(k, v)?; // safety: we don't keep references from inside the LMDB database.
unsafe { out_iter.append(k, v)? };
} }
} }
WriteMethod::GetMergePut => { WriteMethod::GetMergePut => {
@ -154,7 +155,8 @@ where
Some((key, old_val)) if key == k => { Some((key, old_val)) if key == k => {
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..]; let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
let val = merge(k, &vals)?; let val = merge(k, &vals)?;
iter.put_current(k, &val)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(k, &val)? };
} }
_ => { _ => {
drop(iter); drop(iter);
@ -203,7 +205,8 @@ where
WriteMethod::Append => { WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = sorter.next()? { while let Some((k, v)) = sorter.next()? {
out_iter.append(k, v)?; // safety: we don't keep references from inside the LMDB database.
unsafe { out_iter.append(k, v)? };
} }
} }
WriteMethod::GetMergePut => { WriteMethod::GetMergePut => {
@ -216,7 +219,8 @@ where
// TODO just wrap this error? // TODO just wrap this error?
InternalError::IndexingMergingKeys { process: "get-put-merge" } InternalError::IndexingMergingKeys { process: "get-put-merge" }
})?; })?;
iter.put_current(k, &val)?; // safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(k, &val)? };
} }
_ => { _ => {
drop(iter); drop(iter);

View File

@ -7,7 +7,7 @@ edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.38" anyhow = "1.0.38"
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
heed = "0.10.6" heed = { git = "https://github.com/Kerollmops/heed", tag = "v0.12.0" }
log = "0.4.14" log = "0.4.14"
milli = { path = "../milli" } milli = { path = "../milli" }
serde_json = "1.0.62" serde_json = "1.0.62"