Bump the roaring version that fix a deserialization bug

This commit is contained in:
Kerollmops 2020-09-10 20:53:18 +02:00
parent d5e5baa20f
commit 3ded98e5fa
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 14 additions and 43 deletions

29
Cargo.lock generated
View File

@ -136,15 +136,6 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "bitpacking"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3744aff20a3437a99ebc0bb7733e9e60c7bf590478c9b897e95b38d57e5acb68"
dependencies = [
"crunchy",
]
[[package]]
name = "block-buffer"
version = "0.7.3"
@ -267,12 +258,6 @@ dependencies = [
"bitflags",
]
[[package]]
name = "cow-utils"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173"
[[package]]
name = "crc32c"
version = "0.4.0"
@ -370,12 +355,6 @@ dependencies = [
"lazy_static 1.4.0",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "csv"
version = "1.1.3"
@ -981,18 +960,14 @@ dependencies = [
"arc-cache",
"askama",
"askama_warp",
"bitpacking",
"bstr",
"byteorder",
"cow-utils",
"criterion",
"csv",
"flate2",
"fst",
"fxhash",
"heed",
"indexmap",
"itertools",
"jemallocator",
"levenshtein_automata",
"log 0.4.11",
@ -1649,9 +1624,9 @@ checksum = "21215c1b9d8f7832b433255bd9eea3e2779aa55b21b2f8e13aad62c74749b237"
[[package]]
name = "roaring"
version = "0.6.0"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d6c40b0f4a172f43c3dab852f6c05df5a643aebe7395dbeb598a2f5bb318c1e"
checksum = "99a260b0fb7df2095948f4a1d37afe5d1a08a2ccc7380f418cec049dc9560077"
dependencies = [
"byteorder",
]

View File

@ -8,10 +8,8 @@ default-run = "indexer"
[dependencies]
anyhow = "1.0.28"
arc-cache = { git = "https://github.com/Kerollmops/rust-arc-cache.git", rev = "56530f2" }
bitpacking = "0.8.2"
bstr = "0.2.13"
byteorder = "1.3.4"
cow-utils = "0.1.2"
csv = "1.1.3"
flate2 = "1.0.17"
fst = "0.4.3"
@ -25,7 +23,7 @@ once_cell = "1.4.0"
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "9bf47a7" }
rayon = "1.3.1"
ringtail = "0.3.0"
roaring = "0.6.0"
roaring = "0.6.1"
slice-group-by = "0.2.6"
smallstr = "0.2.0"
smallvec = "1.4.0"
@ -36,12 +34,6 @@ tempfile = "3.1.0"
log = "0.4.11"
stderrlog = "0.4.3"
# best proximity
indexmap = "1.5.1"
# to implement internally
itertools = "0.9.0"
# http server
askama = "0.10.1"
askama_warp = "0.10.0"

View File

@ -4,7 +4,7 @@ use std::cmp;
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::DFA;
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
use log::{debug, error};
use log::debug;
use once_cell::sync::Lazy;
use roaring::bitmap::{IntoIter, RoaringBitmap};
@ -155,11 +155,9 @@ impl<'a> Search<'a> {
let mut union_positions = RoaringBitmap::new();
for (word, (_distance, docids)) in words {
if docids.contains(candidate) {
match index.docid_word_positions.get(rtxn, &(candidate, word))? {
Some(positions) => union_positions.union_with(&positions),
None => error!("position missing for candidate {} and word {:?}", candidate, word),
}
if !docids.contains(candidate) { continue; }
if let Some(positions) = index.docid_word_positions.get(rtxn, &(candidate, word))? {
union_positions.union_with(&positions);
}
}
keywords.push(union_positions.into_iter());
@ -223,6 +221,13 @@ impl<'a> Search<'a> {
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
}
// If there only is one word, no need to compute the best proximities.
if derived_words.len() == 1 {
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
let documents_ids = candidates.iter().take(limit).collect();
return Ok(SearchResult { found_words, documents_ids });
}
let mut paths = Vec::new();
for candidate in candidates {
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
@ -236,7 +241,6 @@ impl<'a> Search<'a> {
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
let documents_ids = documents.into_iter().map(|(_, id)| id).take(limit).collect();
Ok(SearchResult { found_words, documents_ids })
}
}