feat: Improve performances by using a fnv Hasher

This commit is contained in:
Clément Renault 2018-08-23 21:32:31 +02:00
parent 0b02e31ce9
commit 0814418710
4 changed files with 12 additions and 3 deletions

7
Cargo.lock generated
View File

@ -65,6 +65,11 @@ name = "elapsed"
version = "0.1.2" version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fnv"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "fst" name = "fst"
version = "0.3.0" version = "0.3.0"
@ -219,6 +224,7 @@ name = "raptor"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"byteorder 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=always-match-clone)", "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=always-match-clone)",
"group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)", "group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)",
"levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)", "levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)",
@ -364,6 +370,7 @@ dependencies = [
"checksum cmake 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "704fbf3bb5149daab0afb255dbea24a1f08d2f4099cedb9baab6d470d4c5eefb" "checksum cmake 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "704fbf3bb5149daab0afb255dbea24a1f08d2f4099cedb9baab6d470d4c5eefb"
"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
"checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29" "checksum elapsed 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6f4e5af126dafd0741c2ad62d47f68b28602550102e5f0dd45c8a97fc8b49c29"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=always-match-clone)" = "<none>" "checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=always-match-clone)" = "<none>"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"

View File

@ -5,6 +5,7 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
[dependencies] [dependencies]
byteorder = "1.2" byteorder = "1.2"
fnv = "1.0"
[dependencies.fst] [dependencies.fst]
git = "https://github.com/Kerollmops/fst.git" git = "https://github.com/Kerollmops/fst.git"

View File

@ -1,4 +1,5 @@
extern crate fst; extern crate fst;
extern crate fnv;
extern crate group_by; extern crate group_by;
extern crate levenshtein_automata; extern crate levenshtein_automata;
extern crate byteorder; extern crate byteorder;

View File

@ -6,9 +6,9 @@ mod sum_of_words_position;
mod exact; mod exact;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::HashMap;
use std::{mem, vec}; use std::{mem, vec};
use fst; use fst;
use fnv::FnvHashMap;
use levenshtein::Levenshtein; use levenshtein::Levenshtein;
use metadata::{DocIndexes, OpWithStateBuilder, UnionWithState}; use metadata::{DocIndexes, OpWithStateBuilder, UnionWithState};
use {Match, DocumentId}; use {Match, DocumentId};
@ -59,7 +59,7 @@ impl Pool {
} }
// TODO remove the matches HashMap, not proud of it // TODO remove the matches HashMap, not proud of it
pub fn extend(&mut self, matches: &mut HashMap<DocumentId, Vec<Match>>) { pub fn extend(&mut self, matches: &mut FnvHashMap<DocumentId, Vec<Match>>) {
for doc in self.documents.iter_mut() { for doc in self.documents.iter_mut() {
if let Some(matches) = matches.remove(&doc.document_id) { if let Some(matches) = matches.remove(&doc.document_id) {
doc.matches.extend(matches); doc.matches.extend(matches);
@ -149,7 +149,7 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
type Item = Document; type Item = Document;
fn next(&'a mut self) -> Option<Self::Item> { fn next(&'a mut self) -> Option<Self::Item> {
let mut matches = HashMap::new(); let mut matches = FnvHashMap::default();
loop { loop {
// TODO remove that when NLL are here ! // TODO remove that when NLL are here !