From d0ee5f12a0dd1b3ef9d9c4b0cf61ba46d7fabbc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 16 Dec 2018 14:21:41 +0100 Subject: [PATCH] feat: Introduce the BufferedDistinctMap struct --- src/rank/distinct_map.rs | 64 +++++++++++++++++++++++++++++++-------- src/rank/query_builder.rs | 2 +- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/rank/distinct_map.rs b/src/rank/distinct_map.rs index b7a5edee3..81afe1cf0 100644 --- a/src/rank/distinct_map.rs +++ b/src/rank/distinct_map.rs @@ -17,10 +17,33 @@ impl DistinctMap { } } + pub fn len(&self) -> usize { + self.len + } +} + +pub struct BufferedDistinctMap<'a, K> { + internal: &'a mut DistinctMap, + inner: HashMap, + len: usize, +} + +impl<'a, K: Hash + Eq> BufferedDistinctMap<'a, K> { + pub fn new(internal: &'a mut DistinctMap) -> BufferedDistinctMap<'a, K> { + BufferedDistinctMap { + internal: internal, + inner: HashMap::new(), + len: 0, + } + } + pub fn register(&mut self, key: K) -> bool { - let seen = self.inner.entry(key).or_insert(0); - if *seen < self.limit { - *seen += 1; + let internal_seen = self.internal.inner.get(&key).unwrap_or(&0); + let inner_seen = self.inner.entry(key).or_insert(0); + let seen = *internal_seen + *inner_seen; + + if seen < self.internal.limit { + *inner_seen += 1; self.len += 1; true } else { @@ -33,8 +56,18 @@ impl DistinctMap { true } + pub fn transfert_to_internal(&mut self) { + for (k, v) in self.inner.drain() { + let value = self.internal.inner.entry(k).or_insert(0); + *value += v; + } + + self.internal.len += self.len; + self.len = 0; + } + pub fn len(&self) -> usize { - self.len + self.internal.len() + self.len } } @@ -45,22 +78,27 @@ mod tests { #[test] fn easy_distinct_map() { let mut map = DistinctMap::new(2); + let mut buffered = BufferedDistinctMap::new(&mut map); + for x in &[1, 1, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6] { - map.register(x); + buffered.register(x); } + buffered.transfert_to_internal(); assert_eq!(map.len(), 8); let mut map = DistinctMap::new(2); - assert_eq!(map.register(1), true); - assert_eq!(map.register(1), true); - assert_eq!(map.register(1), false); - assert_eq!(map.register(1), false); + let mut buffered = BufferedDistinctMap::new(&mut map); + assert_eq!(buffered.register(1), true); + assert_eq!(buffered.register(1), true); + assert_eq!(buffered.register(1), false); + assert_eq!(buffered.register(1), false); - assert_eq!(map.register(2), true); - assert_eq!(map.register(3), true); - assert_eq!(map.register(2), true); - assert_eq!(map.register(2), false); + assert_eq!(buffered.register(2), true); + assert_eq!(buffered.register(3), true); + assert_eq!(buffered.register(2), true); + assert_eq!(buffered.register(2), false); + buffered.transfert_to_internal(); assert_eq!(map.len(), 5); } } diff --git a/src/rank/query_builder.rs b/src/rank/query_builder.rs index 5dc2b8da7..4721cc5f2 100644 --- a/src/rank/query_builder.rs +++ b/src/rank/query_builder.rs @@ -10,7 +10,7 @@ use fst::Streamer; use rocksdb::DB; use crate::automaton::{self, DfaExt, AutomatonExt}; -use crate::rank::distinct_map::DistinctMap; +use crate::rank::distinct_map::{DistinctMap, BufferedDistinctMap}; use crate::rank::criterion::Criteria; use crate::database::DatabaseView; use crate::{Match, DocumentId};