Merge pull request #93 from Kerollmops/slice-group-by

Use the GroupBy/Mut Traits of the slice-group-by library
This commit is contained in:
Clément Renault 2019-01-30 17:52:27 +01:00 committed by GitHub
commit b0b3175641
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 17 additions and 21 deletions

View File

@ -19,6 +19,7 @@ sdset = "0.3"
serde = "1.0" serde = "1.0"
serde_derive = "1.0" serde_derive = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] } serde_json = { version = "1.0", features = ["preserve_order"] }
slice-group-by = "0.2"
unidecode = "0.3" unidecode = "0.3"
[dependencies.toml] [dependencies.toml]
@ -30,16 +31,12 @@ rev = "0372ba6"
git = "https://github.com/pingcap/rust-rocksdb.git" git = "https://github.com/pingcap/rust-rocksdb.git"
rev = "306e201" rev = "306e201"
[dependencies.group-by]
git = "https://github.com/Kerollmops/group-by.git"
rev = "5a113fe"
[features] [features]
default = ["simd"] default = ["simd"]
i128 = ["bincode/i128", "byteorder/i128"] i128 = ["bincode/i128", "byteorder/i128"]
portable = ["rocksdb/portable"] portable = ["rocksdb/portable"]
simd = ["rocksdb/sse"] simd = ["rocksdb/sse"]
nightly = ["hashbrown/nightly", "group-by/nightly"] nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
[dev-dependencies] [dev-dependencies]
csv = "1.0" csv = "1.0"

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy; use slice_group_by::GroupBy;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
@ -16,7 +16,7 @@ fn contains_exact(matches: &&[Match]) -> bool {
#[inline] #[inline]
fn number_exact_matches(matches: &[Match]) -> usize { fn number_exact_matches(matches: &[Match]) -> usize {
GroupBy::new(matches, match_query_index).filter(contains_exact).count() matches.linear_group_by(match_query_index).filter(contains_exact).count()
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy; use slice_group_by::GroupBy;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
@ -11,7 +11,7 @@ use crate::Match;
#[inline] #[inline]
fn number_of_query_words(matches: &[Match]) -> usize { fn number_of_query_words(matches: &[Match]) -> usize {
GroupBy::new(matches, match_query_index).count() matches.linear_group_by(match_query_index).count()
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]

View File

@ -2,8 +2,7 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use slice_group_by::GroupBy;
use group_by::GroupBy;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
@ -17,7 +16,7 @@ fn sum_matches_typos(matches: &[Match]) -> isize {
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
for group in GroupBy::new(matches, match_query_index) { for group in matches.linear_group_by(match_query_index) {
sum_typos += unsafe { group.get_unchecked(0).distance as isize }; sum_typos += unsafe { group.get_unchecked(0).distance as isize };
number_words += 1; number_words += 1;
} }

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy; use slice_group_by::GroupBy;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
@ -13,7 +13,7 @@ use crate::Match;
fn sum_matches_attributes(matches: &[Match]) -> usize { fn sum_matches_attributes(matches: &[Match]) -> usize {
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
GroupBy::new(matches, match_query_index).map(|group| { matches.linear_group_by(match_query_index).map(|group| {
unsafe { group.get_unchecked(0).attribute.attribute() as usize } unsafe { group.get_unchecked(0).attribute.attribute() as usize }
}).sum() }).sum()
} }

View File

@ -2,7 +2,7 @@ use std::cmp::Ordering;
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy; use slice_group_by::GroupBy;
use crate::database::DatabaseView; use crate::database::DatabaseView;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
@ -13,7 +13,7 @@ use crate::Match;
fn sum_matches_attribute_index(matches: &[Match]) -> usize { fn sum_matches_attribute_index(matches: &[Match]) -> usize {
// note that GroupBy will never return an empty group // note that GroupBy will never return an empty group
// so we can do this assumption safely // so we can do this assumption safely
GroupBy::new(matches, match_query_index).map(|group| { matches.linear_group_by(match_query_index).map(|group| {
unsafe { group.get_unchecked(0).attribute.word_index() as usize } unsafe { group.get_unchecked(0).attribute.word_index() as usize }
}).sum() }).sum()
} }

View File

@ -2,7 +2,7 @@ use std::cmp::{self, Ordering};
use std::ops::Deref; use std::ops::Deref;
use rocksdb::DB; use rocksdb::DB;
use group_by::GroupBy; use slice_group_by::GroupBy;
use crate::rank::{match_query_index, Document}; use crate::rank::{match_query_index, Document};
use crate::rank::criterion::Criterion; use crate::rank::criterion::Criterion;
@ -36,7 +36,7 @@ fn min_proximity(lhs: &[Match], rhs: &[Match]) -> u32 {
fn matches_proximity(matches: &[Match]) -> u32 { fn matches_proximity(matches: &[Match]) -> u32 {
let mut proximity = 0; let mut proximity = 0;
let mut iter = GroupBy::new(matches, match_query_index); let mut iter = matches.linear_group_by(match_query_index);
// iterate over groups by windows of size 2 // iterate over groups by windows of size 2
let mut last = iter.next(); let mut last = iter.next();

View File

@ -4,7 +4,7 @@ use std::error::Error;
use std::hash::Hash; use std::hash::Hash;
use std::rc::Rc; use std::rc::Rc;
use group_by::BinaryGroupByMut; use slice_group_by::GroupByMut;
use hashbrown::HashMap; use hashbrown::HashMap;
use fst::Streamer; use fst::Streamer;
use rocksdb::DB; use rocksdb::DB;
@ -164,7 +164,7 @@ where D: Deref<Target=DB>,
}); });
info!("criterion {} sort took {}", ci, elapsed); info!("criterion {} sort took {}", ci, elapsed);
for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
documents_seen += group.len(); documents_seen += group.len();
groups.push(group); groups.push(group);
@ -241,7 +241,7 @@ where D: Deref<Target=DB>,
group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view)); group.sort_unstable_by(|a, b| criterion.evaluate(a, b, view));
for group in BinaryGroupByMut::new(group, |a, b| criterion.eq(a, b, view)) { for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b, view)) {
// we must compute the real distinguished len of this sub-group // we must compute the real distinguished len of this sub-group
for document in group.iter() { for document in group.iter() {
let filter_accepted = match &self.inner.filter { let filter_accepted = match &self.inner.filter {