fix: Make the SumOfTypos criterion use a more clever algorithm

This commit is contained in:
Clément Renault 2019-02-12 15:36:45 +01:00
parent cf58cf86da
commit 85a1f126bf
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE

View File

@ -7,17 +7,20 @@ use crate::rank::RawDocument;
#[inline]
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize {
let mut sum_typos = 0;
let mut number_words = 0;
let mut number_words = 0.0;
let mut sum_typos = 0.0;
let mut index = 0;
for group in query_index.linear_group_by(PartialEq::eq) {
sum_typos += distance[index] as isize;
number_words += 1;
let typo = distance[index] as f32;
sum_typos += (typo + 1.0).log10();
number_words += 1.0_f32;
index += group.len();
}
sum_typos - number_words
let out = number_words / (sum_typos + 1.0);
(out * 1000.0) as isize
}
#[derive(Debug, Clone, Copy)]
@ -37,7 +40,7 @@ impl Criterion for SumOfTypos {
sum_matches_typos(query_index, distance)
};
lhs.cmp(&rhs)
lhs.cmp(&rhs).reverse()
}
}
@ -57,9 +60,9 @@ mod tests {
let query_index1 = &[0, 1];
let distance1 = &[1, 0];
let lhs = sum_matches_typos(query_index0, distance0);
let rhs = sum_matches_typos(query_index1, distance1);
assert_eq!(lhs.cmp(&rhs), Ordering::Less);
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "bouton manchette"
@ -74,9 +77,9 @@ mod tests {
let query_index1 = &[0];
let distance1 = &[0];
let lhs = sum_matches_typos(query_index0, distance0);
let rhs = sum_matches_typos(query_index1, distance1);
assert_eq!(lhs.cmp(&rhs), Ordering::Less);
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
// typing: "bouton manchztte"
@ -91,8 +94,8 @@ mod tests {
let query_index1 = &[0];
let distance1 = &[0];
let lhs = sum_matches_typos(query_index0, distance0);
let rhs = sum_matches_typos(query_index1, distance1);
assert_eq!(lhs.cmp(&rhs), Ordering::Equal);
let doc0 = sum_matches_typos(query_index0, distance0);
let doc1 = sum_matches_typos(query_index1, distance1);
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
}
}