From 85a1f126bfe82c62ddb6afcc4c879ea4b9914346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 12 Feb 2019 15:36:45 +0100 Subject: [PATCH] fix: Make the SumOfTypos criterion use a more clever algorithm --- src/rank/criterion/sum_of_typos.rs | 33 ++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/rank/criterion/sum_of_typos.rs b/src/rank/criterion/sum_of_typos.rs index 5d98a42e7..0f7edbbd5 100644 --- a/src/rank/criterion/sum_of_typos.rs +++ b/src/rank/criterion/sum_of_typos.rs @@ -7,17 +7,20 @@ use crate::rank::RawDocument; #[inline] fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize { - let mut sum_typos = 0; - let mut number_words = 0; + let mut number_words = 0.0; + let mut sum_typos = 0.0; let mut index = 0; for group in query_index.linear_group_by(PartialEq::eq) { - sum_typos += distance[index] as isize; - number_words += 1; + let typo = distance[index] as f32; + sum_typos += (typo + 1.0).log10(); + number_words += 1.0_f32; index += group.len(); } - sum_typos - number_words + let out = number_words / (sum_typos + 1.0); + + (out * 1000.0) as isize } #[derive(Debug, Clone, Copy)] @@ -37,7 +40,7 @@ impl Criterion for SumOfTypos { sum_matches_typos(query_index, distance) }; - lhs.cmp(&rhs) + lhs.cmp(&rhs).reverse() } } @@ -57,9 +60,9 @@ mod tests { let query_index1 = &[0, 1]; let distance1 = &[1, 0]; - let lhs = sum_matches_typos(query_index0, distance0); - let rhs = sum_matches_typos(query_index1, distance1); - assert_eq!(lhs.cmp(&rhs), Ordering::Less); + let doc0 = sum_matches_typos(query_index0, distance0); + let doc1 = sum_matches_typos(query_index1, distance1); + assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); } // typing: "bouton manchette" @@ -74,9 +77,9 @@ mod tests { let query_index1 = &[0]; let distance1 = &[0]; - let lhs = sum_matches_typos(query_index0, distance0); - let rhs = sum_matches_typos(query_index1, distance1); - assert_eq!(lhs.cmp(&rhs), Ordering::Less); + let doc0 = sum_matches_typos(query_index0, distance0); + let doc1 = sum_matches_typos(query_index1, distance1); + assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); } // typing: "bouton manchztte" @@ -91,8 +94,8 @@ mod tests { let query_index1 = &[0]; let distance1 = &[0]; - let lhs = sum_matches_typos(query_index0, distance0); - let rhs = sum_matches_typos(query_index1, distance1); - assert_eq!(lhs.cmp(&rhs), Ordering::Equal); + let doc0 = sum_matches_typos(query_index0, distance0); + let doc1 = sum_matches_typos(query_index1, distance1); + assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less); } }