mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
fix: Make the SumOfTypos criterion use a more clever algorithm
This commit is contained in:
parent
cf58cf86da
commit
85a1f126bf
@ -7,17 +7,20 @@ use crate::rank::RawDocument;
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize {
|
fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> isize {
|
||||||
let mut sum_typos = 0;
|
let mut number_words = 0.0;
|
||||||
let mut number_words = 0;
|
let mut sum_typos = 0.0;
|
||||||
let mut index = 0;
|
let mut index = 0;
|
||||||
|
|
||||||
for group in query_index.linear_group_by(PartialEq::eq) {
|
for group in query_index.linear_group_by(PartialEq::eq) {
|
||||||
sum_typos += distance[index] as isize;
|
let typo = distance[index] as f32;
|
||||||
number_words += 1;
|
sum_typos += (typo + 1.0).log10();
|
||||||
|
number_words += 1.0_f32;
|
||||||
index += group.len();
|
index += group.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
sum_typos - number_words
|
let out = number_words / (sum_typos + 1.0);
|
||||||
|
|
||||||
|
(out * 1000.0) as isize
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
@ -37,7 +40,7 @@ impl Criterion for SumOfTypos {
|
|||||||
sum_matches_typos(query_index, distance)
|
sum_matches_typos(query_index, distance)
|
||||||
};
|
};
|
||||||
|
|
||||||
lhs.cmp(&rhs)
|
lhs.cmp(&rhs).reverse()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,9 +60,9 @@ mod tests {
|
|||||||
let query_index1 = &[0, 1];
|
let query_index1 = &[0, 1];
|
||||||
let distance1 = &[1, 0];
|
let distance1 = &[1, 0];
|
||||||
|
|
||||||
let lhs = sum_matches_typos(query_index0, distance0);
|
let doc0 = sum_matches_typos(query_index0, distance0);
|
||||||
let rhs = sum_matches_typos(query_index1, distance1);
|
let doc1 = sum_matches_typos(query_index1, distance1);
|
||||||
assert_eq!(lhs.cmp(&rhs), Ordering::Less);
|
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
|
||||||
}
|
}
|
||||||
|
|
||||||
// typing: "bouton manchette"
|
// typing: "bouton manchette"
|
||||||
@ -74,9 +77,9 @@ mod tests {
|
|||||||
let query_index1 = &[0];
|
let query_index1 = &[0];
|
||||||
let distance1 = &[0];
|
let distance1 = &[0];
|
||||||
|
|
||||||
let lhs = sum_matches_typos(query_index0, distance0);
|
let doc0 = sum_matches_typos(query_index0, distance0);
|
||||||
let rhs = sum_matches_typos(query_index1, distance1);
|
let doc1 = sum_matches_typos(query_index1, distance1);
|
||||||
assert_eq!(lhs.cmp(&rhs), Ordering::Less);
|
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
|
||||||
}
|
}
|
||||||
|
|
||||||
// typing: "bouton manchztte"
|
// typing: "bouton manchztte"
|
||||||
@ -91,8 +94,8 @@ mod tests {
|
|||||||
let query_index1 = &[0];
|
let query_index1 = &[0];
|
||||||
let distance1 = &[0];
|
let distance1 = &[0];
|
||||||
|
|
||||||
let lhs = sum_matches_typos(query_index0, distance0);
|
let doc0 = sum_matches_typos(query_index0, distance0);
|
||||||
let rhs = sum_matches_typos(query_index1, distance1);
|
let doc1 = sum_matches_typos(query_index1, distance1);
|
||||||
assert_eq!(lhs.cmp(&rhs), Ordering::Equal);
|
assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user