Make the search system be aware of query words typos

This commit is contained in:
Clément Renault 2020-09-23 12:01:39 +02:00
parent b597a92487
commit 5178b3d59d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -195,22 +195,39 @@ impl<'a> Search<'a> {
return Ok(SearchResult { found_words, documents_ids }); return Ok(SearchResult { found_words, documents_ids });
} }
fn words_pair_combinations<'a>(
w1: &'a HashMap<String, (u8, RoaringBitmap)>,
w2: &'a HashMap<String, (u8, RoaringBitmap)>,
) -> Vec<(&'a str, &'a str)>
{
let mut pairs = Vec::new();
for (w1, (_typos, docids1)) in w1 {
for (w2, (_typos, docids2)) in w2 {
if !docids1.is_disjoint(&docids2) {
pairs.push((w1.as_str(), w2.as_str()));
}
}
}
pairs
}
let mut answer = RoaringBitmap::new(); let mut answer = RoaringBitmap::new();
for (i, words) in derived_words.windows(2).enumerate() { for (i, words) in derived_words.windows(2).enumerate() {
let w1: Vec<_> = words[0].0.keys().collect(); let pairs = words_pair_combinations(&words[0].0, &words[1].0);
let w2: Vec<_> = words[1].0.keys().collect(); eprintln!("found pairs {:?}", pairs);
let key = (w1[0].as_str(), w2[0].as_str(), 1); let mut pairs_union = RoaringBitmap::new();
match index.word_pair_proximity_docids.get(rtxn, &key)? { for (w1, w2) in pairs {
Some(docids) => if i == 0 { let key = (w1, w2, 1);
answer = docids; if let Some(docids) = index.word_pair_proximity_docids.get(rtxn, &key)? {
} else { pairs_union.union_with(&docids);
answer.intersect_with(&docids); }
}, }
None => {
answer = RoaringBitmap::new(); if i == 0 {
break; answer = pairs_union;
}, } else {
answer.intersect_with(&pairs_union);
} }
} }