mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Use the words pair proximities in the search algorithm
This commit is contained in:
parent
31224a8425
commit
1f6e00878d
@ -28,13 +28,7 @@ pub struct Search<'a> {
|
|||||||
|
|
||||||
impl<'a> Search<'a> {
|
impl<'a> Search<'a> {
|
||||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||||
Search {
|
Search { query: None, offset: 0, limit: 20, rtxn, index }
|
||||||
query: None,
|
|
||||||
offset: 0,
|
|
||||||
limit: 20,
|
|
||||||
rtxn,
|
|
||||||
index,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Search<'a> {
|
pub fn query(&mut self, query: impl Into<String>) -> &mut Search<'a> {
|
||||||
@ -85,8 +79,8 @@ impl<'a> Search<'a> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetch the words from the given FST related to the
|
/// Fetch the words from the given FST related to the given DFAs along with
|
||||||
/// given DFAs along with the associated documents ids.
|
/// the associated documents ids.
|
||||||
fn fetch_words_docids(
|
fn fetch_words_docids(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
@ -194,26 +188,36 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
// If there only is one word, no need to compute the best proximities.
|
// If there is only one query word, no need to compute the best proximities.
|
||||||
if derived_words.len() == 1 {
|
if derived_words.len() == 1 || candidates.is_empty() {
|
||||||
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
||||||
let documents_ids = candidates.iter().take(limit).collect();
|
let documents_ids = candidates.iter().take(limit).collect();
|
||||||
return Ok(SearchResult { found_words, documents_ids });
|
return Ok(SearchResult { found_words, documents_ids });
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut paths = Vec::new();
|
let mut answer = RoaringBitmap::new();
|
||||||
for candidate in candidates {
|
for (i, words) in derived_words.windows(2).enumerate() {
|
||||||
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
|
let w1: Vec<_> = words[0].0.keys().collect();
|
||||||
near_proximity(keywords, &mut paths, path_proximity);
|
let w2: Vec<_> = words[1].0.keys().collect();
|
||||||
if let Some((prox, _path)) = paths.first() {
|
|
||||||
documents.push((*prox, candidate));
|
let key = (w1[0].as_str(), w2[0].as_str(), 1);
|
||||||
|
match index.word_pair_proximity_docids.get(rtxn, &key)? {
|
||||||
|
Some(docids) => if i == 0 {
|
||||||
|
answer = docids;
|
||||||
|
} else {
|
||||||
|
answer.intersect_with(&docids);
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
answer = RoaringBitmap::new();
|
||||||
|
break;
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
documents.sort_unstable_by_key(|(prox, _)| *prox);
|
documents.push(answer);
|
||||||
|
|
||||||
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
||||||
let documents_ids = documents.into_iter().map(|(_, id)| id).take(limit).collect();
|
let documents_ids = documents.into_iter().flatten().take(limit).collect();
|
||||||
Ok(SearchResult { found_words, documents_ids })
|
Ok(SearchResult { found_words, documents_ids })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user