mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 09:04:59 +08:00
Compute the candidates but not by attribute
This commit is contained in:
parent
6b14b20369
commit
576dd011a1
49
src/lib.rs
49
src/lib.rs
@ -156,32 +156,31 @@ impl Index {
|
|||||||
positions.push(union_positions.iter().collect());
|
positions.push(union_positions.iter().collect());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut words_attributes_docids = Vec::new();
|
// We compute the docids candiate for these words (and derived words).
|
||||||
|
// We do a union between all the docids of each of the words and derived words,
|
||||||
|
// we got N unions (where N is the number of query words), we then intersect them.
|
||||||
|
// TODO we must store the words documents ids to avoid these unions.
|
||||||
|
let mut candidates = RoaringBitmap::new();
|
||||||
let number_of_attributes = self.number_of_attributes(rtxn)?.map_or(0, |n| n as u32);
|
let number_of_attributes = self.number_of_attributes(rtxn)?.map_or(0, |n| n as u32);
|
||||||
|
for (i, derived_words) in words.iter().enumerate() {
|
||||||
for i in 0..number_of_attributes {
|
|
||||||
let mut intersect_docids: Option<RoaringBitmap> = None;
|
|
||||||
for derived_words in &words {
|
|
||||||
let mut union_docids = RoaringBitmap::new();
|
let mut union_docids = RoaringBitmap::new();
|
||||||
for (word, _) in derived_words {
|
for (word, _) in derived_words {
|
||||||
// generate the key with the attribute number.
|
for attr in 0..number_of_attributes {
|
||||||
let mut key = word.to_vec();
|
let mut key = word.to_vec();
|
||||||
key.extend_from_slice(&i.to_be_bytes());
|
key.extend_from_slice(&attr.to_be_bytes());
|
||||||
|
|
||||||
if let Some(right) = self.word_attribute_docids.get(rtxn, &key)? {
|
if let Some(right) = self.word_attribute_docids.get(rtxn, &key)? {
|
||||||
union_docids.union_with(&right);
|
union_docids.union_with(&right);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
match &mut intersect_docids {
|
|
||||||
Some(left) => left.intersect_with(&union_docids),
|
|
||||||
None => intersect_docids = Some(union_docids),
|
|
||||||
}
|
}
|
||||||
|
if i == 0 {
|
||||||
|
candidates = union_docids;
|
||||||
|
} else {
|
||||||
|
candidates.intersect_with(&union_docids);
|
||||||
}
|
}
|
||||||
words_attributes_docids.push(intersect_docids);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("The documents you must find for each attribute: {:?}", words_attributes_docids);
|
debug!("The candidates are {:?}", candidates);
|
||||||
|
|
||||||
debug!("Retrieving words positions took {:.02?}", before.elapsed());
|
debug!("Retrieving words positions took {:.02?}", before.elapsed());
|
||||||
|
|
||||||
// Returns the union of the same position for all the derived words.
|
// Returns the union of the same position for all the derived words.
|
||||||
@ -202,7 +201,7 @@ impl Index {
|
|||||||
let mut union_cache = HashMap::new();
|
let mut union_cache = HashMap::new();
|
||||||
let mut intersect_cache = HashMap::new();
|
let mut intersect_cache = HashMap::new();
|
||||||
// Returns `true` if there is documents in common between the two words and positions given.
|
// Returns `true` if there is documents in common between the two words and positions given.
|
||||||
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>, words_attributes_docids: &[_]| {
|
let mut contains_documents = |(lword, lpos), (rword, rpos), union_cache: &mut HashMap<_, _>, candidates: &RoaringBitmap| {
|
||||||
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
let proximity = best_proximity::positions_proximity(lpos, rpos);
|
||||||
|
|
||||||
if proximity == 0 { return false }
|
if proximity == 0 { return false }
|
||||||
@ -217,13 +216,9 @@ impl Index {
|
|||||||
let lunion_docids = union_cache.get(&(lword, lpos)).unwrap();
|
let lunion_docids = union_cache.get(&(lword, lpos)).unwrap();
|
||||||
let runion_docids = union_cache.get(&(rword, rpos)).unwrap();
|
let runion_docids = union_cache.get(&(rword, rpos)).unwrap();
|
||||||
|
|
||||||
if proximity <= 7 {
|
// We first check that the docids of these unions are part of the candidates.
|
||||||
let lattr = lpos / 1000;
|
if lunion_docids.is_disjoint(candidates) { return false }
|
||||||
if let Some(docids) = &words_attributes_docids[lattr as usize] {
|
if runion_docids.is_disjoint(candidates) { return false }
|
||||||
if lunion_docids.is_disjoint(&docids) { return false }
|
|
||||||
if runion_docids.is_disjoint(&docids) { return false }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
!lunion_docids.is_disjoint(&runion_docids)
|
!lunion_docids.is_disjoint(&runion_docids)
|
||||||
})
|
})
|
||||||
@ -231,7 +226,7 @@ impl Index {
|
|||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
let mut iter = BestProximity::new(positions);
|
let mut iter = BestProximity::new(positions);
|
||||||
while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache, &words_attributes_docids)) {
|
while let Some((proximity, mut positions)) = iter.next(|l, r| contains_documents(l, r, &mut union_cache, &candidates)) {
|
||||||
positions.sort_unstable();
|
positions.sort_unstable();
|
||||||
|
|
||||||
let same_prox_before = Instant::now();
|
let same_prox_before = Instant::now();
|
||||||
@ -289,12 +284,8 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We achieve to find valid documents ids so we remove them from the candidate list.
|
// We achieve to find valid documents ids so we remove them from the candidates list.
|
||||||
for docids in &mut words_attributes_docids {
|
candidates.difference_with(&same_proximity_union);
|
||||||
if let Some(docids) = docids {
|
|
||||||
docids.difference_with(&same_proximity_union);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
documents.push(same_proximity_union);
|
documents.push(same_proximity_union);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user