Change encoding of word_pair_proximity DB to (proximity, word1, word2)

Same for word_prefix_pair_proximity
This commit is contained in:
Loïc Lecrenier 2022-09-14 13:54:12 +02:00 committed by Loïc Lecrenier
parent 19b2326f3d
commit bdeb47305e
6 changed files with 130 additions and 179 deletions

View File

@ -7,12 +7,11 @@ impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
type DItem = (&'a str, &'a str, u8); type DItem = (&'a str, &'a str, u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_last()?; let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?; let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end); let (s1_bytes, rest) = bytes.split_at(s1_end);
let rest = &rest[1..]; let s2_bytes = &rest[1..];
let s1 = str::from_utf8(s1_bytes).ok()?; let s1 = str::from_utf8(s1_bytes).ok()?;
let (_, s2_bytes) = rest.split_last()?;
let s2 = str::from_utf8(s2_bytes).ok()?; let s2 = str::from_utf8(s2_bytes).ok()?;
Some((s1, s2, *n)) Some((s1, s2, *n))
} }
@ -22,12 +21,11 @@ impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
type EItem = (&'a str, &'a str, u8); type EItem = (&'a str, &'a str, u8);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1 + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes()); bytes.extend_from_slice(s1.as_bytes());
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(s2.as_bytes()); bytes.extend_from_slice(s2.as_bytes());
bytes.push(0);
bytes.push(*n);
Some(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }
@ -37,11 +35,10 @@ impl<'a> heed::BytesDecode<'a> for UncheckedStrStrU8Codec {
type DItem = (&'a [u8], &'a [u8], u8); type DItem = (&'a [u8], &'a [u8], u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_last()?; let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?; let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end); let (s1_bytes, rest) = bytes.split_at(s1_end);
let rest = &rest[1..]; let s2_bytes = &rest[1..];
let (_, s2_bytes) = rest.split_last()?;
Some((s1_bytes, s2_bytes, *n)) Some((s1_bytes, s2_bytes, *n))
} }
} }
@ -50,12 +47,11 @@ impl<'a> heed::BytesEncode<'a> for UncheckedStrStrU8Codec {
type EItem = (&'a [u8], &'a [u8], u8); type EItem = (&'a [u8], &'a [u8], u8);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> { fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1 + 1); let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1); bytes.extend_from_slice(s1);
bytes.push(0); bytes.push(0);
bytes.extend_from_slice(s2); bytes.extend_from_slice(s2);
bytes.push(0);
bytes.push(*n);
Some(Cow::Owned(bytes)) Some(Cow::Owned(bytes))
} }
} }

View File

@ -194,7 +194,7 @@ pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
(word1, prefix, proximity), (word1, prefix, proximity),
b, b,
)| { )| {
&format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b)) &format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))
}); });
snap snap
} }

View File

@ -151,11 +151,10 @@ fn document_word_positions_into_sorter<'b>(
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();
for ((w1, w2), prox) in word_pair_proximity { for ((w1, w2), prox) in word_pair_proximity {
key_buffer.clear(); key_buffer.clear();
key_buffer.push(prox as u8);
key_buffer.extend_from_slice(w1.as_bytes()); key_buffer.extend_from_slice(w1.as_bytes());
key_buffer.push(0); key_buffer.push(0);
key_buffer.extend_from_slice(w2.as_bytes()); key_buffer.extend_from_slice(w2.as_bytes());
key_buffer.push(0);
key_buffer.push(prox as u8);
word_pair_proximity_docids_sorter.insert(&key_buffer, &document_id.to_ne_bytes())?; word_pair_proximity_docids_sorter.insert(&key_buffer, &document_id.to_ne_bytes())?;
} }

View File

@ -1,46 +1,46 @@
--- ---
source: milli/src/update/word_prefix_pair_proximity_docids.rs source: milli/src/update/word_prefix_pair_proximity_docids.rs
--- ---
5 a 1 [101, ] 1 5 a [101, ]
5 a 2 [101, ] 1 amazing a [100, ]
5 b 4 [101, ] 1 an a [100, ]
5 be 4 [101, ] 1 and b [100, ]
am a 3 [101, ] 1 and be [100, ]
amazing a 1 [100, ] 1 at a [100, ]
amazing a 2 [100, ] 1 rings a [101, ]
amazing a 3 [100, ] 1 the b [101, ]
amazing b 2 [100, ] 1 the be [101, ]
amazing be 2 [100, ] 2 5 a [101, ]
an a 1 [100, ] 2 amazing a [100, ]
an a 2 [100, ] 2 amazing b [100, ]
an b 3 [100, ] 2 amazing be [100, ]
an be 3 [100, ] 2 an a [100, ]
and a 2 [100, ] 2 and a [100, ]
and a 3 [100, ] 2 at a [100, 101, ]
and a 4 [100, ] 2 beautiful a [100, ]
and b 1 [100, ] 2 bell a [101, ]
and be 1 [100, ] 2 house b [100, ]
at a 1 [100, ] 2 house be [100, ]
at a 2 [100, 101, ] 2 rings b [101, ]
at a 3 [100, ] 2 rings be [101, ]
at b 3 [101, ] 3 am a [101, ]
at b 4 [100, ] 3 amazing a [100, ]
at be 3 [101, ] 3 an b [100, ]
at be 4 [100, ] 3 an be [100, ]
beautiful a 2 [100, ] 3 and a [100, ]
beautiful a 3 [100, ] 3 at a [100, ]
beautiful a 4 [100, ] 3 at b [101, ]
bell a 2 [101, ] 3 at be [101, ]
bell a 4 [101, ] 3 beautiful a [100, ]
house a 3 [100, ] 3 house a [100, ]
house a 4 [100, ] 3 rings a [101, ]
house b 2 [100, ] 3 the a [101, ]
house be 2 [100, ] 4 5 b [101, ]
rings a 1 [101, ] 4 5 be [101, ]
rings a 3 [101, ] 4 and a [100, ]
rings b 2 [101, ] 4 at b [100, ]
rings be 2 [101, ] 4 at be [100, ]
the a 3 [101, ] 4 beautiful a [100, ]
the b 1 [101, ] 4 bell a [101, ]
the be 1 [101, ] 4 house a [100, ]

View File

@ -1,4 +1,4 @@
--- ---
source: milli/src/update/word_prefix_pair_proximity_docids.rs source: milli/src/update/word_prefix_pair_proximity_docids.rs
--- ---
5ed4bf83317b10962a55ade353427bdd fb88e49fd666886731b62baef8f44995

View File

@ -1,7 +1,7 @@
/*! /*!
## What is WordPrefixPairProximityDocids? ## What is WordPrefixPairProximityDocids?
The word-prefix-pair-proximity-docids database is a database whose keys are of The word-prefix-pair-proximity-docids database is a database whose keys are of
the form (`word`, `prefix`, `proximity`) and the values are roaring bitmaps of the form `(proximity, word, prefix)` and the values are roaring bitmaps of
the documents which contain `word` followed by another word starting with the documents which contain `word` followed by another word starting with
`prefix` at a distance of `proximity`. `prefix` at a distance of `proximity`.
@ -23,127 +23,100 @@ dog
Note that only prefixes which correspond to more than a certain number of Note that only prefixes which correspond to more than a certain number of
different words from the database are included in this list. different words from the database are included in this list.
* a sorted list of word pairs and the distance between them (i.e. proximity), * a sorted list of proximities and word pairs (the proximity is the distance between the two words),
* associated with a roaring bitmap, such as: associated with a roaring bitmap, such as:
```text ```text
good dog 3 -> docids1: [2, 5, 6] 1 good doggo -> docids1: [8]
good doggo 1 -> docids2: [8] 1 good door -> docids2: [7, 19, 20]
good dogma 1 -> docids3: [7, 19, 20] 1 good ghost -> docids3: [1]
good ghost 2 -> docids4: [1] 2 good dog -> docids4: [2, 5, 6]
horror cathedral 4 -> docids5: [1, 2] 2 horror cathedral -> docids5: [1, 2]
``` ```
I illustrate a simplified version of the algorithm to create the word-prefix I illustrate a simplified version of the algorithm to create the word-prefix
pair-proximity database below: pair-proximity database below:
1. **Outer loop:** First, we iterate over each word pair and its proximity: 1. **Outer loop:** First, we iterate over each proximity and word pair:
```text ```text
proximity: 1
word1 : good word1 : good
word2 : dog word2 : doggo
proximity: 3
``` ```
2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are 2. **Inner loop:** Then, we iterate over all the prefixes of `word2` that are
in the list of sorted prefixes. And we insert the key (`prefix`, `proximity`) in the list of sorted prefixes. And we insert the key `prefix`
and the value (`docids`) to a sorted map which we call the batch. For example, and the value (`docids`) to a sorted map which we call the batch. For example,
at the end of the first inner loop, we may have: at the end of the first inner loop, we may have:
```text ```text
Outer loop 1: Outer loop 1:
------------------------------ ------------------------------
proximity: 1
word1 : good word1 : good
word2 : dog word2 : doggo
proximity: 3
docids : docids1 docids : docids1
prefixes: [d, do, dog] prefixes: [d, do, dog]
batch: [ batch: [
(d, 3) -> [docids1] d, -> [docids1]
(do, 3) -> [docids1] do -> [docids1]
(dog, 3) -> [docids1] dog -> [docids1]
] ]
``` ```
3. For illustration purpose, let's run through a second iteration of the outer loop: 3. For illustration purpose, let's run through a second iteration of the outer loop:
```text ```text
Outer loop 2: Outer loop 2:
------------------------------ ------------------------------
word1 : good
word2 : doggo
proximity: 1 proximity: 1
word1 : good
word2 : door
docids : docids2 docids : docids2
prefixes: [d, do, dog] prefixes: [d, do, doo]
batch: [ batch: [
(d, 1) -> [docids2] d -> [docids1, docids2]
(d, 3) -> [docids1] do -> [docids1, docids2]
(do, 1) -> [docids2] dog -> [docids1]
(do, 3) -> [docids1] doo -> [docids2]
(dog, 1) -> [docids2]
(dog, 3) -> [docids1]
]
```
Notice that the batch had to re-order some (`prefix`, `proximity`) keys: some
of the elements inserted in the second iteration of the outer loop appear
*before* elements from the first iteration.
4. And a third:
```text
Outer loop 3:
------------------------------
word1 : good
word2 : dogma
proximity: 1
docids : docids3
prefixes: [d, do, dog]
batch: [
(d, 1) -> [docids2, docids3]
(d, 3) -> [docids1]
(do, 1) -> [docids2, docids3]
(do, 3) -> [docids1]
(dog, 1) -> [docids2, docids3]
(dog, 3) -> [docids1]
] ]
``` ```
Notice that there were some conflicts which were resolved by merging the Notice that there were some conflicts which were resolved by merging the
conflicting values together. conflicting values together. Also, an additional prefix was added at the
end of the batch.
5. On the fourth iteration of the outer loop, we have: 4. On the third iteration of the outer loop, we have:
```text ```text
Outer loop 4: Outer loop 4:
------------------------------ ------------------------------
proximity: 1
word1 : good word1 : good
word2 : ghost word2 : ghost
proximity: 2
``` ```
Because `word2` begins with a different letter than the previous `word2`, Because `word2` begins with a different letter than the previous `word2`,
we know that: we know that all the prefixes of `word2` are greater than the prefixes of the previous word2
1. All the prefixes of `word2` are greater than the prefixes of the previous word2
2. And therefore, every instance of (`word2`, `prefix`) will be greater than
any element in the batch.
Therefore, we know that we can insert every element from the batch into the Therefore, we know that we can insert every element from the batch into the
database before proceeding any further. This operation is called database before proceeding any further. This operation is called
flushing the batch. Flushing the batch should also be done whenever `word1` flushing the batch. Flushing the batch should also be done whenever:
is different than the previous `word1`. * `proximity` is different than the previous `proximity`.
* `word1` is different than the previous `word1`.
* `word2` starts with a different letter than the previous word2
6. **Flushing the batch:** to flush the batch, we look at the `word1` and 6. **Flushing the batch:** to flush the batch, we iterate over its elements:
iterate over the elements of the batch in sorted order:
```text ```text
Flushing Batch loop 1: Flushing Batch loop 1:
------------------------------ ------------------------------
word1 : good proximity : 1
word2 : d word1 : good
proximity: 1 prefix : d
docids : [docids2, docids3] docids : [docids2, docids3]
``` ```
We then merge the array of `docids` (of type `Vec<Vec<u8>>`) using We then merge the array of `docids` (of type `Vec<Vec<u8>>`) using
`merge_cbo_roaring_bitmap` in order to get a single byte vector representing a `merge_cbo_roaring_bitmap` in order to get a single byte vector representing a
roaring bitmap of all the document ids where `word1` is followed by `prefix` roaring bitmap of all the document ids where `word1` is followed by `prefix`
at a distance of `proximity`. at a distance of `proximity`.
Once we have done that, we insert (`word1`, `prefix`, `proximity`) -> `merged_docids` Once we have done that, we insert `(proximity, word1, prefix) -> merged_docids`
into the database. into the database.
7. That's it! ... except... 7. That's it! ... except...
@ -184,8 +157,8 @@ Note, also, that since we read data from the database when iterating over
`word_pairs_db`, we cannot insert the computed word-prefix-pair-proximity- `word_pairs_db`, we cannot insert the computed word-prefix-pair-proximity-
docids from the batch directly into the database (we would have a concurrent docids from the batch directly into the database (we would have a concurrent
reader and writer). Therefore, when calling the algorithm on reader and writer). Therefore, when calling the algorithm on
(`new_prefixes`, `word_pairs_db`), we insert the computed `(new_prefixes, word_pairs_db)`, we insert the computed
((`word`, `prefix`, `proximity`), `docids`) elements in an intermediary grenad `((proximity, word, prefix), docids)` elements in an intermediary grenad
Writer instead of the DB. At the end of the outer loop, we finally read from Writer instead of the DB. At the end of the outer loop, we finally read from
the grenad and insert its elements in the database. the grenad and insert its elements in the database.
@ -406,7 +379,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? { while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? {
// skip this iteration if the proximity is over the threshold // skip this iteration if the proximity is over the threshold
if proximity > max_proximity { if proximity > max_proximity {
continue; break;
}; };
let word2_start_different_than_prev = word2[0] != prev_word2_start; let word2_start_different_than_prev = word2[0] != prev_word2_start;
// if there were no potential prefixes for the previous word2 based on its first letter, // if there were no potential prefixes for the previous word2 based on its first letter,
@ -416,16 +389,21 @@ fn execute_on_word_pairs_and_prefixes<I>(
continue; continue;
} }
// if word1 is different than the previous word1 OR if the start of word2 is different // if the proximity is different to the previous one, OR
// than the previous start of word2, then we'll need to flush the batch // if word1 is different than the previous word1, OR
// if the start of word2 is different than the previous start of word2,
// THEN we'll need to flush the batch
let prox_different_than_prev = proximity != batch.proximity;
let word1_different_than_prev = word1 != batch.word1; let word1_different_than_prev = word1 != batch.word1;
if word1_different_than_prev || word2_start_different_than_prev { if prox_different_than_prev || word1_different_than_prev || word2_start_different_than_prev
{
batch.flush(&mut merge_buffer, &mut insert)?; batch.flush(&mut merge_buffer, &mut insert)?;
// don't forget to reset the value of batch.word1 and prev_word2_start // don't forget to reset the value of batch.word1 and prev_word2_start
if word1_different_than_prev { if word1_different_than_prev {
prefix_search_start.0 = 0; prefix_search_start.0 = 0;
batch.word1.clear(); batch.word1.clear();
batch.word1.extend_from_slice(word1); batch.word1.extend_from_slice(word1);
batch.proximity = proximity;
} }
if word2_start_different_than_prev { if word2_start_different_than_prev {
// word2_start_different_than_prev == true // word2_start_different_than_prev == true
@ -437,74 +415,70 @@ fn execute_on_word_pairs_and_prefixes<I>(
if !empty_prefixes { if !empty_prefixes {
// All conditions are satisfied, we can now insert each new prefix of word2 into the batch // All conditions are satisfied, we can now insert each new prefix of word2 into the batch
prefix_buffer.clear();
prefixes.for_each_prefix_of( prefixes.for_each_prefix_of(
word2, word2,
&mut prefix_buffer, &mut prefix_buffer,
&prefix_search_start, &prefix_search_start,
|prefix_buffer| { |prefix_buffer| {
let prefix_len = prefix_buffer.len();
prefix_buffer.push(0);
prefix_buffer.push(proximity);
batch.insert(&prefix_buffer, data.to_vec()); batch.insert(&prefix_buffer, data.to_vec());
prefix_buffer.truncate(prefix_len);
}, },
); );
prefix_buffer.clear();
} }
} }
batch.flush(&mut merge_buffer, &mut insert)?; batch.flush(&mut merge_buffer, &mut insert)?;
Ok(()) Ok(())
} }
/** /**
A map structure whose keys are (prefix, proximity) and whose values are vectors of bitstrings (serialized roaring bitmaps). A map structure whose keys are prefixes and whose values are vectors of bitstrings (serialized roaring bitmaps).
The keys are sorted and conflicts are resolved by merging the vectors of bitstrings together. The keys are sorted and conflicts are resolved by merging the vectors of bitstrings together.
It is used to ensure that all ((word1, prefix, proximity), docids) are inserted into the database in sorted order and efficiently. It is used to ensure that all ((proximity, word1, prefix), docids) are inserted into the database in sorted order and efficiently.
The batch is flushed as often as possible, when we are sure that every (word1, prefix, proximity) key derived from its content The batch is flushed as often as possible, when we are sure that every (proximity, word1, prefix) key derived from its content
can be inserted into the database in sorted order. When it is flushed, it calls a user-provided closure with the following arguments: can be inserted into the database in sorted order. When it is flushed, it calls a user-provided closure with the following arguments:
- key : (word1, prefix, proximity) as bytes - key : (proximity, word1, prefix) as bytes
- value : merged roaring bitmaps from all values associated with (prefix, proximity) in the batch, serialised to bytes - value : merged roaring bitmaps from all values associated with prefix in the batch, serialised to bytes
*/ */
#[derive(Default)] #[derive(Default)]
struct PrefixAndProximityBatch { struct PrefixAndProximityBatch {
proximity: u8,
word1: Vec<u8>, word1: Vec<u8>,
batch: Vec<(Vec<u8>, Vec<Cow<'static, [u8]>>)>, batch: Vec<(Vec<u8>, Vec<Cow<'static, [u8]>>)>,
} }
impl PrefixAndProximityBatch { impl PrefixAndProximityBatch {
/// Insert the new key and value into the batch /// Insert the new key and value into the batch
///
/// The key must either exist in the batch or be greater than all existing keys
fn insert(&mut self, new_key: &[u8], new_value: Vec<u8>) { fn insert(&mut self, new_key: &[u8], new_value: Vec<u8>) {
match self.batch.binary_search_by_key(&new_key, |(k, _)| k.as_slice()) { match self.batch.iter_mut().find(|el| el.0 == new_key) {
Ok(position) => { Some((_prefix, docids)) => docids.push(Cow::Owned(new_value)),
self.batch[position].1.push(Cow::Owned(new_value)); None => self.batch.push((new_key.to_vec(), vec![Cow::Owned(new_value)])),
}
Err(position) => {
self.batch.insert(position, (new_key.to_vec(), vec![Cow::Owned(new_value)]));
}
} }
} }
/// Empties the batch, calling `insert` on each element. /// Empties the batch, calling `insert` on each element.
/// ///
/// The key given to `insert` is `(word1, prefix, proximity)` and the value is the associated merged roaring bitmap. /// The key given to `insert` is `(proximity, word1, prefix)` and the value is the associated merged roaring bitmap.
fn flush( fn flush(
&mut self, &mut self,
merge_buffer: &mut Vec<u8>, merge_buffer: &mut Vec<u8>,
insert: &mut impl for<'buffer> FnMut(&'buffer [u8], &'buffer [u8]) -> Result<()>, insert: &mut impl for<'buffer> FnMut(&'buffer [u8], &'buffer [u8]) -> Result<()>,
) -> Result<()> { ) -> Result<()> {
let PrefixAndProximityBatch { word1, batch } = self; let PrefixAndProximityBatch { proximity, word1, batch } = self;
if batch.is_empty() { if batch.is_empty() {
return Ok(()); return Ok(());
} }
merge_buffer.clear(); merge_buffer.clear();
let mut buffer = Vec::with_capacity(word1.len() + 1 + 6 + 1); let mut buffer = Vec::with_capacity(word1.len() + 1 + 6);
buffer.push(*proximity);
buffer.extend_from_slice(word1); buffer.extend_from_slice(word1);
buffer.push(0); buffer.push(0);
for (key, mergeable_data) in batch.drain(..) { for (key, mergeable_data) in batch.drain(..) {
buffer.truncate(word1.len() + 1); buffer.truncate(1 + word1.len() + 1);
buffer.extend_from_slice(key.as_slice()); buffer.extend_from_slice(key.as_slice());
let data = if mergeable_data.len() > 1 { let data = if mergeable_data.len() > 1 {
@ -884,51 +858,33 @@ mod tests {
CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges); CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges);
let word_pairs = [ let word_pairs = [
// 1, 3: (healthy arb 2) and (healthy arbre 2) with (bitmap123 | bitmap456)
(("healthy", "arbre", 2), &serialised_bitmap123),
// not inserted because 3 > max_proximity
(("healthy", "arbre", 3), &serialised_bitmap456),
// 0, 2: (healthy arb 1) and (healthy arbre 1) with (bitmap123)
(("healthy", "arbres", 1), &serialised_bitmap123), (("healthy", "arbres", 1), &serialised_bitmap123),
// 1, 3:
(("healthy", "arbres", 2), &serialised_bitmap456),
// not be inserted because 3 > max_proximity
(("healthy", "arbres", 3), &serialised_bitmap789),
// not inserted because no prefixes for boat
(("healthy", "boat", 1), &serialised_bitmap123), (("healthy", "boat", 1), &serialised_bitmap123),
// not inserted because no prefixes for ca
(("healthy", "ca", 1), &serialised_bitmap123), (("healthy", "ca", 1), &serialised_bitmap123),
// 4: (healthy cat 1) with (bitmap456 + bitmap123)
(("healthy", "cats", 1), &serialised_bitmap456), (("healthy", "cats", 1), &serialised_bitmap456),
// 5: (healthy cat 2) with (bitmap789 + bitmap_ranges)
(("healthy", "cats", 2), &serialised_bitmap789),
// 4 + 6: (healthy catto 1) with (bitmap123)
(("healthy", "cattos", 1), &serialised_bitmap123), (("healthy", "cattos", 1), &serialised_bitmap123),
// 5 + 7: (healthy catto 2) with (bitmap_ranges)
(("healthy", "cattos", 2), &serialised_bitmap_ranges),
// 8: (jittery cat 1) with (bitmap123 | bitmap456 | bitmap789 | bitmap_ranges)
(("jittery", "cat", 1), &serialised_bitmap123), (("jittery", "cat", 1), &serialised_bitmap123),
// 8:
(("jittery", "cata", 1), &serialised_bitmap456), (("jittery", "cata", 1), &serialised_bitmap456),
// 8:
(("jittery", "catb", 1), &serialised_bitmap789), (("jittery", "catb", 1), &serialised_bitmap789),
// 8:
(("jittery", "catc", 1), &serialised_bitmap_ranges), (("jittery", "catc", 1), &serialised_bitmap_ranges),
(("healthy", "arbre", 2), &serialised_bitmap123),
(("healthy", "arbres", 2), &serialised_bitmap456),
(("healthy", "cats", 2), &serialised_bitmap789),
(("healthy", "cattos", 2), &serialised_bitmap_ranges),
(("healthy", "arbre", 3), &serialised_bitmap456),
(("healthy", "arbres", 3), &serialised_bitmap789),
]; ];
let expected_result = [ let expected_result = [
// first batch:
(("healthy", "arb", 1), bitmap123.clone()), (("healthy", "arb", 1), bitmap123.clone()),
(("healthy", "arb", 2), &bitmap123 | &bitmap456),
(("healthy", "arbre", 1), bitmap123.clone()), (("healthy", "arbre", 1), bitmap123.clone()),
(("healthy", "arbre", 2), &bitmap123 | &bitmap456),
// second batch:
(("healthy", "cat", 1), &bitmap456 | &bitmap123), (("healthy", "cat", 1), &bitmap456 | &bitmap123),
(("healthy", "cat", 2), &bitmap789 | &bitmap_ranges),
(("healthy", "catto", 1), bitmap123.clone()), (("healthy", "catto", 1), bitmap123.clone()),
(("healthy", "catto", 2), bitmap_ranges.clone()),
// third batch
(("jittery", "cat", 1), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)), (("jittery", "cat", 1), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
(("healthy", "arb", 2), &bitmap123 | &bitmap456),
(("healthy", "arbre", 2), &bitmap123 | &bitmap456),
(("healthy", "cat", 2), &bitmap789 | &bitmap_ranges),
(("healthy", "catto", 2), bitmap_ranges.clone()),
]; ];
let mut result = vec![]; let mut result = vec![];