Return original string in facet distributions, work on facet tests

This commit is contained in:
Loïc Lecrenier 2022-09-07 17:56:38 +02:00 committed by Loïc Lecrenier
parent 27454e9828
commit fca4577e23
10 changed files with 350 additions and 213 deletions

View File

@ -140,13 +140,13 @@ impl<'a> FacetDistribution<'a> {
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids| { |facet_key, nbr_docids, _| {
let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap(); let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
distribution.insert(facet_key.to_string(), nbr_docids); distribution.insert(facet_key.to_string(), nbr_docids);
if distribution.len() == self.max_values_per_facet { if distribution.len() == self.max_values_per_facet {
ControlFlow::Break(()) Ok(ControlFlow::Break(()))
} else { } else {
ControlFlow::Continue(()) Ok(ControlFlow::Continue(()))
} }
}, },
) )
@ -163,13 +163,22 @@ impl<'a> FacetDistribution<'a> {
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids| { |facet_key, nbr_docids, any_docid| {
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
distribution.insert(facet_key.to_string(), nbr_docids);
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
let original_string = self
.index
.field_id_docid_facet_strings
.get(self.rtxn, &key)?
.unwrap()
.to_owned();
distribution.insert(original_string, nbr_docids);
if distribution.len() == self.max_values_per_facet { if distribution.len() == self.max_values_per_facet {
ControlFlow::Break(()) Ok(ControlFlow::Break(()))
} else { } else {
ControlFlow::Continue(()) Ok(ControlFlow::Continue(()))
} }
}, },
) )
@ -186,7 +195,8 @@ impl<'a> FacetDistribution<'a> {
let db = self.index.facet_id_f64_docids; let db = self.index.facet_id_f64_docids;
let mut prefix = vec![]; let mut prefix = vec![];
prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(0); prefix.push(0); // read values from level 0 only
let iter = db let iter = db
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
@ -207,10 +217,15 @@ impl<'a> FacetDistribution<'a> {
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>(); .remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
// TODO: get the original value of the facet somewhere (in the documents DB?)
for result in iter { for result in iter {
let (key, value) = result?; let (key, value) = result?;
distribution.insert(key.left_bound.to_owned(), value.bitmap.len());
let docid = value.bitmap.iter().next().unwrap();
let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound);
let original_string =
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned();
distribution.insert(original_string, value.bitmap.len());
if distribution.len() == self.max_values_per_facet { if distribution.len() == self.max_values_per_facet {
break; break;
} }
@ -304,3 +319,217 @@ impl fmt::Debug for FacetDistribution<'_> {
.finish() .finish()
} }
} }
#[cfg(test)]
mod tests {
use big_s::S;
use maplit::hashset;
use crate::{
documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap,
FacetDistribution,
};
#[test]
fn few_candidates_few_facet_values() {
// All the tests here avoid using the code in `facet_distribution_iter` because there aren't
// enough candidates.
let mut index = TempIndex::new();
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let documents = documents!([
{ "colour": "Blue" },
{ "colour": " blue" },
{ "colour": "RED" }
]);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates([0, 1, 2].iter().copied().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates([1, 2].iter().copied().collect())
.execute()
.unwrap();
// I think it would be fine if " blue" was "Blue" instead.
// We just need to get any non-normalised string I think, even if it's not in
// the candidates
milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates([2].iter().copied().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates([0, 1, 2].iter().copied().collect())
.max_values_per_facet(1)
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
}
#[test]
fn many_candidates_few_facet_values() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
let mut documents = vec![];
for i in 0..10_000 {
let document = serde_json::json!({
"colour": facet_values[i % 5],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.max_values_per_facet(1)
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..10_000).into_iter().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..5_000).into_iter().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..5_000).into_iter().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..5_000).into_iter().collect())
.max_values_per_facet(1)
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###);
}
#[test]
fn many_candidates_many_facet_values() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..10_000 {
let document = serde_json::json!({
"colour": facet_values[i % 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.max_values_per_facet(2)
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..10_000).into_iter().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..5_000).into_iter().collect())
.execute()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
}
}

View File

@ -4,8 +4,9 @@ use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{ use crate::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
DocumentId,
}; };
pub fn iterate_over_facet_distribution<'t, CB>( pub fn iterate_over_facet_distribution<'t, CB>(
@ -16,7 +17,7 @@ pub fn iterate_over_facet_distribution<'t, CB>(
callback: CB, callback: CB,
) -> Result<()> ) -> Result<()>
where where
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{ {
let mut fd = FacetDistribution { rtxn, db, field_id, callback }; let mut fd = FacetDistribution { rtxn, db, field_id, callback };
let highest_level = let highest_level =
@ -32,7 +33,7 @@ where
struct FacetDistribution<'t, CB> struct FacetDistribution<'t, CB>
where where
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{ {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
@ -42,7 +43,7 @@ where
impl<'t, CB> FacetDistribution<'t, CB> impl<'t, CB> FacetDistribution<'t, CB>
where where
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{ {
fn iterate_level_0( fn iterate_level_0(
&mut self, &mut self,
@ -62,7 +63,8 @@ where
} }
let docids_in_common = value.bitmap.intersection_len(candidates); let docids_in_common = value.bitmap.intersection_len(candidates);
if docids_in_common > 0 { if docids_in_common > 0 {
match (self.callback)(key.left_bound, docids_in_common) { let any_docid = value.bitmap.iter().next().unwrap();
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
ControlFlow::Continue(_) => {} ControlFlow::Continue(_) => {}
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())), ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
} }
@ -112,50 +114,14 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::iterate_over_facet_distribution;
use crate::milli_snap;
use crate::search::facet::tests::get_random_looking_index;
use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index};
use heed::BytesDecode;
use roaring::RoaringBitmap;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use super::iterate_over_facet_distribution;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
use heed::BytesDecode;
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
#[test]
fn random_looking_index_snap() {
let index = get_random_looking_index();
milli_snap!(format!("{index}"));
}
#[test] #[test]
fn filter_distribution_all() { fn filter_distribution_all() {
let indexes = [get_simple_index(), get_random_looking_index()]; let indexes = [get_simple_index(), get_random_looking_index()];
@ -163,11 +129,17 @@ mod tests {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>(); let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
let mut results = String::new(); let mut results = String::new();
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| { iterate_over_facet_distribution(
let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); &txn,
results.push_str(&format!("{facet}: {count}\n")); index.content,
ControlFlow::Continue(()) 0,
}) &candidates,
|facet, count, _| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
results.push_str(&format!("{facet}: {count}\n"));
Ok(ControlFlow::Continue(()))
},
)
.unwrap(); .unwrap();
milli_snap!(results, i); milli_snap!(results, i);
@ -182,17 +154,23 @@ mod tests {
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>(); let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
let mut results = String::new(); let mut results = String::new();
let mut nbr_facets = 0; let mut nbr_facets = 0;
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| { iterate_over_facet_distribution(
let facet = OrderedF64Codec::bytes_decode(facet).unwrap(); &txn,
if nbr_facets == 100 { index.content,
return ControlFlow::Break(()); 0,
} else { &candidates,
nbr_facets += 1; |facet, count, _| {
results.push_str(&format!("{facet}: {count}\n")); let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
if nbr_facets == 100 {
return Ok(ControlFlow::Break(()));
} else {
nbr_facets += 1;
results.push_str(&format!("{facet}: {count}\n"));
ControlFlow::Continue(()) Ok(ControlFlow::Continue(()))
} }
}) },
)
.unwrap(); .unwrap();
milli_snap!(results, i); milli_snap!(results, i);

View File

@ -15,7 +15,8 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
field_id: u16, field_id: u16,
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
) -> Result<RoaringBitmap> docids: &mut RoaringBitmap,
) -> Result<()>
where where
BoundCodec: for<'a> BytesEncode<'a>, BoundCodec: for<'a> BytesEncode<'a>,
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized, for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized,
@ -45,16 +46,15 @@ where
Bound::Unbounded => Bound::Unbounded, Bound::Unbounded => Bound::Unbounded,
}; };
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(); let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let mut docids = RoaringBitmap::new(); let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap(); let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
Ok(docids) Ok(())
} else { } else {
return Ok(RoaringBitmap::new()); return Ok(());
} }
} }
@ -255,45 +255,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::ops::Bound;
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use super::find_docids_of_facet_within_bounds; use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex; use roaring::RoaringBitmap;
use std::ops::Bound;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
#[test] #[test]
fn random_looking_index_snap() { fn random_looking_index_snap() {
@ -310,12 +278,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Included(0.); let start = Bound::Included(0.);
let end = Bound::Included(i); let end = Bound::Included(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));
@ -326,12 +296,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Excluded(0.); let start = Bound::Excluded(0.);
let end = Bound::Excluded(i); let end = Bound::Excluded(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));
@ -352,12 +324,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Included(i); let start = Bound::Included(i);
let end = Bound::Included(255.); let end = Bound::Included(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));
@ -371,12 +345,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Excluded(i); let start = Bound::Excluded(i);
let end = Bound::Excluded(255.); let end = Bound::Excluded(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));
@ -399,12 +375,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Included(i); let start = Bound::Included(i);
let end = Bound::Included(255. - i); let end = Bound::Included(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));
@ -418,12 +396,14 @@ mod tests {
let i = i as f64; let i = i as f64;
let start = Bound::Excluded(i); let start = Bound::Excluded(i);
let end = Bound::Excluded(255. - i); let end = Bound::Excluded(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(), index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
&mut docids,
) )
.unwrap(); .unwrap();
results.push_str(&format!("{}\n", display_bitmap(&docids))); results.push_str(&format!("{}\n", display_bitmap(&docids)));

View File

@ -83,49 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex; use roaring::RoaringBitmap;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
#[test]
fn random_looking_index_snap() {
let index = get_random_looking_index();
milli_snap!(format!("{index}"));
}
#[test] #[test]
fn filter_sort() { fn filter_sort() {
let indexes = [get_simple_index(), get_random_looking_index()]; let indexes = [get_simple_index(), get_random_looking_index()];

View File

@ -116,49 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use rand::{Rng, SeedableRng}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::facet::tests::FacetIndex; use roaring::RoaringBitmap;
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
#[test]
fn random_looking_index_snap() {
let index = get_random_looking_index();
milli_snap!(format!("{index}"));
}
#[test] #[test]
fn filter_sort_descending() { fn filter_sort_descending() {
let indexes = [get_simple_index(), get_random_looking_index()]; let indexes = [get_simple_index(), get_random_looking_index()];

View File

@ -278,11 +278,9 @@ impl<'a> Filter<'a> {
(Excluded(l), Included(r)) if l >= r => return Ok(()), (Excluded(l), Included(r)) if l >= r => return Ok(()),
(_, _) => (), (_, _) => (),
} }
let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>( facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
rtxn, db, field_id, &left, &right, rtxn, db, field_id, &left, &right, output,
)?; )?;
// TODO: the facet range search should take a mutable roaring bitmap as argument
*output = x;
Ok(()) Ok(())
} }

View File

@ -74,3 +74,40 @@ pub(crate) fn get_highest_level<'t>(
}) })
.unwrap_or(0)) .unwrap_or(0))
} }
#[cfg(test)]
pub(crate) mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex};
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for (_i, key) in keys.into_iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
}

View File

@ -1,4 +0,0 @@
---
source: milli/src/search/facet/facet_distribution_iter.rs
---
3256c76a7c1b768a013e78d5fa6e9ff9

View File

@ -1,4 +0,0 @@
---
source: milli/src/search/facet/facet_sort_ascending.rs
---
3256c76a7c1b768a013e78d5fa6e9ff9

View File

@ -1,4 +0,0 @@
---
source: milli/src/search/facet/facet_sort_descending.rs
---
3256c76a7c1b768a013e78d5fa6e9ff9