mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-27 04:25:06 +08:00
Return original string in facet distributions, work on facet tests
This commit is contained in:
parent
27454e9828
commit
fca4577e23
@ -140,13 +140,13 @@ impl<'a> FacetDistribution<'a> {
|
||||
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
|facet_key, nbr_docids, _| {
|
||||
let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
|
||||
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
ControlFlow::Break(())
|
||||
Ok(ControlFlow::Break(()))
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
},
|
||||
)
|
||||
@ -163,13 +163,22 @@ impl<'a> FacetDistribution<'a> {
|
||||
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids| {
|
||||
|facet_key, nbr_docids, any_docid| {
|
||||
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||
distribution.insert(facet_key.to_string(), nbr_docids);
|
||||
|
||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
||||
let original_string = self
|
||||
.index
|
||||
.field_id_docid_facet_strings
|
||||
.get(self.rtxn, &key)?
|
||||
.unwrap()
|
||||
.to_owned();
|
||||
|
||||
distribution.insert(original_string, nbr_docids);
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
ControlFlow::Break(())
|
||||
Ok(ControlFlow::Break(()))
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
},
|
||||
)
|
||||
@ -186,7 +195,8 @@ impl<'a> FacetDistribution<'a> {
|
||||
let db = self.index.facet_id_f64_docids;
|
||||
let mut prefix = vec![];
|
||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
prefix.push(0);
|
||||
prefix.push(0); // read values from level 0 only
|
||||
|
||||
let iter = db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
@ -207,10 +217,15 @@ impl<'a> FacetDistribution<'a> {
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
|
||||
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
// TODO: get the original value of the facet somewhere (in the documents DB?)
|
||||
for result in iter {
|
||||
let (key, value) = result?;
|
||||
distribution.insert(key.left_bound.to_owned(), value.bitmap.len());
|
||||
|
||||
let docid = value.bitmap.iter().next().unwrap();
|
||||
let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound);
|
||||
let original_string =
|
||||
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned();
|
||||
|
||||
distribution.insert(original_string, value.bitmap.len());
|
||||
if distribution.len() == self.max_values_per_facet {
|
||||
break;
|
||||
}
|
||||
@ -304,3 +319,217 @@ impl fmt::Debug for FacetDistribution<'_> {
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::{
|
||||
documents::documents_batch_reader_from_objects, index::tests::TempIndex, milli_snap,
|
||||
FacetDistribution,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn few_candidates_few_facet_values() {
|
||||
// All the tests here avoid using the code in `facet_distribution_iter` because there aren't
|
||||
// enough candidates.
|
||||
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let documents = documents!([
|
||||
{ "colour": "Blue" },
|
||||
{ "colour": " blue" },
|
||||
{ "colour": "RED" }
|
||||
]);
|
||||
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates([0, 1, 2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates([1, 2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
// I think it would be fine if " blue" was "Blue" instead.
|
||||
// We just need to get any non-normalised string I think, even if it's not in
|
||||
// the candidates
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {" blue": 1, "RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates([2].iter().copied().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates([0, 1, 2].iter().copied().collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn many_candidates_few_facet_values() {
|
||||
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..10_000 {
|
||||
let document = serde_json::json!({
|
||||
"colour": facet_values[i % 5],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone();
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..10_000).into_iter().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn many_candidates_many_facet_values() {
|
||||
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..10_000 {
|
||||
let document = serde_json::json!({
|
||||
"colour": facet_values[i % 1000],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone();
|
||||
documents.push(document);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.max_values_per_facet(2)
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###);
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..10_000).into_iter().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5");
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
|
||||
}
|
||||
}
|
||||
|
@ -4,8 +4,9 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||
use crate::{
|
||||
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
DocumentId,
|
||||
};
|
||||
|
||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
@ -16,7 +17,7 @@ pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
callback: CB,
|
||||
) -> Result<()>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level =
|
||||
@ -32,7 +33,7 @@ where
|
||||
|
||||
struct FacetDistribution<'t, CB>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
@ -42,7 +43,7 @@ where
|
||||
|
||||
impl<'t, CB> FacetDistribution<'t, CB>
|
||||
where
|
||||
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
fn iterate_level_0(
|
||||
&mut self,
|
||||
@ -62,7 +63,8 @@ where
|
||||
}
|
||||
let docids_in_common = value.bitmap.intersection_len(candidates);
|
||||
if docids_in_common > 0 {
|
||||
match (self.callback)(key.left_bound, docids_in_common) {
|
||||
let any_docid = value.bitmap.iter().next().unwrap();
|
||||
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
|
||||
}
|
||||
@ -112,50 +114,14 @@ where
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::get_random_looking_index;
|
||||
use crate::{heed_codec::facet::OrderedF64Codec, search::facet::tests::get_simple_index};
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use super::iterate_over_facet_distribution;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use heed::BytesDecode;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
for i in 0..256u16 {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(i as u32);
|
||||
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||
let keys =
|
||||
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
||||
|
||||
for (_i, key) in keys.into_iter().enumerate() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(key);
|
||||
bitmap.insert(key + 100);
|
||||
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_looking_index_snap() {
|
||||
let index = get_random_looking_index();
|
||||
milli_snap!(format!("{index}"));
|
||||
}
|
||||
#[test]
|
||||
fn filter_distribution_all() {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
@ -163,11 +129,17 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
|
||||
iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.content,
|
||||
0,
|
||||
&candidates,
|
||||
|facet, count, _| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
ControlFlow::Continue(())
|
||||
})
|
||||
Ok(ControlFlow::Continue(()))
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
milli_snap!(results, i);
|
||||
|
||||
@ -182,17 +154,23 @@ mod tests {
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let mut nbr_facets = 0;
|
||||
iterate_over_facet_distribution(&txn, index.content, 0, &candidates, |facet, count| {
|
||||
iterate_over_facet_distribution(
|
||||
&txn,
|
||||
index.content,
|
||||
0,
|
||||
&candidates,
|
||||
|facet, count, _| {
|
||||
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
|
||||
if nbr_facets == 100 {
|
||||
return ControlFlow::Break(());
|
||||
return Ok(ControlFlow::Break(()));
|
||||
} else {
|
||||
nbr_facets += 1;
|
||||
results.push_str(&format!("{facet}: {count}\n"));
|
||||
|
||||
ControlFlow::Continue(())
|
||||
Ok(ControlFlow::Continue(()))
|
||||
}
|
||||
})
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
milli_snap!(results, i);
|
||||
|
||||
|
@ -15,7 +15,8 @@ pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||
field_id: u16,
|
||||
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||
) -> Result<RoaringBitmap>
|
||||
docids: &mut RoaringBitmap,
|
||||
) -> Result<()>
|
||||
where
|
||||
BoundCodec: for<'a> BytesEncode<'a>,
|
||||
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized,
|
||||
@ -45,16 +46,15 @@ where
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let mut docids = RoaringBitmap::new();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
|
||||
Ok(docids)
|
||||
Ok(())
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
@ -255,45 +255,13 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ops::Bound;
|
||||
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::find_docids_of_facet_within_bounds;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
for i in 0..256u16 {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(i as u32);
|
||||
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||
let keys =
|
||||
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
||||
|
||||
for (_i, key) in keys.into_iter().enumerate() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(key);
|
||||
bitmap.insert(key + 100);
|
||||
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
use roaring::RoaringBitmap;
|
||||
use std::ops::Bound;
|
||||
|
||||
#[test]
|
||||
fn random_looking_index_snap() {
|
||||
@ -310,12 +278,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Included(0.);
|
||||
let end = Bound::Included(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
@ -326,12 +296,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Excluded(0.);
|
||||
let end = Bound::Excluded(i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
@ -352,12 +324,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Included(i);
|
||||
let end = Bound::Included(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
@ -371,12 +345,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Excluded(i);
|
||||
let end = Bound::Excluded(255.);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
@ -399,12 +375,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Included(i);
|
||||
let end = Bound::Included(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
@ -418,12 +396,14 @@ mod tests {
|
||||
let i = i as f64;
|
||||
let start = Bound::Excluded(i);
|
||||
let end = Bound::Excluded(255. - i);
|
||||
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
let mut docids = RoaringBitmap::new();
|
||||
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
&txn,
|
||||
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
|
||||
0,
|
||||
&start,
|
||||
&end,
|
||||
&mut docids,
|
||||
)
|
||||
.unwrap();
|
||||
results.push_str(&format!("{}\n", display_bitmap(&docids)));
|
||||
|
@ -83,49 +83,12 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
for i in 0..256u16 {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(i as u32);
|
||||
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||
let keys =
|
||||
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
||||
|
||||
for (_i, key) in keys.into_iter().enumerate() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(key);
|
||||
bitmap.insert(key + 100);
|
||||
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_looking_index_snap() {
|
||||
let index = get_random_looking_index();
|
||||
milli_snap!(format!("{index}"));
|
||||
}
|
||||
#[test]
|
||||
fn filter_sort() {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
|
@ -116,49 +116,13 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
for i in 0..256u16 {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(i as u32);
|
||||
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||
let keys =
|
||||
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
||||
|
||||
for (_i, key) in keys.into_iter().enumerate() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(key);
|
||||
bitmap.insert(key + 100);
|
||||
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn random_looking_index_snap() {
|
||||
let index = get_random_looking_index();
|
||||
milli_snap!(format!("{index}"));
|
||||
}
|
||||
#[test]
|
||||
fn filter_sort_descending() {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
|
@ -278,11 +278,9 @@ impl<'a> Filter<'a> {
|
||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||
(_, _) => (),
|
||||
}
|
||||
let x = facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, db, field_id, &left, &right,
|
||||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||||
rtxn, db, field_id, &left, &right, output,
|
||||
)?;
|
||||
// TODO: the facet range search should take a mutable roaring bitmap as argument
|
||||
*output = x;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -74,3 +74,40 @@ pub(crate) fn get_highest_level<'t>(
|
||||
})
|
||||
.unwrap_or(0))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{heed_codec::facet::OrderedF64Codec, update::facet::tests::FacetIndex};
|
||||
|
||||
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
for i in 0..256u16 {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(i as u32);
|
||||
index.insert(&mut txn, 0, &(i as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
|
||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
|
||||
let keys =
|
||||
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
|
||||
|
||||
for (_i, key) in keys.into_iter().enumerate() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(key);
|
||||
bitmap.insert(key + 100);
|
||||
index.insert(&mut txn, 0, &(key as f64), &bitmap);
|
||||
}
|
||||
txn.commit().unwrap();
|
||||
index
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +0,0 @@
|
||||
---
|
||||
source: milli/src/search/facet/facet_distribution_iter.rs
|
||||
---
|
||||
3256c76a7c1b768a013e78d5fa6e9ff9
|
@ -1,4 +0,0 @@
|
||||
---
|
||||
source: milli/src/search/facet/facet_sort_ascending.rs
|
||||
---
|
||||
3256c76a7c1b768a013e78d5fa6e9ff9
|
@ -1,4 +0,0 @@
|
||||
---
|
||||
source: milli/src/search/facet/facet_sort_descending.rs
|
||||
---
|
||||
3256c76a7c1b768a013e78d5fa6e9ff9
|
Loading…
Reference in New Issue
Block a user