Improve documentation of some facet-related algorithms

This commit is contained in:
Loïc Lecrenier 2022-09-08 08:47:40 +02:00 committed by Loïc Lecrenier
parent 985a94adfc
commit de52a9bf75
5 changed files with 57 additions and 17 deletions

View File

@ -9,9 +9,8 @@ use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::ascending_facet_sort;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::descending_facet_sort;
// use crate::search::facet::FacetStringIter;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};

View File

@ -1,14 +1,23 @@
use std::ops::ControlFlow;
use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
}; };
use crate::DocumentId; use crate::DocumentId;
use heed::Result;
use roaring::RoaringBitmap;
use std::ops::ControlFlow;
/// Call the given closure on the facet distribution of the candidate documents.
///
/// The arguments to the closure are:
/// - the facet value, as a byte slice
/// - the number of documents among the candidates that contain this facet value
/// - the id of a document which contains the facet value. Note that this document
/// is not necessarily from the list of candidates, it is simply *any* document which
/// contains this facet value.
///
/// The return value of the closure is a `ControlFlow<()>` which indicates whether we should
/// keep iterating over the different facet values or stop.
pub fn iterate_over_facet_distribution<'t, CB>( pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,

View File

@ -6,6 +6,28 @@ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
/// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id.
///
/// The documents returned by the iterator are grouped by the facet values that
/// determined their rank. For example, given the documents:
///
/// ```ignore
/// 0: { "colour": ["blue", "green"] }
/// 1: { "colour": ["blue", "red"] }
/// 2: { "colour": ["orange", "red"] }
/// 3: { "colour": ["green", "red"] }
/// 4: { "colour": ["blue", "orange", "red"] }
/// ```
/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
/// over the following elements:
/// ```ignore
/// [0, 4] // corresponds to all the documents within the candidates that have the facet value "blue"
/// [3] // same for "green"
/// [2] // same for "orange"
/// END
/// ```
/// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>( pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,

View File

@ -8,6 +8,9 @@ use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
}; };
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
///
/// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>( pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,

View File

@ -1,17 +1,19 @@
use heed::types::ByteSlice;
use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter; pub use self::filter::Filter;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
pub use facet_sort_ascending::ascending_facet_sort;
pub use facet_sort_descending::descending_facet_sort;
use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, RoTxn};
mod facet_distribution; mod facet_distribution;
mod facet_distribution_iter; mod facet_distribution_iter;
mod facet_range_search; mod facet_range_search;
pub mod facet_sort_ascending; mod facet_sort_ascending;
pub mod facet_sort_descending; mod facet_sort_descending;
mod filter; mod filter;
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>( pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
@ -23,8 +25,9 @@ where
let mut level0prefix = vec![]; let mut level0prefix = vec![];
level0prefix.extend_from_slice(&field_id.to_be_bytes()); level0prefix.extend_from_slice(&field_id.to_be_bytes());
level0prefix.push(0); level0prefix.push(0);
let mut level0_iter_forward = let mut level0_iter_forward = db
db.as_polymorph().prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?; .as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
if let Some(first) = level0_iter_forward.next() { if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?; let (first_key, _) = first?;
let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key) let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
@ -34,6 +37,8 @@ where
Ok(None) Ok(None)
} }
} }
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>( pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
@ -47,7 +52,7 @@ where
level0prefix.push(0); level0prefix.push(0);
let mut level0_iter_backward = db let mut level0_iter_backward = db
.as_polymorph() .as_polymorph()
.rev_prefix_iter::<_, ByteSlice, ByteSlice>(txn, level0prefix.as_slice())?; .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?;
if let Some(last) = level0_iter_backward.next() { if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?; let (last_key, _) = last?;
let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key) let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
@ -57,6 +62,8 @@ where
Ok(None) Ok(None)
} }
} }
/// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>( pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>, txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
@ -65,7 +72,7 @@ pub(crate) fn get_highest_level<'t>(
let field_id_prefix = &field_id.to_be_bytes(); let field_id_prefix = &field_id.to_be_bytes();
Ok(db Ok(db
.as_polymorph() .as_polymorph()
.rev_prefix_iter::<_, ByteSlice, ByteSlice>(&txn, field_id_prefix)? .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(&txn, field_id_prefix)?
.next() .next()
.map(|el| { .map(|el| {
let (key, _) = el.unwrap(); let (key, _) = el.unwrap();