Rely on FieldIdMapWithMetadata in facet search and filters

This commit is contained in:
ManyTheFish 2025-03-05 18:22:12 +01:00
parent 67f7470c83
commit b88aa9cc76
8 changed files with 128 additions and 105 deletions

View File

@ -138,12 +138,12 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` matched rule #{rule_index} in `filterableAttributes`", allowed_operators.join(", "))]
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), rule_index.map_or("did not match any rule".to_string(), |rule_index| format!("matched rule #{rule_index}")))]
FilterOperatorNotAllowed {
field: String,
allowed_operators: Vec<String>,
operator: String,
rule_index: usize,
rule_index: Option<usize>,
},
#[error("Attribute `{}` is not sortable. {}",
.field,

View File

@ -126,20 +126,35 @@ impl Metadata {
&self,
rules: &'rules [FilterableAttributesRule],
) -> Option<&'rules FilterableAttributesRule> {
self.filterable_attributes_with_rule_index(rules).map(|(_, rule)| rule)
}
pub fn filterable_attributes_with_rule_index<'rules>(
&self,
rules: &'rules [FilterableAttributesRule],
) -> Option<(usize, &'rules FilterableAttributesRule)> {
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
// - 1: `filterable_attributes_rule_id` is NonZero
let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap();
Some(rule)
let rule_id = (filterable_attributes_rule_id - 1) as usize;
let rule = rules.get(rule_id).unwrap();
Some((rule_id, rule))
}
pub fn filterable_attributes_features(
&self,
rules: &[FilterableAttributesRule],
) -> FilterableAttributesFeatures {
self.filterable_attributes(rules)
.map(|rule| rule.features())
let (_, features) = self.filterable_attributes_features_with_rule_index(rules);
features
}
pub fn filterable_attributes_features_with_rule_index(
&self,
rules: &[FilterableAttributesRule],
) -> (Option<usize>, FilterableAttributesFeatures) {
self.filterable_attributes_with_rule_index(rules)
.map(|(rule_index, rule)| (Some(rule_index), rule.features()))
// if there is no filterable attributes rule, return no features
.unwrap_or_else(FilterableAttributesFeatures::no_features)
.unwrap_or_else(|| (None, FilterableAttributesFeatures::no_features()))
}
pub fn is_sortable(&self) -> bool {

View File

@ -6,7 +6,8 @@ use utoipa::ToSchema;
use crate::{
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME,
AttributePatterns, FieldsIdsMap,
fields_ids_map::metadata::FieldIdMapWithMetadata,
AttributePatterns,
};
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
@ -235,15 +236,14 @@ impl Default for FilterFeatures {
/// * `filter` - The filter function to apply to the filterable attributes rules.
pub fn filtered_matching_field_names<'fim>(
filterable_attributes: &[FilterableAttributesRule],
fields_ids_map: &'fim FieldsIdsMap,
fields_ids_map: &'fim FieldIdMapWithMetadata,
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'fim str> {
let mut result = BTreeSet::new();
for (_, field_name) in fields_ids_map.iter() {
if let Some((_, features)) = matching_features(field_name, filterable_attributes) {
if filter(features) {
result.insert(field_name);
}
for (_, field_name, metadata) in fields_ids_map.iter() {
let features = metadata.filterable_attributes_features(filterable_attributes);
if filter(features) {
result.insert(field_name);
}
}
result

View File

@ -13,7 +13,7 @@ use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAM
use crate::database_stats::DatabaseStats;
use crate::documents::PrimaryKey;
use crate::error::{InternalError, UserError};
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
@ -514,6 +514,16 @@ impl Index {
.unwrap_or_default())
}
/// Returns the fields ids map with metadata.
///
/// This structure is not yet stored in the index, and is generated on the fly.
pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
Ok(FieldIdMapWithMetadata::new(
self.fields_ids_map(rtxn)?,
MetadataBuilder::from_index(self, rtxn)?,
))
}
/* fieldids weights map */
// This maps the fields ids to their weights.
// Their weights is defined by the ordering of the searchable attributes.

View File

@ -12,17 +12,15 @@ use serde_json::Value;
use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{
filtered_matching_field_names, is_field_filterable, matching_features,
};
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
SerializationError,
distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures,
FilterableAttributesRule, Index, InternalError, Result, SerializationError,
};
/// The maximum number of filters the filter AST can process.
@ -234,21 +232,32 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if !is_field_filterable(attribute, &filterable_attributes_rules) {
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields: filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_filterable(),
),
}))?;
if let Some((_, metadata)) = fields_ids_map.id_with_metadata(fid.value()) {
if metadata
.filterable_attributes_features(&filterable_attributes_rules)
.is_filterable()
{
continue;
}
} else if is_field_filterable(attribute, &filterable_attributes_rules) {
continue;
}
// If the field is not filterable, return an error
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields: filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_filterable(),
),
}))?;
}
self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None)
}
@ -259,7 +268,7 @@ impl<'a> Filter<'a> {
universe: Option<&RoaringBitmap>,
operator: &Condition<'a>,
features: &FilterableAttributesFeatures,
rule_index: usize,
rule_index: Option<usize>,
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
@ -454,7 +463,7 @@ impl<'a> Filter<'a> {
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
field_ids_map: &FieldsIdsMap,
field_ids_map: &FieldIdMapWithMetadata,
filterable_attribute_rules: &[FilterableAttributesRule],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
@ -480,51 +489,33 @@ impl<'a> Filter<'a> {
}
}
}
FilterCondition::In { fid, els } => {
match matching_features(fid.value(), filterable_attribute_rules) {
Some((rule_index, features)) if features.is_filterable() => {
if let Some(fid) = field_ids_map.id(fid.value()) {
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| {
Self::evaluate_operator(
rtxn, index, fid, universe, &op, &features, rule_index,
)
})
.union()
} else {
Ok(RoaringBitmap::new())
}
}
_ => Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filtered_matching_field_names(
filterable_attribute_rules,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?,
}
}
FilterCondition::Condition { fid, op } => {
match matching_features(fid.value(), filterable_attribute_rules) {
Some((rule_index, features)) if features.is_filterable() => {
if let Some(fid) = field_ids_map.id(fid.value()) {
FilterCondition::In { fid, els } => match field_ids_map.id_with_metadata(fid.value()) {
Some((fid, metadata)) => {
let (rule_index, features) = metadata
.filterable_attributes_features_with_rule_index(filterable_attribute_rules);
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| {
Self::evaluate_operator(
rtxn, index, fid, universe, op, &features, rule_index,
rtxn, index, fid, universe, &op, &features, rule_index,
)
} else {
Ok(RoaringBitmap::new())
}
})
.union()
}
None => Ok(RoaringBitmap::new()),
},
FilterCondition::Condition { fid, op } => {
match field_ids_map.id_with_metadata(fid.value()) {
Some((fid, metadata)) => {
let (rule_index, features) = metadata
.filterable_attributes_features_with_rule_index(
filterable_attribute_rules,
);
Self::evaluate_operator(
rtxn, index, fid, universe, op, &features, rule_index,
)
}
_ => Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filtered_matching_field_names(
filterable_attribute_rules,
&field_ids_map,
&|features| features.is_filterable(),
),
}))?,
None => Ok(RoaringBitmap::new()),
}
}
FilterCondition::Or(subfilters) => subfilters
@ -764,7 +755,7 @@ fn generate_filter_error(
field_id: FieldId,
operator: &Condition<'_>,
features: &FilterableAttributesFeatures,
rule_index: usize,
rule_index: Option<usize>,
) -> Error {
match index.fields_ids_map(rtxn) {
Ok(fields_ids_map) => {

View File

@ -77,30 +77,37 @@ impl<'a> SearchForFacetValues<'a> {
let rtxn = self.search_query.rtxn;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let matching_field_names = filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_facet_searchable(),
);
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
hidden_fields,
let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?;
let fid = match fields_ids_map.id_with_metadata(&self.facet) {
Some((fid, metadata))
if metadata
.filterable_attributes_features(&filterable_attributes_rules)
.is_facet_searchable() =>
{
fid
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => {
return Ok(Vec::new());
}
// we return an error when the attribute is not facet searchable
_otherwise => {
let matching_field_names = filtered_matching_field_names(
&filterable_attributes_rules,
&fields_ids_map,
&|features| features.is_facet_searchable(),
);
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
hidden_fields,
}
.into());
}
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {

View File

@ -192,7 +192,7 @@ impl<'a> Search<'a> {
// check if the distinct field is in the filterable fields
if !is_field_filterable(distinct, &filterable_fields) {
// if not, remove the hidden fields from the filterable fields to generate the error message
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?;
let matching_field_names = filtered_matching_field_names(
&filterable_fields,
&fields_ids_map,

View File

@ -1256,7 +1256,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
@ -1479,7 +1479,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
@ -1507,7 +1507,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
@ -1745,7 +1745,7 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
@ -1856,7 +1856,7 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()
@ -1925,7 +1925,7 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap();
let facets =
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
features.is_filterable()