Introduce an empty FilterCondition variant to support unknown fields

This commit is contained in:
Kerollmops 2021-07-27 16:24:21 +02:00
parent b12738cfe9
commit dc2b63abdf
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 71 additions and 52 deletions

View File

@ -6,6 +6,7 @@ use std::str::FromStr;
use either::Either; use either::Either;
use heed::types::DecodeIgnore; use heed::types::DecodeIgnore;
use itertools::Itertools;
use log::debug; use log::debug;
use pest::error::{Error as PestError, ErrorVariant}; use pest::error::{Error as PestError, ErrorVariant};
use pest::iterators::{Pair, Pairs}; use pest::iterators::{Pair, Pairs};
@ -54,6 +55,7 @@ pub enum FilterCondition {
Operator(FieldId, Operator), Operator(FieldId, Operator),
Or(Box<Self>, Box<Self>), Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>), And(Box<Self>, Box<Self>),
Empty,
} }
impl FilterCondition { impl FilterCondition {
@ -108,7 +110,7 @@ impl FilterCondition {
expression: &str, expression: &str,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields_ids(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?;
let lexed = let lexed =
FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?; FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?;
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
@ -116,7 +118,7 @@ impl FilterCondition {
fn from_pairs( fn from_pairs(
fim: &FieldsIdsMap, fim: &FieldsIdsMap,
ff: &HashSet<FieldId>, ff: &HashSet<String>,
expression: Pairs<Rule>, expression: Pairs<Rule>,
) -> Result<Self> { ) -> Result<Self> {
PREC_CLIMBER.climb( PREC_CLIMBER.climb(
@ -150,17 +152,22 @@ impl FilterCondition {
}, },
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
Empty => Empty,
} }
} }
fn between( fn between(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let (lresult, _) = pest_parse(items.next().unwrap()); let (lresult, _) = pest_parse(items.next().unwrap());
let (rresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap());
@ -173,12 +180,16 @@ impl FilterCondition {
fn equal( fn equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, svalue) = pest_parse(value); let (result, svalue) = pest_parse(value);
@ -189,12 +200,16 @@ impl FilterCondition {
fn greater_than( fn greater_than(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@ -205,12 +220,16 @@ impl FilterCondition {
fn greater_than_or_equal( fn greater_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@ -221,12 +240,16 @@ impl FilterCondition {
fn lower_than( fn lower_than(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@ -237,12 +260,16 @@ impl FilterCondition {
fn lower_than_or_equal( fn lower_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
item: Pair<Rule>, item: Pair<Rule>,
) -> Result<FilterCondition> { ) -> Result<FilterCondition> {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items) let fid = match field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::InvalidFilterAttribute)?; .map_err(UserError::InvalidFilterAttribute)?
{
Some(fid) => fid,
None => return Ok(Empty),
};
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@ -461,57 +488,41 @@ impl FilterCondition {
let rhs = rhs.evaluate(rtxn, index)?; let rhs = rhs.evaluate(rtxn, index)?;
Ok(lhs & rhs) Ok(lhs & rhs)
} }
Empty => Ok(RoaringBitmap::new()),
} }
} }
} }
/// Retrieve the field id base on the pest value, returns an error is /// Retrieve the field id base on the pest value.
/// the field does not exist or is not filterable. ///
/// Returns an error if the given value is not filterable.
///
/// Returns Ok(None) if the given value is filterable, but is not yet ascociated to a field_id.
/// ///
/// The pest pair is simply a string associated with a span, a location to highlight in /// The pest pair is simply a string associated with a span, a location to highlight in
/// the error message. /// the error message.
fn field_id( fn field_id(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<String>,
items: &mut Pairs<Rule>, items: &mut Pairs<Rule>,
) -> StdResult<FieldId, PestError<Rule>> { ) -> StdResult<Option<FieldId>, PestError<Rule>> {
// lexing ensures that we at least have a key // lexing ensures that we at least have a key
let key = items.next().unwrap(); let key = items.next().unwrap();
let field_id = match fields_ids_map.id(key.as_str()) { if !filterable_fields.contains(key.as_str()) {
Some(field_id) => field_id,
None => {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
))
}
};
if !filterable_fields.contains(&field_id) {
return Err(PestError::new_from_span( return Err(PestError::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
message: format!( message: format!(
"attribute `{}` is not filterable, available filterable attributes are: {}", "attribute `{}` is not filterable, available filterable attributes are: {}",
key.as_str(), key.as_str(),
filterable_fields filterable_fields.iter().join(", "),
.iter()
.flat_map(|id| { fields_ids_map.name(*id) })
.collect::<Vec<_>>()
.join(", "),
), ),
}, },
key.as_span(), key.as_span(),
)); ));
} }
Ok(field_id) Ok(fields_ids_map.id(key.as_str()))
} }
/// Tries to parse the pest pair into the type `T` specified, always returns /// Tries to parse the pest pair into the type `T` specified, always returns
@ -552,6 +563,9 @@ mod tests {
// Set the filterable fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut map = index.fields_ids_map(&wtxn).unwrap();
map.insert("channel");
index.put_fields_ids_map(&mut wtxn, &map).unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset! { S("channel") }); builder.set_filterable_fields(hashset! { S("channel") });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
@ -581,6 +595,9 @@ mod tests {
// Set the filterable fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut map = index.fields_ids_map(&wtxn).unwrap();
map.insert("timestamp");
index.put_fields_ids_map(&mut wtxn, &map).unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_filterable_fields(hashset! { "timestamp".into() }); builder.set_filterable_fields(hashset! { "timestamp".into() });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();

View File

@ -391,6 +391,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
documents_file, documents_file,
} = output; } = output;
// The fields_ids_map is put back to the store now so the rest of the transaction sees an
// up to date field map.
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
// We delete the documents that this document addition replaces. This way we are // We delete the documents that this document addition replaces. This way we are
// able to simply insert all the documents even if they already exist in the database. // able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() { if !replaced_documents_ids.is_empty() {
@ -596,9 +600,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("Writing using the write method: {:?}", write_method); debug!("Writing using the write method: {:?}", write_method);
// We write the fields ids map into the main database
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
// We write the field distribution into the main database // We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?; self.index.put_field_distribution(self.wtxn, &field_distribution)?;

View File

@ -674,7 +674,8 @@ mod tests {
let count = index let count = index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<ByteSlice>() .remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0]) // The faceted field id is 2u16
.prefix_iter(&rtxn, &[0, 2, 0])
.unwrap() .unwrap()
.count(); .count();
assert_eq!(count, 3); assert_eq!(count, 3);
@ -700,7 +701,7 @@ mod tests {
let count = index let count = index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<ByteSlice>() .remap_key_type::<ByteSlice>()
.prefix_iter(&rtxn, &[0, 0]) .prefix_iter(&rtxn, &[0, 2, 0])
.unwrap() .unwrap()
.count(); .count();
assert_eq!(count, 4); assert_eq!(count, 4);