From 258c3dd5637249d31568b83e6f8b490a41c56903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 20 Jun 2022 18:46:57 +0200 Subject: [PATCH 01/22] Make AND+OR filters n-ary (store a vector of subfilters instead of 2) NOTE: The token_at_depth is method is a bit useless now, as the only cases where there would be a toke at depth 1000 are the cases where the parser already stack-overflowed earlier. Example: (((((... (x=1) ...))))) --- filter-parser/src/lib.rs | 117 +++++++++++++++++++------------ http-ui/src/main.rs | 7 +- milli/src/search/facet/filter.rs | 104 +++++++++++---------------- 3 files changed, 118 insertions(+), 110 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 01be432d7..8da3da35f 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -113,8 +113,8 @@ impl<'a> From> for Token<'a> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum FilterCondition<'a> { Condition { fid: Token<'a>, op: Condition<'a> }, - Or(Box, Box), - And(Box, Box), + Or(Vec), + And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, } @@ -124,13 +124,23 @@ impl<'a> FilterCondition<'a> { pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { match self { FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), - FilterCondition::Or(left, right) => { + FilterCondition::Or(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } - FilterCondition::And(left, right) => { + FilterCondition::And(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), @@ -144,13 +154,13 @@ impl<'a> FilterCondition<'a> { match self { Condition { fid, op } => match op.negate() { (op, None) => Condition { fid, op }, - (a, Some(b)) => Or( + (a, Some(b)) => Or(vec![ Condition { fid: fid.clone(), op: a }.into(), Condition { fid, op: b }.into(), - ), + ]), }, - Or(a, b) => And(a.negate().into(), b.negate().into()), - And(a, b) => Or(a.negate().into(), b.negate().into()), + Or(subfilters) => And(subfilters.into_iter().map(|x| x.negate().into()).collect()), + And(subfilters) => Or(subfilters.into_iter().map(|x| x.negate().into()).collect()), GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, } @@ -172,26 +182,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) /// or = and ("OR" WS+ and)* fn parse_or(input: Span) -> IResult { - let (input, lhs) = parse_and(input)?; + let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next - let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; + let (input, mut ors) = + many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + let filter = if ors.is_empty() { + first_filter + } else { + ors.insert(0, first_filter); + FilterCondition::Or(ors) + }; + + Ok((input, filter)) } /// and = not ("AND" not)* fn parse_and(input: Span) -> IResult { - let (input, lhs) = parse_not(input)?; + let (input, first_filter) = parse_not(input)?; // if we found a `AND` then we MUST find something next - let (input, ors) = + let (input, mut ands) = many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + + let filter = if ands.is_empty() { + first_filter + } else { + ands.insert(0, first_filter); + FilterCondition::And(ands) + }; + + Ok((input, filter)) } /// not = ("NOT" WS+ not) | primary @@ -477,7 +497,7 @@ pub mod tests { ( "NOT subscribers 100 TO 1000", Fc::Or( - Fc::Condition { + vec![Fc::Condition { fid: rtok("NOT ", "subscribers"), op: Condition::LowerThan(rtok("NOT subscribers ", "100")), } @@ -486,7 +506,7 @@ pub mod tests { fid: rtok("NOT ", "subscribers"), op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), } - .into(), + .into()], ), ), ( @@ -506,7 +526,7 @@ pub mod tests { // test simple `or` and `and` ( "channel = ponce AND 'dog race' != 'bernese mountain'", - Fc::And( + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -520,11 +540,11 @@ pub mod tests { )), } .into(), - ), + ]), ), ( "channel = ponce OR 'dog race' != 'bernese mountain'", - Fc::Or( + Fc::Or(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -538,12 +558,12 @@ pub mod tests { )), } .into(), - ), + ]), ), ( "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - Fc::Or( - Fc::And( + Fc::Or(vec![ + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -557,7 +577,7 @@ pub mod tests { )), } .into(), - ) + ]) .into(), Fc::Condition { fid: rtok( @@ -570,30 +590,30 @@ pub mod tests { )), } .into(), - ), + ]), ), // test parenthesis ( "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - Fc::And( + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), - Fc::Or( - Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into()), + Fc::Or(vec![ + Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),] + ).into()]), ), ( "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - Fc::And( - Fc::Or( - Fc::And( + Fc::And(vec![ + Fc::Or(vec![ + Fc::And(vec![ Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), - ).into(), + ]).into(), Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into(), + ]).into(), Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() - ) + ]) ) ]; @@ -657,6 +677,15 @@ pub mod tests { #[test] fn depth() { let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap(); - assert!(filter.token_at_depth(5).is_some()); + assert!(filter.token_at_depth(1).is_some()); + assert!(filter.token_at_depth(2).is_none()); + + let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); + + let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); } } diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 83fce9a9c..da5595cc0 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -745,10 +745,9 @@ async fn main() -> anyhow::Result<()> { }; let condition = match (filters, facet_filters) { - (Some(filters), Some(facet_filters)) => Some(FilterCondition::And( - Box::new(filters.into()), - Box::new(facet_filters.into()), - )), + (Some(filters), Some(facet_filters)) => { + Some(FilterCondition::And(vec![filters.into(), facet_filters.into()])) + } (Some(condition), None) | (None, Some(condition)) => Some(condition.into()), _otherwise => None, }; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 90aab826a..d14b33f80 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -89,52 +89,44 @@ impl<'a> Filter<'a> { I: IntoIterator>, J: IntoIterator, { - let mut ands: Option = None; + let mut ands = vec![]; for either in array { match either { Either::Left(array) => { - let mut ors = None; + let mut ors = vec![]; for rule in array { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ors = match ors.take() { - Some(ors) => { - Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) - } - None => Some(condition), - }; + ors.push(filter.condition); } } - if let Some(rule) = ors { - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(rule))) - } - None => Some(rule), - }; + if ors.len() > 1 { + ands.push(FilterCondition::Or(ors)); + } else if ors.len() == 1 { + ands.push(ors[0].clone()); } } Either::Right(rule) => { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(condition))) - } - None => Some(condition), - }; + ands.push(filter.condition); } } } } + let and = if ands.is_empty() { + return Ok(None); + } else if ands.len() == 1 { + ands[0].clone() + } else { + FilterCondition::And(ands) + }; - if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { + if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) { return Err(token.as_external_error(FilterError::TooDeep).into()); } - Ok(ands.map(|ands| Self { condition: ands })) + Ok(Some(Self { condition: and })) } pub fn from_str(expression: &'a str) -> Result> { @@ -397,38 +389,28 @@ impl<'a> Filter<'a> { } } } - FilterCondition::Or(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs | rhs) - } - FilterCondition::And(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - if lhs.is_empty() { - return Ok(lhs); + FilterCondition::Or(subfilters) => { + let mut bitmap = RoaringBitmap::new(); + for f in subfilters { + bitmap |= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + } + Ok(bitmap) + } + FilterCondition::And(subfilters) => { + let mut subfilters_iter = subfilters.iter(); + if let Some(first_subfilter) = subfilters_iter.next() { + let mut bitmap = + Self::inner_evaluate(&(first_subfilter.clone()).into(), rtxn, index, filterable_fields)?; + for f in subfilters_iter { + if bitmap.is_empty() { + return Ok(bitmap); + } + bitmap &= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) } - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs & rhs) } FilterCondition::GeoLowerThan { point, radius } => { if filterable_fields.contains("_geo") { @@ -732,12 +714,10 @@ mod tests { } } - let error = Filter::from_str(&filter_string).unwrap_err(); - assert!( - error.to_string().starts_with("Too many filter conditions"), - "{}", - error.to_string() - ); + // Note: the filter used to be rejected for being too deep, but that is + // no longer the case + let filter = Filter::from_str(&filter_string).unwrap(); + assert!(filter.is_some()); } #[test] From 01675771d5e82b36dbee2bb8f9e61a33b0349f3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 14 Jun 2022 15:08:40 +0200 Subject: [PATCH 02/22] Reimplement `!=` filter to select all docids not selected by `=` --- milli/src/search/facet/filter.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index d14b33f80..371cf975e 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -309,11 +309,12 @@ impl<'a> Filter<'a> { return Ok(string_docids | number_docids); } Condition::NotEqual(val) => { - let all_numbers_ids = index.number_faceted_documents_ids(rtxn, field_id)?; - let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?; let operator = Condition::Equal(val.clone()); - let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?; - return Ok((all_numbers_ids | all_strings_ids) - docids); + let docids = Self::evaluate_operator( + rtxn, index, field_id, &operator, + )?; + let all_ids = index.documents_ids(rtxn)?; + return Ok(all_ids - docids); } }; From 44744d9e67c0cf1dafd2ed712b8094d4b2b21ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 14 Jun 2022 15:15:05 +0200 Subject: [PATCH 03/22] Implement the simplified NOT operator --- filter-parser/src/lib.rs | 23 ++++------------------- milli/src/search/facet/filter.rs | 10 ++++++++++ 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 8da3da35f..49eba1c61 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -112,6 +112,7 @@ impl<'a> From> for Token<'a> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum FilterCondition<'a> { + Not(Box), Condition { fid: Token<'a>, op: Condition<'a> }, Or(Vec), And(Vec), @@ -148,24 +149,6 @@ impl<'a> FilterCondition<'a> { } } - pub fn negate(self) -> FilterCondition<'a> { - use FilterCondition::*; - - match self { - Condition { fid, op } => match op.negate() { - (op, None) => Condition { fid, op }, - (a, Some(b)) => Or(vec![ - Condition { fid: fid.clone(), op: a }.into(), - Condition { fid, op: b }.into(), - ]), - }, - Or(subfilters) => And(subfilters.into_iter().map(|x| x.negate().into()).collect()), - And(subfilters) => Or(subfilters.into_iter().map(|x| x.negate().into()).collect()), - GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, - GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, - } - } - pub fn parse(input: &'a str) -> Result, Error> { if input.trim().is_empty() { return Ok(None); @@ -219,7 +202,9 @@ fn parse_and(input: Span) -> IResult { /// If we parse a `NOT` we MUST parse something behind. fn parse_not(input: Span) -> IResult { alt(( - map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| e.negate()), + map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| { + FilterCondition::Not(Box::new(e)) + }), parse_primary, ))(input) } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 371cf975e..23917e4aa 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -360,6 +360,16 @@ impl<'a> Filter<'a> { filterable_fields: &HashSet, ) -> Result { match &self.condition { + FilterCondition::Not(f) => { + let all_ids = index.documents_ids(rtxn)?; + let selected = Self::inner_evaluate( + &(f.as_ref().clone()).into(), + rtxn, + index, + filterable_fields, + )?; + return Ok(all_ids - selected); + } FilterCondition::Condition { fid, op } => { if crate::is_faceted(fid.value(), filterable_fields) { let field_ids_map = index.fields_ids_map(rtxn)?; From cc7415bb3185918117f5b60562ffc04704d42994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 14 Jun 2022 15:28:34 +0200 Subject: [PATCH 04/22] Simplify FilterCondition code, made possible by the new NOT operator --- filter-parser/src/condition.rs | 25 +++++-------------------- filter-parser/src/lib.rs | 24 +++++++++++------------- milli/src/search/facet/filter.rs | 19 ------------------- 3 files changed, 16 insertions(+), 52 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index cbf73b96a..e967bd074 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -21,30 +21,12 @@ pub enum Condition<'a> { Equal(Token<'a>), NotEqual(Token<'a>), Exists, - NotExists, LowerThan(Token<'a>), LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, } -impl<'a> Condition<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { - match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(s) => (NotEqual(s), None), - NotEqual(s) => (Equal(s), None), - Exists => (NotExists, None), - NotExists => (Exists, None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), - } - } -} -/// condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value +/// condition = value ("==" | ">" ...) value pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; @@ -73,7 +55,10 @@ pub fn parse_not_exists(input: Span) -> IResult { let (input, key) = parse_value(input)?; let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?; - Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists })) + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key.into(), op: Exists })), + )) } /// to = value value "TO" WS+ value diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 49eba1c61..9b33f6d24 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -117,7 +117,6 @@ pub enum FilterCondition<'a> { Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, - GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, } impl<'a> FilterCondition<'a> { @@ -144,7 +143,6 @@ impl<'a> FilterCondition<'a> { None } FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), - FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), _ => None, } } @@ -443,17 +441,17 @@ pub mod tests { ), ( "NOT subscribers EXISTS", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Condition::NotExists, - }, + op: Condition::Exists, + })), ), ( "subscribers NOT EXISTS", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("", "subscribers"), - op: Condition::NotExists, - }, + op: Condition::Exists, + })), ), ( "NOT subscribers NOT EXISTS", @@ -464,10 +462,10 @@ pub mod tests { ), ( "subscribers NOT EXISTS", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("", "subscribers"), - op: Condition::NotExists, - }, + op: Condition::Exists, + })), ), ( "subscribers 100 TO 1000", @@ -503,10 +501,10 @@ pub mod tests { ), ( "NOT _geoRadius(12, 13, 14)", - Fc::GeoGreaterThan { + Fc::Not(Box::new(Fc::GeoLowerThan { point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")], radius: rtok("NOT _geoRadius(12, 13, ", "14"), - }, + })), ), // test simple `or` and `and` ( diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 23917e4aa..ac3215dea 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -276,14 +276,6 @@ impl<'a> Filter<'a> { let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; return Ok(exist); } - Condition::NotExists => { - let all_ids = index.documents_ids(rtxn)?; - - let exist = Self::evaluate_operator(rtxn, index, field_id, &Condition::Exists)?; - - let notexist = all_ids - exist; - return Ok(notexist); - } Condition::Equal(val) => { let (_original_value, string_docids) = strings_db .get(rtxn, &(field_id, &val.value().to_lowercase()))? @@ -460,17 +452,6 @@ impl<'a> Filter<'a> { }))?; } } - FilterCondition::GeoGreaterThan { point, radius } => { - let result = Self::inner_evaluate( - &FilterCondition::GeoLowerThan { point: point.clone(), radius: radius.clone() } - .into(), - rtxn, - index, - filterable_fields, - )?; - let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - Ok(geo_faceted_doc_ids - result) - } } } } From 90a304cb074def76a41dd0d3da264fc9ba479633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 15 Jun 2022 09:48:56 +0200 Subject: [PATCH 05/22] Fix tests after simplification of NOT filter --- filter-parser/src/lib.rs | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 9b33f6d24..c5eeb84a9 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -371,10 +371,10 @@ pub mod tests { ), ( "NOT channel = ponce", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "channel"), - op: Condition::NotEqual(rtok("NOT channel = ", "ponce")), - }, + op: Condition::Equal(rtok("NOT channel = ", "ponce")), + })), ), ( "subscribers < 1000", @@ -406,31 +406,31 @@ pub mod tests { ), ( "NOT subscribers < 1000", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), - }, + op: Condition::LowerThan(rtok("NOT subscribers < ", "1000")), + })), ), ( "NOT subscribers > 1000", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), - }, + op: Condition::GreaterThan(rtok("NOT subscribers > ", "1000")), + })), ), ( "NOT subscribers <= 1000", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")), - }, + op: Condition::LowerThanOrEqual(rtok("NOT subscribers <= ", "1000")), + })), ), ( "NOT subscribers >= 1000", - Fc::Condition { + Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")), - }, + op: Condition::GreaterThanOrEqual(rtok("NOT subscribers >= ", "1000")), + })), ), ( "subscribers EXISTS", @@ -455,10 +455,10 @@ pub mod tests { ), ( "NOT subscribers NOT EXISTS", - Fc::Condition { + Fc::Not(Box::new(Fc::Not(Box::new(Fc::Condition { fid: rtok("NOT ", "subscribers"), op: Condition::Exists, - }, + })))), ), ( "subscribers NOT EXISTS", @@ -479,18 +479,13 @@ pub mod tests { ), ( "NOT subscribers 100 TO 1000", - Fc::Or( - vec![Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThan(rtok("NOT subscribers ", "100")), - } - .into(), - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), - } - .into()], - ), + Fc::Not(Box::new(Fc::Condition { + fid: rtok("NOT ", "subscribers"), + op: Condition::Between { + from: rtok("NOT subscribers ", "100"), + to: rtok("NOT subscribers 100 TO ", "1000"), + }, + })), ), ( "_geoRadius(12, 13, 14)", From ca97cb0eda3fb18f20928e6526389d2a09be07ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 30 May 2022 13:58:11 +0200 Subject: [PATCH 06/22] Implement the IN filter operator --- filter-parser/src/lib.rs | 80 ++++++++++++++++++++++++++++++-- milli/src/search/facet/filter.rs | 26 +++++++++++ 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index c5eeb84a9..bfb02d63c 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -6,12 +6,14 @@ //! or = and ("OR" WS+ and)* //! and = not ("AND" WS+ not)* //! not = ("NOT" WS+ not) | primary -//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to +//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | in | condition | exists | not_exists | to +//! in = value "IN" WS* "[" value_list "]" //! condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value //! exists = value "EXISTS" //! not_exists = value "NOT" WS+ "EXISTS" //! to = value value "TO" WS+ value //! value = WS* ( word | singleQuoted | doubleQuoted) WS+ +//! value_list = (value ("," value)* ","?)? //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ @@ -51,7 +53,7 @@ pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0, multispace1}; -use nom::combinator::{cut, eof, map}; +use nom::combinator::{cut, eof, map, opt}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; @@ -114,6 +116,7 @@ impl<'a> From> for Token<'a> { pub enum FilterCondition<'a> { Not(Box), Condition { fid: Token<'a>, op: Condition<'a> }, + In { fid: Token<'a>, els: Vec> }, Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, @@ -161,7 +164,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) delimited(multispace0, inner, multispace0) } -/// or = and ("OR" WS+ and)* + +/// value_list = (value ("," value)* ","?)? +fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { + let (input, first_value) = opt(parse_value)(input)?; + if let Some(first_value) = first_value { + let value_list_el_parser = preceded(ws(tag(",")), parse_value); + + let (input, mut values) = many0(value_list_el_parser)(input)?; + let (input, _) = opt(ws(tag(",")))(input)?; + values.insert(0, first_value); + + Ok((input, values)) + } else { + Ok((input, vec![])) + } +} + +/// in = value "IN" "[" value_list "]" +fn parse_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, _) = ws(tag("IN"))(input)?; + + let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); + + let (input, content) = els_parser(input)?; + let filter = FilterCondition::In { fid: value, els: content }; + Ok((input, filter)) +} + +/// or = and ("OR" and) fn parse_or(input: Span) -> IResult { let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next @@ -257,6 +289,7 @@ fn parse_primary(input: Span) -> IResult { }), ), parse_geo_radius, + parse_in, parse_condition, parse_exists, parse_not_exists, @@ -297,6 +330,47 @@ pub mod tests { let test_case = [ // simple test + ( + "colour IN[]", + Fc::In { + fid: rtok("", "colour"), + els: vec![] + } + ), + ( + "colour IN[green]", + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] + } + ), + ( + "colour IN[green,]", + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] + } + ), + ( + "colour IN[green,blue]", + Fc::In { + fid: rtok("", "colour"), + els: vec![ + rtok("colour IN[", "green"), + rtok("colour IN[green, ", "blue"), + ] + } + ), + ( + " colour IN [ green , blue , ]", + Fc::In { + fid: rtok(" ", "colour"), + els: vec![ + rtok("colour IN [ ", "green"), + rtok("colour IN [ green , ", "blue"), + ] + } + ), ( "channel = Ponce", Fc::Condition { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index ac3215dea..25ffe1842 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -362,6 +362,32 @@ impl<'a> Filter<'a> { )?; return Ok(all_ids - selected); } + FilterCondition::In { fid, els } => { + // TODO: this could be optimised + let filterable_fields = index.filterable_fields(rtxn)?; + + if crate::is_faceted(fid.value(), &filterable_fields) { + let field_ids_map = index.fields_ids_map(rtxn)?; + + if let Some(fid) = field_ids_map.id(fid.value()) { + let mut bitmap = RoaringBitmap::new(); + + for el in els { + let op = Condition::Equal(el.clone()); + let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?; + bitmap |= el_bitmap; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) + } + } else { + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields, + }))?; + } + } FilterCondition::Condition { fid, op } => { if crate::is_faceted(fid.value(), filterable_fields) { let field_ids_map = index.fields_ids_map(rtxn)?; From 2fd20fadfc12a11b153823d6bc7355ed51786665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 15 Jun 2022 09:58:47 +0200 Subject: [PATCH 07/22] Implement the NOT IN syntax for negated IN filter --- filter-parser/src/lib.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index bfb02d63c..4b78b86b8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -192,6 +192,19 @@ fn parse_in(input: Span) -> IResult { let filter = FilterCondition::In { fid: value, els: content }; Ok((input, filter)) } +/// in = value "NOT" WS* "IN" "[" value_list "]" +fn parse_not_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, _) = tag("NOT")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = ws(tag("IN"))(input)?; + + let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); + + let (input, content) = els_parser(input)?; + let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); + Ok((input, filter)) +} /// or = and ("OR" and) fn parse_or(input: Span) -> IResult { @@ -290,6 +303,7 @@ fn parse_primary(input: Span) -> IResult { ), parse_geo_radius, parse_in, + parse_not_in, parse_condition, parse_exists, parse_not_exists, @@ -361,6 +375,16 @@ pub mod tests { ] } ), + ( + "colour NOT IN[green,blue]", + Fc::Not(Box::new(Fc::In { + fid: rtok("", "colour"), + els: vec![ + rtok("colour NOT IN[", "green"), + rtok("colour NOT IN[green, ", "blue"), + ] + })) + ), ( " colour IN [ green , blue , ]", Fc::In { From 4ecfb95d0ccdeb0aad0fb7ed8cf22c1bc2e48d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 15 Jun 2022 10:13:34 +0200 Subject: [PATCH 08/22] Improve syntax errors for `IN` filter --- filter-parser/src/error.rs | 19 ++++++++++++-- filter-parser/src/lib.rs | 52 +++++++++++++++++++++++++++++++++++--- filter-parser/src/value.rs | 22 +++++++++++++--- 3 files changed, 83 insertions(+), 10 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index a3720f7bf..0d2959126 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -57,6 +57,10 @@ pub enum ErrorKind<'a> { ExpectedEof, ExpectedValue, MalformedValue, + InOpeningBracket, + InClosingBracket, + InExpectedValue, + ReservedKeyword(String), MissingClosingDelimiter(char), Char(char), InternalError(error::ErrorKind), @@ -109,12 +113,11 @@ impl<'a> ParseError> for Error<'a> { impl<'a> Display for Error<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let input = self.context.fragment(); - // When printing our error message we want to escape all `\n` to be sure we keep our format with the // first line being the diagnostic and the second line being the incriminated filter. let escaped_input = input.escape_debug(); - match self.kind { + match &self.kind { ErrorKind::ExpectedValue if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } @@ -145,6 +148,18 @@ impl<'a> Display for Error<'a> { ErrorKind::MisusedGeo => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? } + ErrorKind::ReservedKeyword(word) => { + writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? + } + ErrorKind::InOpeningBracket => { + writeln!(f, "Expected `[` after `IN` keyword.")? + } + ErrorKind::InClosingBracket => { + writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")? + } + ErrorKind::InExpectedValue => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`")? + } ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 4b78b86b8..12edd56c8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -167,6 +167,12 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) /// value_list = (value ("," value)* ","?)? fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { + + // TODO: here, I should return a failure with a clear explanation whenever possible + // for example: + // * expected the name of a field, but got `AND` + // * expected closing square bracket, but got `AND` + let (input, first_value) = opt(parse_value)(input)?; if let Some(first_value) = first_value { let value_list_el_parser = preceded(ws(tag(",")), parse_value); @@ -186,9 +192,22 @@ fn parse_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; let (input, _) = ws(tag("IN"))(input)?; - let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("["), |_| { + Error::new_from_kind(input, ErrorKind::InOpeningBracket) + })(input)?; + + let (input, content) = cut(parse_value_list)(input)?; + + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("]"), |_| { + if eof::<_, ()>(input).is_ok() { + Error::new_from_kind(input, ErrorKind::InClosingBracket) + } else { + Error::new_from_kind(input, ErrorKind::InExpectedValue) + } + })(input)?; - let (input, content) = els_parser(input)?; let filter = FilterCondition::In { fid: value, els: content }; Ok((input, filter)) } @@ -199,9 +218,19 @@ fn parse_not_in(input: Span) -> IResult { let (input, _) = multispace1(input)?; let (input, _) = ws(tag("IN"))(input)?; - let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); - let (input, content) = els_parser(input)?; + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("["), |_| { + Error::new_from_kind(input, ErrorKind::InOpeningBracket) + })(input)?; + + let (input, content) = cut(parse_value_list)(input)?; + + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("]"), |_| { + Error::new_from_kind(input, ErrorKind::InClosingBracket) + })(input)?; + let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); Ok((input, filter)) } @@ -313,6 +342,9 @@ fn parse_primary(input: Span) -> IResult { ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) + + // TODO: if the filter starts with a reserved keyword that is not NOT, then we should return the reserved keyword error + // TODO: if the filter is x = reserved, idem } /// expression = or @@ -344,6 +376,13 @@ pub mod tests { let test_case = [ // simple test + ( + "x = AND", + Fc::Not(Box::new(Fc::Not(Box::new(Fc::In { + fid: rtok("NOT NOT", "colour"), + els: vec![] + })))) + ), ( "colour IN[]", Fc::In { @@ -734,6 +773,11 @@ pub mod tests { ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), ("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), + ("colour IN blue, green]", "Expected `[` after `IN` keyword."), + ("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"), + ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), + ("colour IN [blue, green", "Expected matching `]` after the list of field names given to `IN[`"), + ("colour IN ['blue, green", "Expression `\\'blue, green` is missing the following closing delimiter: `'`."), ]; for (input, expected) in test_case { diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 22da6a0df..8a7e8f586 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -71,9 +71,17 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { _ => (), } - // word = (alphanumeric | _ | - | .)+ + // word = (alphanumeric | _ | - | .)+ except for reserved keywords let word = |input: Span<'a>| -> IResult> { - take_while1(is_value_component)(input).map(|(s, t)| (s, t.into())) + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if is_keyword(word.value()) { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::ReservedKeyword(word.value().to_owned()), + ))); + } + Ok((input, word)) }; // this parser is only used when an error is encountered and it parse the @@ -85,7 +93,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // when we create the errors from the output of the alt we have spaces everywhere let error_word = take_till::<_, _, Error>(is_syntax_component); - terminated( + let (input, value) = terminated( alt(( delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), @@ -107,7 +115,9 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { failure } }) - }) + })?; + + Ok((input, value)) } fn is_value_component(c: char) -> bool { @@ -118,6 +128,10 @@ fn is_syntax_component(c: char) -> bool { c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) } +fn is_keyword(s: &str) -> bool { + matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius") +} + #[cfg(test)] pub mod test { use nom::Finish; From d10d78d520d6d92ddbfca9800e7215bc1625f2a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 4 Jul 2022 10:27:04 +0200 Subject: [PATCH 09/22] Add integration tests for the IN filter --- milli/tests/assets/test_set.ndjson | 26 +++++++++++++------------- milli/tests/search/filters.rs | 4 +++- milli/tests/search/mod.rs | 12 +++++++++++- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index 427daca8c..2e77f9faf 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -1,17 +1,17 @@ -{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":"", "opt1": [null]} -{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":"", "opt1": []} -{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":"", "opt1": null} -{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":"", "opt1": 4} -{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":"", "opt1": "E"} -{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":"", "opt1": ["F"]} +{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":"", "opt1": [null], "tag_in": 1} +{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":"", "opt1": [], "tag_in": 2} +{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":"", "opt1": null, "tag_in": 3} +{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":"", "opt1": 4, "tag_in": "four"} +{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":"", "opt1": "E", "tag_in": "five"} +{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":"", "opt1": ["F"], "tag_in": null} {"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":"", "opt1": [7]} -{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":"", "opt1": ["H", 8]} -{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""} -{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":"", "opt1": {}} -{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":"", "opt1": [{"opt2": 11}] } -{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":"", "opt1": {"opt2": [12]}} +{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":"", "opt1": ["H", 8], "tag_in": 8} +{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":"", "tag_in": "nine"} +{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":"", "opt1": {}, "tag_in": 10} +{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":"", "opt1": [{"opt2": 11}] , "tag_in": "eleven"} +{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":"", "opt1": {"opt2": [12]}, "tag_in": 12} {"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":"", "opt1": [13, [{"opt2": null}]]} -{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":"", "opt1": {"a": 1, "opt2": {"opt3": 14}} } -{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":"", "opt1": [[[[]]]] } +{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":"", "opt1": {"a": 1, "opt2": {"opt3": 14}}} +{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":"", "opt1": [[[[]]]]} {"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":"", "opt1.opt2": 16} {"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""} diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index 1700a1478..5451a9076 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -85,4 +85,6 @@ test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]); test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]); test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]); -test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]); +test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); +test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); +test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 728a4eb0b..0e1d43d2a 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -44,7 +44,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("asc_desc_rank"), S("_geo"), S("opt1"), - S("opt1.opt2") + S("opt1.opt2"), + S("tag_in") }); builder.set_sortable_fields(hashset! { S("tag"), @@ -205,6 +206,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { } else if let Some(opt1) = &document.opt1 { id = contains_key_rec(opt1, "opt2").then(|| document.id.clone()); } + } else if matches!( + filter, + "tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]" + ) { + id = matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E") + .then(|| document.id.clone()); + } else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") { + id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E")) + .then(|| document.id.clone()); } id } From 196f79115a4367ef57fc96ca2cc28c22be10fbe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 18 Jul 2022 17:09:52 +0200 Subject: [PATCH 10/22] Run cargo fmt --- filter-parser/src/lib.rs | 70 ++++++++++++++++---------------- milli/src/search/facet/filter.rs | 22 ++++++---- 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 12edd56c8..31ca2919a 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -164,10 +164,8 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) delimited(multispace0, inner, multispace0) } - /// value_list = (value ("," value)* ","?)? fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { - // TODO: here, I should return a failure with a clear explanation whenever possible // for example: // * expected the name of a field, but got `AND` @@ -193,12 +191,13 @@ fn parse_in(input: Span) -> IResult { let (input, _) = ws(tag("IN"))(input)?; // everything after `IN` can be a failure - let (input, _) = cut_with_err(tag("["), |_| { - Error::new_from_kind(input, ErrorKind::InOpeningBracket) - })(input)?; + let (input, _) = + cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))( + input, + )?; let (input, content) = cut(parse_value_list)(input)?; - + // everything after `IN` can be a failure let (input, _) = cut_with_err(tag("]"), |_| { if eof::<_, ()>(input).is_ok() { @@ -218,18 +217,19 @@ fn parse_not_in(input: Span) -> IResult { let (input, _) = multispace1(input)?; let (input, _) = ws(tag("IN"))(input)?; - // everything after `IN` can be a failure - let (input, _) = cut_with_err(tag("["), |_| { - Error::new_from_kind(input, ErrorKind::InOpeningBracket) - })(input)?; + let (input, _) = + cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))( + input, + )?; let (input, content) = cut(parse_value_list)(input)?; - + // everything after `IN` can be a failure - let (input, _) = cut_with_err(tag("]"), |_| { - Error::new_from_kind(input, ErrorKind::InClosingBracket) - })(input)?; + let (input, _) = + cut_with_err(tag("]"), |_| Error::new_from_kind(input, ErrorKind::InClosingBracket))( + input, + )?; let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); Ok((input, filter)) @@ -378,60 +378,60 @@ pub mod tests { // simple test ( "x = AND", - Fc::Not(Box::new(Fc::Not(Box::new(Fc::In { - fid: rtok("NOT NOT", "colour"), - els: vec![] + Fc::Not(Box::new(Fc::Not(Box::new(Fc::In { + fid: rtok("NOT NOT", "colour"), + els: vec![] })))) ), ( "colour IN[]", - Fc::In { - fid: rtok("", "colour"), - els: vec![] + Fc::In { + fid: rtok("", "colour"), + els: vec![] } ), ( "colour IN[green]", - Fc::In { - fid: rtok("", "colour"), - els: vec![rtok("colour IN[", "green")] + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] } ), ( "colour IN[green,]", - Fc::In { - fid: rtok("", "colour"), - els: vec![rtok("colour IN[", "green")] + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] } ), ( "colour IN[green,blue]", - Fc::In { - fid: rtok("", "colour"), + Fc::In { + fid: rtok("", "colour"), els: vec![ rtok("colour IN[", "green"), rtok("colour IN[green, ", "blue"), - ] + ] } ), ( "colour NOT IN[green,blue]", - Fc::Not(Box::new(Fc::In { - fid: rtok("", "colour"), + Fc::Not(Box::new(Fc::In { + fid: rtok("", "colour"), els: vec![ rtok("colour NOT IN[", "green"), rtok("colour NOT IN[green, ", "blue"), - ] + ] })) ), ( " colour IN [ green , blue , ]", - Fc::In { - fid: rtok(" ", "colour"), + Fc::In { + fid: rtok(" ", "colour"), els: vec![ rtok("colour IN [ ", "green"), rtok("colour IN [ green , ", "blue"), - ] + ] } ), ( diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 25ffe1842..487676f4a 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -302,9 +302,7 @@ impl<'a> Filter<'a> { } Condition::NotEqual(val) => { let operator = Condition::Equal(val.clone()); - let docids = Self::evaluate_operator( - rtxn, index, field_id, &operator, - )?; + let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?; let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } @@ -421,20 +419,30 @@ impl<'a> Filter<'a> { FilterCondition::Or(subfilters) => { let mut bitmap = RoaringBitmap::new(); for f in subfilters { - bitmap |= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + bitmap |= + Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; } Ok(bitmap) } FilterCondition::And(subfilters) => { let mut subfilters_iter = subfilters.iter(); if let Some(first_subfilter) = subfilters_iter.next() { - let mut bitmap = - Self::inner_evaluate(&(first_subfilter.clone()).into(), rtxn, index, filterable_fields)?; + let mut bitmap = Self::inner_evaluate( + &(first_subfilter.clone()).into(), + rtxn, + index, + filterable_fields, + )?; for f in subfilters_iter { if bitmap.is_empty() { return Ok(bitmap); } - bitmap &= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + bitmap &= Self::inner_evaluate( + &(f.clone()).into(), + rtxn, + index, + filterable_fields, + )?; } Ok(bitmap) } else { From b09a8f1b915911631e94925cf0420b569a6db577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 17 Aug 2022 16:06:29 +0200 Subject: [PATCH 11/22] Filters: add explicit error message when using a keyword as value --- filter-parser/src/error.rs | 29 +++++++++++++++++------- filter-parser/src/lib.rs | 45 ++++++++++++++++++++++++-------------- filter-parser/src/value.rs | 14 ++++++++++-- 3 files changed, 62 insertions(+), 26 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index 0d2959126..ce9470ff8 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -48,6 +48,12 @@ pub struct Error<'a> { kind: ErrorKind<'a>, } +#[derive(Debug)] +pub enum ExpectedValueKind { + ReservedKeyword, + Other, +} + #[derive(Debug)] pub enum ErrorKind<'a> { ReservedGeo(&'a str), @@ -55,11 +61,11 @@ pub enum ErrorKind<'a> { MisusedGeo, InvalidPrimary, ExpectedEof, - ExpectedValue, + ExpectedValue(ExpectedValueKind), MalformedValue, InOpeningBracket, InClosingBracket, - InExpectedValue, + InExpectedValue(ExpectedValueKind), ReservedKeyword(String), MissingClosingDelimiter(char), Char(char), @@ -118,18 +124,22 @@ impl<'a> Display for Error<'a> { let escaped_input = input.escape_debug(); match &self.kind { - ErrorKind::ExpectedValue if input.trim().is_empty() => { + ErrorKind::ExpectedValue(_) if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } + ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => { + writeln!(f, "Was expecting a value but instead got `{escaped_input}`, which is a reserved keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")? + } + ErrorKind::ExpectedValue(ExpectedValueKind::Other) => { + writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)? + } ErrorKind::MalformedValue => { writeln!(f, "Malformed value: `{}`.", escaped_input)? } ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } - ErrorKind::ExpectedValue => { - writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)? - } + ErrorKind::InvalidPrimary if input.trim().is_empty() => { writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")? } @@ -157,8 +167,11 @@ impl<'a> Display for Error<'a> { ErrorKind::InClosingBracket => { writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")? } - ErrorKind::InExpectedValue => { - writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`")? + ErrorKind::InExpectedValue(ExpectedValueKind::ReservedKeyword) => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`, which is a keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")? + } + ErrorKind::InExpectedValue(ExpectedValueKind::Other) => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`.")? } ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 31ca2919a..d0c2d8531 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -48,7 +48,7 @@ use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; -use error::{cut_with_err, NomErrorExt}; +use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; @@ -166,11 +166,6 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) /// value_list = (value ("," value)* ","?)? fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { - // TODO: here, I should return a failure with a clear explanation whenever possible - // for example: - // * expected the name of a field, but got `AND` - // * expected closing square bracket, but got `AND` - let (input, first_value) = opt(parse_value)(input)?; if let Some(first_value) = first_value { let value_list_el_parser = preceded(ws(tag(",")), parse_value); @@ -203,7 +198,14 @@ fn parse_in(input: Span) -> IResult { if eof::<_, ()>(input).is_ok() { Error::new_from_kind(input, ErrorKind::InClosingBracket) } else { - Error::new_from_kind(input, ErrorKind::InExpectedValue) + let expected_value_kind = match parse_value(input) { + Err(nom::Err::Error(e)) => match e.kind() { + ErrorKind::ReservedKeyword(_) => ExpectedValueKind::ReservedKeyword, + _ => ExpectedValueKind::Other, + }, + _ => ExpectedValueKind::Other, + }; + Error::new_from_kind(input, ErrorKind::InExpectedValue(expected_value_kind)) } })(input)?; @@ -319,6 +321,21 @@ fn parse_geo_point(input: Span) -> IResult { Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) } +fn parse_error_reserved_keyword(input: Span) -> IResult { + match parse_condition(input) { + Ok(result) => Ok(result), + Err(nom::Err::Error(inner) | nom::Err::Failure(inner)) => match inner.kind() { + ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => { + return Err(nom::Err::Failure(inner)); + } + _ => return Err(nom::Err::Error(inner)), + }, + Err(e) => { + return Err(e); + } + } +} + /// primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to fn parse_primary(input: Span) -> IResult { alt(( @@ -339,6 +356,7 @@ fn parse_primary(input: Span) -> IResult { parse_to, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo_point, + parse_error_reserved_keyword, ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) @@ -376,13 +394,6 @@ pub mod tests { let test_case = [ // simple test - ( - "x = AND", - Fc::Not(Box::new(Fc::Not(Box::new(Fc::In { - fid: rtok("NOT NOT", "colour"), - els: vec![] - })))) - ), ( "colour IN[]", Fc::In { @@ -756,8 +767,8 @@ pub mod tests { ("channel = ", "Was expecting a value but instead got nothing."), ("channel = 🐻", "Was expecting a value but instead got `🐻`."), ("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."), - ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `OR`."), - ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `AND`."), + ("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."), + ("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), ("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."), ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), @@ -778,6 +789,8 @@ pub mod tests { ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), ("colour IN [blue, green", "Expected matching `]` after the list of field names given to `IN[`"), ("colour IN ['blue, green", "Expression `\\'blue, green` is missing the following closing delimiter: `'`."), + ("x = EXISTS", "Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes."), + ("AND = 8", "Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes."), ]; for (input, expected) in test_case { diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 8a7e8f586..bfa3c2730 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -5,7 +5,7 @@ use nom::combinator::cut; use nom::sequence::{delimited, terminated}; use nom::{InputIter, InputLength, InputTake, Slice}; -use crate::error::NomErrorExt; +use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; /// This function goes through all characters in the [Span] if it finds any escaped character (`\`). @@ -103,7 +103,17 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { )(input) // if we found nothing in the alt it means the user specified something that was not recognized as a value .map_err(|e: nom::Err| { - e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) + e.map_err(|error| { + let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) { + ExpectedValueKind::ReservedKeyword + } else { + ExpectedValueKind::Other + }; + Error::new_from_kind( + error_word(input).unwrap().1, + ErrorKind::ExpectedValue(expected_value_kind), + ) + }) }) .map_err(|e| { e.map_fail(|failure| { From 238a7be58d03d3b646e4b49d2457cd3c6c60c02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 17 Aug 2022 16:53:40 +0200 Subject: [PATCH 12/22] Fix filter parser handling of keywords and surrounding spaces Now the following fragments are allowed: AND(field = AND'field' = AND"field" = --- filter-parser/src/lib.rs | 29 ++++++++++++------------ filter-parser/src/value.rs | 46 +++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d0c2d8531..09aa252e1 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -52,7 +52,7 @@ use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; -use nom::character::complete::{char, multispace0, multispace1}; +use nom::character::complete::{char, multispace0}; use nom::combinator::{cut, eof, map, opt}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; @@ -60,6 +60,7 @@ use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; +use value::word_exact; pub type Span<'a> = LocatedSpan<&'a str, &'a str>; @@ -183,7 +184,7 @@ fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { /// in = value "IN" "[" value_list "]" fn parse_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; - let (input, _) = ws(tag("IN"))(input)?; + let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure let (input, _) = @@ -215,9 +216,8 @@ fn parse_in(input: Span) -> IResult { /// in = value "NOT" WS* "IN" "[" value_list "]" fn parse_not_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; - let (input, _) = tag("NOT")(input)?; - let (input, _) = multispace1(input)?; - let (input, _) = ws(tag("IN"))(input)?; + let (input, _) = word_exact("NOT")(input)?; + let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure let (input, _) = @@ -241,8 +241,7 @@ fn parse_not_in(input: Span) -> IResult { fn parse_or(input: Span) -> IResult { let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next - let (input, mut ors) = - many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; + let (input, mut ors) = many0(preceded(ws(word_exact("OR")), cut(parse_and)))(input)?; let filter = if ors.is_empty() { first_filter @@ -258,8 +257,7 @@ fn parse_or(input: Span) -> IResult { fn parse_and(input: Span) -> IResult { let (input, first_filter) = parse_not(input)?; // if we found a `AND` then we MUST find something next - let (input, mut ands) = - many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; + let (input, mut ands) = many0(preceded(ws(word_exact("AND")), cut(parse_not)))(input)?; let filter = if ands.is_empty() { first_filter @@ -276,9 +274,7 @@ fn parse_and(input: Span) -> IResult { /// If we parse a `NOT` we MUST parse something behind. fn parse_not(input: Span) -> IResult { alt(( - map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| { - FilterCondition::Not(Box::new(e)) - }), + map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| FilterCondition::Not(Box::new(e))), parse_primary, ))(input) } @@ -288,7 +284,7 @@ fn parse_not(input: Span) -> IResult { fn parse_geo_radius(input: Span) -> IResult { // we want to allow space BEFORE the _geoRadius but not after let parsed = preceded( - tuple((multispace0, tag("_geoRadius"))), + tuple((multispace0, word_exact("_geoRadius"))), // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) @@ -741,6 +737,10 @@ pub mod tests { Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() ]) ) + // ( + // ("channel = ponce AND'dog' != 'bernese mountain'", ), + // ("channel = ponce AND('dog' != 'bernese mountain')", ), + // ) ]; for (input, expected) in test_case { @@ -770,7 +770,7 @@ pub mod tests { ("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."), ("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), - ("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."), + ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing."), ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), @@ -783,7 +783,6 @@ pub mod tests { ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."), ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), - ("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), ("colour IN blue, green]", "Expected `[` after `IN` keyword."), ("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"), ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index bfa3c2730..90dc44604 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -3,7 +3,7 @@ use nom::bytes::complete::{take_till, take_while, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::cut; use nom::sequence::{delimited, terminated}; -use nom::{InputIter, InputLength, InputTake, Slice}; +use nom::{error, InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; @@ -48,6 +48,35 @@ fn quoted_by(quote: char, input: Span) -> IResult { )) } +// word = (alphanumeric | _ | - | .)+ except for reserved keywords +pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult> { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if is_keyword(word.value()) { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::ReservedKeyword(word.value().to_owned()), + ))); + } + Ok((input, word)) +} + +// word = {tag} +pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> { + move |input| { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if word.value() == tag { + Ok((input, word)) + } else { + Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::InternalError(nom::error::ErrorKind::Tag), + ))) + } + } +} + /// value = WS* ( word | singleQuoted | doubleQuoted) WS+ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // to get better diagnostic message we are going to strip the left whitespaces from the input right now @@ -71,19 +100,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { _ => (), } - // word = (alphanumeric | _ | - | .)+ except for reserved keywords - let word = |input: Span<'a>| -> IResult> { - let (input, word): (_, Token<'a>) = - take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; - if is_keyword(word.value()) { - return Err(nom::Err::Error(Error::new_from_kind( - input, - ErrorKind::ReservedKeyword(word.value().to_owned()), - ))); - } - Ok((input, word)) - }; - // this parser is only used when an error is encountered and it parse the // largest string possible that do not contain any “language” syntax. // If we try to parse `name = 🦀 AND language = rust` we want to return an @@ -97,7 +113,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { alt(( delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), - word, + word_not_keyword, )), multispace0, )(input) From 497f9817a227b20567af95996dc99fdd0839ca81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 17 Aug 2022 17:25:31 +0200 Subject: [PATCH 13/22] Use snapshot testing for the filter parser --- filter-parser/Cargo.toml | 3 + filter-parser/src/lib.rs | 666 ++++++++++++++----------------------- filter-parser/src/value.rs | 2 +- 3 files changed, 259 insertions(+), 412 deletions(-) diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 8f61796b3..21676f960 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -8,3 +8,6 @@ publish = false [dependencies] nom = "7.1.0" nom_locate = "4.0.0" + +[dev-dependencies] +insta = "1.18.2" diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 09aa252e1..b898c264c 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -43,9 +43,6 @@ mod condition; mod error; mod value; -use std::fmt::Debug; -use std::str::FromStr; - pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; @@ -59,6 +56,8 @@ use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; use nom_locate::LocatedSpan; +use std::fmt::Debug; +use std::str::FromStr; pub(crate) use value::parse_value; use value::word_exact; @@ -388,422 +387,211 @@ pub mod tests { fn parse() { use FilterCondition as Fc; - let test_case = [ - // simple test - ( - "colour IN[]", - Fc::In { - fid: rtok("", "colour"), - els: vec![] - } - ), - ( - "colour IN[green]", - Fc::In { - fid: rtok("", "colour"), - els: vec![rtok("colour IN[", "green")] - } - ), - ( - "colour IN[green,]", - Fc::In { - fid: rtok("", "colour"), - els: vec![rtok("colour IN[", "green")] - } - ), - ( - "colour IN[green,blue]", - Fc::In { - fid: rtok("", "colour"), - els: vec![ - rtok("colour IN[", "green"), - rtok("colour IN[green, ", "blue"), - ] - } - ), - ( - "colour NOT IN[green,blue]", - Fc::Not(Box::new(Fc::In { - fid: rtok("", "colour"), - els: vec![ - rtok("colour NOT IN[", "green"), - rtok("colour NOT IN[green, ", "blue"), - ] - })) - ), - ( - " colour IN [ green , blue , ]", - Fc::In { - fid: rtok(" ", "colour"), - els: vec![ - rtok("colour IN [ ", "green"), - rtok("colour IN [ green , ", "blue"), - ] - } - ), - ( - "channel = Ponce", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "Ponce")), - }, - ), - ( - "subscribers = 12", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Equal(rtok("subscribers = ", "12")), - }, - ), - // test all the quotes and simple quotes - ( - "channel = 'Mister Mv'", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = '", "Mister Mv")), - }, - ), - ( - "channel = \"Mister Mv\"", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = \"", "Mister Mv")), - }, - ), - ( - "'dog race' = Borzoi", - Fc::Condition { - fid: rtok("'", "dog race"), - op: Condition::Equal(rtok("'dog race' = ", "Borzoi")), - }, - ), - ( - "\"dog race\" = Chusky", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")), - }, - ), - ( - "\"dog race\" = \"Bernese Mountain\"", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), - }, - ), - ( - "'dog race' = 'Bernese Mountain'", - Fc::Condition { - fid: rtok("'", "dog race"), - op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")), - }, - ), - ( - "\"dog race\" = 'Bernese Mountain'", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), - }, - ), - // test all the operators - ( - "channel != ponce", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::NotEqual(rtok("channel != ", "ponce")), - }, - ), - ( - "NOT channel = ponce", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "channel"), - op: Condition::Equal(rtok("NOT channel = ", "ponce")), - })), - ), - ( - "subscribers < 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::LowerThan(rtok("subscribers < ", "1000")), - }, - ), - ( - "subscribers > 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::GreaterThan(rtok("subscribers > ", "1000")), - }, - ), - ( - "subscribers <= 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")), - }, - ), - ( - "subscribers >= 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), - }, - ), - ( - "NOT subscribers < 1000", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThan(rtok("NOT subscribers < ", "1000")), - })), - ), - ( - "NOT subscribers > 1000", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThan(rtok("NOT subscribers > ", "1000")), - })), - ), - ( - "NOT subscribers <= 1000", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThanOrEqual(rtok("NOT subscribers <= ", "1000")), - })), - ), - ( - "NOT subscribers >= 1000", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThanOrEqual(rtok("NOT subscribers >= ", "1000")), - })), - ), - ( - "subscribers EXISTS", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Exists, - }, - ), - ( - "NOT subscribers EXISTS", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::Exists, - })), - ), - ( - "subscribers NOT EXISTS", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Exists, - })), - ), - ( - "NOT subscribers NOT EXISTS", - Fc::Not(Box::new(Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::Exists, - })))), - ), - ( - "subscribers NOT EXISTS", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Exists, - })), - ), - ( - "subscribers 100 TO 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Between { - from: rtok("subscribers ", "100"), - to: rtok("subscribers 100 TO ", "1000"), - }, - }, - ), - ( - "NOT subscribers 100 TO 1000", - Fc::Not(Box::new(Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::Between { - from: rtok("NOT subscribers ", "100"), - to: rtok("NOT subscribers 100 TO ", "1000"), - }, - })), - ), - ( - "_geoRadius(12, 13, 14)", - Fc::GeoLowerThan { - point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")], - radius: rtok("_geoRadius(12, 13, ", "14"), - }, - ), - ( - "NOT _geoRadius(12, 13, 14)", - Fc::Not(Box::new(Fc::GeoLowerThan { - point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")], - radius: rtok("NOT _geoRadius(12, 13, ", "14"), - })), - ), - // test simple `or` and `and` - ( - "channel = ponce AND 'dog race' != 'bernese mountain'", - Fc::And(vec![ - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce AND '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce AND 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ]), - ), - ( - "channel = ponce OR 'dog race' != 'bernese mountain'", - Fc::Or(vec![ - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce OR '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce OR 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ]), - ), - ( - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - Fc::Or(vec![ - Fc::And(vec![ - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce AND '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce AND 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ]) - .into(), - Fc::Condition { - fid: rtok( - "channel = ponce AND 'dog race' != 'bernese mountain' OR ", - "subscribers", - ), - op: Condition::GreaterThan(rtok( - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", - "1000", - )), - } - .into(), - ]), - ), - // test parenthesis - ( - "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - Fc::And(vec![ - Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), - Fc::Or(vec![ - Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),] - ).into()]), - ), - ( - "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - Fc::And(vec![ - Fc::Or(vec![ - Fc::And(vec![ - Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), - Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), - ]).into(), - Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ]).into(), - Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() - ]) - ) - // ( - // ("channel = ponce AND'dog' != 'bernese mountain'", ), - // ("channel = ponce AND('dog' != 'bernese mountain')", ), - // ) - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_ok(), - "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap(); - assert_eq!(filter, Some(expected), "Filter `{}` failed.", input); + macro_rules! p { + ($s:literal) => { + Fc::parse($s).unwrap().unwrap() + }; } + + // Test equal + insta::assert_display_snapshot!(p!("channel = Ponce"), @"{channel} = {Ponce}"); + insta::assert_display_snapshot!(p!("subscribers = 12"), @"{subscribers} = {12}"); + insta::assert_display_snapshot!(p!("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p!("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p!("'dog race' = Borzoi"), @"{dog race} = {Borzoi}"); + insta::assert_display_snapshot!(p!("\"dog race\" = Chusky"), @"{dog race} = {Chusky}"); + insta::assert_display_snapshot!(p!("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p!("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p!("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + + // Test IN + insta::assert_display_snapshot!(p!("colour IN[]"), @"{colour} IN[]"); + insta::assert_display_snapshot!(p!("colour IN[green]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p!("colour IN[green,]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p!("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p!(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); + + // Test conditions + insta::assert_display_snapshot!(p!("channel != ponce"), @"{channel} != {ponce}"); + insta::assert_display_snapshot!(p!("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); + insta::assert_display_snapshot!(p!("subscribers < 1000"), @"{subscribers} < {1000}"); + insta::assert_display_snapshot!(p!("subscribers > 1000"), @"{subscribers} > {1000}"); + insta::assert_display_snapshot!(p!("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p!("subscribers >= 1000"), @"{subscribers} >= {1000}"); + insta::assert_display_snapshot!(p!("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p!("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); + + // Test NOT + EXISTS + insta::assert_display_snapshot!(p!("subscribers EXISTS"), @"{subscribers} EXISTS"); + insta::assert_display_snapshot!(p!("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); + insta::assert_display_snapshot!(p!("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p!("NOT subscribers NOT EXISTS"), @"NOT (NOT ({subscribers} EXISTS))"); + insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p!("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); + + // Test geo radius + insta::assert_display_snapshot!(p!("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); + insta::assert_display_snapshot!(p!("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + + // Test OR + AND + insta::assert_display_snapshot!(p!("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p!("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p!("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]"); + insta::assert_display_snapshot!( + p!("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"), + @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]" + ); + + // test parentheses + insta::assert_display_snapshot!(p!("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); + insta::assert_display_snapshot!(p!("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); } #[test] fn error() { use FilterCondition as Fc; - let test_case = [ - // simple test - ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), - ("channel = ", "Was expecting a value but instead got nothing."), - ("channel = 🐻", "Was expecting a value but instead got `🐻`."), - ("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."), - ("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."), - ("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."), - ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), - ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing."), - ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), - ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), - ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), - ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), - ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), - ("channel = 'ponce", "Expression `\\'ponce` is missing the following closing delimiter: `'`."), - ("channel = \"ponce", "Expression `\\\"ponce` is missing the following closing delimiter: `\"`."), - ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."), - ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), - ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."), - ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), - ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), - ("colour IN blue, green]", "Expected `[` after `IN` keyword."), - ("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"), - ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), - ("colour IN [blue, green", "Expected matching `]` after the list of field names given to `IN[`"), - ("colour IN ['blue, green", "Expression `\\'blue, green` is missing the following closing delimiter: `'`."), - ("x = EXISTS", "Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes."), - ("AND = 8", "Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes."), - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_err(), - "Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`", - input, - result.unwrap() - ); - let filter = result.unwrap_err().to_string(); - assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter); + macro_rules! p { + ($s:literal) => { + Fc::parse($s).unwrap_err().to_string() + }; } + + insta::assert_display_snapshot!(p!("channel = Ponce = 12"), @r###" + Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule. + 17:21 channel = Ponce = 12 + "###); + + insta::assert_display_snapshot!(p!("channel = "), @r###" + Was expecting a value but instead got nothing. + 14:14 channel = + "###); + + insta::assert_display_snapshot!(p!("channel = 🐻"), @r###" + Was expecting a value but instead got `🐻`. + 11:12 channel = 🐻 + "###); + + insta::assert_display_snapshot!(p!("channel = 🐻 AND followers < 100"), @r###" + Was expecting a value but instead got `🐻`. + 11:12 channel = 🐻 AND followers < 100 + "###); + + insta::assert_display_snapshot!(p!("'OR'"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`. + 1:5 'OR' + "###); + + insta::assert_display_snapshot!(p!("OR"), @r###" + Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. + 1:3 OR + "###); + + insta::assert_display_snapshot!(p!("channel Ponce"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`. + 1:14 channel Ponce + "###); + + insta::assert_display_snapshot!(p!("channel = Ponce OR"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing. + 19:19 channel = Ponce OR + "###); + + insta::assert_display_snapshot!(p!("_geoRadius"), @r###" + The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. + 1:11 _geoRadius + "###); + + insta::assert_display_snapshot!(p!("_geoRadius = 12"), @r###" + The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. + 1:16 _geoRadius = 12 + "###); + + insta::assert_display_snapshot!(p!("_geoPoint(12, 13, 14)"), @r###" + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + 1:22 _geoPoint(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p!("position <= _geoPoint(12, 13, 14)"), @r###" + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + 13:34 position <= _geoPoint(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p!("position <= _geoRadius(12, 13, 14)"), @r###" + The `_geoRadius` filter is an operation and can't be used as a value. + 13:35 position <= _geoRadius(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p!("channel = 'ponce"), @r###" + Expression `\'ponce` is missing the following closing delimiter: `'`. + 11:17 channel = 'ponce + "###); + + insta::assert_display_snapshot!(p!("channel = \"ponce"), @r###" + Expression `\"ponce` is missing the following closing delimiter: `"`. + 11:17 channel = "ponce + "###); + + insta::assert_display_snapshot!(p!("channel = mv OR (followers >= 1000"), @r###" + Expression `(followers >= 1000` is missing the following closing delimiter: `)`. + 17:35 channel = mv OR (followers >= 1000 + "###); + + insta::assert_display_snapshot!(p!("channel = mv OR followers >= 1000)"), @r###" + Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule. + 34:35 channel = mv OR followers >= 1000) + "###); + + insta::assert_display_snapshot!(p!("colour NOT EXIST"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`. + 1:17 colour NOT EXIST + "###); + + insta::assert_display_snapshot!(p!("subscribers 100 TO1000"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`. + 1:23 subscribers 100 TO1000 + "###); + + insta::assert_display_snapshot!(p!("channel = ponce ORdog != 'bernese mountain'"), @r###" + Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule. + 17:44 channel = ponce ORdog != 'bernese mountain' + "###); + + insta::assert_display_snapshot!(p!("colour IN blue, green]"), @r###" + Expected `[` after `IN` keyword. + 11:23 colour IN blue, green] + "###); + + insta::assert_display_snapshot!(p!("colour IN [blue, green, 'blue' > 2]"), @r###" + Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`. + 32:36 colour IN [blue, green, 'blue' > 2] + "###); + + insta::assert_display_snapshot!(p!("colour IN [blue, green, AND]"), @r###" + Expected only comma-separated field names inside `IN[..]` but instead found `AND]`. + 25:29 colour IN [blue, green, AND] + "###); + + insta::assert_display_snapshot!(p!("colour IN [blue, green"), @r###" + Expected matching `]` after the list of field names given to `IN[` + 23:23 colour IN [blue, green + "###); + + insta::assert_display_snapshot!(p!("colour IN ['blue, green"), @r###" + Expression `\'blue, green` is missing the following closing delimiter: `'`. + 12:24 colour IN ['blue, green + "###); + + insta::assert_display_snapshot!(p!("x = EXISTS"), @r###" + Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes. + 5:11 x = EXISTS + "###); + + insta::assert_display_snapshot!(p!("AND = 8"), @r###" + Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes. + 1:4 AND = 8 + "###); } #[test] @@ -821,3 +609,59 @@ pub mod tests { assert!(filter.token_at_depth(3).is_none()); } } + +impl<'a> std::fmt::Display for FilterCondition<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FilterCondition::Not(filter) => { + write!(f, "NOT ({filter})") + } + FilterCondition::Condition { fid, op } => { + write!(f, "{fid} {op}") + } + FilterCondition::In { fid, els } => { + write!(f, "{fid} IN[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::Or(els) => { + write!(f, "OR[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::And(els) => { + write!(f, "AND[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::GeoLowerThan { point, radius } => { + write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) + } + } + } +} +impl<'a> std::fmt::Display for Condition<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Condition::GreaterThan(token) => write!(f, "> {token}"), + Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"), + Condition::Equal(token) => write!(f, "= {token}"), + Condition::NotEqual(token) => write!(f, "!= {token}"), + Condition::Exists => write!(f, "EXISTS"), + Condition::LowerThan(token) => write!(f, "< {token}"), + Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), + Condition::Between { from, to } => write!(f, "{from} TO {to}"), + } + } +} +impl<'a> std::fmt::Display for Token<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{{}}}", self.value()) + } +} diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 90dc44604..d015018c1 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -3,7 +3,7 @@ use nom::bytes::complete::{take_till, take_while, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::cut; use nom::sequence::{delimited, terminated}; -use nom::{error, InputIter, InputLength, InputTake, Slice}; +use nom::{InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; From b030efdc832ec23df63ad3020bdea758d3357469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 10:58:04 +0200 Subject: [PATCH 14/22] Fix parsing of IN[] filter followed by whitespace + factorise its impl --- filter-parser/src/lib.rs | 47 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index b898c264c..6dc176283 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -180,9 +180,8 @@ fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { } } -/// in = value "IN" "[" value_list "]" -fn parse_in(input: Span) -> IResult { - let (input, value) = parse_value(input)?; +/// "IN" "[" value_list "]" +fn parse_in_body(input: Span) -> IResult> { let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure @@ -194,7 +193,7 @@ fn parse_in(input: Span) -> IResult { let (input, content) = cut(parse_value_list)(input)?; // everything after `IN` can be a failure - let (input, _) = cut_with_err(tag("]"), |_| { + let (input, _) = cut_with_err(ws(tag("]")), |_| { if eof::<_, ()>(input).is_ok() { Error::new_from_kind(input, ErrorKind::InClosingBracket) } else { @@ -209,28 +208,23 @@ fn parse_in(input: Span) -> IResult { } })(input)?; + Ok((input, content)) +} + +/// in = value "IN" "[" value_list "]" +fn parse_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, content) = parse_in_body(input)?; + let filter = FilterCondition::In { fid: value, els: content }; Ok((input, filter)) } + /// in = value "NOT" WS* "IN" "[" value_list "]" fn parse_not_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; let (input, _) = word_exact("NOT")(input)?; - let (input, _) = ws(word_exact("IN"))(input)?; - - // everything after `IN` can be a failure - let (input, _) = - cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))( - input, - )?; - - let (input, content) = cut(parse_value_list)(input)?; - - // everything after `IN` can be a failure - let (input, _) = - cut_with_err(tag("]"), |_| Error::new_from_kind(input, ErrorKind::InClosingBracket))( - input, - )?; + let (input, content) = parse_in_body(input)?; let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); Ok((input, filter)) @@ -355,9 +349,6 @@ fn parse_primary(input: Span) -> IResult { ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) - - // TODO: if the filter starts with a reserved keyword that is not NOT, then we should return the reserved keyword error - // TODO: if the filter is x = reserved, idem } /// expression = or @@ -411,6 +402,18 @@ pub mod tests { insta::assert_display_snapshot!(p!("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); insta::assert_display_snapshot!(p!(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); + // Test IN + OR/AND/() + insta::assert_display_snapshot!(p!(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p!("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]"); + insta::assert_display_snapshot!(p!("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]"); + + // Test whitespace start/end + insta::assert_display_snapshot!(p!(" colour = green "), @"{colour} = {green}"); + insta::assert_display_snapshot!(p!(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]"); + insta::assert_display_snapshot!(p!(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p!(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p!(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]"); + // Test conditions insta::assert_display_snapshot!(p!("channel != ponce"), @"{channel} != {ponce}"); insta::assert_display_snapshot!(p!("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); From 98f0da6b383bf60cde4a82fc9132cac3f05dbdbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 10:58:24 +0200 Subject: [PATCH 15/22] Simplify representation of nested NOT filters --- filter-parser/src/lib.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 6dc176283..aac8d7d35 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -267,7 +267,10 @@ fn parse_and(input: Span) -> IResult { /// If we parse a `NOT` we MUST parse something behind. fn parse_not(input: Span) -> IResult { alt(( - map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| FilterCondition::Not(Box::new(e))), + map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| match e { + FilterCondition::Not(e) => *e, + _ => FilterCondition::Not(Box::new(e)), + }), parse_primary, ))(input) } @@ -429,10 +432,14 @@ pub mod tests { insta::assert_display_snapshot!(p!("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); insta::assert_display_snapshot!(p!("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); - insta::assert_display_snapshot!(p!("NOT subscribers NOT EXISTS"), @"NOT (NOT ({subscribers} EXISTS))"); + insta::assert_display_snapshot!(p!("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); insta::assert_display_snapshot!(p!("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); + // Test nested NOT + insta::assert_display_snapshot!(p!("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); + insta::assert_display_snapshot!(p!("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); + // Test geo radius insta::assert_display_snapshot!(p!("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p!("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); From c51dcad51b1d225f24ef5f519c1c94daa7088c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 10:59:21 +0200 Subject: [PATCH 16/22] Don't recompute filterable fields in evaluation of IN[] filter --- milli/src/search/facet/filter.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 487676f4a..2f6fb5b00 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -361,10 +361,7 @@ impl<'a> Filter<'a> { return Ok(all_ids - selected); } FilterCondition::In { fid, els } => { - // TODO: this could be optimised - let filterable_fields = index.filterable_fields(rtxn)?; - - if crate::is_faceted(fid.value(), &filterable_fields) { + if crate::is_faceted(fid.value(), filterable_fields) { let field_ids_map = index.fields_ids_map(rtxn)?; if let Some(fid) = field_ids_map.id(fid.value()) { @@ -382,7 +379,7 @@ impl<'a> Filter<'a> { } else { return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute: fid.value(), - filterable_fields, + filterable_fields: filterable_fields.clone(), }))?; } } From 9af69c151be63327116208ae7cfc7385c729464c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 11:27:39 +0200 Subject: [PATCH 17/22] Limit the maximum depth of filters This should have no impact on the user but is there to safeguard meilisearch against malicious inputs. --- filter-parser/src/error.rs | 5 +++ filter-parser/src/lib.rs | 79 +++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index ce9470ff8..05fc3a276 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -70,6 +70,7 @@ pub enum ErrorKind<'a> { MissingClosingDelimiter(char), Char(char), InternalError(error::ErrorKind), + DepthLimitReached, External(String), } @@ -176,6 +177,10 @@ impl<'a> Display for Error<'a> { ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) } + ErrorKind::DepthLimitReached => writeln!( + f, + "The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions." + )?, ErrorKind::InternalError(kind) => writeln!( f, "Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index aac8d7d35..b8d3272c8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -65,6 +65,8 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; +const MAX_FILTER_DEPTH: usize = 200; + #[derive(Debug, Clone, Eq)] pub struct Token<'a> { /// The token in the original input, it should be used when possible. @@ -231,10 +233,14 @@ fn parse_not_in(input: Span) -> IResult { } /// or = and ("OR" and) -fn parse_or(input: Span) -> IResult { - let (input, first_filter) = parse_and(input)?; +fn parse_or(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } + let (input, first_filter) = parse_and(input, depth + 1)?; // if we found a `OR` then we MUST find something next - let (input, mut ors) = many0(preceded(ws(word_exact("OR")), cut(parse_and)))(input)?; + let (input, mut ors) = + many0(preceded(ws(word_exact("OR")), cut(|input| parse_and(input, depth + 1))))(input)?; let filter = if ors.is_empty() { first_filter @@ -247,10 +253,14 @@ fn parse_or(input: Span) -> IResult { } /// and = not ("AND" not)* -fn parse_and(input: Span) -> IResult { - let (input, first_filter) = parse_not(input)?; +fn parse_and(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } + let (input, first_filter) = parse_not(input, depth + 1)?; // if we found a `AND` then we MUST find something next - let (input, mut ands) = many0(preceded(ws(word_exact("AND")), cut(parse_not)))(input)?; + let (input, mut ands) = + many0(preceded(ws(word_exact("AND")), cut(|input| parse_not(input, depth + 1))))(input)?; let filter = if ands.is_empty() { first_filter @@ -265,13 +275,19 @@ fn parse_and(input: Span) -> IResult { /// not = ("NOT" WS+ not) | primary /// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. /// If we parse a `NOT` we MUST parse something behind. -fn parse_not(input: Span) -> IResult { +fn parse_not(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } alt(( - map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| match e { - FilterCondition::Not(e) => *e, - _ => FilterCondition::Not(Box::new(e)), - }), - parse_primary, + map( + preceded(ws(word_exact("NOT")), cut(|input| parse_not(input, depth + 1))), + |e| match e { + FilterCondition::Not(e) => *e, + _ => FilterCondition::Not(Box::new(e)), + }, + ), + |input| parse_primary(input, depth + 1), ))(input) } @@ -329,12 +345,15 @@ fn parse_error_reserved_keyword(input: Span) -> IResult { } /// primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to -fn parse_primary(input: Span) -> IResult { +fn parse_primary(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } alt(( // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis delimited( ws(char('(')), - cut(parse_expression), + cut(|input| parse_expression(input, depth + 1)), cut_with_err(ws(char(')')), |c| { Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), @@ -355,13 +374,13 @@ fn parse_primary(input: Span) -> IResult { } /// expression = or -pub fn parse_expression(input: Span) -> IResult { - parse_or(input) +pub fn parse_expression(input: Span, depth: usize) -> IResult { + parse_or(input, depth) } /// filter = expression EOF pub fn parse_filter(input: Span) -> IResult { - terminated(parse_expression, eof)(input) + terminated(|input| parse_expression(input, 0), eof)(input) } #[cfg(test)] @@ -453,9 +472,20 @@ pub mod tests { @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]" ); - // test parentheses + // Test parentheses insta::assert_display_snapshot!(p!("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); insta::assert_display_snapshot!(p!("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); + + // Test recursion + // This is the most that is allowed + insta::assert_display_snapshot!( + p!("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"), + @"{x} = {1}" + ); + insta::assert_display_snapshot!( + p!("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + @"NOT ({x} = {1})" + ); } #[test] @@ -602,6 +632,19 @@ pub mod tests { Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes. 1:4 AND = 8 "###); + + insta::assert_display_snapshot!(p!("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###" + The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. + 51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))) + "###); + + insta::assert_display_snapshot!( + p!("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + @r###" + The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. + 797:802 NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1 + "### + ); } #[test] From 5d74ebd5e56c411b46be9e737de664dbb94ff6b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 11:36:38 +0200 Subject: [PATCH 18/22] Cargo fmt --- filter-parser/src/error.rs | 1 - filter-parser/src/lib.rs | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index 05fc3a276..d5d36bd8e 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -140,7 +140,6 @@ impl<'a> Display for Error<'a> { ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } - ErrorKind::InvalidPrimary if input.trim().is_empty() => { writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")? } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index b8d3272c8..014b76bc5 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -43,6 +43,9 @@ mod condition; mod error; mod value; +use std::fmt::Debug; +use std::str::FromStr; + pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; @@ -56,8 +59,6 @@ use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; use nom_locate::LocatedSpan; -use std::fmt::Debug; -use std::str::FromStr; pub(crate) use value::parse_value; use value::word_exact; @@ -182,7 +183,7 @@ fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { } } -/// "IN" "[" value_list "]" +/// "IN" WS* "[" value_list "]" fn parse_in_body(input: Span) -> IResult> { let (input, _) = ws(word_exact("IN"))(input)?; From dd34dbaca5dae0241d205766d63796bbb23876f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 11:55:01 +0200 Subject: [PATCH 19/22] Add more filter parser tests --- filter-parser/src/lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 014b76bc5..36307446f 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -487,6 +487,9 @@ pub mod tests { p!("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), @"NOT ({x} = {1})" ); + + // Confusing keywords + insta::assert_display_snapshot!(p!(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]"); } #[test] @@ -646,6 +649,11 @@ pub mod tests { 797:802 NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1 "### ); + + insta::assert_display_snapshot!(p!(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" + Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. + 5:7 NOT OR EXISTS AND EXISTS NOT EXISTS + "###); } #[test] From 8a271223a9996ebf1593a1d29dcec85a7c12d5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 13:03:55 +0200 Subject: [PATCH 20/22] Change a macro_rules to a function in filter parser --- filter-parser/src/lib.rs | 170 +++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 87 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 36307446f..ddb218759 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -401,90 +401,88 @@ pub mod tests { fn parse() { use FilterCondition as Fc; - macro_rules! p { - ($s:literal) => { - Fc::parse($s).unwrap().unwrap() - }; + fn p(s: &str) -> impl std::fmt::Display { + Fc::parse(s).unwrap().unwrap() } // Test equal - insta::assert_display_snapshot!(p!("channel = Ponce"), @"{channel} = {Ponce}"); - insta::assert_display_snapshot!(p!("subscribers = 12"), @"{subscribers} = {12}"); - insta::assert_display_snapshot!(p!("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}"); - insta::assert_display_snapshot!(p!("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}"); - insta::assert_display_snapshot!(p!("'dog race' = Borzoi"), @"{dog race} = {Borzoi}"); - insta::assert_display_snapshot!(p!("\"dog race\" = Chusky"), @"{dog race} = {Chusky}"); - insta::assert_display_snapshot!(p!("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}"); - insta::assert_display_snapshot!(p!("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); - insta::assert_display_snapshot!(p!("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}"); + insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}"); + insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}"); + insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}"); + insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); // Test IN - insta::assert_display_snapshot!(p!("colour IN[]"), @"{colour} IN[]"); - insta::assert_display_snapshot!(p!("colour IN[green]"), @"{colour} IN[{green}, ]"); - insta::assert_display_snapshot!(p!("colour IN[green,]"), @"{colour} IN[{green}, ]"); - insta::assert_display_snapshot!(p!("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); - insta::assert_display_snapshot!(p!(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); + insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]"); + insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); // Test IN + OR/AND/() - insta::assert_display_snapshot!(p!(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); - insta::assert_display_snapshot!(p!("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]"); - insta::assert_display_snapshot!(p!("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]"); + insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]"); + insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]"); // Test whitespace start/end - insta::assert_display_snapshot!(p!(" colour = green "), @"{colour} = {green}"); - insta::assert_display_snapshot!(p!(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]"); - insta::assert_display_snapshot!(p!(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); - insta::assert_display_snapshot!(p!(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])"); - insta::assert_display_snapshot!(p!(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]"); + insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}"); + insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]"); + insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]"); // Test conditions - insta::assert_display_snapshot!(p!("channel != ponce"), @"{channel} != {ponce}"); - insta::assert_display_snapshot!(p!("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); - insta::assert_display_snapshot!(p!("subscribers < 1000"), @"{subscribers} < {1000}"); - insta::assert_display_snapshot!(p!("subscribers > 1000"), @"{subscribers} > {1000}"); - insta::assert_display_snapshot!(p!("subscribers <= 1000"), @"{subscribers} <= {1000}"); - insta::assert_display_snapshot!(p!("subscribers >= 1000"), @"{subscribers} >= {1000}"); - insta::assert_display_snapshot!(p!("subscribers <= 1000"), @"{subscribers} <= {1000}"); - insta::assert_display_snapshot!(p!("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); + insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}"); + insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); + insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}"); + insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}"); + insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}"); + insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); // Test NOT + EXISTS - insta::assert_display_snapshot!(p!("subscribers EXISTS"), @"{subscribers} EXISTS"); - insta::assert_display_snapshot!(p!("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); - insta::assert_display_snapshot!(p!("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); - insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); - insta::assert_display_snapshot!(p!("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); - insta::assert_display_snapshot!(p!("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); - insta::assert_display_snapshot!(p!("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); + insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); + insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); + insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); + insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); // Test nested NOT - insta::assert_display_snapshot!(p!("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); - insta::assert_display_snapshot!(p!("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); + insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); + insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); // Test geo radius - insta::assert_display_snapshot!(p!("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); - insta::assert_display_snapshot!(p!("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); + insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); // Test OR + AND - insta::assert_display_snapshot!(p!("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); - insta::assert_display_snapshot!(p!("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); - insta::assert_display_snapshot!(p!("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]"); + insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]"); insta::assert_display_snapshot!( - p!("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"), + p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]" ); // Test parentheses - insta::assert_display_snapshot!(p!("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); - insta::assert_display_snapshot!(p!("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); + insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); + insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); // Test recursion // This is the most that is allowed insta::assert_display_snapshot!( - p!("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"), + p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"), @"{x} = {1}" ); insta::assert_display_snapshot!( - p!("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), @"NOT ({x} = {1})" ); @@ -496,161 +494,159 @@ pub mod tests { fn error() { use FilterCondition as Fc; - macro_rules! p { - ($s:literal) => { - Fc::parse($s).unwrap_err().to_string() - }; + fn p(s: &str) -> impl std::fmt::Display { + Fc::parse(s).unwrap_err().to_string() } - insta::assert_display_snapshot!(p!("channel = Ponce = 12"), @r###" + insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###" Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule. 17:21 channel = Ponce = 12 "###); - insta::assert_display_snapshot!(p!("channel = "), @r###" + insta::assert_display_snapshot!(p("channel = "), @r###" Was expecting a value but instead got nothing. 14:14 channel = "###); - insta::assert_display_snapshot!(p!("channel = 🐻"), @r###" + insta::assert_display_snapshot!(p("channel = 🐻"), @r###" Was expecting a value but instead got `🐻`. 11:12 channel = 🐻 "###); - insta::assert_display_snapshot!(p!("channel = 🐻 AND followers < 100"), @r###" + insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###" Was expecting a value but instead got `🐻`. 11:12 channel = 🐻 AND followers < 100 "###); - insta::assert_display_snapshot!(p!("'OR'"), @r###" + insta::assert_display_snapshot!(p("'OR'"), @r###" Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`. 1:5 'OR' "###); - insta::assert_display_snapshot!(p!("OR"), @r###" + insta::assert_display_snapshot!(p("OR"), @r###" Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. 1:3 OR "###); - insta::assert_display_snapshot!(p!("channel Ponce"), @r###" + insta::assert_display_snapshot!(p("channel Ponce"), @r###" Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`. 1:14 channel Ponce "###); - insta::assert_display_snapshot!(p!("channel = Ponce OR"), @r###" + insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing. 19:19 channel = Ponce OR "###); - insta::assert_display_snapshot!(p!("_geoRadius"), @r###" + insta::assert_display_snapshot!(p("_geoRadius"), @r###" The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. 1:11 _geoRadius "###); - insta::assert_display_snapshot!(p!("_geoRadius = 12"), @r###" + insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###" The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. 1:16 _geoRadius = 12 "###); - insta::assert_display_snapshot!(p!("_geoPoint(12, 13, 14)"), @r###" + insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. 1:22 _geoPoint(12, 13, 14) "###); - insta::assert_display_snapshot!(p!("position <= _geoPoint(12, 13, 14)"), @r###" + insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. 13:34 position <= _geoPoint(12, 13, 14) "###); - insta::assert_display_snapshot!(p!("position <= _geoRadius(12, 13, 14)"), @r###" + insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###" The `_geoRadius` filter is an operation and can't be used as a value. 13:35 position <= _geoRadius(12, 13, 14) "###); - insta::assert_display_snapshot!(p!("channel = 'ponce"), @r###" + insta::assert_display_snapshot!(p("channel = 'ponce"), @r###" Expression `\'ponce` is missing the following closing delimiter: `'`. 11:17 channel = 'ponce "###); - insta::assert_display_snapshot!(p!("channel = \"ponce"), @r###" + insta::assert_display_snapshot!(p("channel = \"ponce"), @r###" Expression `\"ponce` is missing the following closing delimiter: `"`. 11:17 channel = "ponce "###); - insta::assert_display_snapshot!(p!("channel = mv OR (followers >= 1000"), @r###" + insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###" Expression `(followers >= 1000` is missing the following closing delimiter: `)`. 17:35 channel = mv OR (followers >= 1000 "###); - insta::assert_display_snapshot!(p!("channel = mv OR followers >= 1000)"), @r###" + insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###" Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule. 34:35 channel = mv OR followers >= 1000) "###); - insta::assert_display_snapshot!(p!("colour NOT EXIST"), @r###" + insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); - insta::assert_display_snapshot!(p!("subscribers 100 TO1000"), @r###" + insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); - insta::assert_display_snapshot!(p!("channel = ponce ORdog != 'bernese mountain'"), @r###" + insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###" Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule. 17:44 channel = ponce ORdog != 'bernese mountain' "###); - insta::assert_display_snapshot!(p!("colour IN blue, green]"), @r###" + insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###" Expected `[` after `IN` keyword. 11:23 colour IN blue, green] "###); - insta::assert_display_snapshot!(p!("colour IN [blue, green, 'blue' > 2]"), @r###" + insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###" Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`. 32:36 colour IN [blue, green, 'blue' > 2] "###); - insta::assert_display_snapshot!(p!("colour IN [blue, green, AND]"), @r###" + insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###" Expected only comma-separated field names inside `IN[..]` but instead found `AND]`. 25:29 colour IN [blue, green, AND] "###); - insta::assert_display_snapshot!(p!("colour IN [blue, green"), @r###" + insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###" Expected matching `]` after the list of field names given to `IN[` 23:23 colour IN [blue, green "###); - insta::assert_display_snapshot!(p!("colour IN ['blue, green"), @r###" + insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###" Expression `\'blue, green` is missing the following closing delimiter: `'`. 12:24 colour IN ['blue, green "###); - insta::assert_display_snapshot!(p!("x = EXISTS"), @r###" + insta::assert_display_snapshot!(p("x = EXISTS"), @r###" Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes. 5:11 x = EXISTS "###); - insta::assert_display_snapshot!(p!("AND = 8"), @r###" + insta::assert_display_snapshot!(p("AND = 8"), @r###" Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes. 1:4 AND = 8 "###); - insta::assert_display_snapshot!(p!("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###" + insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###" The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. 51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))) "###); insta::assert_display_snapshot!( - p!("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), @r###" The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. 797:802 NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1 "### ); - insta::assert_display_snapshot!(p!(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" + insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. 5:7 NOT OR EXISTS AND EXISTS NOT EXISTS "###); From 9b6602cba2edd365ca1afa8786f8ce723f9f873d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 13:06:57 +0200 Subject: [PATCH 21/22] Avoid cloning FilterCondition in filter array parsing --- milli/src/search/facet/filter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 2f6fb5b00..7241dab2b 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -104,7 +104,7 @@ impl<'a> Filter<'a> { if ors.len() > 1 { ands.push(FilterCondition::Or(ors)); } else if ors.len() == 1 { - ands.push(ors[0].clone()); + ands.push(ors.pop().unwrap()); } } Either::Right(rule) => { @@ -117,7 +117,7 @@ impl<'a> Filter<'a> { let and = if ands.is_empty() { return Ok(None); } else if ands.len() == 1 { - ands[0].clone() + ands.pop().unwrap() } else { FilterCondition::And(ands) }; From c7a86b56efddda4b647be6fce10fef9c8322e140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 18 Aug 2022 13:16:56 +0200 Subject: [PATCH 22/22] Fix filter parser compilation error --- filter-parser/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index ddb218759..33025e6e9 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -401,7 +401,7 @@ pub mod tests { fn parse() { use FilterCondition as Fc; - fn p(s: &str) -> impl std::fmt::Display { + fn p<'a>(s: &'a str) -> impl std::fmt::Display + 'a { Fc::parse(s).unwrap().unwrap() } @@ -487,14 +487,14 @@ pub mod tests { ); // Confusing keywords - insta::assert_display_snapshot!(p!(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]"); + insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]"); } #[test] fn error() { use FilterCondition as Fc; - fn p(s: &str) -> impl std::fmt::Display { + fn p<'a>(s: &'a str) -> impl std::fmt::Display + 'a { Fc::parse(s).unwrap_err().to_string() }