From 238a7be58d03d3b646e4b49d2457cd3c6c60c02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Wed, 17 Aug 2022 16:53:40 +0200 Subject: [PATCH] Fix filter parser handling of keywords and surrounding spaces Now the following fragments are allowed: AND(field = AND'field' = AND"field" = --- filter-parser/src/lib.rs | 29 ++++++++++++------------ filter-parser/src/value.rs | 46 +++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d0c2d8531..09aa252e1 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -52,7 +52,7 @@ use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; -use nom::character::complete::{char, multispace0, multispace1}; +use nom::character::complete::{char, multispace0}; use nom::combinator::{cut, eof, map, opt}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; @@ -60,6 +60,7 @@ use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; +use value::word_exact; pub type Span<'a> = LocatedSpan<&'a str, &'a str>; @@ -183,7 +184,7 @@ fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { /// in = value "IN" "[" value_list "]" fn parse_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; - let (input, _) = ws(tag("IN"))(input)?; + let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure let (input, _) = @@ -215,9 +216,8 @@ fn parse_in(input: Span) -> IResult { /// in = value "NOT" WS* "IN" "[" value_list "]" fn parse_not_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; - let (input, _) = tag("NOT")(input)?; - let (input, _) = multispace1(input)?; - let (input, _) = ws(tag("IN"))(input)?; + let (input, _) = word_exact("NOT")(input)?; + let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure let (input, _) = @@ -241,8 +241,7 @@ fn parse_not_in(input: Span) -> IResult { fn parse_or(input: Span) -> IResult { let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next - let (input, mut ors) = - many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; + let (input, mut ors) = many0(preceded(ws(word_exact("OR")), cut(parse_and)))(input)?; let filter = if ors.is_empty() { first_filter @@ -258,8 +257,7 @@ fn parse_or(input: Span) -> IResult { fn parse_and(input: Span) -> IResult { let (input, first_filter) = parse_not(input)?; // if we found a `AND` then we MUST find something next - let (input, mut ands) = - many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; + let (input, mut ands) = many0(preceded(ws(word_exact("AND")), cut(parse_not)))(input)?; let filter = if ands.is_empty() { first_filter @@ -276,9 +274,7 @@ fn parse_and(input: Span) -> IResult { /// If we parse a `NOT` we MUST parse something behind. fn parse_not(input: Span) -> IResult { alt(( - map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| { - FilterCondition::Not(Box::new(e)) - }), + map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| FilterCondition::Not(Box::new(e))), parse_primary, ))(input) } @@ -288,7 +284,7 @@ fn parse_not(input: Span) -> IResult { fn parse_geo_radius(input: Span) -> IResult { // we want to allow space BEFORE the _geoRadius but not after let parsed = preceded( - tuple((multispace0, tag("_geoRadius"))), + tuple((multispace0, word_exact("_geoRadius"))), // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) @@ -741,6 +737,10 @@ pub mod tests { Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() ]) ) + // ( + // ("channel = ponce AND'dog' != 'bernese mountain'", ), + // ("channel = ponce AND('dog' != 'bernese mountain')", ), + // ) ]; for (input, expected) in test_case { @@ -770,7 +770,7 @@ pub mod tests { ("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."), ("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), - ("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."), + ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing."), ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), @@ -783,7 +783,6 @@ pub mod tests { ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."), ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), - ("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), ("colour IN blue, green]", "Expected `[` after `IN` keyword."), ("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"), ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index bfa3c2730..90dc44604 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -3,7 +3,7 @@ use nom::bytes::complete::{take_till, take_while, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::cut; use nom::sequence::{delimited, terminated}; -use nom::{InputIter, InputLength, InputTake, Slice}; +use nom::{error, InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; @@ -48,6 +48,35 @@ fn quoted_by(quote: char, input: Span) -> IResult { )) } +// word = (alphanumeric | _ | - | .)+ except for reserved keywords +pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult> { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if is_keyword(word.value()) { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::ReservedKeyword(word.value().to_owned()), + ))); + } + Ok((input, word)) +} + +// word = {tag} +pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> { + move |input| { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if word.value() == tag { + Ok((input, word)) + } else { + Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::InternalError(nom::error::ErrorKind::Tag), + ))) + } + } +} + /// value = WS* ( word | singleQuoted | doubleQuoted) WS+ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // to get better diagnostic message we are going to strip the left whitespaces from the input right now @@ -71,19 +100,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { _ => (), } - // word = (alphanumeric | _ | - | .)+ except for reserved keywords - let word = |input: Span<'a>| -> IResult> { - let (input, word): (_, Token<'a>) = - take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; - if is_keyword(word.value()) { - return Err(nom::Err::Error(Error::new_from_kind( - input, - ErrorKind::ReservedKeyword(word.value().to_owned()), - ))); - } - Ok((input, word)) - }; - // this parser is only used when an error is encountered and it parse the // largest string possible that do not contain any “language” syntax. // If we try to parse `name = 🦀 AND language = rust` we want to return an @@ -97,7 +113,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { alt(( delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), - word, + word_not_keyword, )), multispace0, )(input)