Make filter parser more strict regarding spacing around operators

OR, AND, NOT, TO must now be followed by spaces
This commit is contained in:
Loïc Lecrenier 2022-06-16 09:12:37 +02:00
parent c17d616250
commit ea0642c32d
3 changed files with 32 additions and 26 deletions

View File

@ -44,8 +44,7 @@ impl<'a> Condition<'a> {
} }
} }
} }
/// condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<FilterCondition> { pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?;
@ -69,7 +68,7 @@ pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Condition { fid: key.into(), op: Exists })) Ok((input, FilterCondition::Condition { fid: key.into(), op: Exists }))
} }
/// exist = value "NOT" WS* "EXISTS" /// exist = value "NOT" WS+ "EXISTS"
pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> { pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
let (input, key) = parse_value(input)?; let (input, key) = parse_value(input)?;
@ -77,10 +76,10 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists })) Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists }))
} }
/// to = value value TO value /// to = value value "TO" WS+ value
pub fn parse_to(input: Span) -> IResult<FilterCondition> { pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, to)) = let (input, (key, from, _, _, to)) =
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?; tuple((parse_value, parse_value, tag("TO"), multispace1, cut(parse_value)))(input)?;
Ok((input, FilterCondition::Condition { fid: key, op: Between { from, to } })) Ok((input, FilterCondition::Condition { fid: key, op: Between { from, to } }))
} }

View File

@ -3,19 +3,19 @@
//! ```text //! ```text
//! filter = expression EOF //! filter = expression EOF
//! expression = or //! expression = or
//! or = and ("OR" and) //! or = and ("OR" WS+ and)*
//! and = not ("AND" not)* //! and = not ("AND" WS+ not)*
//! not = ("NOT" not) | primary //! not = ("NOT" WS+ not) | primary
//! primary = (WS* "(" expression ")" WS*) | geoRadius | condition | exists | not_exists | to //! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to
//! condition = value ("==" | ">" ...) value //! condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value
//! exists = value "EXISTS" //! exists = value "EXISTS"
//! not_exists = value "NOT" WS* "EXISTS" //! not_exists = value "NOT" WS+ "EXISTS"
//! to = value value "TO" value //! to = value value "TO" WS+ value
//! value = WS* ( word | singleQuoted | doubleQuoted) WS* //! value = WS* ( word | singleQuoted | doubleQuoted) WS+
//! singleQuoted = "'" .* all but quotes "'" //! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" .* all but double quotes "\"" //! doubleQuoted = "\"" .* all but double quotes "\""
//! word = (alphanumeric | _ | - | .)+ //! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! ``` //! ```
//! //!
//! Other BNF grammar used to handle some specific errors: //! Other BNF grammar used to handle some specific errors:
@ -50,7 +50,7 @@ use error::{cut_with_err, NomErrorExt};
pub use error::{Error, ErrorKind}; pub use error::{Error, ErrorKind};
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0}; use nom::character::complete::{char, multispace0, multispace1};
use nom::combinator::{cut, eof, map}; use nom::combinator::{cut, eof, map};
use nom::multi::{many0, separated_list1}; use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float; use nom::number::complete::recognize_float;
@ -170,11 +170,11 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
delimited(multispace0, inner, multispace0) delimited(multispace0, inner, multispace0)
} }
/// or = and ("OR" and)* /// or = and ("OR" WS+ and)*
fn parse_or(input: Span) -> IResult<FilterCondition> { fn parse_or(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_and(input)?; let (input, lhs) = parse_and(input)?;
// if we found a `OR` then we MUST find something next // if we found a `OR` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?;
let expr = ors let expr = ors
.into_iter() .into_iter()
@ -186,24 +186,28 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
fn parse_and(input: Span) -> IResult<FilterCondition> { fn parse_and(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_not(input)?; let (input, lhs) = parse_not(input)?;
// if we found a `AND` then we MUST find something next // if we found a `AND` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let (input, ors) =
many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?;
let expr = ors let expr = ors
.into_iter() .into_iter()
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
Ok((input, expr)) Ok((input, expr))
} }
/// not = ("NOT" not) | primary /// not = ("NOT" WS+ not) | primary
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. /// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
/// If we parse a `NOT` we MUST parse something behind. /// If we parse a `NOT` we MUST parse something behind.
fn parse_not(input: Span) -> IResult<FilterCondition> { fn parse_not(input: Span) -> IResult<FilterCondition> {
alt((map(preceded(tag("NOT"), cut(parse_not)), |e| e.negate()), parse_primary))(input) alt((
map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| e.negate()),
parse_primary,
))(input)
} }
/// geoRadius = WS* "_geoRadius(float WS* "," WS* float WS* "," WS* float) /// geoRadius = WS* "_geoRadius(float WS* "," WS* float WS* "," WS* float)
/// If we parse `_geoRadius` we MUST parse the rest of the expression. /// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> { fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoRadius but not after // we want to allow space BEFORE the _geoRadius but not after
let parsed = preceded( let parsed = preceded(
tuple((multispace0, tag("_geoRadius"))), tuple((multispace0, tag("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
@ -238,7 +242,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
} }
/// primary = (WS* "(" expression ")" WS*) | geoRadius | condition | to /// primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to
fn parse_primary(input: Span) -> IResult<FilterCondition> { fn parse_primary(input: Span) -> IResult<FilterCondition> {
alt(( alt((
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
@ -620,7 +624,7 @@ pub mod tests {
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `OR`."), ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `OR`."),
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `AND`."), ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `AND`."),
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."),
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing."), ("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."),
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
@ -631,6 +635,9 @@ pub mod tests {
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."), ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."), ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."),
("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."),
("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
]; ];
for (input, expected) in test_case { for (input, expected) in test_case {

View File

@ -48,7 +48,7 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
)) ))
} }
/// value = WS* ( word | singleQuoted | doubleQuoted) WS* /// value = WS* ( word | singleQuoted | doubleQuoted) WS+
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> { pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
// to get better diagnostic message we are going to strip the left whitespaces from the input right now // to get better diagnostic message we are going to strip the left whitespaces from the input right now
let (input, _) = take_while(char::is_whitespace)(input)?; let (input, _) = take_while(char::is_whitespace)(input)?;