Improve parser for NOT EXISTS filter

Allow multiple spaces between NOT and EXISTS
This commit is contained in:
Loïc Lecrenier 2022-06-15 09:14:19 +02:00
parent 0388b2d463
commit a5c9162250
3 changed files with 36 additions and 19 deletions

View File

@ -7,11 +7,12 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::multispace1;
use nom::combinator::cut;
use nom::sequence::{terminated, tuple};
use Condition::*;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
use crate::{parse_value, ws, FilterCondition, IResult, Span, Token};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
@ -62,16 +63,17 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
Ok((input, condition))
}
/// exist = value NOT EXISTS
/// exist = value "EXISTS"
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
Ok((input, FilterCondition::Condition { fid: key.into(), op: Exists }))
}
/// exist = value NOT EXISTS
/// exist = value "NOT" WS* "EXISTS"
pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
let (input, key) = terminated(parse_value, tag("NOT EXISTS"))(input)?;
let (input, key) = parse_value(input)?;
let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?;
Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists }))
}

View File

@ -1,21 +1,21 @@
//! BNF grammar:
//!
//! ```text
//! filter = expression ~ EOF
//! filter = expression EOF
//! expression = or
//! or = and ("OR" and)
//! and = not ("AND" not)*
//! not = ("NOT" not) | primary
//! primary = (WS* "(" expression ")" WS*) | geoRadius | condition | exists | not_exists | to
//! condition = value ("==" | ">" ...) value
//! exists = value EXISTS
//! not_exists = value NOT EXISTS
//! to = value value TO value
//! value = WS* ( word | singleQuoted | doubleQuoted) ~ WS*
//! exists = value "EXISTS"
//! not_exists = value "NOT" WS* "EXISTS"
//! to = value value "TO" value
//! value = WS* ( word | singleQuoted | doubleQuoted) WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" .* all but double quotes "\""
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* ~ "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! geoRadius = WS* "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! ```
//!
//! Other BNF grammar used to handle some specific errors:
@ -31,7 +31,7 @@
//! field < 12 AND _geoPoint(1, 2)
//! ```
//!
//! - If a user try to use a geoRadius as a value we must throw an error.
//! - If a user try to use a geoRadius as a value we must throw an error.
//! ```text
//! field = _geoRadius(12, 13, 14)
//! ```
@ -170,7 +170,7 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
delimited(multispace0, inner, multispace0)
}
/// or = and (~ "OR" ~ and)
/// or = and ("OR" and)
fn parse_or(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_and(input)?;
// if we found a `OR` then we MUST find something next
@ -182,7 +182,7 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
Ok((input, expr))
}
/// and = not (~ "AND" not)*
/// and = not ("AND" not)*
fn parse_and(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_not(input)?;
// if we found a `AND` then we MUST find something next
@ -193,14 +193,14 @@ fn parse_and(input: Span) -> IResult<FilterCondition> {
Ok((input, expr))
}
/// not = ("NOT" ~ not) | primary
/// not = ("NOT" not) | primary
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
/// If we parse a `NOT` we MUST parse something behind.
fn parse_not(input: Span) -> IResult<FilterCondition> {
alt((map(preceded(tag("NOT"), cut(parse_not)), |e| e.negate()), parse_primary))(input)
}
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
/// geoRadius = WS* "_geoRadius(float "," float "," float)
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoRadius but not after
@ -224,7 +224,7 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
Ok((input, res))
}
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
/// geoPoint = WS* "_geoPoint(float "," float "," float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
tuple((
@ -238,7 +238,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
}
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
/// primary = (WS* "(" expression ")" WS*) | geoRadius | condition | to
fn parse_primary(input: Span) -> IResult<FilterCondition> {
alt((
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
@ -266,7 +266,7 @@ pub fn parse_expression(input: Span) -> IResult<FilterCondition> {
parse_or(input)
}
/// filter = expression ~ EOF
/// filter = expression EOF
pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
terminated(parse_expression, eof)(input)
}
@ -446,6 +446,20 @@ pub mod tests {
op: Condition::NotExists,
},
),
(
"NOT subscribers NOT EXISTS",
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::Exists,
},
),
(
"subscribers NOT EXISTS",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::NotExists,
},
),
(
"subscribers 100 TO 1000",
Fc::Condition {
@ -616,6 +630,7 @@ pub mod tests {
("channel = \"ponce", "Expression `\\\"ponce` is missing the following closing delimiter: `\"`."),
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."),
];
for (input, expected) in test_case {

View File

@ -48,7 +48,7 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
))
}
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
/// value = WS* ( word | singleQuoted | doubleQuoted) WS*
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
let (input, _) = take_while(char::is_whitespace)(input)?;