diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index a3720f7bf..0d2959126 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -57,6 +57,10 @@ pub enum ErrorKind<'a> { ExpectedEof, ExpectedValue, MalformedValue, + InOpeningBracket, + InClosingBracket, + InExpectedValue, + ReservedKeyword(String), MissingClosingDelimiter(char), Char(char), InternalError(error::ErrorKind), @@ -109,12 +113,11 @@ impl<'a> ParseError> for Error<'a> { impl<'a> Display for Error<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let input = self.context.fragment(); - // When printing our error message we want to escape all `\n` to be sure we keep our format with the // first line being the diagnostic and the second line being the incriminated filter. let escaped_input = input.escape_debug(); - match self.kind { + match &self.kind { ErrorKind::ExpectedValue if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } @@ -145,6 +148,18 @@ impl<'a> Display for Error<'a> { ErrorKind::MisusedGeo => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? } + ErrorKind::ReservedKeyword(word) => { + writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? + } + ErrorKind::InOpeningBracket => { + writeln!(f, "Expected `[` after `IN` keyword.")? + } + ErrorKind::InClosingBracket => { + writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")? + } + ErrorKind::InExpectedValue => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`")? + } ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 4b78b86b8..12edd56c8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -167,6 +167,12 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) /// value_list = (value ("," value)* ","?)? fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { + + // TODO: here, I should return a failure with a clear explanation whenever possible + // for example: + // * expected the name of a field, but got `AND` + // * expected closing square bracket, but got `AND` + let (input, first_value) = opt(parse_value)(input)?; if let Some(first_value) = first_value { let value_list_el_parser = preceded(ws(tag(",")), parse_value); @@ -186,9 +192,22 @@ fn parse_in(input: Span) -> IResult { let (input, value) = parse_value(input)?; let (input, _) = ws(tag("IN"))(input)?; - let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("["), |_| { + Error::new_from_kind(input, ErrorKind::InOpeningBracket) + })(input)?; + + let (input, content) = cut(parse_value_list)(input)?; + + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("]"), |_| { + if eof::<_, ()>(input).is_ok() { + Error::new_from_kind(input, ErrorKind::InClosingBracket) + } else { + Error::new_from_kind(input, ErrorKind::InExpectedValue) + } + })(input)?; - let (input, content) = els_parser(input)?; let filter = FilterCondition::In { fid: value, els: content }; Ok((input, filter)) } @@ -199,9 +218,19 @@ fn parse_not_in(input: Span) -> IResult { let (input, _) = multispace1(input)?; let (input, _) = ws(tag("IN"))(input)?; - let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); - let (input, content) = els_parser(input)?; + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("["), |_| { + Error::new_from_kind(input, ErrorKind::InOpeningBracket) + })(input)?; + + let (input, content) = cut(parse_value_list)(input)?; + + // everything after `IN` can be a failure + let (input, _) = cut_with_err(tag("]"), |_| { + Error::new_from_kind(input, ErrorKind::InClosingBracket) + })(input)?; + let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); Ok((input, filter)) } @@ -313,6 +342,9 @@ fn parse_primary(input: Span) -> IResult { ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) + + // TODO: if the filter starts with a reserved keyword that is not NOT, then we should return the reserved keyword error + // TODO: if the filter is x = reserved, idem } /// expression = or @@ -344,6 +376,13 @@ pub mod tests { let test_case = [ // simple test + ( + "x = AND", + Fc::Not(Box::new(Fc::Not(Box::new(Fc::In { + fid: rtok("NOT NOT", "colour"), + els: vec![] + })))) + ), ( "colour IN[]", Fc::In { @@ -734,6 +773,11 @@ pub mod tests { ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), ("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), + ("colour IN blue, green]", "Expected `[` after `IN` keyword."), + ("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"), + ("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"), + ("colour IN [blue, green", "Expected matching `]` after the list of field names given to `IN[`"), + ("colour IN ['blue, green", "Expression `\\'blue, green` is missing the following closing delimiter: `'`."), ]; for (input, expected) in test_case { diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 22da6a0df..8a7e8f586 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -71,9 +71,17 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { _ => (), } - // word = (alphanumeric | _ | - | .)+ + // word = (alphanumeric | _ | - | .)+ except for reserved keywords let word = |input: Span<'a>| -> IResult> { - take_while1(is_value_component)(input).map(|(s, t)| (s, t.into())) + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if is_keyword(word.value()) { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::ReservedKeyword(word.value().to_owned()), + ))); + } + Ok((input, word)) }; // this parser is only used when an error is encountered and it parse the @@ -85,7 +93,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // when we create the errors from the output of the alt we have spaces everywhere let error_word = take_till::<_, _, Error>(is_syntax_component); - terminated( + let (input, value) = terminated( alt(( delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), @@ -107,7 +115,9 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { failure } }) - }) + })?; + + Ok((input, value)) } fn is_value_component(c: char) -> bool { @@ -118,6 +128,10 @@ fn is_syntax_component(c: char) -> bool { c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) } +fn is_keyword(s: &str) -> bool { + matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius") +} + #[cfg(test)] pub mod test { use nom::Finish;