update the filter parser and some code for the fuzzer

This commit is contained in:
Tamo 2021-11-04 14:22:35 +01:00
parent 5d3af5f273
commit 54aec7ac5f
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
50 changed files with 406 additions and 51 deletions

3
filter_parser/fuzz/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
target
corpus
artifacts

View File

@ -0,0 +1,25 @@
[package]
name = "filter_parser-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.filter_parser]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "parse"
path = "fuzz_targets/parse.rs"
test = false
doc = false

View File

@ -0,0 +1 @@
channel = Ponce

View File

@ -0,0 +1 @@
channel != ponce

View File

@ -0,0 +1 @@
NOT channel = ponce

View File

@ -0,0 +1 @@
subscribers < 1000

View File

@ -0,0 +1 @@
subscribers > 1000

View File

@ -0,0 +1 @@
subscribers <= 1000

View File

@ -0,0 +1 @@
subscribers >= 1000

View File

@ -0,0 +1 @@
NOT subscribers < 1000

View File

@ -0,0 +1 @@
NOT subscribers > 1000

View File

@ -0,0 +1 @@
NOT subscribers <= 1000

View File

@ -0,0 +1 @@
NOT subscribers >= 1000

View File

@ -0,0 +1 @@
subscribers = 12

View File

@ -0,0 +1 @@
subscribers 100 TO 1000

View File

@ -0,0 +1 @@
NOT subscribers 100 TO 1000

View File

@ -0,0 +1 @@
_geoRadius(12, 13, 14)

View File

@ -0,0 +1 @@
NOT _geoRadius(12, 13, 14)

View File

@ -0,0 +1 @@
channel = ponce AND 'dog race' != 'bernese mountain'

View File

@ -0,0 +1 @@
channel = ponce OR 'dog race' != 'bernese mountain'

View File

@ -0,0 +1 @@
channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000

View File

@ -0,0 +1 @@
channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )

View File

@ -0,0 +1 @@
(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)

View File

@ -0,0 +1 @@
channel = Ponce = 12

View File

@ -0,0 +1 @@
channel = 'Mister Mv'

View File

@ -0,0 +1 @@
channel =

View File

@ -0,0 +1 @@
channel = 🐻

View File

@ -0,0 +1 @@
OR

View File

@ -0,0 +1 @@
AND

View File

@ -0,0 +1 @@
channel Ponce

View File

@ -0,0 +1 @@
channel = Ponce OR

View File

@ -0,0 +1 @@
_geoRadius

View File

@ -0,0 +1 @@
_geoRadius = 12

View File

@ -0,0 +1 @@
_geoPoint(12, 13, 14)

View File

@ -0,0 +1 @@
position <= _geoPoint(12, 13, 14)

View File

@ -0,0 +1 @@
channel = "Mister Mv"

View File

@ -0,0 +1 @@
position <= _geoRadius(12, 13, 14)

View File

@ -0,0 +1 @@
channel = 'ponce

View File

@ -0,0 +1 @@
channel = "ponce

View File

@ -0,0 +1 @@
channel = mv OR (followers >= 1000

View File

@ -0,0 +1 @@
'dog race' = Borzoi

View File

@ -0,0 +1 @@
"dog race" = Chusky

View File

@ -0,0 +1 @@
"dog race" = "Bernese Mountain"

View File

@ -0,0 +1 @@
'dog race' = 'Bernese Mountain'

View File

@ -0,0 +1 @@
"dog race" = 'Bernese Mountain'

View File

@ -0,0 +1,13 @@
#![no_main]
use filter_parser::FilterCondition;
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
if let Ok(s) = std::str::from_utf8(data) {
// When we are fuzzing the parser we can get stack overflow really easily.
// But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters.
if s.len() < 500 {
let _ = FilterCondition::parse(s);
}
}
});

195
filter_parser/src/error.rs Normal file
View File

@ -0,0 +1,195 @@
use std::fmt::Display;
use nom::{Parser, error::{self, ParseError}};
use crate::{IResult, Span};
pub trait ExtendNomError<E> {
fn is_failure(&self) -> bool;
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
}
impl<E> ExtendNomError<E> for nom::Err<E> {
fn is_failure(&self) -> bool {
matches!(self, Self::Failure(_))
}
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
match self {
e @ Self::Failure(_) => e,
e => e.map(|e| op(e)),
}
}
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
match self {
e @ Self::Error(_) => e,
e => e.map(|e| op(e)),
}
}
}
/// cut a parser and map the error
pub fn cut_with_err<'a, O>(mut parser: impl FnMut(Span<'a>) -> IResult<O>, mut with: impl FnMut(Error<'a>) -> Error<'a>) -> impl FnMut(Span<'a>) -> IResult<O> {
move |input| match parser.parse(input) {
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
rest => rest,
}
}
#[derive(Debug)]
pub struct Error<'a> {
context: Span<'a>,
kind: ErrorKind<'a>,
}
#[derive(Debug)]
pub enum ErrorKind<'a> {
ReservedGeo(&'a str),
Geo,
MisusedGeo,
InvalidPrimary,
ReservedKeyword,
ExpectedEof,
ExpectedValue,
MissingClosingDelimiter(char),
UnexpectedInput(Vec<&'a str>),
Context(&'a str),
Char(char),
Unreachable,
}
impl<'a> Error<'a> {
pub fn kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
Self { context, kind }
}
pub fn char(self) -> char {
match self.kind {
ErrorKind::Char(c) => c,
_ => panic!("Internal filter parser error"),
}
}
}
impl<'a> ParseError<Span<'a>> for Error<'a> {
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
let kind = match kind {
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
error::ErrorKind::Tag => ErrorKind::UnexpectedInput(Vec::new()),
error::ErrorKind::MapRes => todo!(),
error::ErrorKind::MapOpt => todo!(),
error::ErrorKind::Alt => todo!(),
error::ErrorKind::IsNot => todo!(),
error::ErrorKind::IsA => todo!(),
error::ErrorKind::SeparatedList => todo!(),
error::ErrorKind::SeparatedNonEmptyList => todo!(),
error::ErrorKind::Many0 => todo!(),
error::ErrorKind::Many1 => todo!(),
error::ErrorKind::ManyTill => todo!(),
error::ErrorKind::Count => todo!(),
error::ErrorKind::TakeUntil => todo!(),
error::ErrorKind::LengthValue => todo!(),
error::ErrorKind::TagClosure => todo!(),
error::ErrorKind::Alpha => todo!(),
error::ErrorKind::Digit => todo!(),
error::ErrorKind::HexDigit => todo!(),
error::ErrorKind::OctDigit => todo!(),
error::ErrorKind::AlphaNumeric => todo!(),
error::ErrorKind::Space => todo!(),
error::ErrorKind::MultiSpace => todo!(),
error::ErrorKind::LengthValueFn => todo!(),
error::ErrorKind::Switch => todo!(),
error::ErrorKind::TagBits => todo!(),
error::ErrorKind::OneOf => todo!(),
error::ErrorKind::NoneOf => todo!(),
error::ErrorKind::Char => todo!(),
error::ErrorKind::CrLf => todo!(),
error::ErrorKind::RegexpMatch => todo!(),
error::ErrorKind::RegexpMatches => todo!(),
error::ErrorKind::RegexpFind => todo!(),
error::ErrorKind::RegexpCapture => todo!(),
error::ErrorKind::RegexpCaptures => todo!(),
error::ErrorKind::TakeWhile1 => ErrorKind::Unreachable,
error::ErrorKind::Complete => todo!(),
error::ErrorKind::Fix => todo!(),
error::ErrorKind::Escaped => todo!(),
error::ErrorKind::EscapedTransform => todo!(),
error::ErrorKind::NonEmpty => todo!(),
error::ErrorKind::ManyMN => todo!(),
error::ErrorKind::Not => todo!(),
error::ErrorKind::Permutation => todo!(),
error::ErrorKind::Verify => todo!(),
error::ErrorKind::TakeTill1 => todo!(),
error::ErrorKind::TakeWhileMN => todo!(),
error::ErrorKind::TooLarge => todo!(),
error::ErrorKind::Many0Count => todo!(),
error::ErrorKind::Many1Count => todo!(),
error::ErrorKind::Float => todo!(),
error::ErrorKind::Satisfy => todo!(),
error::ErrorKind::Fail => todo!(),
};
Self { context: input, kind }
}
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
other
}
fn from_char(input: Span<'a>, c: char) -> Self {
Self { context: input, kind: ErrorKind::Char(c) }
}
}
impl<'a> Display for Error<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let input = self.context.fragment();
match self.kind {
ErrorKind::ExpectedValue if input.trim().is_empty() => {
writeln!(f, "Was expecting a value but instead got nothing.")?
}
ErrorKind::MissingClosingDelimiter(c) => {
writeln!(f, "Expression `{}` is missing the following closing delemiter: `{}`.", input, c)?
}
ErrorKind::ExpectedValue => {
writeln!(f, "Was expecting a value but instead got `{}`.", input)?
}
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")?
}
ErrorKind::InvalidPrimary => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", input)?
}
ErrorKind::Geo => {
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name)?
}
ErrorKind::MisusedGeo => {
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
}
ErrorKind::Char(c) => {
panic!("Tried to display a char error with `{}`", c)
}
ErrorKind::ReservedKeyword => writeln!(f, "reserved keyword")?,
ErrorKind::UnexpectedInput(ref v) => writeln!(f, "Unexpected input found `{}`, vec: `{:?}`", input, v)?,
ErrorKind::Context(_) => todo!(),
ErrorKind::Unreachable => writeln!(
f,
"Encountered an internal error while parsing your filter. Please fill an issue"
)?,
}
write!(
f,
"{}:{} in `{}`.",
self.context.location_line(),
self.context.get_utf8_column(),
self.context.extra,
)
}
}

View File

@ -20,6 +20,20 @@
//! ```text //! ```text
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")" //! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
//! ``` //! ```
//!
//! Specific errors:
//! ================
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
//! ```text
//! field = _geoPoint(12, 13, 14)
//! field < 12 AND _geoPoint(1, 2)
//! ```
//!
//! - If a user try to use a geoRadius as a value we must throw an error.
//! ```text
//! field = _geoRadius(12, 13, 14)
//! ```
//!
mod condition; mod condition;
mod error; mod error;
@ -28,12 +42,12 @@ mod value;
use std::fmt::Debug; use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition}; pub use condition::{parse_condition, parse_to, Condition};
use error::{cut_with_err, ExtendNomError};
pub use error::{Error, ErrorKind}; pub use error::{Error, ErrorKind};
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0}; use nom::character::complete::{char, multispace0};
use nom::combinator::{cut, eof, map}; use nom::combinator::{cut, eof, map};
use nom::error::{ContextError, ParseError};
use nom::multi::{many0, separated_list1}; use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float; use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::sequence::{delimited, preceded, terminated, tuple};
@ -102,14 +116,15 @@ impl<'a> FilterCondition<'a> {
} }
} }
// remove OPTIONAL whitespaces before AND after the the provided parser /// remove OPTIONAL whitespaces before AND after the the provided parser.
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> { fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
delimited(multispace0, inner, multispace0) delimited(multispace0, inner, multispace0)
} }
/// and = not (~ "AND" not)* /// or = and (~ "OR" ~ and)
fn parse_or(input: Span) -> IResult<FilterCondition> { fn parse_or(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_and(input)?; let (input, lhs) = parse_and(input)?;
// if we found a `OR` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
let expr = ors let expr = ors
@ -118,8 +133,10 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
Ok((input, expr)) Ok((input, expr))
} }
/// and = not (~ "AND" not)*
fn parse_and(input: Span) -> IResult<FilterCondition> { fn parse_and(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_not(input)?; let (input, lhs) = parse_not(input)?;
// if we found a `AND` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
let expr = ors let expr = ors
.into_iter() .into_iter()
@ -128,28 +145,29 @@ fn parse_and(input: Span) -> IResult<FilterCondition> {
} }
/// not = ("NOT" | "!") not | primary /// not = ("NOT" | "!") not | primary
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
/// If we parse a `NOT` or `!` we MUST parse something behind.
fn parse_not(input: Span) -> IResult<FilterCondition> { fn parse_not(input: Span) -> IResult<FilterCondition> {
alt(( alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))(
map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), input,
cut(parse_primary), )
))(input)
} }
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> { fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`";
// we want to forbid space BEFORE the _geoRadius but not after // we want to forbid space BEFORE the _geoRadius but not after
let parsed = preceded::<_, _, _, _, _, _>( let parsed = preceded(
tuple((multispace0, tag("_geoRadius"))), tuple((multispace0, tag("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
)(input); )(input)
.map_err(|e| e.map(|_| Error::kind(input, ErrorKind::Geo)));
let (input, args): (Span, Vec<Span>) = parsed?; let (input, args) = parsed?;
if args.len() != 3 { if args.len() != 3 {
let e = Error::from_char(input, '('); return Err(nom::Err::Failure(Error::kind(input, ErrorKind::Geo)));
return Err(nom::Err::Failure(Error::add_context(input, err_msg_args_incomplete, e)));
} }
let res = FilterCondition::GeoLowerThan { let res = FilterCondition::GeoLowerThan {
@ -159,14 +177,39 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
Ok((input, res)) Ok((input, res))
} }
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
tuple((
multispace0,
tag("_geoPoint"),
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
// if we succeeded we still returns a Failure because geoPoints are not allowed
Err(nom::Err::Failure(Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
}
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to /// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
fn parse_primary(input: Span) -> IResult<FilterCondition> { fn parse_primary(input: Span) -> IResult<FilterCondition> {
alt(( alt((
delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))), // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
delimited(
ws(char('(')),
cut(parse_expression),
cut_with_err(ws(char(')')), |c| {
Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}),
),
|c| parse_geo_radius(c), |c| parse_geo_radius(c),
|c| parse_condition(c), |c| parse_condition(c),
|c| parse_to(c), |c| parse_to(c),
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|c| parse_geo_point(c),
))(input) ))(input)
// if the inner parsers did not match enough information to return an accurate error
.map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::InvalidPrimary)))
} }
/// expression = or /// expression = or
@ -484,18 +527,24 @@ pub mod tests {
fn error() { fn error() {
use FilterCondition as Fc; use FilterCondition as Fc;
let result = Fc::parse("test = truc OR truc");
assert!(result.is_err());
let test_case = [ let test_case = [
// simple test // simple test
("channel = Ponce = 12", "An error occured"), ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
("OR", "An error occured"), ("channel = ", "Was expecting a value but instead got nothing."),
("AND", "An error occured"), ("channel = 🐻", "Was expecting a value but instead got `🐻`."),
("channel = Ponce OR", "An error occured"), ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
("_geoRadius = 12", "An error occured"), ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
("_geoPoint(12, 13, 14)", "An error occured"), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),
("_geo = _geoRadius(12, 13, 14)", "An error occured"), ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."),
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."),
("channel = 'ponce", "Expression `'ponce` is missing the following closing delemiter: `'`."),
("channel = \"ponce", "Expression `\"ponce` is missing the following closing delemiter: `\"`."),
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delemiter: `)`."),
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
]; ];
for (input, expected) in test_case { for (input, expected) in test_case {
@ -503,24 +552,12 @@ pub mod tests {
assert!( assert!(
result.is_err(), result.is_err(),
"Filter `{:?}` wasn't supposed to be parsed but it did with the following result: `{:?}`", "Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
expected, input,
result.unwrap() result.unwrap()
); );
let filter = result.unwrap_err().to_string(); let filter = result.unwrap_err().to_string();
assert_eq!(filter, expected, "Filter `{:?}` was supposed to return the following error: `{}`, but instead returned `{}`.", input, filter, expected); assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter);
} }
} }
/*
#[test]
fn bidule() {
use FilterCondition as Fc;
let result = Fc::parse::<crate::Error<Span>>("test = truc OR truc");
dbg!(result);
assert!(false);
}
*/
} }

11
filter_parser/src/main.rs Normal file
View File

@ -0,0 +1,11 @@
fn main() {
let input = std::env::args().nth(1).expect("You must provide a filter to test");
println!("Trying to execute the following filter:\n{}\n\n", input);
if let Err(e) = filter_parser::FilterCondition::parse(&input) {
println!("{}", e.to_string());
} else {
println!("✅ Valid filter");
}
}

View File

@ -1,12 +1,29 @@
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while1}; use nom::bytes::complete::{take_till, take_while, take_while1};
use nom::character::complete::char; use nom::character::complete::{char, multispace0};
use nom::sequence::delimited; use nom::combinator::cut;
use nom::sequence::{delimited, terminated};
use crate::{ws, Error, IResult, Span, Token}; use crate::error::ExtendNomError;
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Token> { pub fn parse_value(input: Span) -> IResult<Token> {
// before anything we want to check if the user is misusing a geo expression
let err = parse_geo_point(input).unwrap_err();
if err.is_failure() {
return Err(err);
}
match parse_geo_radius(input) {
Ok(_) => return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))),
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
Err(e) if e.is_failure() => {
return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo)))
}
_ => (),
}
// singleQuoted = "'" .* all but quotes "'" // singleQuoted = "'" .* all but quotes "'"
let simple_quoted = |input| take_till(|c: char| c == '\'')(input); let simple_quoted = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\"" // doubleQuoted = "\"" (word | spaces)* "\""
@ -14,13 +31,23 @@ pub fn parse_value(input: Span) -> IResult<Token> {
// word = (alphanumeric | _ | - | .)+ // word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input); let word = |input| take_while1(is_key_component)(input);
ws(alt(( // we want to remove the space before entering the alt because if we don't,
delimited(char('\''), simple_quoted, char('\'')), // when we create the errors from the output of the alt we have spaces everywhere
delimited(char('"'), double_quoted, char('"')), let (input, _) = take_while(char::is_whitespace)(input)?;
word,
)))(input) terminated(
alt((
delimited(char('\''), simple_quoted, cut(char('\''))),
delimited(char('"'), double_quoted, cut(char('"'))),
word,
)),
multispace0,
)(input)
.map(|(s, t)| (s, t.into())) .map(|(s, t)| (s, t.into()))
.map_err(|e| e.map(|_| Error::expected_value(input))) // if we found nothing in the alt it means the user did not input any value
.map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::ExpectedValue)))
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
.map_err(|e| e.map_fail(|c| Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char()))))
} }
fn is_key_component(c: char) -> bool { fn is_key_component(c: char) -> bool {
@ -38,12 +65,13 @@ pub mod tests {
("channel", rtok("", "channel")), ("channel", rtok("", "channel")),
(".private", rtok("", ".private")), (".private", rtok("", ".private")),
("I-love-kebab", rtok("", "I-love-kebab")), ("I-love-kebab", rtok("", "I-love-kebab")),
("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")), ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
("parens(", rtok("", "parens")), ("parens(", rtok("", "parens")),
("parens)", rtok("", "parens")), ("parens)", rtok("", "parens")),
("not!", rtok("", "not")), ("not!", rtok("", "not")),
(" channel", rtok(" ", "channel")), (" channel", rtok(" ", "channel")),
("channel ", rtok("", "channel")), ("channel ", rtok("", "channel")),
(" channel ", rtok(" ", "channel")),
("'channel'", rtok("'", "channel")), ("'channel'", rtok("'", "channel")),
("\"channel\"", rtok("\"", "channel")), ("\"channel\"", rtok("\"", "channel")),
("'cha)nnel'", rtok("'", "cha)nnel")), ("'cha)nnel'", rtok("'", "cha)nnel")),