From 54aec7ac5f541b0b5a160e3a790a4688613f0d8b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 14:22:35 +0100 Subject: [PATCH] update the filter parser and some code for the fuzzer --- filter_parser/fuzz/.gitignore | 3 + filter_parser/fuzz/Cargo.toml | 25 +++ filter_parser/fuzz/corpus/parse/test_1 | 1 + filter_parser/fuzz/corpus/parse/test_10 | 1 + filter_parser/fuzz/corpus/parse/test_11 | 1 + filter_parser/fuzz/corpus/parse/test_12 | 1 + filter_parser/fuzz/corpus/parse/test_13 | 1 + filter_parser/fuzz/corpus/parse/test_14 | 1 + filter_parser/fuzz/corpus/parse/test_15 | 1 + filter_parser/fuzz/corpus/parse/test_16 | 1 + filter_parser/fuzz/corpus/parse/test_17 | 1 + filter_parser/fuzz/corpus/parse/test_18 | 1 + filter_parser/fuzz/corpus/parse/test_19 | 1 + filter_parser/fuzz/corpus/parse/test_2 | 1 + filter_parser/fuzz/corpus/parse/test_20 | 1 + filter_parser/fuzz/corpus/parse/test_21 | 1 + filter_parser/fuzz/corpus/parse/test_22 | 1 + filter_parser/fuzz/corpus/parse/test_23 | 1 + filter_parser/fuzz/corpus/parse/test_24 | 1 + filter_parser/fuzz/corpus/parse/test_25 | 1 + filter_parser/fuzz/corpus/parse/test_26 | 1 + filter_parser/fuzz/corpus/parse/test_27 | 1 + filter_parser/fuzz/corpus/parse/test_28 | 1 + filter_parser/fuzz/corpus/parse/test_29 | 1 + filter_parser/fuzz/corpus/parse/test_3 | 1 + filter_parser/fuzz/corpus/parse/test_30 | 1 + filter_parser/fuzz/corpus/parse/test_31 | 1 + filter_parser/fuzz/corpus/parse/test_32 | 1 + filter_parser/fuzz/corpus/parse/test_33 | 1 + filter_parser/fuzz/corpus/parse/test_34 | 1 + filter_parser/fuzz/corpus/parse/test_35 | 1 + filter_parser/fuzz/corpus/parse/test_36 | 1 + filter_parser/fuzz/corpus/parse/test_37 | 1 + filter_parser/fuzz/corpus/parse/test_38 | 1 + filter_parser/fuzz/corpus/parse/test_39 | 1 + filter_parser/fuzz/corpus/parse/test_4 | 1 + filter_parser/fuzz/corpus/parse/test_40 | 1 + filter_parser/fuzz/corpus/parse/test_41 | 1 + filter_parser/fuzz/corpus/parse/test_42 | 1 + filter_parser/fuzz/corpus/parse/test_43 | 1 + filter_parser/fuzz/corpus/parse/test_5 | 1 + filter_parser/fuzz/corpus/parse/test_6 | 1 + filter_parser/fuzz/corpus/parse/test_7 | 1 + filter_parser/fuzz/corpus/parse/test_8 | 1 + filter_parser/fuzz/corpus/parse/test_9 | 1 + filter_parser/fuzz/fuzz_targets/parse.rs | 13 ++ filter_parser/src/error.rs | 195 +++++++++++++++++++++++ filter_parser/src/lib.rs | 117 +++++++++----- filter_parser/src/main.rs | 11 ++ filter_parser/src/value.rs | 50 ++++-- 50 files changed, 406 insertions(+), 51 deletions(-) create mode 100644 filter_parser/fuzz/.gitignore create mode 100644 filter_parser/fuzz/Cargo.toml create mode 100644 filter_parser/fuzz/corpus/parse/test_1 create mode 100644 filter_parser/fuzz/corpus/parse/test_10 create mode 100644 filter_parser/fuzz/corpus/parse/test_11 create mode 100644 filter_parser/fuzz/corpus/parse/test_12 create mode 100644 filter_parser/fuzz/corpus/parse/test_13 create mode 100644 filter_parser/fuzz/corpus/parse/test_14 create mode 100644 filter_parser/fuzz/corpus/parse/test_15 create mode 100644 filter_parser/fuzz/corpus/parse/test_16 create mode 100644 filter_parser/fuzz/corpus/parse/test_17 create mode 100644 filter_parser/fuzz/corpus/parse/test_18 create mode 100644 filter_parser/fuzz/corpus/parse/test_19 create mode 100644 filter_parser/fuzz/corpus/parse/test_2 create mode 100644 filter_parser/fuzz/corpus/parse/test_20 create mode 100644 filter_parser/fuzz/corpus/parse/test_21 create mode 100644 filter_parser/fuzz/corpus/parse/test_22 create mode 100644 filter_parser/fuzz/corpus/parse/test_23 create mode 100644 filter_parser/fuzz/corpus/parse/test_24 create mode 100644 filter_parser/fuzz/corpus/parse/test_25 create mode 100644 filter_parser/fuzz/corpus/parse/test_26 create mode 100644 filter_parser/fuzz/corpus/parse/test_27 create mode 100644 filter_parser/fuzz/corpus/parse/test_28 create mode 100644 filter_parser/fuzz/corpus/parse/test_29 create mode 100644 filter_parser/fuzz/corpus/parse/test_3 create mode 100644 filter_parser/fuzz/corpus/parse/test_30 create mode 100644 filter_parser/fuzz/corpus/parse/test_31 create mode 100644 filter_parser/fuzz/corpus/parse/test_32 create mode 100644 filter_parser/fuzz/corpus/parse/test_33 create mode 100644 filter_parser/fuzz/corpus/parse/test_34 create mode 100644 filter_parser/fuzz/corpus/parse/test_35 create mode 100644 filter_parser/fuzz/corpus/parse/test_36 create mode 100644 filter_parser/fuzz/corpus/parse/test_37 create mode 100644 filter_parser/fuzz/corpus/parse/test_38 create mode 100644 filter_parser/fuzz/corpus/parse/test_39 create mode 100644 filter_parser/fuzz/corpus/parse/test_4 create mode 100644 filter_parser/fuzz/corpus/parse/test_40 create mode 100644 filter_parser/fuzz/corpus/parse/test_41 create mode 100644 filter_parser/fuzz/corpus/parse/test_42 create mode 100644 filter_parser/fuzz/corpus/parse/test_43 create mode 100644 filter_parser/fuzz/corpus/parse/test_5 create mode 100644 filter_parser/fuzz/corpus/parse/test_6 create mode 100644 filter_parser/fuzz/corpus/parse/test_7 create mode 100644 filter_parser/fuzz/corpus/parse/test_8 create mode 100644 filter_parser/fuzz/corpus/parse/test_9 create mode 100644 filter_parser/fuzz/fuzz_targets/parse.rs create mode 100644 filter_parser/src/error.rs create mode 100644 filter_parser/src/main.rs diff --git a/filter_parser/fuzz/.gitignore b/filter_parser/fuzz/.gitignore new file mode 100644 index 000000000..a0925114d --- /dev/null +++ b/filter_parser/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/filter_parser/fuzz/Cargo.toml b/filter_parser/fuzz/Cargo.toml new file mode 100644 index 000000000..33e604e73 --- /dev/null +++ b/filter_parser/fuzz/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "filter_parser-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.filter_parser] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "parse" +path = "fuzz_targets/parse.rs" +test = false +doc = false diff --git a/filter_parser/fuzz/corpus/parse/test_1 b/filter_parser/fuzz/corpus/parse/test_1 new file mode 100644 index 000000000..2523a328e --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_1 @@ -0,0 +1 @@ +channel = Ponce diff --git a/filter_parser/fuzz/corpus/parse/test_10 b/filter_parser/fuzz/corpus/parse/test_10 new file mode 100644 index 000000000..d0e9f1e51 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_10 @@ -0,0 +1 @@ +channel != ponce diff --git a/filter_parser/fuzz/corpus/parse/test_11 b/filter_parser/fuzz/corpus/parse/test_11 new file mode 100644 index 000000000..ca3db9223 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_11 @@ -0,0 +1 @@ +NOT channel = ponce diff --git a/filter_parser/fuzz/corpus/parse/test_12 b/filter_parser/fuzz/corpus/parse/test_12 new file mode 100644 index 000000000..325f848c1 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_12 @@ -0,0 +1 @@ +subscribers < 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_13 b/filter_parser/fuzz/corpus/parse/test_13 new file mode 100644 index 000000000..ca7b96f30 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_13 @@ -0,0 +1 @@ +subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_14 b/filter_parser/fuzz/corpus/parse/test_14 new file mode 100644 index 000000000..f72f48bdb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_14 @@ -0,0 +1 @@ +subscribers <= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_15 b/filter_parser/fuzz/corpus/parse/test_15 new file mode 100644 index 000000000..75073fc74 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_15 @@ -0,0 +1 @@ +subscribers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_16 b/filter_parser/fuzz/corpus/parse/test_16 new file mode 100644 index 000000000..bdd39241b --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_16 @@ -0,0 +1 @@ +NOT subscribers < 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_17 b/filter_parser/fuzz/corpus/parse/test_17 new file mode 100644 index 000000000..4487643e4 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_17 @@ -0,0 +1 @@ +NOT subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_18 b/filter_parser/fuzz/corpus/parse/test_18 new file mode 100644 index 000000000..150604012 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_18 @@ -0,0 +1 @@ +NOT subscribers <= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_19 b/filter_parser/fuzz/corpus/parse/test_19 new file mode 100644 index 000000000..11bc15103 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_19 @@ -0,0 +1 @@ +NOT subscribers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_2 b/filter_parser/fuzz/corpus/parse/test_2 new file mode 100644 index 000000000..8ac19cad4 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_2 @@ -0,0 +1 @@ +subscribers = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_20 b/filter_parser/fuzz/corpus/parse/test_20 new file mode 100644 index 000000000..f52ad8ff2 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_20 @@ -0,0 +1 @@ +subscribers 100 TO 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_21 b/filter_parser/fuzz/corpus/parse/test_21 new file mode 100644 index 000000000..e86e6b89d --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_21 @@ -0,0 +1 @@ +NOT subscribers 100 TO 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_22 b/filter_parser/fuzz/corpus/parse/test_22 new file mode 100644 index 000000000..8ceeb6c1a --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_22 @@ -0,0 +1 @@ +_geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_23 b/filter_parser/fuzz/corpus/parse/test_23 new file mode 100644 index 000000000..614effb98 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_23 @@ -0,0 +1 @@ +NOT _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_24 b/filter_parser/fuzz/corpus/parse/test_24 new file mode 100644 index 000000000..2b8b39279 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_24 @@ -0,0 +1 @@ +channel = ponce AND 'dog race' != 'bernese mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_25 b/filter_parser/fuzz/corpus/parse/test_25 new file mode 100644 index 000000000..8f6fef74a --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_25 @@ -0,0 +1 @@ +channel = ponce OR 'dog race' != 'bernese mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_26 b/filter_parser/fuzz/corpus/parse/test_26 new file mode 100644 index 000000000..5134b354d --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_26 @@ -0,0 +1 @@ +channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_27 b/filter_parser/fuzz/corpus/parse/test_27 new file mode 100644 index 000000000..b63559b9f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_27 @@ -0,0 +1 @@ +channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 ) diff --git a/filter_parser/fuzz/corpus/parse/test_28 b/filter_parser/fuzz/corpus/parse/test_28 new file mode 100644 index 000000000..5bc97fb2b --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_28 @@ -0,0 +1 @@ +(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_29 b/filter_parser/fuzz/corpus/parse/test_29 new file mode 100644 index 000000000..7713618bb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_29 @@ -0,0 +1 @@ +channel = Ponce = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_3 b/filter_parser/fuzz/corpus/parse/test_3 new file mode 100644 index 000000000..2533e8fcf --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_3 @@ -0,0 +1 @@ +channel = 'Mister Mv' diff --git a/filter_parser/fuzz/corpus/parse/test_30 b/filter_parser/fuzz/corpus/parse/test_30 new file mode 100644 index 000000000..c35941150 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_30 @@ -0,0 +1 @@ +channel = diff --git a/filter_parser/fuzz/corpus/parse/test_31 b/filter_parser/fuzz/corpus/parse/test_31 new file mode 100644 index 000000000..f7982669f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_31 @@ -0,0 +1 @@ +channel = 🐻 diff --git a/filter_parser/fuzz/corpus/parse/test_32 b/filter_parser/fuzz/corpus/parse/test_32 new file mode 100644 index 000000000..c4a102dc8 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_32 @@ -0,0 +1 @@ +OR diff --git a/filter_parser/fuzz/corpus/parse/test_33 b/filter_parser/fuzz/corpus/parse/test_33 new file mode 100644 index 000000000..eb80eb4e6 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_33 @@ -0,0 +1 @@ +AND diff --git a/filter_parser/fuzz/corpus/parse/test_34 b/filter_parser/fuzz/corpus/parse/test_34 new file mode 100644 index 000000000..60fc05e7f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_34 @@ -0,0 +1 @@ +channel Ponce diff --git a/filter_parser/fuzz/corpus/parse/test_35 b/filter_parser/fuzz/corpus/parse/test_35 new file mode 100644 index 000000000..4a868f1d8 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_35 @@ -0,0 +1 @@ +channel = Ponce OR diff --git a/filter_parser/fuzz/corpus/parse/test_36 b/filter_parser/fuzz/corpus/parse/test_36 new file mode 100644 index 000000000..d7a0abac7 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_36 @@ -0,0 +1 @@ +_geoRadius diff --git a/filter_parser/fuzz/corpus/parse/test_37 b/filter_parser/fuzz/corpus/parse/test_37 new file mode 100644 index 000000000..44b5105b6 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_37 @@ -0,0 +1 @@ +_geoRadius = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_38 b/filter_parser/fuzz/corpus/parse/test_38 new file mode 100644 index 000000000..ab45b973f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_38 @@ -0,0 +1 @@ +_geoPoint(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_39 b/filter_parser/fuzz/corpus/parse/test_39 new file mode 100644 index 000000000..283095326 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_39 @@ -0,0 +1 @@ +position <= _geoPoint(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_4 b/filter_parser/fuzz/corpus/parse/test_4 new file mode 100644 index 000000000..9c2716e79 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_4 @@ -0,0 +1 @@ +channel = "Mister Mv" diff --git a/filter_parser/fuzz/corpus/parse/test_40 b/filter_parser/fuzz/corpus/parse/test_40 new file mode 100644 index 000000000..c4c038c15 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_40 @@ -0,0 +1 @@ +position <= _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_41 b/filter_parser/fuzz/corpus/parse/test_41 new file mode 100644 index 000000000..6952aa87e --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_41 @@ -0,0 +1 @@ +channel = 'ponce diff --git a/filter_parser/fuzz/corpus/parse/test_42 b/filter_parser/fuzz/corpus/parse/test_42 new file mode 100644 index 000000000..485d8da96 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_42 @@ -0,0 +1 @@ +channel = "ponce diff --git a/filter_parser/fuzz/corpus/parse/test_43 b/filter_parser/fuzz/corpus/parse/test_43 new file mode 100644 index 000000000..728c8aa22 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_43 @@ -0,0 +1 @@ +channel = mv OR (followers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_5 b/filter_parser/fuzz/corpus/parse/test_5 new file mode 100644 index 000000000..89f5ec8ee --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_5 @@ -0,0 +1 @@ +'dog race' = Borzoi diff --git a/filter_parser/fuzz/corpus/parse/test_6 b/filter_parser/fuzz/corpus/parse/test_6 new file mode 100644 index 000000000..be3e203cb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_6 @@ -0,0 +1 @@ +"dog race" = Chusky diff --git a/filter_parser/fuzz/corpus/parse/test_7 b/filter_parser/fuzz/corpus/parse/test_7 new file mode 100644 index 000000000..eb77a2875 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_7 @@ -0,0 +1 @@ +"dog race" = "Bernese Mountain" diff --git a/filter_parser/fuzz/corpus/parse/test_8 b/filter_parser/fuzz/corpus/parse/test_8 new file mode 100644 index 000000000..a25477648 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_8 @@ -0,0 +1 @@ +'dog race' = 'Bernese Mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_9 b/filter_parser/fuzz/corpus/parse/test_9 new file mode 100644 index 000000000..c347e68f5 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_9 @@ -0,0 +1 @@ +"dog race" = 'Bernese Mountain' diff --git a/filter_parser/fuzz/fuzz_targets/parse.rs b/filter_parser/fuzz/fuzz_targets/parse.rs new file mode 100644 index 000000000..99d4a03a6 --- /dev/null +++ b/filter_parser/fuzz/fuzz_targets/parse.rs @@ -0,0 +1,13 @@ +#![no_main] +use filter_parser::FilterCondition; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = std::str::from_utf8(data) { + // When we are fuzzing the parser we can get stack overflow really easily. + // But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters. + if s.len() < 500 { + let _ = FilterCondition::parse(s); + } + } +}); diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs new file mode 100644 index 000000000..b4155bb51 --- /dev/null +++ b/filter_parser/src/error.rs @@ -0,0 +1,195 @@ +use std::fmt::Display; + +use nom::{Parser, error::{self, ParseError}}; + +use crate::{IResult, Span}; + +pub trait ExtendNomError { + fn is_failure(&self) -> bool; + fn map_err E>(self, op: O) -> nom::Err; + fn map_fail E>(self, op: O) -> nom::Err; +} + +impl ExtendNomError for nom::Err { + fn is_failure(&self) -> bool { + matches!(self, Self::Failure(_)) + } + + fn map_err E>(self, op: O) -> nom::Err { + match self { + e @ Self::Failure(_) => e, + e => e.map(|e| op(e)), + } + } + + fn map_fail E>(self, op: O) -> nom::Err { + match self { + e @ Self::Error(_) => e, + e => e.map(|e| op(e)), + } + } +} + +/// cut a parser and map the error +pub fn cut_with_err<'a, O>(mut parser: impl FnMut(Span<'a>) -> IResult, mut with: impl FnMut(Error<'a>) -> Error<'a>) -> impl FnMut(Span<'a>) -> IResult { + move |input| match parser.parse(input) { + Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))), + rest => rest, + } +} + +#[derive(Debug)] +pub struct Error<'a> { + context: Span<'a>, + kind: ErrorKind<'a>, +} + +#[derive(Debug)] +pub enum ErrorKind<'a> { + ReservedGeo(&'a str), + Geo, + MisusedGeo, + InvalidPrimary, + ReservedKeyword, + ExpectedEof, + ExpectedValue, + MissingClosingDelimiter(char), + UnexpectedInput(Vec<&'a str>), + Context(&'a str), + Char(char), + Unreachable, +} + +impl<'a> Error<'a> { + pub fn kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { + Self { context, kind } + } + pub fn char(self) -> char { + match self.kind { + ErrorKind::Char(c) => c, + _ => panic!("Internal filter parser error"), + } + } +} + +impl<'a> ParseError> for Error<'a> { + fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self { + let kind = match kind { + error::ErrorKind::Eof => ErrorKind::ExpectedEof, + error::ErrorKind::Tag => ErrorKind::UnexpectedInput(Vec::new()), + error::ErrorKind::MapRes => todo!(), + error::ErrorKind::MapOpt => todo!(), + error::ErrorKind::Alt => todo!(), + error::ErrorKind::IsNot => todo!(), + error::ErrorKind::IsA => todo!(), + error::ErrorKind::SeparatedList => todo!(), + error::ErrorKind::SeparatedNonEmptyList => todo!(), + error::ErrorKind::Many0 => todo!(), + error::ErrorKind::Many1 => todo!(), + error::ErrorKind::ManyTill => todo!(), + error::ErrorKind::Count => todo!(), + error::ErrorKind::TakeUntil => todo!(), + error::ErrorKind::LengthValue => todo!(), + error::ErrorKind::TagClosure => todo!(), + error::ErrorKind::Alpha => todo!(), + error::ErrorKind::Digit => todo!(), + error::ErrorKind::HexDigit => todo!(), + error::ErrorKind::OctDigit => todo!(), + error::ErrorKind::AlphaNumeric => todo!(), + error::ErrorKind::Space => todo!(), + error::ErrorKind::MultiSpace => todo!(), + error::ErrorKind::LengthValueFn => todo!(), + error::ErrorKind::Switch => todo!(), + error::ErrorKind::TagBits => todo!(), + error::ErrorKind::OneOf => todo!(), + error::ErrorKind::NoneOf => todo!(), + error::ErrorKind::Char => todo!(), + error::ErrorKind::CrLf => todo!(), + error::ErrorKind::RegexpMatch => todo!(), + error::ErrorKind::RegexpMatches => todo!(), + error::ErrorKind::RegexpFind => todo!(), + error::ErrorKind::RegexpCapture => todo!(), + error::ErrorKind::RegexpCaptures => todo!(), + error::ErrorKind::TakeWhile1 => ErrorKind::Unreachable, + error::ErrorKind::Complete => todo!(), + error::ErrorKind::Fix => todo!(), + error::ErrorKind::Escaped => todo!(), + error::ErrorKind::EscapedTransform => todo!(), + error::ErrorKind::NonEmpty => todo!(), + error::ErrorKind::ManyMN => todo!(), + error::ErrorKind::Not => todo!(), + error::ErrorKind::Permutation => todo!(), + error::ErrorKind::Verify => todo!(), + error::ErrorKind::TakeTill1 => todo!(), + error::ErrorKind::TakeWhileMN => todo!(), + error::ErrorKind::TooLarge => todo!(), + error::ErrorKind::Many0Count => todo!(), + error::ErrorKind::Many1Count => todo!(), + error::ErrorKind::Float => todo!(), + error::ErrorKind::Satisfy => todo!(), + error::ErrorKind::Fail => todo!(), + }; + Self { context: input, kind } + } + + fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self { + other + } + + fn from_char(input: Span<'a>, c: char) -> Self { + Self { context: input, kind: ErrorKind::Char(c) } + } +} + +impl<'a> Display for Error<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let input = self.context.fragment(); + + match self.kind { + ErrorKind::ExpectedValue if input.trim().is_empty() => { + writeln!(f, "Was expecting a value but instead got nothing.")? + } + ErrorKind::MissingClosingDelimiter(c) => { + writeln!(f, "Expression `{}` is missing the following closing delemiter: `{}`.", input, c)? + } + ErrorKind::ExpectedValue => { + writeln!(f, "Was expecting a value but instead got `{}`.", input)? + } + ErrorKind::InvalidPrimary if input.trim().is_empty() => { + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")? + } + ErrorKind::InvalidPrimary => { + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", input)? + } + ErrorKind::ExpectedEof => { + writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", input)? + } + ErrorKind::Geo => { + writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? + } + ErrorKind::ReservedGeo(name) => { + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name)? + } + ErrorKind::MisusedGeo => { + writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? + } + ErrorKind::Char(c) => { + panic!("Tried to display a char error with `{}`", c) + } + ErrorKind::ReservedKeyword => writeln!(f, "reserved keyword")?, + ErrorKind::UnexpectedInput(ref v) => writeln!(f, "Unexpected input found `{}`, vec: `{:?}`", input, v)?, + ErrorKind::Context(_) => todo!(), + ErrorKind::Unreachable => writeln!( + f, + "Encountered an internal error while parsing your filter. Please fill an issue" + )?, + } + write!( + f, + "{}:{} in `{}`.", + self.context.location_line(), + self.context.get_utf8_column(), + self.context.extra, + ) + } +} diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 86c6cd79c..cb9a13f58 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -20,6 +20,20 @@ //! ```text //! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")" //! ``` +//! +//! Specific errors: +//! ================ +//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error. +//! ```text +//! field = _geoPoint(12, 13, 14) +//! field < 12 AND _geoPoint(1, 2) +//! ``` +//! +//! - If a user try to use a geoRadius as a value we must throw an error. +//! ```text +//! field = _geoRadius(12, 13, 14) +//! ``` +//! mod condition; mod error; @@ -28,12 +42,12 @@ mod value; use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; +use error::{cut_with_err, ExtendNomError}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::{cut, eof, map}; -use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; @@ -102,14 +116,15 @@ impl<'a> FilterCondition<'a> { } } -// remove OPTIONAL whitespaces before AND after the the provided parser +/// remove OPTIONAL whitespaces before AND after the the provided parser. fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } -/// and = not (~ "AND" not)* +/// or = and (~ "OR" ~ and) fn parse_or(input: Span) -> IResult { let (input, lhs) = parse_and(input)?; + // if we found a `OR` then we MUST find something next let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; let expr = ors @@ -118,8 +133,10 @@ fn parse_or(input: Span) -> IResult { Ok((input, expr)) } +/// and = not (~ "AND" not)* fn parse_and(input: Span) -> IResult { let (input, lhs) = parse_not(input)?; + // if we found a `AND` then we MUST find something next let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let expr = ors .into_iter() @@ -128,28 +145,29 @@ fn parse_and(input: Span) -> IResult { } /// not = ("NOT" | "!") not | primary +/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. +/// If we parse a `NOT` or `!` we MUST parse something behind. fn parse_not(input: Span) -> IResult { - alt(( - map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), - cut(parse_primary), - ))(input) + alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))( + input, + ) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +/// If we parse `_geoRadius` we MUST parse the rest of the expression. fn parse_geo_radius(input: Span) -> IResult { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - // we want to forbid space BEFORE the _geoRadius but not after - let parsed = preceded::<_, _, _, _, _, _>( + let parsed = preceded( tuple((multispace0, tag("_geoRadius"))), + // if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), - )(input); + )(input) + .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::Geo))); - let (input, args): (Span, Vec) = parsed?; + let (input, args) = parsed?; if args.len() != 3 { - let e = Error::from_char(input, '('); - return Err(nom::Err::Failure(Error::add_context(input, err_msg_args_incomplete, e))); + return Err(nom::Err::Failure(Error::kind(input, ErrorKind::Geo))); } let res = FilterCondition::GeoLowerThan { @@ -159,14 +177,39 @@ fn parse_geo_radius(input: Span) -> IResult { Ok((input, res)) } +/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float) +fn parse_geo_point(input: Span) -> IResult { + // we want to forbid space BEFORE the _geoPoint but not after + tuple(( + multispace0, + tag("_geoPoint"), + // if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next. + cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), + ))(input) + .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; + // if we succeeded we still returns a Failure because geoPoints are not allowed + Err(nom::Err::Failure(Error::kind(input, ErrorKind::ReservedGeo("_geoPoint")))) +} + /// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to fn parse_primary(input: Span) -> IResult { alt(( - delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))), + // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis + delimited( + ws(char('(')), + cut(parse_expression), + cut_with_err(ws(char(')')), |c| { + Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())) + }), + ), |c| parse_geo_radius(c), |c| parse_condition(c), |c| parse_to(c), + // the next lines are only for error handling and are written at the end to have the less possible performance impact + |c| parse_geo_point(c), ))(input) + // if the inner parsers did not match enough information to return an accurate error + .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::InvalidPrimary))) } /// expression = or @@ -484,18 +527,24 @@ pub mod tests { fn error() { use FilterCondition as Fc; - let result = Fc::parse("test = truc OR truc"); - assert!(result.is_err()); - let test_case = [ // simple test - ("channel = Ponce = 12", "An error occured"), - ("OR", "An error occured"), - ("AND", "An error occured"), - ("channel = Ponce OR", "An error occured"), - ("_geoRadius = 12", "An error occured"), - ("_geoPoint(12, 13, 14)", "An error occured"), - ("_geo = _geoRadius(12, 13, 14)", "An error occured"), + ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), + ("channel = ", "Was expecting a value but instead got nothing."), + ("channel = 🐻", "Was expecting a value but instead got `🐻`."), + ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."), + ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."), + ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."), + ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."), + ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), + ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), + ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), + ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), + ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), + ("channel = 'ponce", "Expression `'ponce` is missing the following closing delemiter: `'`."), + ("channel = \"ponce", "Expression `\"ponce` is missing the following closing delemiter: `\"`."), + ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delemiter: `)`."), + ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), ]; for (input, expected) in test_case { @@ -503,24 +552,12 @@ pub mod tests { assert!( result.is_err(), - "Filter `{:?}` wasn't supposed to be parsed but it did with the following result: `{:?}`", - expected, + "Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`", + input, result.unwrap() ); let filter = result.unwrap_err().to_string(); - assert_eq!(filter, expected, "Filter `{:?}` was supposed to return the following error: `{}`, but instead returned `{}`.", input, filter, expected); + assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter); } } - - /* - #[test] - fn bidule() { - use FilterCondition as Fc; - - let result = Fc::parse::>("test = truc OR truc"); - dbg!(result); - - assert!(false); - } - */ } diff --git a/filter_parser/src/main.rs b/filter_parser/src/main.rs new file mode 100644 index 000000000..4158a2063 --- /dev/null +++ b/filter_parser/src/main.rs @@ -0,0 +1,11 @@ +fn main() { + let input = std::env::args().nth(1).expect("You must provide a filter to test"); + + println!("Trying to execute the following filter:\n{}\n\n", input); + + if let Err(e) = filter_parser::FilterCondition::parse(&input) { + println!("{}", e.to_string()); + } else { + println!("✅ Valid filter"); + } +} diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 7c708aa73..5f4677a2e 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -1,12 +1,29 @@ use nom::branch::alt; -use nom::bytes::complete::{take_till, take_while1}; -use nom::character::complete::char; -use nom::sequence::delimited; +use nom::bytes::complete::{take_till, take_while, take_while1}; +use nom::character::complete::{char, multispace0}; +use nom::combinator::cut; +use nom::sequence::{delimited, terminated}; -use crate::{ws, Error, IResult, Span, Token}; +use crate::error::ExtendNomError; +use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* pub fn parse_value(input: Span) -> IResult { + // before anything we want to check if the user is misusing a geo expression + let err = parse_geo_point(input).unwrap_err(); + if err.is_failure() { + return Err(err); + } + match parse_geo_radius(input) { + Ok(_) => return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))), + // if we encountered a failure it means the user badly wrote a _geoRadius filter. + // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + Err(e) if e.is_failure() => { + return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))) + } + _ => (), + } + // singleQuoted = "'" .* all but quotes "'" let simple_quoted = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" @@ -14,13 +31,23 @@ pub fn parse_value(input: Span) -> IResult { // word = (alphanumeric | _ | - | .)+ let word = |input| take_while1(is_key_component)(input); - ws(alt(( - delimited(char('\''), simple_quoted, char('\'')), - delimited(char('"'), double_quoted, char('"')), - word, - )))(input) + // we want to remove the space before entering the alt because if we don't, + // when we create the errors from the output of the alt we have spaces everywhere + let (input, _) = take_while(char::is_whitespace)(input)?; + + terminated( + alt(( + delimited(char('\''), simple_quoted, cut(char('\''))), + delimited(char('"'), double_quoted, cut(char('"'))), + word, + )), + multispace0, + )(input) .map(|(s, t)| (s, t.into())) - .map_err(|e| e.map(|_| Error::expected_value(input))) + // if we found nothing in the alt it means the user did not input any value + .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::ExpectedValue))) + // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote + .map_err(|e| e.map_fail(|c| Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())))) } fn is_key_component(c: char) -> bool { @@ -38,12 +65,13 @@ pub mod tests { ("channel", rtok("", "channel")), (".private", rtok("", ".private")), ("I-love-kebab", rtok("", "I-love-kebab")), - ("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")), + ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")), ("parens(", rtok("", "parens")), ("parens)", rtok("", "parens")), ("not!", rtok("", "not")), (" channel", rtok(" ", "channel")), ("channel ", rtok("", "channel")), + (" channel ", rtok(" ", "channel")), ("'channel'", rtok("'", "channel")), ("\"channel\"", rtok("\"", "channel")), ("'cha)nnel'", rtok("'", "cha)nnel")),