mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
update the filter parser and some code for the fuzzer
This commit is contained in:
parent
5d3af5f273
commit
54aec7ac5f
3
filter_parser/fuzz/.gitignore
vendored
Normal file
3
filter_parser/fuzz/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
target
|
||||||
|
corpus
|
||||||
|
artifacts
|
25
filter_parser/fuzz/Cargo.toml
Normal file
25
filter_parser/fuzz/Cargo.toml
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
[package]
|
||||||
|
name = "filter_parser-fuzz"
|
||||||
|
version = "0.0.0"
|
||||||
|
authors = ["Automatically generated"]
|
||||||
|
publish = false
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[package.metadata]
|
||||||
|
cargo-fuzz = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
libfuzzer-sys = "0.4"
|
||||||
|
|
||||||
|
[dependencies.filter_parser]
|
||||||
|
path = ".."
|
||||||
|
|
||||||
|
# Prevent this from interfering with workspaces
|
||||||
|
[workspace]
|
||||||
|
members = ["."]
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "parse"
|
||||||
|
path = "fuzz_targets/parse.rs"
|
||||||
|
test = false
|
||||||
|
doc = false
|
1
filter_parser/fuzz/corpus/parse/test_1
Normal file
1
filter_parser/fuzz/corpus/parse/test_1
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = Ponce
|
1
filter_parser/fuzz/corpus/parse/test_10
Normal file
1
filter_parser/fuzz/corpus/parse/test_10
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel != ponce
|
1
filter_parser/fuzz/corpus/parse/test_11
Normal file
1
filter_parser/fuzz/corpus/parse/test_11
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT channel = ponce
|
1
filter_parser/fuzz/corpus/parse/test_12
Normal file
1
filter_parser/fuzz/corpus/parse/test_12
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers < 1000
|
1
filter_parser/fuzz/corpus/parse/test_13
Normal file
1
filter_parser/fuzz/corpus/parse/test_13
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers > 1000
|
1
filter_parser/fuzz/corpus/parse/test_14
Normal file
1
filter_parser/fuzz/corpus/parse/test_14
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers <= 1000
|
1
filter_parser/fuzz/corpus/parse/test_15
Normal file
1
filter_parser/fuzz/corpus/parse/test_15
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers >= 1000
|
1
filter_parser/fuzz/corpus/parse/test_16
Normal file
1
filter_parser/fuzz/corpus/parse/test_16
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT subscribers < 1000
|
1
filter_parser/fuzz/corpus/parse/test_17
Normal file
1
filter_parser/fuzz/corpus/parse/test_17
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT subscribers > 1000
|
1
filter_parser/fuzz/corpus/parse/test_18
Normal file
1
filter_parser/fuzz/corpus/parse/test_18
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT subscribers <= 1000
|
1
filter_parser/fuzz/corpus/parse/test_19
Normal file
1
filter_parser/fuzz/corpus/parse/test_19
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT subscribers >= 1000
|
1
filter_parser/fuzz/corpus/parse/test_2
Normal file
1
filter_parser/fuzz/corpus/parse/test_2
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers = 12
|
1
filter_parser/fuzz/corpus/parse/test_20
Normal file
1
filter_parser/fuzz/corpus/parse/test_20
Normal file
@ -0,0 +1 @@
|
|||||||
|
subscribers 100 TO 1000
|
1
filter_parser/fuzz/corpus/parse/test_21
Normal file
1
filter_parser/fuzz/corpus/parse/test_21
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT subscribers 100 TO 1000
|
1
filter_parser/fuzz/corpus/parse/test_22
Normal file
1
filter_parser/fuzz/corpus/parse/test_22
Normal file
@ -0,0 +1 @@
|
|||||||
|
_geoRadius(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_23
Normal file
1
filter_parser/fuzz/corpus/parse/test_23
Normal file
@ -0,0 +1 @@
|
|||||||
|
NOT _geoRadius(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_24
Normal file
1
filter_parser/fuzz/corpus/parse/test_24
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = ponce AND 'dog race' != 'bernese mountain'
|
1
filter_parser/fuzz/corpus/parse/test_25
Normal file
1
filter_parser/fuzz/corpus/parse/test_25
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = ponce OR 'dog race' != 'bernese mountain'
|
1
filter_parser/fuzz/corpus/parse/test_26
Normal file
1
filter_parser/fuzz/corpus/parse/test_26
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000
|
1
filter_parser/fuzz/corpus/parse/test_27
Normal file
1
filter_parser/fuzz/corpus/parse/test_27
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )
|
1
filter_parser/fuzz/corpus/parse/test_28
Normal file
1
filter_parser/fuzz/corpus/parse/test_28
Normal file
@ -0,0 +1 @@
|
|||||||
|
(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_29
Normal file
1
filter_parser/fuzz/corpus/parse/test_29
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = Ponce = 12
|
1
filter_parser/fuzz/corpus/parse/test_3
Normal file
1
filter_parser/fuzz/corpus/parse/test_3
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = 'Mister Mv'
|
1
filter_parser/fuzz/corpus/parse/test_30
Normal file
1
filter_parser/fuzz/corpus/parse/test_30
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel =
|
1
filter_parser/fuzz/corpus/parse/test_31
Normal file
1
filter_parser/fuzz/corpus/parse/test_31
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = 🐻
|
1
filter_parser/fuzz/corpus/parse/test_32
Normal file
1
filter_parser/fuzz/corpus/parse/test_32
Normal file
@ -0,0 +1 @@
|
|||||||
|
OR
|
1
filter_parser/fuzz/corpus/parse/test_33
Normal file
1
filter_parser/fuzz/corpus/parse/test_33
Normal file
@ -0,0 +1 @@
|
|||||||
|
AND
|
1
filter_parser/fuzz/corpus/parse/test_34
Normal file
1
filter_parser/fuzz/corpus/parse/test_34
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel Ponce
|
1
filter_parser/fuzz/corpus/parse/test_35
Normal file
1
filter_parser/fuzz/corpus/parse/test_35
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = Ponce OR
|
1
filter_parser/fuzz/corpus/parse/test_36
Normal file
1
filter_parser/fuzz/corpus/parse/test_36
Normal file
@ -0,0 +1 @@
|
|||||||
|
_geoRadius
|
1
filter_parser/fuzz/corpus/parse/test_37
Normal file
1
filter_parser/fuzz/corpus/parse/test_37
Normal file
@ -0,0 +1 @@
|
|||||||
|
_geoRadius = 12
|
1
filter_parser/fuzz/corpus/parse/test_38
Normal file
1
filter_parser/fuzz/corpus/parse/test_38
Normal file
@ -0,0 +1 @@
|
|||||||
|
_geoPoint(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_39
Normal file
1
filter_parser/fuzz/corpus/parse/test_39
Normal file
@ -0,0 +1 @@
|
|||||||
|
position <= _geoPoint(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_4
Normal file
1
filter_parser/fuzz/corpus/parse/test_4
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = "Mister Mv"
|
1
filter_parser/fuzz/corpus/parse/test_40
Normal file
1
filter_parser/fuzz/corpus/parse/test_40
Normal file
@ -0,0 +1 @@
|
|||||||
|
position <= _geoRadius(12, 13, 14)
|
1
filter_parser/fuzz/corpus/parse/test_41
Normal file
1
filter_parser/fuzz/corpus/parse/test_41
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = 'ponce
|
1
filter_parser/fuzz/corpus/parse/test_42
Normal file
1
filter_parser/fuzz/corpus/parse/test_42
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = "ponce
|
1
filter_parser/fuzz/corpus/parse/test_43
Normal file
1
filter_parser/fuzz/corpus/parse/test_43
Normal file
@ -0,0 +1 @@
|
|||||||
|
channel = mv OR (followers >= 1000
|
1
filter_parser/fuzz/corpus/parse/test_5
Normal file
1
filter_parser/fuzz/corpus/parse/test_5
Normal file
@ -0,0 +1 @@
|
|||||||
|
'dog race' = Borzoi
|
1
filter_parser/fuzz/corpus/parse/test_6
Normal file
1
filter_parser/fuzz/corpus/parse/test_6
Normal file
@ -0,0 +1 @@
|
|||||||
|
"dog race" = Chusky
|
1
filter_parser/fuzz/corpus/parse/test_7
Normal file
1
filter_parser/fuzz/corpus/parse/test_7
Normal file
@ -0,0 +1 @@
|
|||||||
|
"dog race" = "Bernese Mountain"
|
1
filter_parser/fuzz/corpus/parse/test_8
Normal file
1
filter_parser/fuzz/corpus/parse/test_8
Normal file
@ -0,0 +1 @@
|
|||||||
|
'dog race' = 'Bernese Mountain'
|
1
filter_parser/fuzz/corpus/parse/test_9
Normal file
1
filter_parser/fuzz/corpus/parse/test_9
Normal file
@ -0,0 +1 @@
|
|||||||
|
"dog race" = 'Bernese Mountain'
|
13
filter_parser/fuzz/fuzz_targets/parse.rs
Normal file
13
filter_parser/fuzz/fuzz_targets/parse.rs
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#![no_main]
|
||||||
|
use filter_parser::FilterCondition;
|
||||||
|
use libfuzzer_sys::fuzz_target;
|
||||||
|
|
||||||
|
fuzz_target!(|data: &[u8]| {
|
||||||
|
if let Ok(s) = std::str::from_utf8(data) {
|
||||||
|
// When we are fuzzing the parser we can get stack overflow really easily.
|
||||||
|
// But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters.
|
||||||
|
if s.len() < 500 {
|
||||||
|
let _ = FilterCondition::parse(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
195
filter_parser/src/error.rs
Normal file
195
filter_parser/src/error.rs
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use nom::{Parser, error::{self, ParseError}};
|
||||||
|
|
||||||
|
use crate::{IResult, Span};
|
||||||
|
|
||||||
|
pub trait ExtendNomError<E> {
|
||||||
|
fn is_failure(&self) -> bool;
|
||||||
|
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||||
|
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> ExtendNomError<E> for nom::Err<E> {
|
||||||
|
fn is_failure(&self) -> bool {
|
||||||
|
matches!(self, Self::Failure(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||||
|
match self {
|
||||||
|
e @ Self::Failure(_) => e,
|
||||||
|
e => e.map(|e| op(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||||
|
match self {
|
||||||
|
e @ Self::Error(_) => e,
|
||||||
|
e => e.map(|e| op(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// cut a parser and map the error
|
||||||
|
pub fn cut_with_err<'a, O>(mut parser: impl FnMut(Span<'a>) -> IResult<O>, mut with: impl FnMut(Error<'a>) -> Error<'a>) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||||
|
move |input| match parser.parse(input) {
|
||||||
|
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
|
||||||
|
rest => rest,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Error<'a> {
|
||||||
|
context: Span<'a>,
|
||||||
|
kind: ErrorKind<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum ErrorKind<'a> {
|
||||||
|
ReservedGeo(&'a str),
|
||||||
|
Geo,
|
||||||
|
MisusedGeo,
|
||||||
|
InvalidPrimary,
|
||||||
|
ReservedKeyword,
|
||||||
|
ExpectedEof,
|
||||||
|
ExpectedValue,
|
||||||
|
MissingClosingDelimiter(char),
|
||||||
|
UnexpectedInput(Vec<&'a str>),
|
||||||
|
Context(&'a str),
|
||||||
|
Char(char),
|
||||||
|
Unreachable,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Error<'a> {
|
||||||
|
pub fn kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
|
||||||
|
Self { context, kind }
|
||||||
|
}
|
||||||
|
pub fn char(self) -> char {
|
||||||
|
match self.kind {
|
||||||
|
ErrorKind::Char(c) => c,
|
||||||
|
_ => panic!("Internal filter parser error"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ParseError<Span<'a>> for Error<'a> {
|
||||||
|
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
|
||||||
|
let kind = match kind {
|
||||||
|
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
|
||||||
|
error::ErrorKind::Tag => ErrorKind::UnexpectedInput(Vec::new()),
|
||||||
|
error::ErrorKind::MapRes => todo!(),
|
||||||
|
error::ErrorKind::MapOpt => todo!(),
|
||||||
|
error::ErrorKind::Alt => todo!(),
|
||||||
|
error::ErrorKind::IsNot => todo!(),
|
||||||
|
error::ErrorKind::IsA => todo!(),
|
||||||
|
error::ErrorKind::SeparatedList => todo!(),
|
||||||
|
error::ErrorKind::SeparatedNonEmptyList => todo!(),
|
||||||
|
error::ErrorKind::Many0 => todo!(),
|
||||||
|
error::ErrorKind::Many1 => todo!(),
|
||||||
|
error::ErrorKind::ManyTill => todo!(),
|
||||||
|
error::ErrorKind::Count => todo!(),
|
||||||
|
error::ErrorKind::TakeUntil => todo!(),
|
||||||
|
error::ErrorKind::LengthValue => todo!(),
|
||||||
|
error::ErrorKind::TagClosure => todo!(),
|
||||||
|
error::ErrorKind::Alpha => todo!(),
|
||||||
|
error::ErrorKind::Digit => todo!(),
|
||||||
|
error::ErrorKind::HexDigit => todo!(),
|
||||||
|
error::ErrorKind::OctDigit => todo!(),
|
||||||
|
error::ErrorKind::AlphaNumeric => todo!(),
|
||||||
|
error::ErrorKind::Space => todo!(),
|
||||||
|
error::ErrorKind::MultiSpace => todo!(),
|
||||||
|
error::ErrorKind::LengthValueFn => todo!(),
|
||||||
|
error::ErrorKind::Switch => todo!(),
|
||||||
|
error::ErrorKind::TagBits => todo!(),
|
||||||
|
error::ErrorKind::OneOf => todo!(),
|
||||||
|
error::ErrorKind::NoneOf => todo!(),
|
||||||
|
error::ErrorKind::Char => todo!(),
|
||||||
|
error::ErrorKind::CrLf => todo!(),
|
||||||
|
error::ErrorKind::RegexpMatch => todo!(),
|
||||||
|
error::ErrorKind::RegexpMatches => todo!(),
|
||||||
|
error::ErrorKind::RegexpFind => todo!(),
|
||||||
|
error::ErrorKind::RegexpCapture => todo!(),
|
||||||
|
error::ErrorKind::RegexpCaptures => todo!(),
|
||||||
|
error::ErrorKind::TakeWhile1 => ErrorKind::Unreachable,
|
||||||
|
error::ErrorKind::Complete => todo!(),
|
||||||
|
error::ErrorKind::Fix => todo!(),
|
||||||
|
error::ErrorKind::Escaped => todo!(),
|
||||||
|
error::ErrorKind::EscapedTransform => todo!(),
|
||||||
|
error::ErrorKind::NonEmpty => todo!(),
|
||||||
|
error::ErrorKind::ManyMN => todo!(),
|
||||||
|
error::ErrorKind::Not => todo!(),
|
||||||
|
error::ErrorKind::Permutation => todo!(),
|
||||||
|
error::ErrorKind::Verify => todo!(),
|
||||||
|
error::ErrorKind::TakeTill1 => todo!(),
|
||||||
|
error::ErrorKind::TakeWhileMN => todo!(),
|
||||||
|
error::ErrorKind::TooLarge => todo!(),
|
||||||
|
error::ErrorKind::Many0Count => todo!(),
|
||||||
|
error::ErrorKind::Many1Count => todo!(),
|
||||||
|
error::ErrorKind::Float => todo!(),
|
||||||
|
error::ErrorKind::Satisfy => todo!(),
|
||||||
|
error::ErrorKind::Fail => todo!(),
|
||||||
|
};
|
||||||
|
Self { context: input, kind }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
|
||||||
|
other
|
||||||
|
}
|
||||||
|
|
||||||
|
fn from_char(input: Span<'a>, c: char) -> Self {
|
||||||
|
Self { context: input, kind: ErrorKind::Char(c) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Display for Error<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let input = self.context.fragment();
|
||||||
|
|
||||||
|
match self.kind {
|
||||||
|
ErrorKind::ExpectedValue if input.trim().is_empty() => {
|
||||||
|
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||||
|
}
|
||||||
|
ErrorKind::MissingClosingDelimiter(c) => {
|
||||||
|
writeln!(f, "Expression `{}` is missing the following closing delemiter: `{}`.", input, c)?
|
||||||
|
}
|
||||||
|
ErrorKind::ExpectedValue => {
|
||||||
|
writeln!(f, "Was expecting a value but instead got `{}`.", input)?
|
||||||
|
}
|
||||||
|
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||||
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")?
|
||||||
|
}
|
||||||
|
ErrorKind::InvalidPrimary => {
|
||||||
|
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", input)?
|
||||||
|
}
|
||||||
|
ErrorKind::ExpectedEof => {
|
||||||
|
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", input)?
|
||||||
|
}
|
||||||
|
ErrorKind::Geo => {
|
||||||
|
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||||
|
}
|
||||||
|
ErrorKind::ReservedGeo(name) => {
|
||||||
|
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name)?
|
||||||
|
}
|
||||||
|
ErrorKind::MisusedGeo => {
|
||||||
|
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||||
|
}
|
||||||
|
ErrorKind::Char(c) => {
|
||||||
|
panic!("Tried to display a char error with `{}`", c)
|
||||||
|
}
|
||||||
|
ErrorKind::ReservedKeyword => writeln!(f, "reserved keyword")?,
|
||||||
|
ErrorKind::UnexpectedInput(ref v) => writeln!(f, "Unexpected input found `{}`, vec: `{:?}`", input, v)?,
|
||||||
|
ErrorKind::Context(_) => todo!(),
|
||||||
|
ErrorKind::Unreachable => writeln!(
|
||||||
|
f,
|
||||||
|
"Encountered an internal error while parsing your filter. Please fill an issue"
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}:{} in `{}`.",
|
||||||
|
self.context.location_line(),
|
||||||
|
self.context.get_utf8_column(),
|
||||||
|
self.context.extra,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -20,6 +20,20 @@
|
|||||||
//! ```text
|
//! ```text
|
||||||
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
|
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
|
||||||
//! ```
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Specific errors:
|
||||||
|
//! ================
|
||||||
|
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
|
||||||
|
//! ```text
|
||||||
|
//! field = _geoPoint(12, 13, 14)
|
||||||
|
//! field < 12 AND _geoPoint(1, 2)
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! - If a user try to use a geoRadius as a value we must throw an error.
|
||||||
|
//! ```text
|
||||||
|
//! field = _geoRadius(12, 13, 14)
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
|
||||||
mod condition;
|
mod condition;
|
||||||
mod error;
|
mod error;
|
||||||
@ -28,12 +42,12 @@ mod value;
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
pub use condition::{parse_condition, parse_to, Condition};
|
pub use condition::{parse_condition, parse_to, Condition};
|
||||||
|
use error::{cut_with_err, ExtendNomError};
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::character::complete::{char, multispace0};
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::combinator::{cut, eof, map};
|
use nom::combinator::{cut, eof, map};
|
||||||
use nom::error::{ContextError, ParseError};
|
|
||||||
use nom::multi::{many0, separated_list1};
|
use nom::multi::{many0, separated_list1};
|
||||||
use nom::number::complete::recognize_float;
|
use nom::number::complete::recognize_float;
|
||||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||||
@ -102,14 +116,15 @@ impl<'a> FilterCondition<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove OPTIONAL whitespaces before AND after the the provided parser
|
/// remove OPTIONAL whitespaces before AND after the the provided parser.
|
||||||
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
|
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||||
delimited(multispace0, inner, multispace0)
|
delimited(multispace0, inner, multispace0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// and = not (~ "AND" not)*
|
/// or = and (~ "OR" ~ and)
|
||||||
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, lhs) = parse_and(input)?;
|
let (input, lhs) = parse_and(input)?;
|
||||||
|
// if we found a `OR` then we MUST find something next
|
||||||
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
|
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
|
||||||
|
|
||||||
let expr = ors
|
let expr = ors
|
||||||
@ -118,8 +133,10 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, expr))
|
Ok((input, expr))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// and = not (~ "AND" not)*
|
||||||
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, lhs) = parse_not(input)?;
|
let (input, lhs) = parse_not(input)?;
|
||||||
|
// if we found a `AND` then we MUST find something next
|
||||||
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
|
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
|
||||||
let expr = ors
|
let expr = ors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -128,28 +145,29 @@ fn parse_and(input: Span) -> IResult<FilterCondition> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// not = ("NOT" | "!") not | primary
|
/// not = ("NOT" | "!") not | primary
|
||||||
|
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
|
||||||
|
/// If we parse a `NOT` or `!` we MUST parse something behind.
|
||||||
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
||||||
alt((
|
alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))(
|
||||||
map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()),
|
input,
|
||||||
cut(parse_primary),
|
)
|
||||||
))(input)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
||||||
|
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||||
let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`";
|
|
||||||
|
|
||||||
// we want to forbid space BEFORE the _geoRadius but not after
|
// we want to forbid space BEFORE the _geoRadius but not after
|
||||||
let parsed = preceded::<_, _, _, _, _, _>(
|
let parsed = preceded(
|
||||||
tuple((multispace0, tag("_geoRadius"))),
|
tuple((multispace0, tag("_geoRadius"))),
|
||||||
|
// if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure
|
||||||
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
||||||
)(input);
|
)(input)
|
||||||
|
.map_err(|e| e.map(|_| Error::kind(input, ErrorKind::Geo)));
|
||||||
|
|
||||||
let (input, args): (Span, Vec<Span>) = parsed?;
|
let (input, args) = parsed?;
|
||||||
|
|
||||||
if args.len() != 3 {
|
if args.len() != 3 {
|
||||||
let e = Error::from_char(input, '(');
|
return Err(nom::Err::Failure(Error::kind(input, ErrorKind::Geo)));
|
||||||
return Err(nom::Err::Failure(Error::add_context(input, err_msg_args_incomplete, e)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = FilterCondition::GeoLowerThan {
|
let res = FilterCondition::GeoLowerThan {
|
||||||
@ -159,14 +177,39 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, res))
|
Ok((input, res))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
|
||||||
|
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||||
|
// we want to forbid space BEFORE the _geoPoint but not after
|
||||||
|
tuple((
|
||||||
|
multispace0,
|
||||||
|
tag("_geoPoint"),
|
||||||
|
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
||||||
|
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
||||||
|
))(input)
|
||||||
|
.map_err(|e| e.map(|_| Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||||
|
// if we succeeded we still returns a Failure because geoPoints are not allowed
|
||||||
|
Err(nom::Err::Failure(Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||||
|
}
|
||||||
|
|
||||||
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
||||||
fn parse_primary(input: Span) -> IResult<FilterCondition> {
|
fn parse_primary(input: Span) -> IResult<FilterCondition> {
|
||||||
alt((
|
alt((
|
||||||
delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))),
|
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
|
||||||
|
delimited(
|
||||||
|
ws(char('(')),
|
||||||
|
cut(parse_expression),
|
||||||
|
cut_with_err(ws(char(')')), |c| {
|
||||||
|
Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||||
|
}),
|
||||||
|
),
|
||||||
|c| parse_geo_radius(c),
|
|c| parse_geo_radius(c),
|
||||||
|c| parse_condition(c),
|
|c| parse_condition(c),
|
||||||
|c| parse_to(c),
|
|c| parse_to(c),
|
||||||
|
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||||
|
|c| parse_geo_point(c),
|
||||||
))(input)
|
))(input)
|
||||||
|
// if the inner parsers did not match enough information to return an accurate error
|
||||||
|
.map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::InvalidPrimary)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// expression = or
|
/// expression = or
|
||||||
@ -484,18 +527,24 @@ pub mod tests {
|
|||||||
fn error() {
|
fn error() {
|
||||||
use FilterCondition as Fc;
|
use FilterCondition as Fc;
|
||||||
|
|
||||||
let result = Fc::parse("test = truc OR truc");
|
|
||||||
assert!(result.is_err());
|
|
||||||
|
|
||||||
let test_case = [
|
let test_case = [
|
||||||
// simple test
|
// simple test
|
||||||
("channel = Ponce = 12", "An error occured"),
|
("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
|
||||||
("OR", "An error occured"),
|
("channel = ", "Was expecting a value but instead got nothing."),
|
||||||
("AND", "An error occured"),
|
("channel = 🐻", "Was expecting a value but instead got `🐻`."),
|
||||||
("channel = Ponce OR", "An error occured"),
|
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
|
||||||
("_geoRadius = 12", "An error occured"),
|
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
|
||||||
("_geoPoint(12, 13, 14)", "An error occured"),
|
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),
|
||||||
("_geo = _geoRadius(12, 13, 14)", "An error occured"),
|
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."),
|
||||||
|
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||||
|
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||||
|
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||||
|
("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||||
|
("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."),
|
||||||
|
("channel = 'ponce", "Expression `'ponce` is missing the following closing delemiter: `'`."),
|
||||||
|
("channel = \"ponce", "Expression `\"ponce` is missing the following closing delemiter: `\"`."),
|
||||||
|
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delemiter: `)`."),
|
||||||
|
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_case {
|
for (input, expected) in test_case {
|
||||||
@ -503,24 +552,12 @@ pub mod tests {
|
|||||||
|
|
||||||
assert!(
|
assert!(
|
||||||
result.is_err(),
|
result.is_err(),
|
||||||
"Filter `{:?}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
|
"Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
|
||||||
expected,
|
input,
|
||||||
result.unwrap()
|
result.unwrap()
|
||||||
);
|
);
|
||||||
let filter = result.unwrap_err().to_string();
|
let filter = result.unwrap_err().to_string();
|
||||||
assert_eq!(filter, expected, "Filter `{:?}` was supposed to return the following error: `{}`, but instead returned `{}`.", input, filter, expected);
|
assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
#[test]
|
|
||||||
fn bidule() {
|
|
||||||
use FilterCondition as Fc;
|
|
||||||
|
|
||||||
let result = Fc::parse::<crate::Error<Span>>("test = truc OR truc");
|
|
||||||
dbg!(result);
|
|
||||||
|
|
||||||
assert!(false);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
11
filter_parser/src/main.rs
Normal file
11
filter_parser/src/main.rs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
fn main() {
|
||||||
|
let input = std::env::args().nth(1).expect("You must provide a filter to test");
|
||||||
|
|
||||||
|
println!("Trying to execute the following filter:\n{}\n\n", input);
|
||||||
|
|
||||||
|
if let Err(e) = filter_parser::FilterCondition::parse(&input) {
|
||||||
|
println!("{}", e.to_string());
|
||||||
|
} else {
|
||||||
|
println!("✅ Valid filter");
|
||||||
|
}
|
||||||
|
}
|
@ -1,12 +1,29 @@
|
|||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::{take_till, take_while1};
|
use nom::bytes::complete::{take_till, take_while, take_while1};
|
||||||
use nom::character::complete::char;
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::sequence::delimited;
|
use nom::combinator::cut;
|
||||||
|
use nom::sequence::{delimited, terminated};
|
||||||
|
|
||||||
use crate::{ws, Error, IResult, Span, Token};
|
use crate::error::ExtendNomError;
|
||||||
|
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||||
|
|
||||||
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||||
|
// before anything we want to check if the user is misusing a geo expression
|
||||||
|
let err = parse_geo_point(input).unwrap_err();
|
||||||
|
if err.is_failure() {
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
match parse_geo_radius(input) {
|
||||||
|
Ok(_) => return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))),
|
||||||
|
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
||||||
|
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
|
||||||
|
Err(e) if e.is_failure() => {
|
||||||
|
return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo)))
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
// singleQuoted = "'" .* all but quotes "'"
|
// singleQuoted = "'" .* all but quotes "'"
|
||||||
let simple_quoted = |input| take_till(|c: char| c == '\'')(input);
|
let simple_quoted = |input| take_till(|c: char| c == '\'')(input);
|
||||||
// doubleQuoted = "\"" (word | spaces)* "\""
|
// doubleQuoted = "\"" (word | spaces)* "\""
|
||||||
@ -14,13 +31,23 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
|||||||
// word = (alphanumeric | _ | - | .)+
|
// word = (alphanumeric | _ | - | .)+
|
||||||
let word = |input| take_while1(is_key_component)(input);
|
let word = |input| take_while1(is_key_component)(input);
|
||||||
|
|
||||||
ws(alt((
|
// we want to remove the space before entering the alt because if we don't,
|
||||||
delimited(char('\''), simple_quoted, char('\'')),
|
// when we create the errors from the output of the alt we have spaces everywhere
|
||||||
delimited(char('"'), double_quoted, char('"')),
|
let (input, _) = take_while(char::is_whitespace)(input)?;
|
||||||
word,
|
|
||||||
)))(input)
|
terminated(
|
||||||
|
alt((
|
||||||
|
delimited(char('\''), simple_quoted, cut(char('\''))),
|
||||||
|
delimited(char('"'), double_quoted, cut(char('"'))),
|
||||||
|
word,
|
||||||
|
)),
|
||||||
|
multispace0,
|
||||||
|
)(input)
|
||||||
.map(|(s, t)| (s, t.into()))
|
.map(|(s, t)| (s, t.into()))
|
||||||
.map_err(|e| e.map(|_| Error::expected_value(input)))
|
// if we found nothing in the alt it means the user did not input any value
|
||||||
|
.map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::ExpectedValue)))
|
||||||
|
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
|
||||||
|
.map_err(|e| e.map_fail(|c| Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char()))))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_key_component(c: char) -> bool {
|
fn is_key_component(c: char) -> bool {
|
||||||
@ -38,12 +65,13 @@ pub mod tests {
|
|||||||
("channel", rtok("", "channel")),
|
("channel", rtok("", "channel")),
|
||||||
(".private", rtok("", ".private")),
|
(".private", rtok("", ".private")),
|
||||||
("I-love-kebab", rtok("", "I-love-kebab")),
|
("I-love-kebab", rtok("", "I-love-kebab")),
|
||||||
("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")),
|
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
|
||||||
("parens(", rtok("", "parens")),
|
("parens(", rtok("", "parens")),
|
||||||
("parens)", rtok("", "parens")),
|
("parens)", rtok("", "parens")),
|
||||||
("not!", rtok("", "not")),
|
("not!", rtok("", "not")),
|
||||||
(" channel", rtok(" ", "channel")),
|
(" channel", rtok(" ", "channel")),
|
||||||
("channel ", rtok("", "channel")),
|
("channel ", rtok("", "channel")),
|
||||||
|
(" channel ", rtok(" ", "channel")),
|
||||||
("'channel'", rtok("'", "channel")),
|
("'channel'", rtok("'", "channel")),
|
||||||
("\"channel\"", rtok("\"", "channel")),
|
("\"channel\"", rtok("\"", "channel")),
|
||||||
("'cha)nnel'", rtok("'", "cha)nnel")),
|
("'cha)nnel'", rtok("'", "cha)nnel")),
|
||||||
|
Loading…
Reference in New Issue
Block a user