rewrite the parser logic

This commit is contained in:
Tamo 2021-10-21 16:45:42 +02:00
parent e1d81342cf
commit 6c15f50899
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69

View File

@ -1,9 +1,26 @@
//! BNF grammar:
//!
//! ```text
//! expression = or
//! or = and (~ "OR" ~ and)
//! and = not (~ "AND" not)*
//! not = ("NOT" | "!") not | primary
//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
//! to = value value TO value
//! condition = value ("==" | ">" ...) value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" (word | spaces)* "\""
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
//! ```
use std::collections::HashSet; use std::collections::HashSet;
use std::fmt::Debug; use std::fmt::Debug;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{tag, take_till, take_till1, take_while1}; use nom::bytes::complete::{tag, take_till, take_while1};
use nom::character::complete::{char, multispace0}; use nom::character::complete::{char, multispace0};
use nom::combinator::map; use nom::combinator::map;
use nom::error::{ContextError, ErrorKind, VerboseError}; use nom::error::{ContextError, ErrorKind, VerboseError};
@ -60,12 +77,14 @@ pub struct ParseContext<'a> {
} }
impl<'a> ParseContext<'a> { impl<'a> ParseContext<'a> {
/// and = not (~ "AND" not)*
fn parse_or<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> fn parse_or<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let (input, lhs) = self.parse_and(input)?; let (input, lhs) = self.parse_and(input)?;
let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?; let (input, ors) =
many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?;
let expr = ors let expr = ors
.into_iter() .into_iter()
@ -78,49 +97,40 @@ impl<'a> ParseContext<'a> {
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let (input, lhs) = self.parse_not(input)?; let (input, lhs) = self.parse_not(input)?;
let (input, ors) = let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?;
many0(preceded(self.ws(tag("AND")), |c| Self::parse_and(self, c)))(input)?;
let expr = ors let expr = ors
.into_iter() .into_iter()
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
Ok((input, expr)) Ok((input, expr))
} }
/// not = ("NOT" | "!") not | primary
fn parse_not<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> fn parse_not<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
alt(( alt((
map( map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()),
preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { |c| self.parse_primary(c),
Self::parse_condition_expression(self, c)
}),
|e| e.negate(),
),
|c| Self::parse_condition_expression(self, c),
))(input) ))(input)
} }
fn ws<F, O, E>(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> fn ws<F, O, E>(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
where where
F: Fn(&'a str) -> IResult<&'a str, O, E>, F: FnMut(&'a str) -> IResult<&'a str, O, E>,
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
delimited(multispace0, inner, multispace0) delimited(multispace0, inner, multispace0)
} }
fn parse_simple_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> /// condition = value ("==" | ">" ...) value
fn parse_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_value(c))))( let (input, (key, op, value)) =
input, tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?;
);
let (input, (key, op, value)) = match k {
Ok(o) => o,
Err(e) => return Err(e),
};
let fid = self.parse_fid(input, key)?; let fid = self.parse_fid(input, key)?;
let r: StdResult<f64, nom::Err<VerboseError<&str>>> = self.parse_numeric(value); let r: StdResult<f64, nom::Err<VerboseError<&str>>> = self.parse_numeric(value);
@ -137,7 +147,17 @@ impl<'a> ParseContext<'a> {
); );
Ok((input, k)) Ok((input, k))
} }
">" | "<" | "<=" | ">=" => self.parse_numeric_unary_condition(op, fid, value), ">" | "<" | "<=" | ">=" => {
let numeric: f64 = self.parse_numeric(value)?;
let k = match op {
">" => FilterCondition::Operator(fid, GreaterThan(numeric)),
"<" => FilterCondition::Operator(fid, LowerThan(numeric)),
"<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)),
">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)),
_ => unreachable!(),
};
Ok((input, k))
}
_ => unreachable!(), _ => unreachable!(),
} }
} }
@ -156,26 +176,6 @@ impl<'a> ParseContext<'a> {
} }
} }
fn parse_numeric_unary_condition<E>(
&'a self,
input: &'a str,
fid: u16,
value: &'a str,
) -> IResult<&'a str, FilterCondition, E>
where
E: FilterParserError<'a>,
{
let numeric: f64 = self.parse_numeric(value)?;
let k = match input {
">" => FilterCondition::Operator(fid, GreaterThan(numeric)),
"<" => FilterCondition::Operator(fid, LowerThan(numeric)),
"<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)),
">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)),
_ => unreachable!(),
};
Ok((input, k))
}
fn parse_fid<E>(&'a self, input: &'a str, key: &'a str) -> StdResult<FieldId, nom::Err<E>> fn parse_fid<E>(&'a self, input: &'a str, key: &'a str) -> StdResult<FieldId, nom::Err<E>>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
@ -193,12 +193,13 @@ impl<'a> ParseContext<'a> {
} }
} }
fn parse_range_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> /// to = value value TO value
fn parse_to<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let (input, (key, from, _, to)) = tuple(( let (input, (key, from, _, to)) = tuple((
self.ws(|c| self.parse_key(c)), self.ws(|c| self.parse_value(c)),
self.ws(|c| self.parse_value(c)), self.ws(|c| self.parse_value(c)),
tag("TO"), tag("TO"),
self.ws(|c| self.parse_value(c)), self.ws(|c| self.parse_value(c)),
@ -212,6 +213,7 @@ impl<'a> ParseContext<'a> {
Ok((input, res)) Ok((input, res))
} }
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
fn parse_geo_radius<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> fn parse_geo_radius<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
@ -224,7 +226,8 @@ impl<'a> ParseContext<'a> {
"_geoRadius. Longitude must be contained between -180 and 180 degrees."; "_geoRadius. Longitude must be contained between -180 and 180 degrees.";
let parsed = preceded::<_, _, _, E, _, _>( let parsed = preceded::<_, _, _, E, _, _>(
tag("_geoRadius"), // TODO: forbid spaces between _geoRadius and parenthesis
self.ws(tag("_geoRadius")),
delimited( delimited(
char('('), char('('),
separated_list1(tag(","), self.ws(|c| recognize_float(c))), separated_list1(tag(","), self.ws(|c| recognize_float(c))),
@ -275,54 +278,35 @@ impl<'a> ParseContext<'a> {
Ok((input, res)) Ok((input, res))
} }
fn parse_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
where fn parse_primary<E>(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E>
E: FilterParserError<'a>,
{
let l1 = |c| self.parse_simple_condition(c);
let l2 = |c| self.parse_range_condition(c);
let l3 = |c| self.parse_geo_radius(c);
alt((l1, l2, l3))(input)
}
fn parse_condition_expression<E>(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
alt(( alt((
delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))),
|c| Self::parse_condition(self, c), |c| self.parse_condition(c),
))(input) |c| self.parse_to(c),
} |c| self.parse_geo_radius(c),
fn parse_key<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E>
where
E: FilterParserError<'a>,
{
let key = |input| take_while1(Self::is_key_component)(input);
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
let quoted_key = |input| take_till(|c: char| c == '"')(input);
alt((
delimited(char('\''), simple_quoted_key, char('\'')),
delimited(char('"'), quoted_key, char('"')),
key,
))(input) ))(input)
} }
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
fn parse_value<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> fn parse_value<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let key = // singleQuoted = "'" .* all but quotes "'"
|input| take_till1(|c: char| c.is_ascii_whitespace() || c == '(' || c == ')')(input);
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\""
let quoted_key = |input| take_till(|c: char| c == '"')(input); let quoted_key = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(Self::is_key_component)(input);
alt(( alt((
delimited(char('\''), simple_quoted_key, char('\'')), self.ws(delimited(char('\''), simple_quoted_key, char('\''))),
delimited(char('"'), quoted_key, char('"')), self.ws(delimited(char('"'), quoted_key, char('"'))),
key, self.ws(word),
))(input) ))(input)
} }
@ -330,11 +314,12 @@ impl<'a> ParseContext<'a> {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c) c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
} }
/// expression = or
pub fn parse_expression<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> pub fn parse_expression<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
alt((|input| self.parse_or(input), |input| self.parse_and(input)))(input) self.parse_or(input)
} }
} }
@ -499,7 +484,19 @@ mod tests {
), ),
), ),
// test parenthesis // test parenthesis
/* (
Fc::from_str(
&rtxn,
&index,
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
),
Fc::And(
Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))),
Box::new(Fc::Or(
Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))),
Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))),
))),
),
( (
Fc::from_str( Fc::from_str(
&rtxn, &rtxn,
@ -516,7 +513,6 @@ mod tests {
)), )),
Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.))))
), ),
*/
]; ];
for (result, expected) in test_case { for (result, expected) in test_case {