From 6c15f50899ba37f624aa521e0940c1bb8c86b5ba Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 16:45:42 +0200 Subject: [PATCH] rewrite the parser logic --- milli/src/search/facet/filter_parser.rs | 156 ++++++++++++------------ 1 file changed, 76 insertions(+), 80 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 3454d91a4..9440a44ca 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -1,9 +1,26 @@ +//! BNF grammar: +//! +//! ```text +//! expression = or +//! or = and (~ "OR" ~ and) +//! and = not (~ "AND" not)* +//! not = ("NOT" | "!") not | primary +//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +//! to = value value TO value +//! condition = value ("==" | ">" ...) value +//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +//! singleQuoted = "'" .* all but quotes "'" +//! doubleQuoted = "\"" (word | spaces)* "\"" +//! word = (alphanumeric | _ | - | .)+ +//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! ``` + use std::collections::HashSet; use std::fmt::Debug; use std::result::Result as StdResult; use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_till1, take_while1}; +use nom::bytes::complete::{tag, take_till, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::map; use nom::error::{ContextError, ErrorKind, VerboseError}; @@ -60,12 +77,14 @@ pub struct ParseContext<'a> { } impl<'a> ParseContext<'a> { + /// and = not (~ "AND" not)* fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let (input, lhs) = self.parse_and(input)?; - let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?; + let (input, ors) = + many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?; let expr = ors .into_iter() @@ -78,49 +97,40 @@ impl<'a> ParseContext<'a> { E: FilterParserError<'a>, { let (input, lhs) = self.parse_not(input)?; - let (input, ors) = - many0(preceded(self.ws(tag("AND")), |c| Self::parse_and(self, c)))(input)?; + let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?; let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); Ok((input, expr)) } + /// not = ("NOT" | "!") not | primary fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { alt(( - map( - preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { - Self::parse_condition_expression(self, c) - }), - |e| e.negate(), - ), - |c| Self::parse_condition_expression(self, c), + map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()), + |c| self.parse_primary(c), ))(input) } fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where - F: Fn(&'a str) -> IResult<&'a str, O, E>, + F: FnMut(&'a str) -> IResult<&'a str, O, E>, E: FilterParserError<'a>, { delimited(multispace0, inner, multispace0) } - fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + /// condition = value ("==" | ">" ...) value + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_value(c))))( - input, - ); - let (input, (key, op, value)) = match k { - Ok(o) => o, - Err(e) => return Err(e), - }; + let (input, (key, op, value)) = + tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?; let fid = self.parse_fid(input, key)?; let r: StdResult>> = self.parse_numeric(value); @@ -137,7 +147,17 @@ impl<'a> ParseContext<'a> { ); Ok((input, k)) } - ">" | "<" | "<=" | ">=" => self.parse_numeric_unary_condition(op, fid, value), + ">" | "<" | "<=" | ">=" => { + let numeric: f64 = self.parse_numeric(value)?; + let k = match op { + ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), + "<" => FilterCondition::Operator(fid, LowerThan(numeric)), + "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), + ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), + _ => unreachable!(), + }; + Ok((input, k)) + } _ => unreachable!(), } } @@ -156,26 +176,6 @@ impl<'a> ParseContext<'a> { } } - fn parse_numeric_unary_condition( - &'a self, - input: &'a str, - fid: u16, - value: &'a str, - ) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let numeric: f64 = self.parse_numeric(value)?; - let k = match input { - ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), - "<" => FilterCondition::Operator(fid, LowerThan(numeric)), - "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), - ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), - _ => unreachable!(), - }; - Ok((input, k)) - } - fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> where E: FilterParserError<'a>, @@ -193,12 +193,13 @@ impl<'a> ParseContext<'a> { } } - fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + /// to = value value TO value + fn parse_to(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let (input, (key, from, _, to)) = tuple(( - self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_value(c)), self.ws(|c| self.parse_value(c)), tag("TO"), self.ws(|c| self.parse_value(c)), @@ -212,6 +213,7 @@ impl<'a> ParseContext<'a> { Ok((input, res)) } + /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, @@ -224,7 +226,8 @@ impl<'a> ParseContext<'a> { "_geoRadius. Longitude must be contained between -180 and 180 degrees."; let parsed = preceded::<_, _, _, E, _, _>( - tag("_geoRadius"), + // TODO: forbid spaces between _geoRadius and parenthesis + self.ws(tag("_geoRadius")), delimited( char('('), separated_list1(tag(","), self.ws(|c| recognize_float(c))), @@ -275,54 +278,35 @@ impl<'a> ParseContext<'a> { Ok((input, res)) } - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let l1 = |c| self.parse_simple_condition(c); - let l2 = |c| self.parse_range_condition(c); - let l3 = |c| self.parse_geo_radius(c); - alt((l1, l2, l3))(input) - } - - fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> + /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius + fn parse_primary(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> where E: FilterParserError<'a>, { alt(( - delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), - |c| Self::parse_condition(self, c), - ))(input) - } - - fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> - where - E: FilterParserError<'a>, - { - let key = |input| take_while1(Self::is_key_component)(input); - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - let quoted_key = |input| take_till(|c: char| c == '"')(input); - - alt(( - delimited(char('\''), simple_quoted_key, char('\'')), - delimited(char('"'), quoted_key, char('"')), - key, + delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))), + |c| self.parse_condition(c), + |c| self.parse_to(c), + |c| self.parse_geo_radius(c), ))(input) } + /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where E: FilterParserError<'a>, { - let key = - |input| take_till1(|c: char| c.is_ascii_whitespace() || c == '(' || c == ')')(input); + // singleQuoted = "'" .* all but quotes "'" let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + // doubleQuoted = "\"" (word | spaces)* "\"" let quoted_key = |input| take_till(|c: char| c == '"')(input); + // word = (alphanumeric | _ | - | .)+ + let word = |input| take_while1(Self::is_key_component)(input); alt(( - delimited(char('\''), simple_quoted_key, char('\'')), - delimited(char('"'), quoted_key, char('"')), - key, + self.ws(delimited(char('\''), simple_quoted_key, char('\''))), + self.ws(delimited(char('"'), quoted_key, char('"'))), + self.ws(word), ))(input) } @@ -330,11 +314,12 @@ impl<'a> ParseContext<'a> { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } + /// expression = or pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { - alt((|input| self.parse_or(input), |input| self.parse_and(input)))(input) + self.parse_or(input) } } @@ -499,7 +484,19 @@ mod tests { ), ), // test parenthesis - /* + ( + Fc::from_str( + &rtxn, + &index, + "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", + ), + Fc::And( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Or( + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), + ))), + ), ( Fc::from_str( &rtxn, @@ -516,7 +513,6 @@ mod tests { )), Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) ), - */ ]; for (result, expected) in test_case {