From 661bc21af51f30e564685e3cdff9261ea0680884 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 20 Oct 2021 17:27:12 +0200 Subject: [PATCH 01/58] Fix the filter parser And add a bunch of tests on the filter::from_array --- milli/src/search/facet/filter_parser.rs | 174 +++++++++++++++++++----- 1 file changed, 141 insertions(+), 33 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 4d8a54987..cfa3cdae0 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -3,17 +3,19 @@ use std::fmt::Debug; use std::result::Result as StdResult; use nom::branch::alt; -use nom::bytes::complete::{tag, take_while1}; +use nom::bytes::complete::{tag, take_till, take_till1, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::map; use nom::error::{ContextError, ErrorKind, VerboseError}; use nom::multi::{many0, separated_list1}; +use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, tuple}; use nom::IResult; use self::Operator::*; use super::FilterCondition; use crate::{FieldId, FieldsIdsMap}; + #[derive(Debug, Clone, PartialEq)] pub enum Operator { GreaterThan(f64), @@ -111,28 +113,33 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let operator = alt((tag("<="), tag(">="), tag(">"), tag("="), tag("<"), tag("!="))); - let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( + let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); + let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_value(c))))( input, ); let (input, (key, op, value)) = match k { Ok(o) => o, - Err(e) => { - return Err(e); - } + Err(e) => return Err(e), }; let fid = self.parse_fid(input, key)?; let r: StdResult>> = self.parse_numeric(value); - let k = match op { - "=" => FilterCondition::Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())), - "!=" => { - FilterCondition::Operator(fid, NotEqual(r.ok(), value.to_string().to_lowercase())) + match op { + "=" => { + let k = + FilterCondition::Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())); + Ok((input, k)) } - ">" | "<" | "<=" | ">=" => return self.parse_numeric_unary_condition(op, fid, value), + "!=" => { + let k = FilterCondition::Operator( + fid, + NotEqual(r.ok(), value.to_string().to_lowercase()), + ); + Ok((input, k)) + } + ">" | "<" | "<=" | ">=" => self.parse_numeric_unary_condition(op, fid, value), _ => unreachable!(), - }; - Ok((input, k)) + } } fn parse_numeric(&'a self, input: &'a str) -> StdResult> @@ -142,12 +149,10 @@ impl<'a> ParseContext<'a> { { match input.parse::() { Ok(n) => Ok(n), - Err(_) => { - return match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }; - } + Err(_) => match input.chars().nth(0) { + Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), + None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), + }, } } @@ -194,9 +199,9 @@ impl<'a> ParseContext<'a> { { let (input, (key, from, _, to)) = tuple(( self.ws(|c| self.parse_key(c)), - self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_value(c)), tag("TO"), - self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_value(c)), ))(input)?; let fid = self.parse_fid(input, key)?; @@ -211,22 +216,23 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let err_msg_args_incomplete= "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; + let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; let err_msg_latitude_invalid = "_geoRadius. Latitude must be contained between -90 and 90 degrees."; let err_msg_longitude_invalid = "_geoRadius. Longitude must be contained between -180 and 180 degrees."; - let (input, args): (&str, Vec<&str>) = match preceded( + let parsed = preceded::<_, _, _, E, _, _>( tag("_geoRadius"), delimited( char('('), - separated_list1(tag(","), self.ws(|c| self.parse_value::(c))), + separated_list1(tag(","), self.ws(|c| recognize_float(c))), char(')'), ), - )(input) - { + )(input); + + let (input, args): (&str, Vec<&str>) = match parsed { Ok(e) => e, Err(_e) => { return Err(nom::Err::Failure(E::add_context( @@ -293,15 +299,30 @@ impl<'a> ParseContext<'a> { E: FilterParserError<'a>, { let key = |input| take_while1(Self::is_key_component)(input); - alt((key, delimited(char('"'), key, char('"'))))(input) + let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + let quoted_key = |input| take_till(|c: char| c == '"')(input); + + alt(( + delimited(char('\''), simple_quoted_key, char('\'')), + delimited(char('"'), quoted_key, char('"')), + key, + ))(input) } fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where E: FilterParserError<'a>, { - let key = |input| take_while1(Self::is_key_component)(input); - alt((key, delimited(char('"'), key, char('"'))))(input) + let key = + |input| take_till1(|c: char| c.is_ascii_whitespace() || c == '(' || c == ')')(input); + let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + let quoted_key = |input| take_till(|c: char| c == '"')(input); + + alt(( + delimited(char('\''), simple_quoted_key, char('\'')), + delimited(char('"'), quoted_key, char('"')), + key, + ))(input) } fn is_key_component(c: char) -> bool { @@ -312,7 +333,7 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - self.parse_or(input) + alt((|input| self.parse_or(input), |input| self.parse_and(input)))(input) } } @@ -481,6 +502,90 @@ mod tests { builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); + // Simple array with Left + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = mv"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Simple array with Right + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = mv")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = \"Mister Mv\""])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = \"Mister Mv\"")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = 'Mister Mv'"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = 'Mister Mv'")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Simple with parenthesis + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["(channel = mv)"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); + assert_eq!(condition, expected); + // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); let condition = FilterCondition::from_array( @@ -501,6 +606,7 @@ mod tests { .unwrap(); assert_eq!(condition, expected); } + #[test] fn geo_radius() { let path = tempfile::tempdir().unwrap(); @@ -591,9 +697,11 @@ mod tests { let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); assert!(result.is_err()); let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); + assert!( + error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), + "{}", + error.to_string() + ); // georadius have a bad latitude let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); From 36281a653f5052c677ad518fb680fc8ff044350c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 12:40:11 +0200 Subject: [PATCH 02/58] write all the simple tests --- milli/src/search/facet/filter_parser.rs | 121 +++++++++++++++++++++--- 1 file changed, 110 insertions(+), 11 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index cfa3cdae0..bd5aaf976 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -359,25 +359,124 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut map = index.fields_ids_map(&wtxn).unwrap(); map.insert("channel"); + map.insert("dog race"); + map.insert("subscribers"); index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { S("channel") }); + builder.set_filterable_fields(hashset! { S("channel"), S("dog race"), S("subscribers") }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); - // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); - let expected = FilterCondition::Operator(0, Operator::Equal(None, S("ponce"))); - assert_eq!(condition, expected); - let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); - let expected = FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce"))); - assert_eq!(condition, expected); + use FilterCondition as Fc; + let test_case = [ + // simple test + ( + Fc::from_str(&rtxn, &index, "channel = Ponce"), + Fc::Operator(0, Operator::Equal(None, S("ponce"))), + ), + // test all the quotes and simple quotes + ( + Fc::from_str(&rtxn, &index, "channel = 'Mister Mv'"), + Fc::Operator(0, Operator::Equal(None, S("mister mv"))), + ), + ( + Fc::from_str(&rtxn, &index, "channel = \"Mister Mv\""), + Fc::Operator(0, Operator::Equal(None, S("mister mv"))), + ), + ( + Fc::from_str(&rtxn, &index, "'dog race' = Borzoi"), + Fc::Operator(1, Operator::Equal(None, S("borzoi"))), + ), + ( + Fc::from_str(&rtxn, &index, "\"dog race\" = Chusky"), + Fc::Operator(1, Operator::Equal(None, S("chusky"))), + ), + ( + Fc::from_str(&rtxn, &index, "\"dog race\" = \"Bernese Mountain\""), + Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), + ), + ( + Fc::from_str(&rtxn, &index, "'dog race' = 'Bernese Mountain'"), + Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), + ), + ( + Fc::from_str(&rtxn, &index, "\"dog race\" = 'Bernese Mountain'"), + Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), + ), + // test all the operators + ( + Fc::from_str(&rtxn, &index, "channel != ponce"), + Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), + ), + ( + Fc::from_str(&rtxn, &index, "NOT channel = ponce"), + Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), + ), + ( + Fc::from_str(&rtxn, &index, "subscribers < 1000"), + Fc::Operator(2, Operator::LowerThan(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "subscribers > 1000"), + Fc::Operator(2, Operator::GreaterThan(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "subscribers <= 1000"), + Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "subscribers >= 1000"), + Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT subscribers < 1000"), + Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT subscribers > 1000"), + Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT subscribers <= 1000"), + Fc::Operator(2, Operator::GreaterThan(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT subscribers >= 1000"), + Fc::Operator(2, Operator::LowerThan(1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "subscribers 100 TO 1000"), + Fc::Operator(2, Operator::Between(100., 1000.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT subscribers 100 TO 1000"), + Fc::Or( + Box::new(Fc::Operator(2, Operator::LowerThan(100.))), + Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), + ), + ), + ( + Fc::from_str(&rtxn, &index, "_geoRadius(12, 13, 14)"), + Fc::Operator(2, Operator::GeoLowerThan([12., 13.], 14.)), + ), + ( + Fc::from_str(&rtxn, &index, "NOT _geoRadius(12, 13, 14)"), + Fc::Operator(2, Operator::GeoGreaterThan([12., 13.], 14.)), + ), + ]; - let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); - let expected = FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce"))); - assert_eq!(condition, expected); + for (result, expected) in test_case { + assert!( + result.is_ok(), + "Filter {:?} was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let filter = result.unwrap(); + assert_eq!(filter, expected,); + } } #[test] From 423baac08b1921594643eafcf5d4db8d679c84df Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 12:45:40 +0200 Subject: [PATCH 03/58] fix the tests --- milli/src/search/facet/filter_parser.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index bd5aaf976..1ba4962f8 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -253,6 +253,7 @@ impl<'a> ParseContext<'a> { let fid = match self.fields_ids_map.id("_geo") { Some(fid) => fid, + // TODO send an error None => return Ok((input, FilterCondition::Empty)), }; @@ -361,9 +362,12 @@ mod tests { map.insert("channel"); map.insert("dog race"); map.insert("subscribers"); + map.insert("_geo"); index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { S("channel"), S("dog race"), S("subscribers") }); + builder.set_filterable_fields( + hashset! { S("channel"), S("dog race"), S("subscribers"), S("_geo") }, + ); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -459,11 +463,11 @@ mod tests { ), ( Fc::from_str(&rtxn, &index, "_geoRadius(12, 13, 14)"), - Fc::Operator(2, Operator::GeoLowerThan([12., 13.], 14.)), + Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)), ), ( Fc::from_str(&rtxn, &index, "NOT _geoRadius(12, 13, 14)"), - Fc::Operator(2, Operator::GeoGreaterThan([12., 13.], 14.)), + Fc::Operator(3, Operator::GeoGreaterThan([12., 13.], 14.)), ), ]; From e1d81342cf39894c1c99c5e0e042752d1d909355 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 13:01:25 +0200 Subject: [PATCH 04/58] add test on the or and and operator --- milli/src/search/facet/filter_parser.rs | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 1ba4962f8..3454d91a4 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -469,6 +469,54 @@ mod tests { Fc::from_str(&rtxn, &index, "NOT _geoRadius(12, 13, 14)"), Fc::Operator(3, Operator::GeoGreaterThan([12., 13.], 14.)), ), + // test simple `or` and `and` + ( + Fc::from_str(&rtxn, &index, "channel = ponce AND 'dog race' != 'bernese mountain'"), + Fc::And( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + ), + ), + ( + Fc::from_str(&rtxn, &index, "channel = ponce OR 'dog race' != 'bernese mountain'"), + Fc::Or( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + ), + ), + ( + Fc::from_str( + &rtxn, + &index, + "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", + ), + Fc::Or( + Box::new(Fc::And( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + )), + Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), + ), + ), + // test parenthesis + /* + ( + Fc::from_str( + &rtxn, + &index, + "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", + ), + Fc::And( + Box::new(Fc::Or( + Box::new(Fc::And( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + )), + Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), + )), + Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) + ), + */ ]; for (result, expected) in test_case { From 6c15f50899ba37f624aa521e0940c1bb8c86b5ba Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 16:45:42 +0200 Subject: [PATCH 05/58] rewrite the parser logic --- milli/src/search/facet/filter_parser.rs | 156 ++++++++++++------------ 1 file changed, 76 insertions(+), 80 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 3454d91a4..9440a44ca 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -1,9 +1,26 @@ +//! BNF grammar: +//! +//! ```text +//! expression = or +//! or = and (~ "OR" ~ and) +//! and = not (~ "AND" not)* +//! not = ("NOT" | "!") not | primary +//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +//! to = value value TO value +//! condition = value ("==" | ">" ...) value +//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +//! singleQuoted = "'" .* all but quotes "'" +//! doubleQuoted = "\"" (word | spaces)* "\"" +//! word = (alphanumeric | _ | - | .)+ +//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! ``` + use std::collections::HashSet; use std::fmt::Debug; use std::result::Result as StdResult; use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_till1, take_while1}; +use nom::bytes::complete::{tag, take_till, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::map; use nom::error::{ContextError, ErrorKind, VerboseError}; @@ -60,12 +77,14 @@ pub struct ParseContext<'a> { } impl<'a> ParseContext<'a> { + /// and = not (~ "AND" not)* fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let (input, lhs) = self.parse_and(input)?; - let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?; + let (input, ors) = + many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?; let expr = ors .into_iter() @@ -78,49 +97,40 @@ impl<'a> ParseContext<'a> { E: FilterParserError<'a>, { let (input, lhs) = self.parse_not(input)?; - let (input, ors) = - many0(preceded(self.ws(tag("AND")), |c| Self::parse_and(self, c)))(input)?; + let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?; let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); Ok((input, expr)) } + /// not = ("NOT" | "!") not | primary fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { alt(( - map( - preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { - Self::parse_condition_expression(self, c) - }), - |e| e.negate(), - ), - |c| Self::parse_condition_expression(self, c), + map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()), + |c| self.parse_primary(c), ))(input) } fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where - F: Fn(&'a str) -> IResult<&'a str, O, E>, + F: FnMut(&'a str) -> IResult<&'a str, O, E>, E: FilterParserError<'a>, { delimited(multispace0, inner, multispace0) } - fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + /// condition = value ("==" | ">" ...) value + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_value(c))))( - input, - ); - let (input, (key, op, value)) = match k { - Ok(o) => o, - Err(e) => return Err(e), - }; + let (input, (key, op, value)) = + tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?; let fid = self.parse_fid(input, key)?; let r: StdResult>> = self.parse_numeric(value); @@ -137,7 +147,17 @@ impl<'a> ParseContext<'a> { ); Ok((input, k)) } - ">" | "<" | "<=" | ">=" => self.parse_numeric_unary_condition(op, fid, value), + ">" | "<" | "<=" | ">=" => { + let numeric: f64 = self.parse_numeric(value)?; + let k = match op { + ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), + "<" => FilterCondition::Operator(fid, LowerThan(numeric)), + "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), + ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), + _ => unreachable!(), + }; + Ok((input, k)) + } _ => unreachable!(), } } @@ -156,26 +176,6 @@ impl<'a> ParseContext<'a> { } } - fn parse_numeric_unary_condition( - &'a self, - input: &'a str, - fid: u16, - value: &'a str, - ) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let numeric: f64 = self.parse_numeric(value)?; - let k = match input { - ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), - "<" => FilterCondition::Operator(fid, LowerThan(numeric)), - "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), - ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), - _ => unreachable!(), - }; - Ok((input, k)) - } - fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> where E: FilterParserError<'a>, @@ -193,12 +193,13 @@ impl<'a> ParseContext<'a> { } } - fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + /// to = value value TO value + fn parse_to(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { let (input, (key, from, _, to)) = tuple(( - self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_value(c)), self.ws(|c| self.parse_value(c)), tag("TO"), self.ws(|c| self.parse_value(c)), @@ -212,6 +213,7 @@ impl<'a> ParseContext<'a> { Ok((input, res)) } + /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, @@ -224,7 +226,8 @@ impl<'a> ParseContext<'a> { "_geoRadius. Longitude must be contained between -180 and 180 degrees."; let parsed = preceded::<_, _, _, E, _, _>( - tag("_geoRadius"), + // TODO: forbid spaces between _geoRadius and parenthesis + self.ws(tag("_geoRadius")), delimited( char('('), separated_list1(tag(","), self.ws(|c| recognize_float(c))), @@ -275,54 +278,35 @@ impl<'a> ParseContext<'a> { Ok((input, res)) } - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let l1 = |c| self.parse_simple_condition(c); - let l2 = |c| self.parse_range_condition(c); - let l3 = |c| self.parse_geo_radius(c); - alt((l1, l2, l3))(input) - } - - fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> + /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius + fn parse_primary(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> where E: FilterParserError<'a>, { alt(( - delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), - |c| Self::parse_condition(self, c), - ))(input) - } - - fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> - where - E: FilterParserError<'a>, - { - let key = |input| take_while1(Self::is_key_component)(input); - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - let quoted_key = |input| take_till(|c: char| c == '"')(input); - - alt(( - delimited(char('\''), simple_quoted_key, char('\'')), - delimited(char('"'), quoted_key, char('"')), - key, + delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))), + |c| self.parse_condition(c), + |c| self.parse_to(c), + |c| self.parse_geo_radius(c), ))(input) } + /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where E: FilterParserError<'a>, { - let key = - |input| take_till1(|c: char| c.is_ascii_whitespace() || c == '(' || c == ')')(input); + // singleQuoted = "'" .* all but quotes "'" let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + // doubleQuoted = "\"" (word | spaces)* "\"" let quoted_key = |input| take_till(|c: char| c == '"')(input); + // word = (alphanumeric | _ | - | .)+ + let word = |input| take_while1(Self::is_key_component)(input); alt(( - delimited(char('\''), simple_quoted_key, char('\'')), - delimited(char('"'), quoted_key, char('"')), - key, + self.ws(delimited(char('\''), simple_quoted_key, char('\''))), + self.ws(delimited(char('"'), quoted_key, char('"'))), + self.ws(word), ))(input) } @@ -330,11 +314,12 @@ impl<'a> ParseContext<'a> { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } + /// expression = or pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: FilterParserError<'a>, { - alt((|input| self.parse_or(input), |input| self.parse_and(input)))(input) + self.parse_or(input) } } @@ -499,7 +484,19 @@ mod tests { ), ), // test parenthesis - /* + ( + Fc::from_str( + &rtxn, + &index, + "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", + ), + Fc::And( + Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), + Box::new(Fc::Or( + Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), + Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), + ))), + ), ( Fc::from_str( &rtxn, @@ -516,7 +513,6 @@ mod tests { )), Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) ), - */ ]; for (result, expected) in test_case { From c634d43ac5f9bec5162b2158291045566b12338c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 21 Oct 2021 17:10:27 +0200 Subject: [PATCH 06/58] add a simple test on the filters with an integer --- milli/src/search/facet/filter_parser.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 9440a44ca..47189841a 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -365,6 +365,10 @@ mod tests { Fc::from_str(&rtxn, &index, "channel = Ponce"), Fc::Operator(0, Operator::Equal(None, S("ponce"))), ), + ( + Fc::from_str(&rtxn, &index, "subscribers = 12"), + Fc::Operator(2, Operator::Equal(Some(12.), S("12"))), + ), // test all the quotes and simple quotes ( Fc::from_str(&rtxn, &index, "channel = 'Mister Mv'"), From 7e5c5c4d2750aea594dca8f0f597f13b60b6db1f Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 01:15:42 +0200 Subject: [PATCH 07/58] start a new rewrite of the filter parser --- Cargo.toml | 2 +- filter_parser/Cargo.toml | 15 + filter_parser/src/lib.rs | 623 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 639 insertions(+), 1 deletion(-) create mode 100644 filter_parser/Cargo.toml create mode 100644 filter_parser/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index b78989f50..5d2d47713 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["milli", "http-ui", "benchmarks", "infos", "helpers", "cli"] +members = ["milli", "filter_parser", "http-ui", "benchmarks", "infos", "helpers", "cli"] default-members = ["milli"] [profile.dev] diff --git a/filter_parser/Cargo.toml b/filter_parser/Cargo.toml new file mode 100644 index 000000000..d8a522b1b --- /dev/null +++ b/filter_parser/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "filter_parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "7.0.0" +nom_locate = "4.0.0" + +[dev-dependencies] +big_s = "1.0.2" +maplit = "1.0.2" +rand = "0.8.3" diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs new file mode 100644 index 000000000..6e6f5a1e6 --- /dev/null +++ b/filter_parser/src/lib.rs @@ -0,0 +1,623 @@ +#![allow(unused_imports)] +//! BNF grammar: +//! +//! ```text +//! expression = or +//! or = and (~ "OR" ~ and) +//! and = not (~ "AND" not)* +//! not = ("NOT" | "!") not | primary +//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +//! to = value value TO value +//! condition = value ("==" | ">" ...) value +//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +//! singleQuoted = "'" .* all but quotes "'" +//! doubleQuoted = "\"" (word | spaces)* "\"" +//! word = (alphanumeric | _ | - | .)+ +//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! ``` + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FilterCondition<'a> { + Operator { fid: Token<'a>, op: Operator<'a> }, + Or(Box, Box), + And(Box, Box), + GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, + GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, + Empty, +} + +impl<'a> FilterCondition<'a> { + pub fn negate(self) -> FilterCondition<'a> { + use FilterCondition::*; + + match self { + Operator { fid, op } => match op.negate() { + (op, None) => Operator { fid, op }, + (a, Some(b)) => { + Or(Operator { fid: fid.clone(), op: a }.into(), Operator { fid, op: b }.into()) + } + }, + Or(a, b) => And(a.negate().into(), b.negate().into()), + And(a, b) => Or(a.negate().into(), b.negate().into()), + Empty => Empty, + GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, + GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, + } + } + + pub fn parse(input: &'a str) -> IResult { + let span = Span::new(input); + parse_expression(span) + } +} + +use std::collections::HashSet; +use std::fmt::Debug; +use std::result::Result as StdResult; + +use nom::branch::alt; +use nom::bytes::complete::{tag, take_till, take_while1}; +use nom::character::complete::{char, multispace0}; +use nom::combinator::map; +use nom::error::{ContextError, ErrorKind, ParseError, VerboseError}; +use nom::multi::{many0, separated_list1}; +use nom::number::complete::recognize_float; +use nom::sequence::{delimited, preceded, tuple}; +use nom::IResult; +use nom_locate::LocatedSpan; + +use self::Operator::*; + +pub enum FilterError { + AttributeNotFilterable(String), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Token<'a> { + pub position: Span<'a>, + pub inner: &'a str, +} + +impl<'a> Token<'a> { + pub fn new(position: Span<'a>) -> Self { + Self { position, inner: &position } + } +} + +impl<'a> From> for Token<'a> { + fn from(span: Span<'a>) -> Self { + Self { inner: &span, position: span } + } +} + +type Span<'a> = LocatedSpan<&'a str>; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Operator<'a> { + GreaterThan(Token<'a>), + GreaterThanOrEqual(Token<'a>), + Equal(Token<'a>), + NotEqual(Token<'a>), + LowerThan(Token<'a>), + LowerThanOrEqual(Token<'a>), + Between { from: Token<'a>, to: Token<'a> }, +} + +impl<'a> Operator<'a> { + /// This method can return two operations in case it must express + /// an OR operation for the between case (i.e. `TO`). + pub fn negate(self) -> (Self, Option) { + match self { + GreaterThan(n) => (LowerThanOrEqual(n), None), + GreaterThanOrEqual(n) => (LowerThan(n), None), + Equal(s) => (NotEqual(s), None), + NotEqual(s) => (Equal(s), None), + LowerThan(n) => (GreaterThanOrEqual(n), None), + LowerThanOrEqual(n) => (GreaterThan(n), None), + Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), + } + } +} + +pub trait FilterParserError<'a>: + nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug +{ +} + +impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} + +/// and = not (~ "AND" not)* +fn parse_or(input: Span) -> IResult { + let (input, lhs) = parse_and(input)?; + let (input, ors) = many0(preceded(ws(tag("OR")), |c| parse_and(c)))(input)?; + + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); + Ok((input, expr)) +} + +fn parse_and(input: Span) -> IResult { + let (input, lhs) = parse_not(input)?; + let (input, ors) = many0(preceded(ws(tag("AND")), |c| parse_not(c)))(input)?; + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); + Ok((input, expr)) +} + +/// not = ("NOT" | "!") not | primary +fn parse_not(input: Span) -> IResult { + alt((map(preceded(alt((tag("!"), tag("NOT"))), |c| parse_not(c)), |e| e.negate()), |c| { + parse_primary(c) + }))(input) +} + +fn ws<'a, O>( + inner: impl FnMut(Span<'a>) -> IResult, +) -> impl FnMut(Span<'a>) -> IResult { + delimited(multispace0, inner, multispace0) +} + +/// condition = value ("==" | ">" ...) value +fn parse_condition(input: Span) -> IResult { + let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); + let (input, (key, op, value)) = + tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; + + let fid = key.into(); + + // TODO + match *op.fragment() { + "=" => { + let k = FilterCondition::Operator { fid, op: Equal(value.into()) }; + Ok((input, k)) + } + "!=" => { + let k = FilterCondition::Operator { fid, op: NotEqual(value.into()) }; + Ok((input, k)) + } + ">" | "<" | "<=" | ">=" => { + let k = match *op.fragment() { + ">" => FilterCondition::Operator { fid, op: GreaterThan(value.into()) }, + "<" => FilterCondition::Operator { fid, op: LowerThan(value.into()) }, + "<=" => FilterCondition::Operator { fid, op: LowerThanOrEqual(value.into()) }, + ">=" => FilterCondition::Operator { fid, op: GreaterThanOrEqual(value.into()) }, + _ => unreachable!(), + }; + Ok((input, k)) + } + _ => unreachable!(), + } +} + +/// to = value value TO value +fn parse_to(input: Span) -> IResult { + let (input, (key, from, _, to)) = + tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( + input, + )?; + + Ok(( + input, + FilterCondition::Operator { + fid: key.into(), + op: Between { from: from.into(), to: to.into() }, + }, + )) +} + +/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +fn parse_geo_radius(input: Span) -> IResult { + let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; + /* + TODO + let err_msg_latitude_invalid = + "_geoRadius. Latitude must be contained between -90 and 90 degrees."; + + let err_msg_longitude_invalid = + "_geoRadius. Longitude must be contained between -180 and 180 degrees."; + */ + + let parsed = preceded::<_, _, _, _, _, _>( + // TODO: forbid spaces between _geoRadius and parenthesis + ws(tag("_geoRadius")), + delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')')), + )(input); + + let (input, args): (Span, Vec) = match parsed { + Ok(e) => e, + Err(_e) => { + return Err(nom::Err::Failure(nom::error::Error::add_context( + input, + err_msg_args_incomplete, + nom::error::Error::from_char(input, '('), + ))); + } + }; + + if args.len() != 3 { + let e = nom::error::Error::from_char(input, '('); + return Err(nom::Err::Failure(nom::error::Error::add_context( + input, + err_msg_args_incomplete, + e, + ))); + } + + let res = FilterCondition::GeoLowerThan { + point: [args[0].into(), args[1].into()], + radius: args[2].into(), + }; + Ok((input, res)) +} + +/// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +fn parse_primary(input: Span) -> IResult { + alt(( + delimited(ws(char('(')), |c| parse_expression(c), ws(char(')'))), + |c| parse_condition(c), + |c| parse_to(c), + |c| parse_geo_radius(c), + ))(input) +} + +/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +fn parse_value(input: Span) -> IResult { + // singleQuoted = "'" .* all but quotes "'" + let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + // doubleQuoted = "\"" (word | spaces)* "\"" + let quoted_key = |input| take_till(|c: char| c == '"')(input); + // word = (alphanumeric | _ | - | .)+ + let word = |input| take_while1(is_key_component)(input); + + alt(( + ws(delimited(char('\''), simple_quoted_key, char('\''))), + ws(delimited(char('"'), quoted_key, char('"'))), + ws(word), + ))(input) +} + +fn is_key_component(c: char) -> bool { + c.is_alphanumeric() || ['_', '-', '.'].contains(&c) +} + +/// expression = or +pub fn parse_expression(input: Span) -> IResult { + parse_or(input) +} + +#[cfg(test)] +mod tests { + use big_s::S; + use maplit::hashset; + + use super::*; + + /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element + fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { + // if the string is empty we still need to return 1 for the line number + let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count()); + let offset = before.chars().count(); + unsafe { Span::new_from_raw_offset(offset, lines as u32, value, ()) }.into() + } + + #[test] + fn parse() { + use FilterCondition as Fc; + + // new_from_raw_offset is unsafe + let test_case = [ + // simple test + ( + "channel = Ponce", + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = ", "Ponce")), + }, + ), + ( + "subscribers = 12", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::Equal(rtok("subscribers = ", "12")), + }, + ), + // test all the quotes and simple quotes + ( + "channel = 'Mister Mv'", + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = '", "Mister Mv")), + }, + ), + ( + "channel = \"Mister Mv\"", + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = \"", "Mister Mv")), + }, + ), + ( + "'dog race' = Borzoi", + Fc::Operator { + fid: rtok("'", "dog race"), + op: Operator::Equal(rtok("'dog race' = ", "Borzoi")), + }, + ), + ( + "\"dog race\" = Chusky", + Fc::Operator { + fid: rtok("\"", "dog race"), + op: Operator::Equal(rtok("\"dog race\" = ", "Chusky")), + }, + ), + ( + "\"dog race\" = \"Bernese Mountain\"", + Fc::Operator { + fid: rtok("\"", "dog race"), + op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + }, + ), + ( + "'dog race' = 'Bernese Mountain'", + Fc::Operator { + fid: rtok("'", "dog race"), + op: Operator::Equal(rtok("'dog race' = '", "Bernese Mountain")), + }, + ), + ( + "\"dog race\" = 'Bernese Mountain'", + Fc::Operator { + fid: rtok("\"", "dog race"), + op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + }, + ), + // test all the operators + ( + "channel != ponce", + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::NotEqual(rtok("channel != ", "ponce")), + }, + ), + ( + "NOT channel = ponce", + Fc::Operator { + fid: rtok("NOT ", "channel"), + op: Operator::NotEqual(rtok("NOT channel = ", "ponce")), + }, + ), + ( + "subscribers < 1000", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::LowerThan(rtok("subscribers < ", "1000")), + }, + ), + ( + "subscribers > 1000", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::GreaterThan(rtok("subscribers > ", "1000")), + }, + ), + ( + "subscribers <= 1000", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::LowerThanOrEqual(rtok("subscribers <= ", "1000")), + }, + ), + ( + "subscribers >= 1000", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), + }, + ), + ( + "NOT subscribers < 1000", + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), + }, + ), + ( + "NOT subscribers > 1000", + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), + }, + ), + ( + "NOT subscribers <= 1000", + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::GreaterThan(rtok("NOT subscribers <= ", "1000")), + }, + ), + ( + "NOT subscribers >= 1000", + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::LowerThan(rtok("NOT subscribers >= ", "1000")), + }, + ), + ( + "subscribers 100 TO 1000", + Fc::Operator { + fid: rtok("", "subscribers"), + op: Operator::Between { + from: rtok("subscribers ", "100"), + to: rtok("subscribers 100 TO ", "1000"), + }, + }, + ), + ( + "NOT subscribers 100 TO 1000", + Fc::Or( + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::LowerThan(rtok("NOT subscribers ", "100")), + } + .into(), + Fc::Operator { + fid: rtok("NOT ", "subscribers"), + op: Operator::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), + } + .into(), + ), + ), + ( + "_geoRadius(12, 13, 14)", + Fc::GeoLowerThan { + point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")], + radius: rtok("_geoRadius(12, 13, ", "14"), + }, + ), + ( + "NOT _geoRadius(12, 13, 14)", + Fc::GeoGreaterThan { + point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")], + radius: rtok("NOT _geoRadius(12, 13, ", "14"), + }, + ), + // test simple `or` and `and` + ( + "channel = ponce AND 'dog race' != 'bernese mountain'", + Fc::And( + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = ", "ponce")), + } + .into(), + Fc::Operator { + fid: rtok("channel = ponce AND '", "dog race"), + op: Operator::NotEqual(rtok( + "channel = ponce AND 'dog race' != '", + "bernese mountain", + )), + } + .into(), + ), + ), + ( + "channel = ponce OR 'dog race' != 'bernese mountain'", + Fc::Or( + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = ", "ponce")), + } + .into(), + Fc::Operator { + fid: rtok("channel = ponce OR '", "dog race"), + op: Operator::NotEqual(rtok( + "channel = ponce OR 'dog race' != '", + "bernese mountain", + )), + } + .into(), + ), + ), + ( + "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", + Fc::Or( + Fc::And( + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = ", "ponce")), + } + .into(), + Fc::Operator { + fid: rtok("channel = ponce AND '", "dog race"), + op: Operator::NotEqual(rtok( + "channel = ponce AND 'dog race' != '", + "bernese mountain", + )), + } + .into(), + ) + .into(), + Fc::Operator { + fid: rtok( + "channel = ponce AND 'dog race' != 'bernese mountain' OR ", + "subscribers", + ), + op: Operator::GreaterThan(rtok( + "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", + "1000", + )), + } + .into(), + ), + ), + // test parenthesis + ( + "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", + Fc::And( + Fc::Operator { fid: rtok("", "channel"), op: Operator::Equal(rtok("channel = ", "ponce")) }.into(), + Fc::Or( + Fc::Operator { fid: rtok("channel = ponce AND ( '", "dog race"), op: Operator::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), + Fc::Operator { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + ).into()), + ), + ( + "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", + Fc::And( + Fc::Or( + Fc::And( + Fc::Operator { fid: rtok("(", "channel"), op: Operator::Equal(rtok("(channel = ", "ponce")) }.into(), + Fc::Operator { fid: rtok("(channel = ponce AND '", "dog race"), op: Operator::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), + ).into(), + Fc::Operator { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + ).into(), + Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() + ) + ) + ]; + + for (input, expected) in test_case { + let result = Fc::parse(input); + + assert!( + result.is_ok(), + "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let filter = result.unwrap().1; + assert_eq!(filter, expected, "Filter `{}` failed.", input); + } + } + + #[test] + fn name() { + use FilterCondition as Fc; + + // new_from_raw_offset is unsafe + let test_case = [ + // simple test + ( + "channel=Ponce", + Fc::Operator { + fid: rtok("", "channel"), + op: Operator::Equal(rtok("channel = ", "Ponce")), + }, + ), + ]; + + for (input, expected) in test_case { + let result = Fc::parse(input); + + assert!( + result.is_ok(), + "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let filter = result.unwrap().1; + assert_eq!(filter, expected, "Filter `{}` failed.", input); + } + } +} From 01dedde1c9e1e2195d4b72252cf8153fdb743aa9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 01:59:38 +0200 Subject: [PATCH 08/58] update some names and move some parser out of the lib.rs --- filter_parser/Cargo.toml | 5 - filter_parser/src/condition.rs | 94 ++++++ filter_parser/src/lib.rs | 355 +++++++-------------- filter_parser/src/value.rs | 71 +++++ milli/Cargo.toml | 1 + milli/src/search/facet/filter_condition.rs | 22 +- milli/src/search/facet/filter_parser.rs | 52 +-- milli/src/search/mod.rs | 3 +- 8 files changed, 318 insertions(+), 285 deletions(-) create mode 100644 filter_parser/src/condition.rs create mode 100644 filter_parser/src/value.rs diff --git a/filter_parser/Cargo.toml b/filter_parser/Cargo.toml index d8a522b1b..80767d5c4 100644 --- a/filter_parser/Cargo.toml +++ b/filter_parser/Cargo.toml @@ -8,8 +8,3 @@ edition = "2021" [dependencies] nom = "7.0.0" nom_locate = "4.0.0" - -[dev-dependencies] -big_s = "1.0.2" -maplit = "1.0.2" -rand = "0.8.3" diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs new file mode 100644 index 000000000..5a1bb62be --- /dev/null +++ b/filter_parser/src/condition.rs @@ -0,0 +1,94 @@ +//! BNF grammar: +//! +//! ```text +//! condition = value ("==" | ">" ...) value +//! to = value value TO value +//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +//! singleQuoted = "'" .* all but quotes "'" +//! doubleQuoted = "\"" (word | spaces)* "\"" +//! word = (alphanumeric | _ | - | .)+ +//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! ``` + +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::sequence::tuple; +use nom::IResult; +use Condition::*; + +use crate::{parse_value, ws, FilterCondition, Span, Token}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Condition<'a> { + GreaterThan(Token<'a>), + GreaterThanOrEqual(Token<'a>), + Equal(Token<'a>), + NotEqual(Token<'a>), + LowerThan(Token<'a>), + LowerThanOrEqual(Token<'a>), + Between { from: Token<'a>, to: Token<'a> }, +} + +impl<'a> Condition<'a> { + /// This method can return two operations in case it must express + /// an OR operation for the between case (i.e. `TO`). + pub fn negate(self) -> (Self, Option) { + match self { + GreaterThan(n) => (LowerThanOrEqual(n), None), + GreaterThanOrEqual(n) => (LowerThan(n), None), + Equal(s) => (NotEqual(s), None), + NotEqual(s) => (Equal(s), None), + LowerThan(n) => (GreaterThanOrEqual(n), None), + LowerThanOrEqual(n) => (GreaterThan(n), None), + Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), + } + } +} + +/// condition = value ("==" | ">" ...) value +pub fn parse_condition(input: Span) -> IResult { + let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); + let (input, (key, op, value)) = + tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; + + let fid = key; + + // TODO + match *op.fragment() { + "=" => { + let k = FilterCondition::Condition { fid, op: Equal(value) }; + Ok((input, k)) + } + "!=" => { + let k = FilterCondition::Condition { fid, op: NotEqual(value) }; + Ok((input, k)) + } + ">" | "<" | "<=" | ">=" => { + let k = match *op.fragment() { + ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, + "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, + "<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) }, + ">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) }, + _ => unreachable!(), + }; + Ok((input, k)) + } + _ => unreachable!(), + } +} + +/// to = value value TO value +pub fn parse_to(input: Span) -> IResult { + let (input, (key, from, _, to)) = + tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( + input, + )?; + + Ok(( + input, + FilterCondition::Condition { + fid: key.into(), + op: Between { from: from.into(), to: to.into() }, + }, + )) +} diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 6e6f5a1e6..096a9e26e 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -1,4 +1,3 @@ -#![allow(unused_imports)] //! BNF grammar: //! //! ```text @@ -7,8 +6,8 @@ //! and = not (~ "AND" not)* //! not = ("NOT" | "!") not | primary //! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -//! to = value value TO value //! condition = value ("==" | ">" ...) value +//! to = value value TO value //! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" (word | spaces)* "\"" @@ -16,61 +15,24 @@ //! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) //! ``` -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum FilterCondition<'a> { - Operator { fid: Token<'a>, op: Operator<'a> }, - Or(Box, Box), - And(Box, Box), - GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, - GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, - Empty, -} - -impl<'a> FilterCondition<'a> { - pub fn negate(self) -> FilterCondition<'a> { - use FilterCondition::*; - - match self { - Operator { fid, op } => match op.negate() { - (op, None) => Operator { fid, op }, - (a, Some(b)) => { - Or(Operator { fid: fid.clone(), op: a }.into(), Operator { fid, op: b }.into()) - } - }, - Or(a, b) => And(a.negate().into(), b.negate().into()), - And(a, b) => Or(a.negate().into(), b.negate().into()), - Empty => Empty, - GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, - GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, - } - } - - pub fn parse(input: &'a str) -> IResult { - let span = Span::new(input); - parse_expression(span) - } -} - -use std::collections::HashSet; +mod condition; +mod value; use std::fmt::Debug; -use std::result::Result as StdResult; +pub use condition::{parse_condition, parse_to, Condition}; use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_while1}; +use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::map; -use nom::error::{ContextError, ErrorKind, ParseError, VerboseError}; +use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded, tuple}; +use nom::sequence::{delimited, preceded}; use nom::IResult; use nom_locate::LocatedSpan; +pub(crate) use value::parse_value; -use self::Operator::*; - -pub enum FilterError { - AttributeNotFilterable(String), -} +type Span<'a> = LocatedSpan<&'a str>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -90,42 +52,49 @@ impl<'a> From> for Token<'a> { } } -type Span<'a> = LocatedSpan<&'a str>; - #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Operator<'a> { - GreaterThan(Token<'a>), - GreaterThanOrEqual(Token<'a>), - Equal(Token<'a>), - NotEqual(Token<'a>), - LowerThan(Token<'a>), - LowerThanOrEqual(Token<'a>), - Between { from: Token<'a>, to: Token<'a> }, +pub enum FilterCondition<'a> { + Condition { fid: Token<'a>, op: Condition<'a> }, + Or(Box, Box), + And(Box, Box), + GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, + GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, + Empty, } -impl<'a> Operator<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { +impl<'a> FilterCondition<'a> { + pub fn negate(self) -> FilterCondition<'a> { + use FilterCondition::*; + match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(s) => (NotEqual(s), None), - NotEqual(s) => (Equal(s), None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), + Condition { fid, op } => match op.negate() { + (op, None) => Condition { fid, op }, + (a, Some(b)) => Or( + Condition { fid: fid.clone(), op: a }.into(), + Condition { fid, op: b }.into(), + ), + }, + Or(a, b) => And(a.negate().into(), b.negate().into()), + And(a, b) => Or(a.negate().into(), b.negate().into()), + Empty => Empty, + GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, + GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, } } + + pub fn parse(input: &'a str) -> IResult { + let span = Span::new(input); + parse_expression(span) + } } -pub trait FilterParserError<'a>: - nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug -{ +// remove OPTIONAL whitespaces before AND after the the provided parser +fn ws<'a, O>( + inner: impl FnMut(Span<'a>) -> IResult, +) -> impl FnMut(Span<'a>) -> IResult { + delimited(multispace0, inner, multispace0) } -impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} - /// and = not (~ "AND" not)* fn parse_or(input: Span) -> IResult { let (input, lhs) = parse_and(input)?; @@ -153,60 +122,6 @@ fn parse_not(input: Span) -> IResult { }))(input) } -fn ws<'a, O>( - inner: impl FnMut(Span<'a>) -> IResult, -) -> impl FnMut(Span<'a>) -> IResult { - delimited(multispace0, inner, multispace0) -} - -/// condition = value ("==" | ">" ...) value -fn parse_condition(input: Span) -> IResult { - let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = - tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; - - let fid = key.into(); - - // TODO - match *op.fragment() { - "=" => { - let k = FilterCondition::Operator { fid, op: Equal(value.into()) }; - Ok((input, k)) - } - "!=" => { - let k = FilterCondition::Operator { fid, op: NotEqual(value.into()) }; - Ok((input, k)) - } - ">" | "<" | "<=" | ">=" => { - let k = match *op.fragment() { - ">" => FilterCondition::Operator { fid, op: GreaterThan(value.into()) }, - "<" => FilterCondition::Operator { fid, op: LowerThan(value.into()) }, - "<=" => FilterCondition::Operator { fid, op: LowerThanOrEqual(value.into()) }, - ">=" => FilterCondition::Operator { fid, op: GreaterThanOrEqual(value.into()) }, - _ => unreachable!(), - }; - Ok((input, k)) - } - _ => unreachable!(), - } -} - -/// to = value value TO value -fn parse_to(input: Span) -> IResult { - let (input, (key, from, _, to)) = - tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( - input, - )?; - - Ok(( - input, - FilterCondition::Operator { - fid: key.into(), - op: Between { from: from.into(), to: to.into() }, - }, - )) -} - /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) fn parse_geo_radius(input: Span) -> IResult { let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; @@ -262,40 +177,17 @@ fn parse_primary(input: Span) -> IResult { ))(input) } -/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -fn parse_value(input: Span) -> IResult { - // singleQuoted = "'" .* all but quotes "'" - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - // doubleQuoted = "\"" (word | spaces)* "\"" - let quoted_key = |input| take_till(|c: char| c == '"')(input); - // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(is_key_component)(input); - - alt(( - ws(delimited(char('\''), simple_quoted_key, char('\''))), - ws(delimited(char('"'), quoted_key, char('"'))), - ws(word), - ))(input) -} - -fn is_key_component(c: char) -> bool { - c.is_alphanumeric() || ['_', '-', '.'].contains(&c) -} - /// expression = or pub fn parse_expression(input: Span) -> IResult { parse_or(input) } #[cfg(test)] -mod tests { - use big_s::S; - use maplit::hashset; - +pub mod tests { use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element - fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { + pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { // if the string is empty we still need to return 1 for the line number let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count()); let offset = before.chars().count(); @@ -306,149 +198,148 @@ mod tests { fn parse() { use FilterCondition as Fc; - // new_from_raw_offset is unsafe let test_case = [ // simple test ( "channel = Ponce", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "Ponce")), + op: Condition::Equal(rtok("channel = ", "Ponce")), }, ), ( "subscribers = 12", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::Equal(rtok("subscribers = ", "12")), + op: Condition::Equal(rtok("subscribers = ", "12")), }, ), // test all the quotes and simple quotes ( "channel = 'Mister Mv'", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = '", "Mister Mv")), + op: Condition::Equal(rtok("channel = '", "Mister Mv")), }, ), ( "channel = \"Mister Mv\"", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = \"", "Mister Mv")), + op: Condition::Equal(rtok("channel = \"", "Mister Mv")), }, ), ( "'dog race' = Borzoi", - Fc::Operator { + Fc::Condition { fid: rtok("'", "dog race"), - op: Operator::Equal(rtok("'dog race' = ", "Borzoi")), + op: Condition::Equal(rtok("'dog race' = ", "Borzoi")), }, ), ( "\"dog race\" = Chusky", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = ", "Chusky")), + op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")), }, ), ( "\"dog race\" = \"Bernese Mountain\"", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), }, ), ( "'dog race' = 'Bernese Mountain'", - Fc::Operator { + Fc::Condition { fid: rtok("'", "dog race"), - op: Operator::Equal(rtok("'dog race' = '", "Bernese Mountain")), + op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")), }, ), ( "\"dog race\" = 'Bernese Mountain'", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), }, ), // test all the operators ( "channel != ponce", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::NotEqual(rtok("channel != ", "ponce")), + op: Condition::NotEqual(rtok("channel != ", "ponce")), }, ), ( "NOT channel = ponce", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "channel"), - op: Operator::NotEqual(rtok("NOT channel = ", "ponce")), + op: Condition::NotEqual(rtok("NOT channel = ", "ponce")), }, ), ( "subscribers < 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::LowerThan(rtok("subscribers < ", "1000")), + op: Condition::LowerThan(rtok("subscribers < ", "1000")), }, ), ( "subscribers > 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::GreaterThan(rtok("subscribers > ", "1000")), + op: Condition::GreaterThan(rtok("subscribers > ", "1000")), }, ), ( "subscribers <= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::LowerThanOrEqual(rtok("subscribers <= ", "1000")), + op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")), }, ), ( "subscribers >= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), + op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), }, ), ( "NOT subscribers < 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), + op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), }, ), ( "NOT subscribers > 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), + op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), }, ), ( "NOT subscribers <= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThan(rtok("NOT subscribers <= ", "1000")), + op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")), }, ), ( "NOT subscribers >= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThan(rtok("NOT subscribers >= ", "1000")), + op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")), }, ), ( "subscribers 100 TO 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::Between { + op: Condition::Between { from: rtok("subscribers ", "100"), to: rtok("subscribers 100 TO ", "1000"), }, @@ -457,14 +348,14 @@ mod tests { ( "NOT subscribers 100 TO 1000", Fc::Or( - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThan(rtok("NOT subscribers ", "100")), + op: Condition::LowerThan(rtok("NOT subscribers ", "100")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), + op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), } .into(), ), @@ -487,14 +378,14 @@ mod tests { ( "channel = ponce AND 'dog race' != 'bernese mountain'", Fc::And( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce AND '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce AND 'dog race' != '", "bernese mountain", )), @@ -505,14 +396,14 @@ mod tests { ( "channel = ponce OR 'dog race' != 'bernese mountain'", Fc::Or( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce OR '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce OR 'dog race' != '", "bernese mountain", )), @@ -524,14 +415,14 @@ mod tests { "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", Fc::Or( Fc::And( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce AND '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce AND 'dog race' != '", "bernese mountain", )), @@ -539,12 +430,12 @@ mod tests { .into(), ) .into(), - Fc::Operator { + Fc::Condition { fid: rtok( "channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers", ), - op: Operator::GreaterThan(rtok( + op: Condition::GreaterThan(rtok( "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000", )), @@ -556,10 +447,10 @@ mod tests { ( "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", Fc::And( - Fc::Operator { fid: rtok("", "channel"), op: Operator::Equal(rtok("channel = ", "ponce")) }.into(), + Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), Fc::Or( - Fc::Operator { fid: rtok("channel = ponce AND ( '", "dog race"), op: Operator::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Operator { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), ).into()), ), ( @@ -567,10 +458,10 @@ mod tests { Fc::And( Fc::Or( Fc::And( - Fc::Operator { fid: rtok("(", "channel"), op: Operator::Equal(rtok("(channel = ", "ponce")) }.into(), - Fc::Operator { fid: rtok("(channel = ponce AND '", "dog race"), op: Operator::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), + Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), + Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), ).into(), - Fc::Operator { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), ).into(), Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() ) @@ -590,34 +481,4 @@ mod tests { assert_eq!(filter, expected, "Filter `{}` failed.", input); } } - - #[test] - fn name() { - use FilterCondition as Fc; - - // new_from_raw_offset is unsafe - let test_case = [ - // simple test - ( - "channel=Ponce", - Fc::Operator { - fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "Ponce")), - }, - ), - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_ok(), - "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap().1; - assert_eq!(filter, expected, "Filter `{}` failed.", input); - } - } } diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs new file mode 100644 index 000000000..c36becf7e --- /dev/null +++ b/filter_parser/src/value.rs @@ -0,0 +1,71 @@ +use nom::branch::alt; +use nom::bytes::complete::{take_till, take_while1}; +use nom::character::complete::char; +use nom::sequence::delimited; +use nom::IResult; + +use crate::{ws, Span, Token}; + +/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +pub fn parse_value(input: Span) -> IResult { + // singleQuoted = "'" .* all but quotes "'" + let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + // doubleQuoted = "\"" (word | spaces)* "\"" + let quoted_key = |input| take_till(|c: char| c == '"')(input); + // word = (alphanumeric | _ | - | .)+ + let word = |input| take_while1(is_key_component)(input); + + alt(( + ws(delimited(char('\''), simple_quoted_key, char('\''))), + ws(delimited(char('"'), quoted_key, char('"'))), + ws(word), + ))(input) + .map(|(s, t)| (s, t.into())) +} + +fn is_key_component(c: char) -> bool { + c.is_alphanumeric() || ['_', '-', '.'].contains(&c) +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::tests::rtok; + + #[test] + fn name() { + let test_case = [ + ("channel", rtok("", "channel")), + (".private", rtok("", ".private")), + ("I-love-kebab", rtok("", "I-love-kebab")), + ("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")), + ("parens(", rtok("", "parens")), + ("parens)", rtok("", "parens")), + ("not!", rtok("", "not")), + (" channel", rtok(" ", "channel")), + ("channel ", rtok("", "channel")), + ("'channel'", rtok("'", "channel")), + ("\"channel\"", rtok("\"", "channel")), + ("'cha)nnel'", rtok("'", "cha)nnel")), + ("'cha\"nnel'", rtok("'", "cha\"nnel")), + ("\"cha'nnel\"", rtok("\"", "cha'nnel")), + ("\" some spaces \"", rtok("\"", " some spaces ")), + ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ]; + + for (input, expected) in test_case { + let input = Span::new(input); + let result = parse_value(input); + + assert!( + result.is_ok(), + "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let value = result.unwrap().1; + assert_eq!(value, expected, "Filter `{}` failed.", input); + } + } +} diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 594cc60e0..63fd0d984 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -40,6 +40,7 @@ uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser nom = "7.0.0" +nom_locate = "4.0.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 4fedeee69..5c57adb88 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -16,20 +16,20 @@ use crate::heed_codec::facet::{ }; use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; -#[derive(Debug, Clone, PartialEq)] -pub enum FilterCondition { - Operator(FieldId, Operator), +#[derive(Debug, Clone)] +pub enum FilterCondition<'a> { + Operator(FieldId, Operator<'a>), Or(Box, Box), And(Box, Box), Empty, } -impl FilterCondition { - pub fn from_array( +impl<'a> FilterCondition<'a> { + pub fn from_array( rtxn: &heed::RoTxn, index: &Index, array: I, - ) -> Result> + ) -> Result>> where I: IntoIterator>, J: IntoIterator, @@ -73,8 +73,8 @@ impl FilterCondition { pub fn from_str( rtxn: &heed::RoTxn, index: &Index, - expression: &str, - ) -> Result { + expression: &'a str, + ) -> Result> { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; let ctx = @@ -93,7 +93,7 @@ impl FilterCondition { } } } - pub fn negate(self) -> FilterCondition { + pub fn negate(self) -> FilterCondition<'a> { match self { Operator(fid, op) => match op.negate() { (op, None) => Operator(fid, op), @@ -106,7 +106,7 @@ impl FilterCondition { } } -impl FilterCondition { +impl<'a> FilterCondition<'a> { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. fn explore_facet_number_levels( @@ -221,7 +221,7 @@ impl FilterCondition { numbers_db: heed::Database, strings_db: heed::Database, field_id: FieldId, - operator: &Operator, + operator: &Operator<'a>, ) -> Result { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 47189841a..c25d523aa 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -28,25 +28,38 @@ use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, tuple}; use nom::IResult; +use nom_locate::LocatedSpan; use self::Operator::*; use super::FilterCondition; use crate::{FieldId, FieldsIdsMap}; -#[derive(Debug, Clone, PartialEq)] -pub enum Operator { - GreaterThan(f64), - GreaterThanOrEqual(f64), - Equal(Option, String), - NotEqual(Option, String), - LowerThan(f64), - LowerThanOrEqual(f64), - Between(f64, f64), - GeoLowerThan([f64; 2], f64), - GeoGreaterThan([f64; 2], f64), +pub enum FilterError { + AttributeNotFilterable(String), } -impl Operator { +#[derive(Debug, Clone, PartialEq, Eq)] +struct Token<'a> { + pub position: Span<'a>, + pub inner: &'a str, +} + +type Span<'a> = LocatedSpan<&'a str>; + +#[derive(Debug, Clone)] +pub enum Operator<'a> { + GreaterThan(Token<'a>), + GreaterThanOrEqual(Token<'a>), + Equal(Option>, Token<'a>), + NotEqual(Option>, Token<'a>), + LowerThan(Token<'a>), + LowerThanOrEqual(Token<'a>), + Between(Token<'a>, Token<'a>), + GeoLowerThan([Token<'a>; 2], Token<'a>), + GeoGreaterThan([Token<'a>; 2], Token<'a>), +} + +impl<'a> Operator<'a> { /// This method can return two operations in case it must express /// an OR operation for the between case (i.e. `TO`). pub fn negate(self) -> (Self, Option) { @@ -180,16 +193,13 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let error = match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }; - if !self.filterable_fields.contains(key) { - return error; - } match self.fields_ids_map.id(key) { - Some(fid) => Ok(fid), - None => error, + Some(fid) if self.filterable_fields.contains(key) => Ok(fid), + _ => Err(nom::Err::Failure(E::add_context( + input, + "Attribute is not filterable", + E::from_char(input, 'T'), + ))), } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 9b76ca851..8cd7f1a34 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -34,7 +34,8 @@ mod query_tree; pub struct Search<'a> { query: Option, - filter: Option, + // this should be linked to the String in the query + filter: Option>, offset: usize, limit: usize, sort_criteria: Option>, From c27870e76511c1f109b31ccb2ccafb5dc07dcfc3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 14:33:18 +0200 Subject: [PATCH 09/58] integrate a first version without any error handling --- filter_parser/src/condition.rs | 8 +- filter_parser/src/lib.rs | 60 +- filter_parser/src/value.rs | 7 +- milli/Cargo.toml | 2 +- milli/src/lib.rs | 2 +- milli/src/search/facet/filter_condition.rs | 547 +++++++++++-- milli/src/search/facet/filter_parser.rs | 891 --------------------- milli/src/search/facet/mod.rs | 3 +- milli/src/search/mod.rs | 6 +- 9 files changed, 507 insertions(+), 1019 deletions(-) delete mode 100644 milli/src/search/facet/filter_parser.rs diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index 5a1bb62be..75ee8c6f7 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -12,6 +12,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::error::ParseError; use nom::sequence::tuple; use nom::IResult; use Condition::*; @@ -46,14 +47,15 @@ impl<'a> Condition<'a> { } /// condition = value ("==" | ">" ...) value -pub fn parse_condition(input: Span) -> IResult { +pub fn parse_condition<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, FilterCondition, E> { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (key, op, value)) = tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; let fid = key; - // TODO match *op.fragment() { "=" => { let k = FilterCondition::Condition { fid, op: Equal(value) }; @@ -78,7 +80,7 @@ pub fn parse_condition(input: Span) -> IResult { } /// to = value value TO value -pub fn parse_to(input: Span) -> IResult { +pub fn parse_to<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, (key, from, _, to)) = tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( input, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 096a9e26e..bb826872f 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -27,12 +27,12 @@ use nom::combinator::map; use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded}; +use nom::sequence::{delimited, preceded, tuple}; use nom::IResult; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; -type Span<'a> = LocatedSpan<&'a str>; +pub type Span<'a> = LocatedSpan<&'a str>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -82,21 +82,22 @@ impl<'a> FilterCondition<'a> { } } - pub fn parse(input: &'a str) -> IResult { + pub fn parse>>(input: &'a str) -> Result { let span = Span::new(input); - parse_expression(span) + // handle error + Ok(parse_expression::<'a, E>(span).map(|(_rem, output)| output).ok().unwrap()) } } // remove OPTIONAL whitespaces before AND after the the provided parser -fn ws<'a, O>( - inner: impl FnMut(Span<'a>) -> IResult, -) -> impl FnMut(Span<'a>) -> IResult { +fn ws<'a, O, E: ParseError>>( + inner: impl FnMut(Span<'a>) -> IResult, +) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } /// and = not (~ "AND" not)* -fn parse_or(input: Span) -> IResult { +fn parse_or<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_and(input)?; let (input, ors) = many0(preceded(ws(tag("OR")), |c| parse_and(c)))(input)?; @@ -106,7 +107,7 @@ fn parse_or(input: Span) -> IResult { Ok((input, expr)) } -fn parse_and(input: Span) -> IResult { +fn parse_and<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_not(input)?; let (input, ors) = many0(preceded(ws(tag("AND")), |c| parse_not(c)))(input)?; let expr = ors @@ -116,15 +117,17 @@ fn parse_and(input: Span) -> IResult { } /// not = ("NOT" | "!") not | primary -fn parse_not(input: Span) -> IResult { +fn parse_not<'a, E: ParseError>>(input: Span<'a>) -> IResult { alt((map(preceded(alt((tag("!"), tag("NOT"))), |c| parse_not(c)), |e| e.negate()), |c| { parse_primary(c) }))(input) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) -fn parse_geo_radius(input: Span) -> IResult { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; +fn parse_geo_radius<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, FilterCondition, E> { + // let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; /* TODO let err_msg_latitude_invalid = @@ -134,30 +137,25 @@ fn parse_geo_radius(input: Span) -> IResult { "_geoRadius. Longitude must be contained between -180 and 180 degrees."; */ + // we want to forbid space BEFORE the _geoRadius but not after let parsed = preceded::<_, _, _, _, _, _>( - // TODO: forbid spaces between _geoRadius and parenthesis - ws(tag("_geoRadius")), + tuple((multispace0, tag("_geoRadius"))), delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')')), )(input); - let (input, args): (Span, Vec) = match parsed { - Ok(e) => e, - Err(_e) => { - return Err(nom::Err::Failure(nom::error::Error::add_context( - input, - err_msg_args_incomplete, - nom::error::Error::from_char(input, '('), - ))); - } - }; + let (input, args): (Span, Vec) = parsed?; if args.len() != 3 { + // TODO + panic!("todo"); + /* let e = nom::error::Error::from_char(input, '('); return Err(nom::Err::Failure(nom::error::Error::add_context( input, err_msg_args_incomplete, e, ))); + */ } let res = FilterCondition::GeoLowerThan { @@ -168,7 +166,9 @@ fn parse_geo_radius(input: Span) -> IResult { } /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -fn parse_primary(input: Span) -> IResult { +fn parse_primary<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult { alt(( delimited(ws(char('(')), |c| parse_expression(c), ws(char(')'))), |c| parse_condition(c), @@ -178,12 +178,16 @@ fn parse_primary(input: Span) -> IResult { } /// expression = or -pub fn parse_expression(input: Span) -> IResult { +pub fn parse_expression<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult { parse_or(input) } #[cfg(test)] pub mod tests { + use nom::error::Error; + use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element @@ -469,7 +473,7 @@ pub mod tests { ]; for (input, expected) in test_case { - let result = Fc::parse(input); + let result = Fc::parse::>(input); assert!( result.is_ok(), @@ -477,7 +481,7 @@ pub mod tests { expected, result.unwrap_err() ); - let filter = result.unwrap().1; + let filter = result.unwrap(); assert_eq!(filter, expected, "Filter `{}` failed.", input); } } diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index c36becf7e..1497aaddd 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -1,13 +1,14 @@ use nom::branch::alt; use nom::bytes::complete::{take_till, take_while1}; use nom::character::complete::char; +use nom::error::ParseError; use nom::sequence::delimited; use nom::IResult; use crate::{ws, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -pub fn parse_value(input: Span) -> IResult { +pub fn parse_value<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { // singleQuoted = "'" .* all but quotes "'" let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" @@ -29,6 +30,8 @@ fn is_key_component(c: char) -> bool { #[cfg(test)] pub mod tests { + use nom::error::Error; + use super::*; use crate::tests::rtok; @@ -56,7 +59,7 @@ pub mod tests { for (input, expected) in test_case { let input = Span::new(input); - let result = parse_value(input); + let result = parse_value::>(input); assert!( result.is_ok(), diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 63fd0d984..3fc53492f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -39,8 +39,8 @@ tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser +filter_parser = { path = "../filter_parser" } nom = "7.0.0" -nom_locate = "4.0.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 6fe5947f5..27453bf36 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -34,7 +34,7 @@ pub use self::heed_codec::{ RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, }; pub use self::index::Index; -pub use self::search::{FacetDistribution, FilterCondition, MatchingWords, Search, SearchResult}; +pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 5c57adb88..50caf4eac 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -2,13 +2,12 @@ use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; use either::Either; +use filter_parser::{Condition, FilterCondition, Span, Token}; use heed::types::DecodeIgnore; use log::debug; use nom::error::{convert_error, VerboseError}; use roaring::RoaringBitmap; -use self::FilterCondition::*; -use super::filter_parser::{Operator, ParseContext}; use super::FacetNumberRange; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ @@ -17,24 +16,19 @@ use crate::heed_codec::facet::{ use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; #[derive(Debug, Clone)] -pub enum FilterCondition<'a> { - Operator(FieldId, Operator<'a>), - Or(Box, Box), - And(Box, Box), - Empty, +pub struct Filter<'a> { + condition: FilterCondition<'a>, } -impl<'a> FilterCondition<'a> { - pub fn from_array( +impl<'a> Filter<'a> { + pub fn from_array( rtxn: &heed::RoTxn, index: &Index, array: I, ) -> Result>> where - I: IntoIterator>, - J: IntoIterator, - A: AsRef, - B: AsRef, + I: IntoIterator>, + J: IntoIterator, { let mut ands: Option = None; @@ -43,24 +37,32 @@ impl<'a> FilterCondition<'a> { Either::Left(array) => { let mut ors = None; for rule in array { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = + FilterCondition::parse::>(rule.as_ref()).unwrap(); ors = match ors.take() { - Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), + Some(ors) => { + Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) + } None => Some(condition), }; } if let Some(rule) = ors { ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(rule))), + Some(ands) => { + Some(FilterCondition::And(Box::new(ands), Box::new(rule))) + } None => Some(rule), }; } } Either::Right(rule) => { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = + FilterCondition::parse::>(rule.as_ref()).unwrap(); ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(condition))), + Some(ands) => { + Some(FilterCondition::And(Box::new(ands), Box::new(condition))) + } None => Some(condition), }; } @@ -70,17 +72,14 @@ impl<'a> FilterCondition<'a> { Ok(ands) } - pub fn from_str( - rtxn: &heed::RoTxn, - index: &Index, - expression: &'a str, - ) -> Result> { + pub fn from_str(rtxn: &heed::RoTxn, index: &Index, expression: &'a str) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; - let ctx = - ParseContext { fields_ids_map: &fields_ids_map, filterable_fields: &filterable_fields }; - match ctx.parse_expression::>(expression) { - Ok((_, fc)) => Ok(fc), + // TODO TAMO + let condition = FilterCondition::parse::>(expression).ok().unwrap(); + /* + let condition = match FilterCondition::parse::>(expression) { + Ok(fc) => Ok(fc), Err(e) => { let ve = match e { nom::Err::Error(x) => x, @@ -88,25 +87,16 @@ impl<'a> FilterCondition<'a> { _ => unreachable!(), }; Err(Error::UserError(UserError::InvalidFilter { - input: convert_error(expression, ve).to_string(), + input: convert_error(Span::new(expression), ve).to_string(), })) } - } - } - pub fn negate(self) -> FilterCondition<'a> { - match self { - Operator(fid, op) => match op.negate() { - (op, None) => Operator(fid, op), - (a, Some(b)) => Or(Box::new(Operator(fid, a)), Box::new(Operator(fid, b))), - }, - Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), - And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), - Empty => Empty, - } + }; + */ + Ok(Self { condition }) } } -impl<'a> FilterCondition<'a> { +impl<'a> Filter<'a> { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. fn explore_facet_number_levels( @@ -221,20 +211,33 @@ impl<'a> FilterCondition<'a> { numbers_db: heed::Database, strings_db: heed::Database, field_id: FieldId, - operator: &Operator<'a>, + operator: &Condition<'a>, ) -> Result { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the // field id and the level. + // TODO TAMO: return good error when we can't parse a span let (left, right) = match operator { - Operator::GreaterThan(val) => (Excluded(*val), Included(f64::MAX)), - Operator::GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)), - Operator::Equal(number, string) => { + Condition::GreaterThan(val) => { + (Excluded(val.inner.parse::().unwrap()), Included(f64::MAX)) + } + Condition::GreaterThanOrEqual(val) => { + (Included(val.inner.parse::().unwrap()), Included(f64::MAX)) + } + Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.inner.parse().unwrap())), + Condition::LowerThanOrEqual(val) => { + (Included(f64::MIN), Included(val.inner.parse().unwrap())) + } + Condition::Between { from, to } => { + (Included(from.inner.parse::().unwrap()), Included(to.inner.parse().unwrap())) + } + Condition::Equal(val) => { let (_original_value, string_docids) = - strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default(); + strings_db.get(rtxn, &(field_id, val.inner))?.unwrap_or_default(); + let number = val.inner.parse::().ok(); let number_docids = match number { Some(n) => { - let n = Included(*n); + let n = Included(n); let mut output = RoaringBitmap::new(); Self::explore_facet_number_levels( rtxn, @@ -251,50 +254,49 @@ impl<'a> FilterCondition<'a> { }; return Ok(string_docids | number_docids); } - Operator::NotEqual(number, string) => { + Condition::NotEqual(val) => { + let number = val.inner.parse::().ok(); let all_numbers_ids = if number.is_some() { index.number_faceted_documents_ids(rtxn, field_id)? } else { RoaringBitmap::new() }; let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?; - let operator = Operator::Equal(*number, string.clone()); + let operator = Condition::Equal(val.clone()); let docids = Self::evaluate_operator( rtxn, index, numbers_db, strings_db, field_id, &operator, )?; return Ok((all_numbers_ids | all_strings_ids) - docids); - } - Operator::LowerThan(val) => (Included(f64::MIN), Excluded(*val)), - Operator::LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), - Operator::Between(left, right) => (Included(*left), Included(*right)), - Operator::GeoLowerThan(base_point, distance) => { - let rtree = match index.geo_rtree(rtxn)? { - Some(rtree) => rtree, - None => return Ok(RoaringBitmap::new()), - }; + } /* + Condition::GeoLowerThan(base_point, distance) => { + let rtree = match index.geo_rtree(rtxn)? { + Some(rtree) => rtree, + None => return Ok(RoaringBitmap::new()), + }; - let result = rtree - .nearest_neighbor_iter(base_point) - .take_while(|point| { - distance_between_two_points(base_point, point.geom()) < *distance - }) - .map(|point| point.data) - .collect(); + let result = rtree + .nearest_neighbor_iter(base_point) + .take_while(|point| { + distance_between_two_points(base_point, point.geom()) < *distance + }) + .map(|point| point.data) + .collect(); - return Ok(result); - } - Operator::GeoGreaterThan(point, distance) => { - let result = Self::evaluate_operator( - rtxn, - index, - numbers_db, - strings_db, - field_id, - &Operator::GeoLowerThan(point.clone(), *distance), - )?; - let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - return Ok(geo_faceted_doc_ids - result); - } + return Ok(result); + } + Condition::GeoGreaterThan(point, distance) => { + let result = Self::evaluate_operator( + rtxn, + index, + numbers_db, + strings_db, + field_id, + &Condition::GeoLowerThan(point.clone(), *distance), + )?; + let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; + return Ok(geo_faceted_doc_ids - result); + } + */ }; // Ask for the biggest value that can exist for this specific field, if it exists @@ -326,21 +328,390 @@ impl<'a> FilterCondition<'a> { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; - match self { - Operator(fid, op) => { - Self::evaluate_operator(rtxn, index, numbers_db, strings_db, *fid, op) + match &self.condition { + FilterCondition::Condition { fid, op } => { + // TODO: parse fid + let _ = fid; + let fid = 42; + Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } - Or(lhs, rhs) => { - let lhs = lhs.evaluate(rtxn, index)?; - let rhs = rhs.evaluate(rtxn, index)?; + FilterCondition::Or(lhs, rhs) => { + let lhs = Self::evaluate(&(lhs.as_ref().clone()).into(), rtxn, index)?; + let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs | rhs) } - And(lhs, rhs) => { - let lhs = lhs.evaluate(rtxn, index)?; - let rhs = rhs.evaluate(rtxn, index)?; + FilterCondition::And(lhs, rhs) => { + let lhs = Self::evaluate(&(lhs.as_ref().clone()).into(), rtxn, index)?; + let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs & rhs) } Empty => Ok(RoaringBitmap::new()), } } } + +impl<'a> From> for Filter<'a> { + fn from(fc: FilterCondition<'a>) -> Self { + Self { condition: fc } + } +} + +#[cfg(test)] +mod tests { + use big_s::S; + use either::Either; + use heed::EnvOpenOptions; + use maplit::hashset; + + use super::*; + use crate::update::Settings; + use crate::Index; + + #[test] + fn number() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("timestamp"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_filterable_fields(hashset! { "timestamp".into() }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, LowerThan(22.0))), + Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), + ); + assert_eq!(condition, expected); + } + + #[test] + fn compare() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); + let expected = FilterCondition::Operator(0, LowerThan(20.0)); + assert_eq!(condition, expected); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); + let expected = FilterCondition::Operator(2, LowerThan(200.0)); + assert_eq!(condition, expected); + } + + #[test] + fn parentheses() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), + Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Operator(1, LowerThan(22.0))), + Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), + )), + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + } + + #[test] + fn from_array() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Simple array with Left + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = mv"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Simple array with Right + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = mv")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = \"Mister Mv\""])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = \"Mister Mv\"")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = 'Mister Mv'"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = 'Mister Mv'")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Simple with parenthesis + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["(channel = mv)"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); + assert_eq!(condition, expected); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array( + &rtxn, + &index, + vec![ + Either::Right("channel = gotaga"), + Either::Left(vec!["timestamp = 44", "channel != ponce"]), + ], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga AND (timestamp = 44 OR channel != ponce)", + ) + .unwrap(); + assert_eq!(condition, expected); + } + + #[test] + fn geo_radius() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + // basic test + let condition = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); + let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); + assert_eq!(condition, expected); + + // test the negation of the GeoLowerThan + let condition = + FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); + let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); + assert_eq!(condition, expected); + + // composition of multiple operations + let condition = FilterCondition::from_str( + &rtxn, + &index, + "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), + Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), + )), + Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), + ); + assert_eq!(condition, expected); + } + + #[test] + fn geo_radius_error() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have enough parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius have too many parameters + let result = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!( + error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), + "{}", + error.to_string() + ); + + // georadius have a bad latitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Latitude must be contained between -90 and 90 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Longitude must be contained between -180 and 180 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Longitude must be contained between -180 and 180 degrees.")); + } +} diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs deleted file mode 100644 index c25d523aa..000000000 --- a/milli/src/search/facet/filter_parser.rs +++ /dev/null @@ -1,891 +0,0 @@ -//! BNF grammar: -//! -//! ```text -//! expression = or -//! or = and (~ "OR" ~ and) -//! and = not (~ "AND" not)* -//! not = ("NOT" | "!") not | primary -//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -//! to = value value TO value -//! condition = value ("==" | ">" ...) value -//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -//! singleQuoted = "'" .* all but quotes "'" -//! doubleQuoted = "\"" (word | spaces)* "\"" -//! word = (alphanumeric | _ | - | .)+ -//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) -//! ``` - -use std::collections::HashSet; -use std::fmt::Debug; -use std::result::Result as StdResult; - -use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_while1}; -use nom::character::complete::{char, multispace0}; -use nom::combinator::map; -use nom::error::{ContextError, ErrorKind, VerboseError}; -use nom::multi::{many0, separated_list1}; -use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded, tuple}; -use nom::IResult; -use nom_locate::LocatedSpan; - -use self::Operator::*; -use super::FilterCondition; -use crate::{FieldId, FieldsIdsMap}; - -pub enum FilterError { - AttributeNotFilterable(String), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct Token<'a> { - pub position: Span<'a>, - pub inner: &'a str, -} - -type Span<'a> = LocatedSpan<&'a str>; - -#[derive(Debug, Clone)] -pub enum Operator<'a> { - GreaterThan(Token<'a>), - GreaterThanOrEqual(Token<'a>), - Equal(Option>, Token<'a>), - NotEqual(Option>, Token<'a>), - LowerThan(Token<'a>), - LowerThanOrEqual(Token<'a>), - Between(Token<'a>, Token<'a>), - GeoLowerThan([Token<'a>; 2], Token<'a>), - GeoGreaterThan([Token<'a>; 2], Token<'a>), -} - -impl<'a> Operator<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { - match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(n, s) => (NotEqual(n, s), None), - NotEqual(n, s) => (Equal(n, s), None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), - GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None), - GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None), - } - } -} - -pub trait FilterParserError<'a>: - nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug -{ -} - -impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} - -pub struct ParseContext<'a> { - pub fields_ids_map: &'a FieldsIdsMap, - pub filterable_fields: &'a HashSet, -} - -impl<'a> ParseContext<'a> { - /// and = not (~ "AND" not)* - fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, lhs) = self.parse_and(input)?; - let (input, ors) = - many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?; - - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - fn parse_and(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, lhs) = self.parse_not(input)?; - let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - /// not = ("NOT" | "!") not | primary - fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - alt(( - map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()), - |c| self.parse_primary(c), - ))(input) - } - - fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> - where - F: FnMut(&'a str) -> IResult<&'a str, O, E>, - E: FilterParserError<'a>, - { - delimited(multispace0, inner, multispace0) - } - - /// condition = value ("==" | ">" ...) value - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = - tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?; - - let fid = self.parse_fid(input, key)?; - let r: StdResult>> = self.parse_numeric(value); - match op { - "=" => { - let k = - FilterCondition::Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())); - Ok((input, k)) - } - "!=" => { - let k = FilterCondition::Operator( - fid, - NotEqual(r.ok(), value.to_string().to_lowercase()), - ); - Ok((input, k)) - } - ">" | "<" | "<=" | ">=" => { - let numeric: f64 = self.parse_numeric(value)?; - let k = match op { - ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), - "<" => FilterCondition::Operator(fid, LowerThan(numeric)), - "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), - ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), - _ => unreachable!(), - }; - Ok((input, k)) - } - _ => unreachable!(), - } - } - - fn parse_numeric(&'a self, input: &'a str) -> StdResult> - where - E: FilterParserError<'a>, - T: std::str::FromStr, - { - match input.parse::() { - Ok(n) => Ok(n), - Err(_) => match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }, - } - } - - fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> - where - E: FilterParserError<'a>, - { - match self.fields_ids_map.id(key) { - Some(fid) if self.filterable_fields.contains(key) => Ok(fid), - _ => Err(nom::Err::Failure(E::add_context( - input, - "Attribute is not filterable", - E::from_char(input, 'T'), - ))), - } - } - - /// to = value value TO value - fn parse_to(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, (key, from, _, to)) = tuple(( - self.ws(|c| self.parse_value(c)), - self.ws(|c| self.parse_value(c)), - tag("TO"), - self.ws(|c| self.parse_value(c)), - ))(input)?; - - let fid = self.parse_fid(input, key)?; - let numeric_from: f64 = self.parse_numeric(from)?; - let numeric_to: f64 = self.parse_numeric(to)?; - let res = FilterCondition::Operator(fid, Between(numeric_from, numeric_to)); - - Ok((input, res)) - } - - /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) - fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - let err_msg_latitude_invalid = - "_geoRadius. Latitude must be contained between -90 and 90 degrees."; - - let err_msg_longitude_invalid = - "_geoRadius. Longitude must be contained between -180 and 180 degrees."; - - let parsed = preceded::<_, _, _, E, _, _>( - // TODO: forbid spaces between _geoRadius and parenthesis - self.ws(tag("_geoRadius")), - delimited( - char('('), - separated_list1(tag(","), self.ws(|c| recognize_float(c))), - char(')'), - ), - )(input); - - let (input, args): (&str, Vec<&str>) = match parsed { - Ok(e) => e, - Err(_e) => { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_args_incomplete, - E::from_char(input, '('), - ))); - } - }; - - if args.len() != 3 { - let e = E::from_char(input, '('); - return Err(nom::Err::Failure(E::add_context(input, err_msg_args_incomplete, e))); - } - let lat = self.parse_numeric(args[0])?; - let lng = self.parse_numeric(args[1])?; - let dis = self.parse_numeric(args[2])?; - - let fid = match self.fields_ids_map.id("_geo") { - Some(fid) => fid, - // TODO send an error - None => return Ok((input, FilterCondition::Empty)), - }; - - if !(-90.0..=90.0).contains(&lat) { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_latitude_invalid, - E::from_char(input, '('), - ))); - } else if !(-180.0..=180.0).contains(&lng) { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_longitude_invalid, - E::from_char(input, '('), - ))); - } - - let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); - Ok((input, res)) - } - - /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius - fn parse_primary(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - alt(( - delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))), - |c| self.parse_condition(c), - |c| self.parse_to(c), - |c| self.parse_geo_radius(c), - ))(input) - } - - /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* - fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> - where - E: FilterParserError<'a>, - { - // singleQuoted = "'" .* all but quotes "'" - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - // doubleQuoted = "\"" (word | spaces)* "\"" - let quoted_key = |input| take_till(|c: char| c == '"')(input); - // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(Self::is_key_component)(input); - - alt(( - self.ws(delimited(char('\''), simple_quoted_key, char('\''))), - self.ws(delimited(char('"'), quoted_key, char('"'))), - self.ws(word), - ))(input) - } - - fn is_key_component(c: char) -> bool { - c.is_alphanumeric() || ['_', '-', '.'].contains(&c) - } - - /// expression = or - pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - self.parse_or(input) - } -} - -#[cfg(test)] -mod tests { - use big_s::S; - use either::Either; - use heed::EnvOpenOptions; - use maplit::hashset; - - use super::*; - use crate::update::Settings; - use crate::Index; - - #[test] - fn string() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("channel"); - map.insert("dog race"); - map.insert("subscribers"); - map.insert("_geo"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields( - hashset! { S("channel"), S("dog race"), S("subscribers"), S("_geo") }, - ); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - - use FilterCondition as Fc; - let test_case = [ - // simple test - ( - Fc::from_str(&rtxn, &index, "channel = Ponce"), - Fc::Operator(0, Operator::Equal(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers = 12"), - Fc::Operator(2, Operator::Equal(Some(12.), S("12"))), - ), - // test all the quotes and simple quotes - ( - Fc::from_str(&rtxn, &index, "channel = 'Mister Mv'"), - Fc::Operator(0, Operator::Equal(None, S("mister mv"))), - ), - ( - Fc::from_str(&rtxn, &index, "channel = \"Mister Mv\""), - Fc::Operator(0, Operator::Equal(None, S("mister mv"))), - ), - ( - Fc::from_str(&rtxn, &index, "'dog race' = Borzoi"), - Fc::Operator(1, Operator::Equal(None, S("borzoi"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = Chusky"), - Fc::Operator(1, Operator::Equal(None, S("chusky"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = \"Bernese Mountain\""), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - ( - Fc::from_str(&rtxn, &index, "'dog race' = 'Bernese Mountain'"), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = 'Bernese Mountain'"), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - // test all the operators - ( - Fc::from_str(&rtxn, &index, "channel != ponce"), - Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "NOT channel = ponce"), - Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers < 1000"), - Fc::Operator(2, Operator::LowerThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers > 1000"), - Fc::Operator(2, Operator::GreaterThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers <= 1000"), - Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers >= 1000"), - Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers < 1000"), - Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers > 1000"), - Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers <= 1000"), - Fc::Operator(2, Operator::GreaterThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers >= 1000"), - Fc::Operator(2, Operator::LowerThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers 100 TO 1000"), - Fc::Operator(2, Operator::Between(100., 1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers 100 TO 1000"), - Fc::Or( - Box::new(Fc::Operator(2, Operator::LowerThan(100.))), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ), - ), - ( - Fc::from_str(&rtxn, &index, "_geoRadius(12, 13, 14)"), - Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT _geoRadius(12, 13, 14)"), - Fc::Operator(3, Operator::GeoGreaterThan([12., 13.], 14.)), - ), - // test simple `or` and `and` - ( - Fc::from_str(&rtxn, &index, "channel = ponce AND 'dog race' != 'bernese mountain'"), - Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - ), - ), - ( - Fc::from_str(&rtxn, &index, "channel = ponce OR 'dog race' != 'bernese mountain'"), - Fc::Or( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - ), - ), - ( - Fc::from_str( - &rtxn, - &index, - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - ), - Fc::Or( - Box::new(Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - )), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ), - ), - // test parenthesis - ( - Fc::from_str( - &rtxn, - &index, - "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - ), - Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Or( - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ))), - ), - ( - Fc::from_str( - &rtxn, - &index, - "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - ), - Fc::And( - Box::new(Fc::Or( - Box::new(Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - )), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - )), - Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) - ), - ]; - - for (result, expected) in test_case { - assert!( - result.is_ok(), - "Filter {:?} was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap(); - assert_eq!(filter, expected,); - } - } - - #[test] - fn number() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("timestamp"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { "timestamp".into() }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, LowerThan(22.0))), - Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), - ); - assert_eq!(condition, expected); - } - - #[test] - fn compare() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); - let expected = FilterCondition::Operator(0, LowerThan(20.0)); - assert_eq!(condition, expected); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); - let expected = FilterCondition::Operator(2, LowerThan(200.0)); - assert_eq!(condition, expected); - } - - #[test] - fn parentheses() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), - Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Operator(1, LowerThan(22.0))), - Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), - )), - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - } - - #[test] - fn from_array() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Simple array with Left - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = mv"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); - assert_eq!(condition, expected); - - // Simple array with Right - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = mv")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); - assert_eq!(condition, expected); - - // Array with Left and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = \"Mister Mv\""])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); - assert_eq!(condition, expected); - - // Array with Right and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = \"Mister Mv\"")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); - assert_eq!(condition, expected); - - // Array with Left and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = 'Mister Mv'"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); - assert_eq!(condition, expected); - - // Array with Right and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = 'Mister Mv'")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); - assert_eq!(condition, expected); - - // Simple with parenthesis - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["(channel = mv)"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); - assert_eq!(condition, expected); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array( - &rtxn, - &index, - vec![ - Either::Right("channel = gotaga"), - Either::Left(vec!["timestamp = 44", "channel != ponce"]), - ], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga AND (timestamp = 44 OR channel != ponce)", - ) - .unwrap(); - assert_eq!(condition, expected); - } - - #[test] - fn geo_radius() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - // basic test - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); - let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); - assert_eq!(condition, expected); - - // test the negation of the GeoLowerThan - let condition = - FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); - let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); - assert_eq!(condition, expected); - - // composition of multiple operations - let condition = FilterCondition::from_str( - &rtxn, - &index, - "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), - Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), - )), - Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), - ); - assert_eq!(condition, expected); - } - - #[test] - fn geo_radius_error() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have enough parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius have too many parameters - let result = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!( - error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), - "{}", - error.to_string() - ); - - // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - } -} diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 3efa0262f..d6f276fbb 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,10 +1,9 @@ pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; -pub use self::filter_condition::FilterCondition; +pub use self::filter_condition::Filter; mod facet_distribution; mod facet_number; mod facet_string; mod filter_condition; -mod filter_parser; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 8cd7f1a34..a31ead1ec 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition}; +pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; @@ -35,7 +35,7 @@ mod query_tree; pub struct Search<'a> { query: Option, // this should be linked to the String in the query - filter: Option>, + filter: Option>, offset: usize, limit: usize, sort_criteria: Option>, @@ -97,7 +97,7 @@ impl<'a> Search<'a> { self } - pub fn filter(&mut self, condition: FilterCondition) -> &mut Search<'a> { + pub fn filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> { self.filter = Some(condition); self } From d6ba84ea99721a919cedc0e6a44ecbc992e4a983 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 15:09:56 +0200 Subject: [PATCH 10/58] re introduce the special error type to be able to add context to the errors --- filter_parser/src/condition.rs | 7 ++-- filter_parser/src/lib.rs | 59 ++++++++++++---------------------- filter_parser/src/value.rs | 5 ++- 3 files changed, 25 insertions(+), 46 deletions(-) diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index 75ee8c6f7..b8d0e1efc 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -12,12 +12,11 @@ use nom::branch::alt; use nom::bytes::complete::tag; -use nom::error::ParseError; use nom::sequence::tuple; use nom::IResult; use Condition::*; -use crate::{parse_value, ws, FilterCondition, Span, Token}; +use crate::{parse_value, ws, FPError, FilterCondition, Span, Token}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Condition<'a> { @@ -47,7 +46,7 @@ impl<'a> Condition<'a> { } /// condition = value ("==" | ">" ...) value -pub fn parse_condition<'a, E: ParseError>>( +pub fn parse_condition<'a, E: FPError<'a>>( input: Span<'a>, ) -> IResult, FilterCondition, E> { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); @@ -80,7 +79,7 @@ pub fn parse_condition<'a, E: ParseError>>( } /// to = value value TO value -pub fn parse_to<'a, E: ParseError>>(input: Span<'a>) -> IResult { +pub fn parse_to<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { let (input, (key, from, _, to)) = tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( input, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index bb826872f..007817655 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -24,16 +24,22 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::map; -use nom::error::{ContextError, ParseError}; +use nom::error::{ContextError, Error, VerboseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, tuple}; -use nom::IResult; +use nom::{Finish, IResult}; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; pub type Span<'a> = LocatedSpan<&'a str>; +pub trait FilterParserError<'a>: nom::error::ParseError> + ContextError> {} +impl<'a> FilterParserError<'a> for VerboseError> {} +impl<'a> FilterParserError<'a> for Error> {} + +use FilterParserError as FPError; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { pub position: Span<'a>, @@ -82,22 +88,21 @@ impl<'a> FilterCondition<'a> { } } - pub fn parse>>(input: &'a str) -> Result { + pub fn parse>(input: &'a str) -> Result { let span = Span::new(input); - // handle error - Ok(parse_expression::<'a, E>(span).map(|(_rem, output)| output).ok().unwrap()) + parse_expression::<'a, E>(span).finish().map(|(_rem, output)| output) } } // remove OPTIONAL whitespaces before AND after the the provided parser -fn ws<'a, O, E: ParseError>>( +fn ws<'a, O, E: FPError<'a>>( inner: impl FnMut(Span<'a>) -> IResult, ) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } /// and = not (~ "AND" not)* -fn parse_or<'a, E: ParseError>>(input: Span<'a>) -> IResult { +fn parse_or<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_and(input)?; let (input, ors) = many0(preceded(ws(tag("OR")), |c| parse_and(c)))(input)?; @@ -107,7 +112,7 @@ fn parse_or<'a, E: ParseError>>(input: Span<'a>) -> IResult>>(input: Span<'a>) -> IResult { +fn parse_and<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_not(input)?; let (input, ors) = many0(preceded(ws(tag("AND")), |c| parse_not(c)))(input)?; let expr = ors @@ -117,25 +122,15 @@ fn parse_and<'a, E: ParseError>>(input: Span<'a>) -> IResult>>(input: Span<'a>) -> IResult { +fn parse_not<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { alt((map(preceded(alt((tag("!"), tag("NOT"))), |c| parse_not(c)), |e| e.negate()), |c| { parse_primary(c) }))(input) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) -fn parse_geo_radius<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult, FilterCondition, E> { - // let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - /* - TODO - let err_msg_latitude_invalid = - "_geoRadius. Latitude must be contained between -90 and 90 degrees."; - - let err_msg_longitude_invalid = - "_geoRadius. Longitude must be contained between -180 and 180 degrees."; - */ +fn parse_geo_radius<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, FilterCondition, E> { + let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; // we want to forbid space BEFORE the _geoRadius but not after let parsed = preceded::<_, _, _, _, _, _>( @@ -146,16 +141,8 @@ fn parse_geo_radius<'a, E: ParseError>>( let (input, args): (Span, Vec) = parsed?; if args.len() != 3 { - // TODO - panic!("todo"); - /* - let e = nom::error::Error::from_char(input, '('); - return Err(nom::Err::Failure(nom::error::Error::add_context( - input, - err_msg_args_incomplete, - e, - ))); - */ + let e = E::from_char(input, '('); + return Err(nom::Err::Failure(E::add_context(input, err_msg_args_incomplete, e))); } let res = FilterCondition::GeoLowerThan { @@ -166,9 +153,7 @@ fn parse_geo_radius<'a, E: ParseError>>( } /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -fn parse_primary<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult { +fn parse_primary<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { alt(( delimited(ws(char('(')), |c| parse_expression(c), ws(char(')'))), |c| parse_condition(c), @@ -178,16 +163,12 @@ fn parse_primary<'a, E: ParseError>>( } /// expression = or -pub fn parse_expression<'a, E: ParseError>>( - input: Span<'a>, -) -> IResult { +pub fn parse_expression<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { parse_or(input) } #[cfg(test)] pub mod tests { - use nom::error::Error; - use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 1497aaddd..5b3a8dfd1 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -1,14 +1,13 @@ use nom::branch::alt; use nom::bytes::complete::{take_till, take_while1}; use nom::character::complete::char; -use nom::error::ParseError; use nom::sequence::delimited; use nom::IResult; -use crate::{ws, Span, Token}; +use crate::{ws, FPError, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -pub fn parse_value<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { +pub fn parse_value<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Token, E> { // singleQuoted = "'" .* all but quotes "'" let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" From efb2f8b3254213bf919084ff6f42d922a3f7a68a Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 16:38:35 +0200 Subject: [PATCH 11/58] convert the errors --- filter_parser/Cargo.toml | 1 + filter_parser/src/lib.rs | 4 +- milli/Cargo.toml | 1 + milli/src/search/facet/filter_condition.rs | 53 +++++++++------------- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/filter_parser/Cargo.toml b/filter_parser/Cargo.toml index 80767d5c4..2bdb3316a 100644 --- a/filter_parser/Cargo.toml +++ b/filter_parser/Cargo.toml @@ -8,3 +8,4 @@ edition = "2021" [dependencies] nom = "7.0.0" nom_locate = "4.0.0" +nom-greedyerror = "0.4.0" diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 007817655..7153c5361 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -24,17 +24,19 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::map; -use nom::error::{ContextError, Error, VerboseError}; +use nom::error::{ContextError, Error, ErrorKind, VerboseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, tuple}; use nom::{Finish, IResult}; +use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; pub type Span<'a> = LocatedSpan<&'a str>; pub trait FilterParserError<'a>: nom::error::ParseError> + ContextError> {} +impl<'a> FilterParserError<'a> for GreedyError, ErrorKind> {} impl<'a> FilterParserError<'a> for VerboseError> {} impl<'a> FilterParserError<'a> for Error> {} diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 3fc53492f..1aaeed008 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -41,6 +41,7 @@ uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser filter_parser = { path = "../filter_parser" } nom = "7.0.0" +nom-greedyerror = "0.4.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 50caf4eac..fca35ff4d 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -2,10 +2,11 @@ use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; use either::Either; -use filter_parser::{Condition, FilterCondition, Span, Token}; +use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; use heed::types::DecodeIgnore; use log::debug; -use nom::error::{convert_error, VerboseError}; +use nom::error::{ErrorKind, VerboseError}; +use nom_greedyerror::{convert_error, GreedyError}; use roaring::RoaringBitmap; use super::FacetNumberRange; @@ -20,12 +21,14 @@ pub struct Filter<'a> { condition: FilterCondition<'a>, } +impl<'a> From>> for Error { + fn from(nom_error: VerboseError>) -> Self { + UserError::InvalidFilter { input: nom_error.to_string() }.into() + } +} + impl<'a> Filter<'a> { - pub fn from_array( - rtxn: &heed::RoTxn, - index: &Index, - array: I, - ) -> Result>> + pub fn from_array(array: I) -> Result> where I: IntoIterator>, J: IntoIterator, @@ -37,8 +40,7 @@ impl<'a> Filter<'a> { Either::Left(array) => { let mut ors = None; for rule in array { - let condition = - FilterCondition::parse::>(rule.as_ref()).unwrap(); + let condition = Self::from_str(rule.as_ref())?.condition; ors = match ors.take() { Some(ors) => { Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) @@ -57,8 +59,7 @@ impl<'a> Filter<'a> { } } Either::Right(rule) => { - let condition = - FilterCondition::parse::>(rule.as_ref()).unwrap(); + let condition = Self::from_str(rule.as_ref())?.condition; ands = match ands.take() { Some(ands) => { Some(FilterCondition::And(Box::new(ands), Box::new(condition))) @@ -69,29 +70,16 @@ impl<'a> Filter<'a> { } } - Ok(ands) + Ok(ands.map(|ands| Self { condition: ands })) } - pub fn from_str(rtxn: &heed::RoTxn, index: &Index, expression: &'a str) -> Result { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let filterable_fields = index.filterable_fields(rtxn)?; - // TODO TAMO - let condition = FilterCondition::parse::>(expression).ok().unwrap(); - /* - let condition = match FilterCondition::parse::>(expression) { + pub fn from_str(expression: &'a str) -> Result { + let condition = match FilterCondition::parse::>(expression) { Ok(fc) => Ok(fc), - Err(e) => { - let ve = match e { - nom::Err::Error(x) => x, - nom::Err::Failure(x) => x, - _ => unreachable!(), - }; - Err(Error::UserError(UserError::InvalidFilter { - input: convert_error(Span::new(expression), ve).to_string(), - })) - } - }; - */ + Err(e) => Err(Error::UserError(UserError::InvalidFilter { + input: convert_error(Span::new(expression), e).to_string(), + })), + }?; Ok(Self { condition }) } } @@ -345,7 +333,8 @@ impl<'a> Filter<'a> { let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs & rhs) } - Empty => Ok(RoaringBitmap::new()), + FilterCondition::Empty => Ok(RoaringBitmap::new()), + _ => panic!("do the geosearch"), } } } From 6c9165b6a8161544c76c1a3d63867a72e5983115 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 16:52:13 +0200 Subject: [PATCH 12/58] provide a helper to parse the token but to not handle the errors --- milli/src/search/facet/filter_condition.rs | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index fca35ff4d..2ba5a023e 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,5 +1,6 @@ use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; +use std::str::FromStr; use either::Either; use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; @@ -27,6 +28,10 @@ impl<'a> From>> for Error { } } +fn parse(tok: &Token) -> Result { + Ok(tok.inner.parse().ok().unwrap()) +} + impl<'a> Filter<'a> { pub fn from_array(array: I) -> Result> where @@ -206,19 +211,11 @@ impl<'a> Filter<'a> { // field id and the level. // TODO TAMO: return good error when we can't parse a span let (left, right) = match operator { - Condition::GreaterThan(val) => { - (Excluded(val.inner.parse::().unwrap()), Included(f64::MAX)) - } - Condition::GreaterThanOrEqual(val) => { - (Included(val.inner.parse::().unwrap()), Included(f64::MAX)) - } - Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.inner.parse().unwrap())), - Condition::LowerThanOrEqual(val) => { - (Included(f64::MIN), Included(val.inner.parse().unwrap())) - } - Condition::Between { from, to } => { - (Included(from.inner.parse::().unwrap()), Included(to.inner.parse().unwrap())) - } + Condition::GreaterThan(val) => (Excluded(parse(val)?), Included(f64::MAX)), + Condition::GreaterThanOrEqual(val) => (Included(parse(val)?), Included(f64::MAX)), + Condition::LowerThan(val) => (Included(f64::MIN), Excluded(parse(val)?)), + Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(parse(val)?)), + Condition::Between { from, to } => (Included(parse(from)?), Included(parse(to)?)), Condition::Equal(val) => { let (_original_value, string_docids) = strings_db.get(rtxn, &(field_id, val.inner))?.unwrap_or_default(); @@ -334,6 +331,7 @@ impl<'a> Filter<'a> { Ok(lhs & rhs) } FilterCondition::Empty => Ok(RoaringBitmap::new()), + // TODO: TAMO _ => panic!("do the geosearch"), } } From e25ca9776fbf8e9bead564c8bd9803f7eafd2a7e Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 17:23:22 +0200 Subject: [PATCH 13/58] start updating the exposed function to makes other modules happy --- cli/src/main.rs | 2 +- http-ui/src/main.rs | 7 ++++--- milli/src/facet/mod.rs | 2 ++ milli/src/lib.rs | 4 +++- milli/src/search/facet/filter_condition.rs | 17 +++++++++++------ milli/src/search/facet/mod.rs | 2 ++ milli/src/search/mod.rs | 2 +- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index b84ff3243..cae4d081f 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -262,7 +262,7 @@ impl Search { } if let Some(ref filter) = self.filter { - let condition = milli::FilterCondition::from_str(&txn, &index, filter)?; + let condition = milli::Filter::from_str(filter)?; search.filter(condition); } diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index d27c6d5bb..e3f8f0317 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -24,7 +24,8 @@ use milli::documents::DocumentBatchReader; use milli::update::UpdateIndexingStep::*; use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder}; use milli::{ - obkv_to_json, CompressionType, FilterCondition, Index, MatchingWords, SearchResult, SortError, + obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, Index, MatchingWords, + SearchResult, SortError, }; use once_cell::sync::OnceCell; use rayon::ThreadPool; @@ -739,7 +740,7 @@ async fn main() -> anyhow::Result<()> { let filters = match query.filters { Some(condition) if !condition.trim().is_empty() => { - Some(FilterCondition::from_str(&rtxn, &index, &condition).unwrap()) + Some(MilliFilter::from_str(&condition).unwrap()) } _otherwise => None, }; @@ -747,7 +748,7 @@ async fn main() -> anyhow::Result<()> { let facet_filters = match query.facet_filters { Some(array) => { let eithers = array.into_iter().map(Into::into); - FilterCondition::from_array(&rtxn, &index, eithers).unwrap() + MilliFilter::from_array(eithers).unwrap() } _otherwise => None, }; diff --git a/milli/src/facet/mod.rs b/milli/src/facet/mod.rs index 274d2588d..aaa7a65ce 100644 --- a/milli/src/facet/mod.rs +++ b/milli/src/facet/mod.rs @@ -2,5 +2,7 @@ mod facet_type; mod facet_value; pub mod value_encoding; +pub use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; + pub use self::facet_type::FacetType; pub use self::facet_value::FacetValue; diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 27453bf36..e2ecb060c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -34,7 +34,9 @@ pub use self::heed_codec::{ RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, }; pub use self::index::Index; -pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; +pub use self::search::{ + Condition, FacetDistribution, Filter, FilterCondition, MatchingWords, Search, SearchResult, +}; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 2ba5a023e..29be3edf4 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -3,7 +3,7 @@ use std::ops::Bound::{self, Excluded, Included}; use std::str::FromStr; use either::Either; -use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; +pub use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; use heed::types::DecodeIgnore; use log::debug; use nom::error::{ErrorKind, VerboseError}; @@ -209,7 +209,7 @@ impl<'a> Filter<'a> { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the // field id and the level. - // TODO TAMO: return good error when we can't parse a span + let (left, right) = match operator { Condition::GreaterThan(val) => (Excluded(parse(val)?), Included(f64::MAX)), Condition::GreaterThanOrEqual(val) => (Included(parse(val)?), Included(f64::MAX)), @@ -315,10 +315,15 @@ impl<'a> Filter<'a> { match &self.condition { FilterCondition::Condition { fid, op } => { - // TODO: parse fid - let _ = fid; - let fid = 42; - Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) + let filterable_fields = index.fields_ids_map(rtxn)?; + if let Some(fid) = filterable_fields.id(fid.inner) { + Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) + } else { + // TODO TAMO: update the error message + return Err(UserError::InvalidFilter { + input: format!("Bad filter, available filters are {:?}", filterable_fields), + })?; + } } FilterCondition::Or(lhs, rhs) => { let lhs = Self::evaluate(&(lhs.as_ref().clone()).into(), rtxn, index)?; diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index d6f276fbb..c0b692de7 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,3 +1,5 @@ +pub use filter_parser::{Condition, FilterCondition}; + pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index a31ead1ec..f52dd06f0 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; +pub use self::facet::{Condition, FacetDistribution, FacetNumberIter, Filter, FilterCondition}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; From 4e113bbf1b61acd480c858087fdfeb6f7be2143c Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 17:49:08 +0200 Subject: [PATCH 14/58] handle the case of empty input --- filter_parser/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 7153c5361..4623f9387 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -91,6 +91,9 @@ impl<'a> FilterCondition<'a> { } pub fn parse>(input: &'a str) -> Result { + if input.trim().is_empty() { + return Ok(Self::Empty); + } let span = Span::new(input); parse_expression::<'a, E>(span).finish().map(|(_rem, output)| output) } From 7cd9109e2fbea5e032c40cc33201cf9e3a15c130 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 17:50:15 +0200 Subject: [PATCH 15/58] lowercase value extracted from Token --- milli/src/search/facet/filter_condition.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 29be3edf4..01132dce0 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -217,8 +217,9 @@ impl<'a> Filter<'a> { Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(parse(val)?)), Condition::Between { from, to } => (Included(parse(from)?), Included(parse(to)?)), Condition::Equal(val) => { - let (_original_value, string_docids) = - strings_db.get(rtxn, &(field_id, val.inner))?.unwrap_or_default(); + let (_original_value, string_docids) = strings_db + .get(rtxn, &(field_id, &val.inner.to_lowercase()))? + .unwrap_or_default(); let number = val.inner.parse::().ok(); let number_docids = match number { Some(n) => { @@ -316,7 +317,7 @@ impl<'a> Filter<'a> { match &self.condition { FilterCondition::Condition { fid, op } => { let filterable_fields = index.fields_ids_map(rtxn)?; - if let Some(fid) = filterable_fields.id(fid.inner) { + if let Some(fid) = filterable_fields.id(&fid.inner.to_lowercase()) { Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } else { // TODO TAMO: update the error message From 3942b3732f21ff97409355d85b6dfb80dbff85d4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 18:03:39 +0200 Subject: [PATCH 16/58] re-implement the geosearch --- milli/src/search/facet/filter_condition.rs | 61 ++++++++++------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 01132dce0..fb7ce2ec7 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -253,36 +253,7 @@ impl<'a> Filter<'a> { rtxn, index, numbers_db, strings_db, field_id, &operator, )?; return Ok((all_numbers_ids | all_strings_ids) - docids); - } /* - Condition::GeoLowerThan(base_point, distance) => { - let rtree = match index.geo_rtree(rtxn)? { - Some(rtree) => rtree, - None => return Ok(RoaringBitmap::new()), - }; - - let result = rtree - .nearest_neighbor_iter(base_point) - .take_while(|point| { - distance_between_two_points(base_point, point.geom()) < *distance - }) - .map(|point| point.data) - .collect(); - - return Ok(result); - } - Condition::GeoGreaterThan(point, distance) => { - let result = Self::evaluate_operator( - rtxn, - index, - numbers_db, - strings_db, - field_id, - &Condition::GeoLowerThan(point.clone(), *distance), - )?; - let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - return Ok(geo_faceted_doc_ids - result); - } - */ + } }; // Ask for the biggest value that can exist for this specific field, if it exists @@ -337,8 +308,34 @@ impl<'a> Filter<'a> { Ok(lhs & rhs) } FilterCondition::Empty => Ok(RoaringBitmap::new()), - // TODO: TAMO - _ => panic!("do the geosearch"), + FilterCondition::GeoLowerThan { point, radius } => { + let base_point = [parse(&point[0])?, parse(&point[1])?]; + let radius = parse(&radius)?; + let rtree = match index.geo_rtree(rtxn)? { + Some(rtree) => rtree, + None => return Ok(RoaringBitmap::new()), + }; + + let result = rtree + .nearest_neighbor_iter(&base_point) + .take_while(|point| { + distance_between_two_points(&base_point, point.geom()) < radius + }) + .map(|point| point.data) + .collect(); + + return Ok(result); + } + FilterCondition::GeoGreaterThan { point, radius } => { + let result = Self::evaluate( + &FilterCondition::GeoLowerThan { point: point.clone(), radius: radius.clone() } + .into(), + rtxn, + index, + )?; + let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; + return Ok(geo_faceted_doc_ids - result); + } } } } From c8d03046bfec58b02a50ac310550c3312b8804c0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 18:08:18 +0200 Subject: [PATCH 17/58] add a check on the fid in the geosearch --- milli/src/search/facet/filter_condition.rs | 40 ++++++++++++++-------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index fb7ce2ec7..6f9c4849e 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -309,22 +309,32 @@ impl<'a> Filter<'a> { } FilterCondition::Empty => Ok(RoaringBitmap::new()), FilterCondition::GeoLowerThan { point, radius } => { - let base_point = [parse(&point[0])?, parse(&point[1])?]; - let radius = parse(&radius)?; - let rtree = match index.geo_rtree(rtxn)? { - Some(rtree) => rtree, - None => return Ok(RoaringBitmap::new()), - }; + let filterable_fields = index.fields_ids_map(rtxn)?; + if filterable_fields.id("_geo").is_some() { + let base_point = [parse(&point[0])?, parse(&point[1])?]; + // TODO TAMO: ensure lat is between -90 and 90 + // TODO TAMO: ensure lng is between -180 and 180 + let radius = parse(&radius)?; + let rtree = match index.geo_rtree(rtxn)? { + Some(rtree) => rtree, + None => return Ok(RoaringBitmap::new()), + }; - let result = rtree - .nearest_neighbor_iter(&base_point) - .take_while(|point| { - distance_between_two_points(&base_point, point.geom()) < radius - }) - .map(|point| point.data) - .collect(); + let result = rtree + .nearest_neighbor_iter(&base_point) + .take_while(|point| { + distance_between_two_points(&base_point, point.geom()) < radius + }) + .map(|point| point.data) + .collect(); - return Ok(result); + Ok(result) + } else { + // TODO TAMO: update the error message + return Err(UserError::InvalidFilter { + input: format!("You tried to use _geo in a filter, you probably wanted to use _geoRadius"), + })?; + } } FilterCondition::GeoGreaterThan { point, radius } => { let result = Self::evaluate( @@ -334,7 +344,7 @@ impl<'a> Filter<'a> { index, )?; let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - return Ok(geo_faceted_doc_ids - result); + Ok(geo_faceted_doc_ids - result) } } } From 1327807caad1df55a28b68f1dfd9d8699d9c6426 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 19:00:33 +0200 Subject: [PATCH 18/58] add some error messages --- milli/src/search/facet/filter_condition.rs | 39 +++++++++++++++++++--- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 6f9c4849e..42b3fc52d 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -29,7 +29,18 @@ impl<'a> From>> for Error { } fn parse(tok: &Token) -> Result { - Ok(tok.inner.parse().ok().unwrap()) + match tok.inner.parse::() { + Ok(t) => Ok(t), + Err(_e) => Err(UserError::InvalidFilter { + input: format!( + "Could not parse `{}` at line {} and offset {}", + tok.inner, + tok.position.location_line(), + tok.position.get_column() + ), + } + .into()), + } } impl<'a> Filter<'a> { @@ -291,10 +302,28 @@ impl<'a> Filter<'a> { if let Some(fid) = filterable_fields.id(&fid.inner.to_lowercase()) { Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } else { - // TODO TAMO: update the error message - return Err(UserError::InvalidFilter { - input: format!("Bad filter, available filters are {:?}", filterable_fields), - })?; + match fid.inner { + // TODO update the error messages according to the spec + "_geo" => { + return Err(UserError::InvalidFilter { input: format!("Tried to use _geo in a filter, you probably wanted to use _geoRadius(latitude, longitude, radius)") })?; + } + "_geoDistance" => { + return Err(UserError::InvalidFilter { + input: format!("Reserved field _geoDistance"), + })?; + } + fid if fid.starts_with("_geoPoint(") => { + return Err(UserError::InvalidFilter { input: format!("_geoPoint only available in sort. You wanted to use _geoRadius") })?; + } + fid => { + return Err(UserError::InvalidFilter { + input: format!( + "Bad filter {}, available filters are {:?}", + fid, filterable_fields + ), + })?; + } + } } } FilterCondition::Or(lhs, rhs) => { From 76a2adb7c38b1ca15b7f5868de8dbe360a58f281 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 Nov 2021 17:35:17 +0100 Subject: [PATCH 19/58] re-enable the tests in the parser and start the creation of an error type --- filter_parser/src/condition.rs | 15 +-- filter_parser/src/lib.rs | 103 ++++++++++++++++----- filter_parser/src/value.rs | 2 +- milli/src/search/facet/filter_condition.rs | 2 +- 4 files changed, 86 insertions(+), 36 deletions(-) diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index b8d0e1efc..c7a9a85a0 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -3,20 +3,16 @@ //! ```text //! condition = value ("==" | ">" ...) value //! to = value value TO value -//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -//! singleQuoted = "'" .* all but quotes "'" -//! doubleQuoted = "\"" (word | spaces)* "\"" -//! word = (alphanumeric | _ | - | .)+ -//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) //! ``` use nom::branch::alt; use nom::bytes::complete::tag; +use nom::combinator::cut; use nom::sequence::tuple; use nom::IResult; use Condition::*; -use crate::{parse_value, ws, FPError, FilterCondition, Span, Token}; +use crate::{parse_value, FPError, FilterCondition, Span, Token}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Condition<'a> { @@ -50,8 +46,7 @@ pub fn parse_condition<'a, E: FPError<'a>>( input: Span<'a>, ) -> IResult, FilterCondition, E> { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = - tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; + let (input, (key, op, value)) = tuple((|c| parse_value(c), operator, cut(parse_value)))(input)?; let fid = key; @@ -81,9 +76,7 @@ pub fn parse_condition<'a, E: FPError<'a>>( /// to = value value TO value pub fn parse_to<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { let (input, (key, from, _, to)) = - tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( - input, - )?; + tuple((|c| parse_value(c), |c| parse_value(c), tag("TO"), cut(parse_value)))(input)?; Ok(( input, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 4623f9387..5b8107b82 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -1,44 +1,50 @@ //! BNF grammar: //! //! ```text +//! filter = expression ~ EOF //! expression = or //! or = and (~ "OR" ~ and) //! and = not (~ "AND" not)* //! not = ("NOT" | "!") not | primary -//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +//! primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to //! condition = value ("==" | ">" ...) value //! to = value value TO value //! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" (word | spaces)* "\"" //! word = (alphanumeric | _ | - | .)+ -//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! geoRadius = WS* ~ "_geoRadius(" ~ float ~ "," ~ float ~ "," float ~ ")" +//! ``` +//! +//! Other BNF grammar used to handle some specific errors: +//! ```text +//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")" //! ``` mod condition; +mod error; mod value; + use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; +pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; -use nom::combinator::map; -use nom::error::{ContextError, Error, ErrorKind, VerboseError}; +use nom::combinator::{cut, eof, map}; +use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded, tuple}; +use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::{Finish, IResult}; -use nom_greedyerror::GreedyError; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; -pub type Span<'a> = LocatedSpan<&'a str>; +pub type Span<'a> = LocatedSpan<&'a str, &'a str>; -pub trait FilterParserError<'a>: nom::error::ParseError> + ContextError> {} -impl<'a> FilterParserError<'a> for GreedyError, ErrorKind> {} -impl<'a> FilterParserError<'a> for VerboseError> {} -impl<'a> FilterParserError<'a> for Error> {} +pub trait FilterParserError<'a>: ParseError> + ContextError> {} +impl<'a, T> FilterParserError<'a> for T where T: ParseError> + ContextError> {} use FilterParserError as FPError; @@ -94,8 +100,8 @@ impl<'a> FilterCondition<'a> { if input.trim().is_empty() { return Ok(Self::Empty); } - let span = Span::new(input); - parse_expression::<'a, E>(span).finish().map(|(_rem, output)| output) + let span = Span::new_extra(input, input); + parse_filter::<'a, E>(span).finish().map(|(_rem, output)| output) } } @@ -109,7 +115,7 @@ fn ws<'a, O, E: FPError<'a>>( /// and = not (~ "AND" not)* fn parse_or<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_and(input)?; - let (input, ors) = many0(preceded(ws(tag("OR")), |c| parse_and(c)))(input)?; + let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; let expr = ors .into_iter() @@ -119,7 +125,7 @@ fn parse_or<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { let (input, lhs) = parse_not(input)?; - let (input, ors) = many0(preceded(ws(tag("AND")), |c| parse_not(c)))(input)?; + let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); @@ -128,9 +134,10 @@ fn parse_and<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { - alt((map(preceded(alt((tag("!"), tag("NOT"))), |c| parse_not(c)), |e| e.negate()), |c| { - parse_primary(c) - }))(input) + alt(( + map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), + cut(parse_primary), + ))(input) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) @@ -140,7 +147,7 @@ fn parse_geo_radius<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Fi // we want to forbid space BEFORE the _geoRadius but not after let parsed = preceded::<_, _, _, _, _, _>( tuple((multispace0, tag("_geoRadius"))), - delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')')), + cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), )(input); let (input, args): (Span, Vec) = parsed?; @@ -157,13 +164,13 @@ fn parse_geo_radius<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Fi Ok((input, res)) } -/// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius +/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to fn parse_primary<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { alt(( - delimited(ws(char('(')), |c| parse_expression(c), ws(char(')'))), + delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))), + |c| parse_geo_radius(c), |c| parse_condition(c), |c| parse_to(c), - |c| parse_geo_radius(c), ))(input) } @@ -172,6 +179,11 @@ pub fn parse_expression<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { + terminated(parse_expression, eof)(input) +} + #[cfg(test)] pub mod tests { use super::*; @@ -181,7 +193,8 @@ pub mod tests { // if the string is empty we still need to return 1 for the line number let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count()); let offset = before.chars().count(); - unsafe { Span::new_from_raw_offset(offset, lines as u32, value, ()) }.into() + // the extra field is not checked in the tests so we can set it to nothing + unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into() } #[test] @@ -471,4 +484,48 @@ pub mod tests { assert_eq!(filter, expected, "Filter `{}` failed.", input); } } + + #[test] + fn error() { + use FilterCondition as Fc; + + let result = Fc::parse::>("test = truc OR truc"); + assert!(result.is_err()); + + let test_case = [ + // simple test + ("OR", "An error occured"), + ("AND", "An error occured"), + ("channel = Ponce OR", "An error occured"), + ("channel = Ponce = 12", "An error occured"), + ("_geoRadius = 12", "An error occured"), + ("_geoPoint(12, 13, 14)", "An error occured"), + ("_geo = _geoRadius(12, 13, 14)", "An error occured"), + ]; + + for (input, expected) in test_case { + let result = Fc::parse::>(input); + + assert!( + result.is_err(), + "Filter `{:?}` wasn't supposed to be parsed but it did with the following result: `{:?}`", + expected, + result.unwrap() + ); + let filter = result.unwrap_err().to_string(); + assert_eq!(filter, expected, "Filter `{:?}` was supposed to return the following error: `{}`, but instead returned `{}`.", input, filter, expected); + } + } + + /* + #[test] + fn bidule() { + use FilterCondition as Fc; + + let result = Fc::parse::>("test = truc OR truc"); + dbg!(result); + + assert!(false); + } + */ } diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 5b3a8dfd1..55c9aec23 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -57,7 +57,7 @@ pub mod tests { ]; for (input, expected) in test_case { - let input = Span::new(input); + let input = Span::new_extra(input, input); let result = parse_value::>(input); assert!( diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 42b3fc52d..b61cd451b 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -93,7 +93,7 @@ impl<'a> Filter<'a> { let condition = match FilterCondition::parse::>(expression) { Ok(fc) => Ok(fc), Err(e) => Err(Error::UserError(UserError::InvalidFilter { - input: convert_error(Span::new(expression), e).to_string(), + input: convert_error(Span::new_extra(expression, expression), e).to_string(), })), }?; Ok(Self { condition }) From 5d3af5f2732f878fa3cfa758d5deff8f01a47d2f Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 2 Nov 2021 20:27:07 +0100 Subject: [PATCH 20/58] remove all genericity in favor of my custom error type --- filter_parser/src/condition.rs | 9 +++----- filter_parser/src/lib.rs | 41 +++++++++++++++------------------- filter_parser/src/value.rs | 24 +++++++++----------- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index c7a9a85a0..faacceb72 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -9,10 +9,9 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::combinator::cut; use nom::sequence::tuple; -use nom::IResult; use Condition::*; -use crate::{parse_value, FPError, FilterCondition, Span, Token}; +use crate::{parse_value, FilterCondition, IResult, Span, Token}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Condition<'a> { @@ -42,9 +41,7 @@ impl<'a> Condition<'a> { } /// condition = value ("==" | ">" ...) value -pub fn parse_condition<'a, E: FPError<'a>>( - input: Span<'a>, -) -> IResult, FilterCondition, E> { +pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (key, op, value)) = tuple((|c| parse_value(c), operator, cut(parse_value)))(input)?; @@ -74,7 +71,7 @@ pub fn parse_condition<'a, E: FPError<'a>>( } /// to = value value TO value -pub fn parse_to<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { +pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, to)) = tuple((|c| parse_value(c), |c| parse_value(c), tag("TO"), cut(parse_value)))(input)?; diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 5b8107b82..86c6cd79c 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -37,16 +37,13 @@ use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; -use nom::{Finish, IResult}; +use nom::Finish; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; pub type Span<'a> = LocatedSpan<&'a str, &'a str>; -pub trait FilterParserError<'a>: ParseError> + ContextError> {} -impl<'a, T> FilterParserError<'a> for T where T: ParseError> + ContextError> {} - -use FilterParserError as FPError; +type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -96,24 +93,22 @@ impl<'a> FilterCondition<'a> { } } - pub fn parse>(input: &'a str) -> Result { + pub fn parse(input: &'a str) -> Result { if input.trim().is_empty() { return Ok(Self::Empty); } let span = Span::new_extra(input, input); - parse_filter::<'a, E>(span).finish().map(|(_rem, output)| output) + parse_filter(span).finish().map(|(_rem, output)| output) } } // remove OPTIONAL whitespaces before AND after the the provided parser -fn ws<'a, O, E: FPError<'a>>( - inner: impl FnMut(Span<'a>) -> IResult, -) -> impl FnMut(Span<'a>) -> IResult { +fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } /// and = not (~ "AND" not)* -fn parse_or<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { +fn parse_or(input: Span) -> IResult { let (input, lhs) = parse_and(input)?; let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; @@ -123,7 +118,7 @@ fn parse_or<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { +fn parse_and(input: Span) -> IResult { let (input, lhs) = parse_not(input)?; let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let expr = ors @@ -133,7 +128,7 @@ fn parse_and<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { +fn parse_not(input: Span) -> IResult { alt(( map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), cut(parse_primary), @@ -141,7 +136,7 @@ fn parse_not<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult, FilterCondition, E> { +fn parse_geo_radius(input: Span) -> IResult { let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; // we want to forbid space BEFORE the _geoRadius but not after @@ -153,8 +148,8 @@ fn parse_geo_radius<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Fi let (input, args): (Span, Vec) = parsed?; if args.len() != 3 { - let e = E::from_char(input, '('); - return Err(nom::Err::Failure(E::add_context(input, err_msg_args_incomplete, e))); + let e = Error::from_char(input, '('); + return Err(nom::Err::Failure(Error::add_context(input, err_msg_args_incomplete, e))); } let res = FilterCondition::GeoLowerThan { @@ -165,7 +160,7 @@ fn parse_geo_radius<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Fi } /// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to -fn parse_primary<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { +fn parse_primary(input: Span) -> IResult { alt(( delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))), |c| parse_geo_radius(c), @@ -175,12 +170,12 @@ fn parse_primary<'a, E: FPError<'a>>(input: Span<'a>) -> IResult>(input: Span<'a>) -> IResult { +pub fn parse_expression(input: Span) -> IResult { parse_or(input) } /// filter = expression ~ EOF -pub fn parse_filter<'a, E: FPError<'a>>(input: Span<'a>) -> IResult { +pub fn parse_filter(input: Span) -> IResult { terminated(parse_expression, eof)(input) } @@ -472,7 +467,7 @@ pub mod tests { ]; for (input, expected) in test_case { - let result = Fc::parse::>(input); + let result = Fc::parse(input); assert!( result.is_ok(), @@ -489,22 +484,22 @@ pub mod tests { fn error() { use FilterCondition as Fc; - let result = Fc::parse::>("test = truc OR truc"); + let result = Fc::parse("test = truc OR truc"); assert!(result.is_err()); let test_case = [ // simple test + ("channel = Ponce = 12", "An error occured"), ("OR", "An error occured"), ("AND", "An error occured"), ("channel = Ponce OR", "An error occured"), - ("channel = Ponce = 12", "An error occured"), ("_geoRadius = 12", "An error occured"), ("_geoPoint(12, 13, 14)", "An error occured"), ("_geo = _geoRadius(12, 13, 14)", "An error occured"), ]; for (input, expected) in test_case { - let result = Fc::parse::>(input); + let result = Fc::parse(input); assert!( result.is_err(), diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 55c9aec23..7c708aa73 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -2,25 +2,25 @@ use nom::branch::alt; use nom::bytes::complete::{take_till, take_while1}; use nom::character::complete::char; use nom::sequence::delimited; -use nom::IResult; -use crate::{ws, FPError, Span, Token}; +use crate::{ws, Error, IResult, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -pub fn parse_value<'a, E: FPError<'a>>(input: Span<'a>) -> IResult, Token, E> { +pub fn parse_value(input: Span) -> IResult { // singleQuoted = "'" .* all but quotes "'" - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + let simple_quoted = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" - let quoted_key = |input| take_till(|c: char| c == '"')(input); + let double_quoted = |input| take_till(|c: char| c == '"')(input); // word = (alphanumeric | _ | - | .)+ let word = |input| take_while1(is_key_component)(input); - alt(( - ws(delimited(char('\''), simple_quoted_key, char('\''))), - ws(delimited(char('"'), quoted_key, char('"'))), - ws(word), - ))(input) + ws(alt(( + delimited(char('\''), simple_quoted, char('\'')), + delimited(char('"'), double_quoted, char('"')), + word, + )))(input) .map(|(s, t)| (s, t.into())) + .map_err(|e| e.map(|_| Error::expected_value(input))) } fn is_key_component(c: char) -> bool { @@ -29,8 +29,6 @@ fn is_key_component(c: char) -> bool { #[cfg(test)] pub mod tests { - use nom::error::Error; - use super::*; use crate::tests::rtok; @@ -58,7 +56,7 @@ pub mod tests { for (input, expected) in test_case { let input = Span::new_extra(input, input); - let result = parse_value::>(input); + let result = parse_value(input); assert!( result.is_ok(), From 54aec7ac5f541b0b5a160e3a790a4688613f0d8b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 14:22:35 +0100 Subject: [PATCH 21/58] update the filter parser and some code for the fuzzer --- filter_parser/fuzz/.gitignore | 3 + filter_parser/fuzz/Cargo.toml | 25 +++ filter_parser/fuzz/corpus/parse/test_1 | 1 + filter_parser/fuzz/corpus/parse/test_10 | 1 + filter_parser/fuzz/corpus/parse/test_11 | 1 + filter_parser/fuzz/corpus/parse/test_12 | 1 + filter_parser/fuzz/corpus/parse/test_13 | 1 + filter_parser/fuzz/corpus/parse/test_14 | 1 + filter_parser/fuzz/corpus/parse/test_15 | 1 + filter_parser/fuzz/corpus/parse/test_16 | 1 + filter_parser/fuzz/corpus/parse/test_17 | 1 + filter_parser/fuzz/corpus/parse/test_18 | 1 + filter_parser/fuzz/corpus/parse/test_19 | 1 + filter_parser/fuzz/corpus/parse/test_2 | 1 + filter_parser/fuzz/corpus/parse/test_20 | 1 + filter_parser/fuzz/corpus/parse/test_21 | 1 + filter_parser/fuzz/corpus/parse/test_22 | 1 + filter_parser/fuzz/corpus/parse/test_23 | 1 + filter_parser/fuzz/corpus/parse/test_24 | 1 + filter_parser/fuzz/corpus/parse/test_25 | 1 + filter_parser/fuzz/corpus/parse/test_26 | 1 + filter_parser/fuzz/corpus/parse/test_27 | 1 + filter_parser/fuzz/corpus/parse/test_28 | 1 + filter_parser/fuzz/corpus/parse/test_29 | 1 + filter_parser/fuzz/corpus/parse/test_3 | 1 + filter_parser/fuzz/corpus/parse/test_30 | 1 + filter_parser/fuzz/corpus/parse/test_31 | 1 + filter_parser/fuzz/corpus/parse/test_32 | 1 + filter_parser/fuzz/corpus/parse/test_33 | 1 + filter_parser/fuzz/corpus/parse/test_34 | 1 + filter_parser/fuzz/corpus/parse/test_35 | 1 + filter_parser/fuzz/corpus/parse/test_36 | 1 + filter_parser/fuzz/corpus/parse/test_37 | 1 + filter_parser/fuzz/corpus/parse/test_38 | 1 + filter_parser/fuzz/corpus/parse/test_39 | 1 + filter_parser/fuzz/corpus/parse/test_4 | 1 + filter_parser/fuzz/corpus/parse/test_40 | 1 + filter_parser/fuzz/corpus/parse/test_41 | 1 + filter_parser/fuzz/corpus/parse/test_42 | 1 + filter_parser/fuzz/corpus/parse/test_43 | 1 + filter_parser/fuzz/corpus/parse/test_5 | 1 + filter_parser/fuzz/corpus/parse/test_6 | 1 + filter_parser/fuzz/corpus/parse/test_7 | 1 + filter_parser/fuzz/corpus/parse/test_8 | 1 + filter_parser/fuzz/corpus/parse/test_9 | 1 + filter_parser/fuzz/fuzz_targets/parse.rs | 13 ++ filter_parser/src/error.rs | 195 +++++++++++++++++++++++ filter_parser/src/lib.rs | 117 +++++++++----- filter_parser/src/main.rs | 11 ++ filter_parser/src/value.rs | 50 ++++-- 50 files changed, 406 insertions(+), 51 deletions(-) create mode 100644 filter_parser/fuzz/.gitignore create mode 100644 filter_parser/fuzz/Cargo.toml create mode 100644 filter_parser/fuzz/corpus/parse/test_1 create mode 100644 filter_parser/fuzz/corpus/parse/test_10 create mode 100644 filter_parser/fuzz/corpus/parse/test_11 create mode 100644 filter_parser/fuzz/corpus/parse/test_12 create mode 100644 filter_parser/fuzz/corpus/parse/test_13 create mode 100644 filter_parser/fuzz/corpus/parse/test_14 create mode 100644 filter_parser/fuzz/corpus/parse/test_15 create mode 100644 filter_parser/fuzz/corpus/parse/test_16 create mode 100644 filter_parser/fuzz/corpus/parse/test_17 create mode 100644 filter_parser/fuzz/corpus/parse/test_18 create mode 100644 filter_parser/fuzz/corpus/parse/test_19 create mode 100644 filter_parser/fuzz/corpus/parse/test_2 create mode 100644 filter_parser/fuzz/corpus/parse/test_20 create mode 100644 filter_parser/fuzz/corpus/parse/test_21 create mode 100644 filter_parser/fuzz/corpus/parse/test_22 create mode 100644 filter_parser/fuzz/corpus/parse/test_23 create mode 100644 filter_parser/fuzz/corpus/parse/test_24 create mode 100644 filter_parser/fuzz/corpus/parse/test_25 create mode 100644 filter_parser/fuzz/corpus/parse/test_26 create mode 100644 filter_parser/fuzz/corpus/parse/test_27 create mode 100644 filter_parser/fuzz/corpus/parse/test_28 create mode 100644 filter_parser/fuzz/corpus/parse/test_29 create mode 100644 filter_parser/fuzz/corpus/parse/test_3 create mode 100644 filter_parser/fuzz/corpus/parse/test_30 create mode 100644 filter_parser/fuzz/corpus/parse/test_31 create mode 100644 filter_parser/fuzz/corpus/parse/test_32 create mode 100644 filter_parser/fuzz/corpus/parse/test_33 create mode 100644 filter_parser/fuzz/corpus/parse/test_34 create mode 100644 filter_parser/fuzz/corpus/parse/test_35 create mode 100644 filter_parser/fuzz/corpus/parse/test_36 create mode 100644 filter_parser/fuzz/corpus/parse/test_37 create mode 100644 filter_parser/fuzz/corpus/parse/test_38 create mode 100644 filter_parser/fuzz/corpus/parse/test_39 create mode 100644 filter_parser/fuzz/corpus/parse/test_4 create mode 100644 filter_parser/fuzz/corpus/parse/test_40 create mode 100644 filter_parser/fuzz/corpus/parse/test_41 create mode 100644 filter_parser/fuzz/corpus/parse/test_42 create mode 100644 filter_parser/fuzz/corpus/parse/test_43 create mode 100644 filter_parser/fuzz/corpus/parse/test_5 create mode 100644 filter_parser/fuzz/corpus/parse/test_6 create mode 100644 filter_parser/fuzz/corpus/parse/test_7 create mode 100644 filter_parser/fuzz/corpus/parse/test_8 create mode 100644 filter_parser/fuzz/corpus/parse/test_9 create mode 100644 filter_parser/fuzz/fuzz_targets/parse.rs create mode 100644 filter_parser/src/error.rs create mode 100644 filter_parser/src/main.rs diff --git a/filter_parser/fuzz/.gitignore b/filter_parser/fuzz/.gitignore new file mode 100644 index 000000000..a0925114d --- /dev/null +++ b/filter_parser/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/filter_parser/fuzz/Cargo.toml b/filter_parser/fuzz/Cargo.toml new file mode 100644 index 000000000..33e604e73 --- /dev/null +++ b/filter_parser/fuzz/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "filter_parser-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.filter_parser] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "parse" +path = "fuzz_targets/parse.rs" +test = false +doc = false diff --git a/filter_parser/fuzz/corpus/parse/test_1 b/filter_parser/fuzz/corpus/parse/test_1 new file mode 100644 index 000000000..2523a328e --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_1 @@ -0,0 +1 @@ +channel = Ponce diff --git a/filter_parser/fuzz/corpus/parse/test_10 b/filter_parser/fuzz/corpus/parse/test_10 new file mode 100644 index 000000000..d0e9f1e51 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_10 @@ -0,0 +1 @@ +channel != ponce diff --git a/filter_parser/fuzz/corpus/parse/test_11 b/filter_parser/fuzz/corpus/parse/test_11 new file mode 100644 index 000000000..ca3db9223 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_11 @@ -0,0 +1 @@ +NOT channel = ponce diff --git a/filter_parser/fuzz/corpus/parse/test_12 b/filter_parser/fuzz/corpus/parse/test_12 new file mode 100644 index 000000000..325f848c1 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_12 @@ -0,0 +1 @@ +subscribers < 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_13 b/filter_parser/fuzz/corpus/parse/test_13 new file mode 100644 index 000000000..ca7b96f30 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_13 @@ -0,0 +1 @@ +subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_14 b/filter_parser/fuzz/corpus/parse/test_14 new file mode 100644 index 000000000..f72f48bdb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_14 @@ -0,0 +1 @@ +subscribers <= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_15 b/filter_parser/fuzz/corpus/parse/test_15 new file mode 100644 index 000000000..75073fc74 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_15 @@ -0,0 +1 @@ +subscribers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_16 b/filter_parser/fuzz/corpus/parse/test_16 new file mode 100644 index 000000000..bdd39241b --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_16 @@ -0,0 +1 @@ +NOT subscribers < 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_17 b/filter_parser/fuzz/corpus/parse/test_17 new file mode 100644 index 000000000..4487643e4 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_17 @@ -0,0 +1 @@ +NOT subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_18 b/filter_parser/fuzz/corpus/parse/test_18 new file mode 100644 index 000000000..150604012 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_18 @@ -0,0 +1 @@ +NOT subscribers <= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_19 b/filter_parser/fuzz/corpus/parse/test_19 new file mode 100644 index 000000000..11bc15103 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_19 @@ -0,0 +1 @@ +NOT subscribers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_2 b/filter_parser/fuzz/corpus/parse/test_2 new file mode 100644 index 000000000..8ac19cad4 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_2 @@ -0,0 +1 @@ +subscribers = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_20 b/filter_parser/fuzz/corpus/parse/test_20 new file mode 100644 index 000000000..f52ad8ff2 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_20 @@ -0,0 +1 @@ +subscribers 100 TO 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_21 b/filter_parser/fuzz/corpus/parse/test_21 new file mode 100644 index 000000000..e86e6b89d --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_21 @@ -0,0 +1 @@ +NOT subscribers 100 TO 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_22 b/filter_parser/fuzz/corpus/parse/test_22 new file mode 100644 index 000000000..8ceeb6c1a --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_22 @@ -0,0 +1 @@ +_geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_23 b/filter_parser/fuzz/corpus/parse/test_23 new file mode 100644 index 000000000..614effb98 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_23 @@ -0,0 +1 @@ +NOT _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_24 b/filter_parser/fuzz/corpus/parse/test_24 new file mode 100644 index 000000000..2b8b39279 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_24 @@ -0,0 +1 @@ +channel = ponce AND 'dog race' != 'bernese mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_25 b/filter_parser/fuzz/corpus/parse/test_25 new file mode 100644 index 000000000..8f6fef74a --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_25 @@ -0,0 +1 @@ +channel = ponce OR 'dog race' != 'bernese mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_26 b/filter_parser/fuzz/corpus/parse/test_26 new file mode 100644 index 000000000..5134b354d --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_26 @@ -0,0 +1 @@ +channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_27 b/filter_parser/fuzz/corpus/parse/test_27 new file mode 100644 index 000000000..b63559b9f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_27 @@ -0,0 +1 @@ +channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 ) diff --git a/filter_parser/fuzz/corpus/parse/test_28 b/filter_parser/fuzz/corpus/parse/test_28 new file mode 100644 index 000000000..5bc97fb2b --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_28 @@ -0,0 +1 @@ +(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_29 b/filter_parser/fuzz/corpus/parse/test_29 new file mode 100644 index 000000000..7713618bb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_29 @@ -0,0 +1 @@ +channel = Ponce = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_3 b/filter_parser/fuzz/corpus/parse/test_3 new file mode 100644 index 000000000..2533e8fcf --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_3 @@ -0,0 +1 @@ +channel = 'Mister Mv' diff --git a/filter_parser/fuzz/corpus/parse/test_30 b/filter_parser/fuzz/corpus/parse/test_30 new file mode 100644 index 000000000..c35941150 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_30 @@ -0,0 +1 @@ +channel = diff --git a/filter_parser/fuzz/corpus/parse/test_31 b/filter_parser/fuzz/corpus/parse/test_31 new file mode 100644 index 000000000..f7982669f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_31 @@ -0,0 +1 @@ +channel = 🐻 diff --git a/filter_parser/fuzz/corpus/parse/test_32 b/filter_parser/fuzz/corpus/parse/test_32 new file mode 100644 index 000000000..c4a102dc8 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_32 @@ -0,0 +1 @@ +OR diff --git a/filter_parser/fuzz/corpus/parse/test_33 b/filter_parser/fuzz/corpus/parse/test_33 new file mode 100644 index 000000000..eb80eb4e6 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_33 @@ -0,0 +1 @@ +AND diff --git a/filter_parser/fuzz/corpus/parse/test_34 b/filter_parser/fuzz/corpus/parse/test_34 new file mode 100644 index 000000000..60fc05e7f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_34 @@ -0,0 +1 @@ +channel Ponce diff --git a/filter_parser/fuzz/corpus/parse/test_35 b/filter_parser/fuzz/corpus/parse/test_35 new file mode 100644 index 000000000..4a868f1d8 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_35 @@ -0,0 +1 @@ +channel = Ponce OR diff --git a/filter_parser/fuzz/corpus/parse/test_36 b/filter_parser/fuzz/corpus/parse/test_36 new file mode 100644 index 000000000..d7a0abac7 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_36 @@ -0,0 +1 @@ +_geoRadius diff --git a/filter_parser/fuzz/corpus/parse/test_37 b/filter_parser/fuzz/corpus/parse/test_37 new file mode 100644 index 000000000..44b5105b6 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_37 @@ -0,0 +1 @@ +_geoRadius = 12 diff --git a/filter_parser/fuzz/corpus/parse/test_38 b/filter_parser/fuzz/corpus/parse/test_38 new file mode 100644 index 000000000..ab45b973f --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_38 @@ -0,0 +1 @@ +_geoPoint(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_39 b/filter_parser/fuzz/corpus/parse/test_39 new file mode 100644 index 000000000..283095326 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_39 @@ -0,0 +1 @@ +position <= _geoPoint(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_4 b/filter_parser/fuzz/corpus/parse/test_4 new file mode 100644 index 000000000..9c2716e79 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_4 @@ -0,0 +1 @@ +channel = "Mister Mv" diff --git a/filter_parser/fuzz/corpus/parse/test_40 b/filter_parser/fuzz/corpus/parse/test_40 new file mode 100644 index 000000000..c4c038c15 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_40 @@ -0,0 +1 @@ +position <= _geoRadius(12, 13, 14) diff --git a/filter_parser/fuzz/corpus/parse/test_41 b/filter_parser/fuzz/corpus/parse/test_41 new file mode 100644 index 000000000..6952aa87e --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_41 @@ -0,0 +1 @@ +channel = 'ponce diff --git a/filter_parser/fuzz/corpus/parse/test_42 b/filter_parser/fuzz/corpus/parse/test_42 new file mode 100644 index 000000000..485d8da96 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_42 @@ -0,0 +1 @@ +channel = "ponce diff --git a/filter_parser/fuzz/corpus/parse/test_43 b/filter_parser/fuzz/corpus/parse/test_43 new file mode 100644 index 000000000..728c8aa22 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_43 @@ -0,0 +1 @@ +channel = mv OR (followers >= 1000 diff --git a/filter_parser/fuzz/corpus/parse/test_5 b/filter_parser/fuzz/corpus/parse/test_5 new file mode 100644 index 000000000..89f5ec8ee --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_5 @@ -0,0 +1 @@ +'dog race' = Borzoi diff --git a/filter_parser/fuzz/corpus/parse/test_6 b/filter_parser/fuzz/corpus/parse/test_6 new file mode 100644 index 000000000..be3e203cb --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_6 @@ -0,0 +1 @@ +"dog race" = Chusky diff --git a/filter_parser/fuzz/corpus/parse/test_7 b/filter_parser/fuzz/corpus/parse/test_7 new file mode 100644 index 000000000..eb77a2875 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_7 @@ -0,0 +1 @@ +"dog race" = "Bernese Mountain" diff --git a/filter_parser/fuzz/corpus/parse/test_8 b/filter_parser/fuzz/corpus/parse/test_8 new file mode 100644 index 000000000..a25477648 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_8 @@ -0,0 +1 @@ +'dog race' = 'Bernese Mountain' diff --git a/filter_parser/fuzz/corpus/parse/test_9 b/filter_parser/fuzz/corpus/parse/test_9 new file mode 100644 index 000000000..c347e68f5 --- /dev/null +++ b/filter_parser/fuzz/corpus/parse/test_9 @@ -0,0 +1 @@ +"dog race" = 'Bernese Mountain' diff --git a/filter_parser/fuzz/fuzz_targets/parse.rs b/filter_parser/fuzz/fuzz_targets/parse.rs new file mode 100644 index 000000000..99d4a03a6 --- /dev/null +++ b/filter_parser/fuzz/fuzz_targets/parse.rs @@ -0,0 +1,13 @@ +#![no_main] +use filter_parser::FilterCondition; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = std::str::from_utf8(data) { + // When we are fuzzing the parser we can get stack overflow really easily. + // But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters. + if s.len() < 500 { + let _ = FilterCondition::parse(s); + } + } +}); diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs new file mode 100644 index 000000000..b4155bb51 --- /dev/null +++ b/filter_parser/src/error.rs @@ -0,0 +1,195 @@ +use std::fmt::Display; + +use nom::{Parser, error::{self, ParseError}}; + +use crate::{IResult, Span}; + +pub trait ExtendNomError { + fn is_failure(&self) -> bool; + fn map_err E>(self, op: O) -> nom::Err; + fn map_fail E>(self, op: O) -> nom::Err; +} + +impl ExtendNomError for nom::Err { + fn is_failure(&self) -> bool { + matches!(self, Self::Failure(_)) + } + + fn map_err E>(self, op: O) -> nom::Err { + match self { + e @ Self::Failure(_) => e, + e => e.map(|e| op(e)), + } + } + + fn map_fail E>(self, op: O) -> nom::Err { + match self { + e @ Self::Error(_) => e, + e => e.map(|e| op(e)), + } + } +} + +/// cut a parser and map the error +pub fn cut_with_err<'a, O>(mut parser: impl FnMut(Span<'a>) -> IResult, mut with: impl FnMut(Error<'a>) -> Error<'a>) -> impl FnMut(Span<'a>) -> IResult { + move |input| match parser.parse(input) { + Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))), + rest => rest, + } +} + +#[derive(Debug)] +pub struct Error<'a> { + context: Span<'a>, + kind: ErrorKind<'a>, +} + +#[derive(Debug)] +pub enum ErrorKind<'a> { + ReservedGeo(&'a str), + Geo, + MisusedGeo, + InvalidPrimary, + ReservedKeyword, + ExpectedEof, + ExpectedValue, + MissingClosingDelimiter(char), + UnexpectedInput(Vec<&'a str>), + Context(&'a str), + Char(char), + Unreachable, +} + +impl<'a> Error<'a> { + pub fn kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { + Self { context, kind } + } + pub fn char(self) -> char { + match self.kind { + ErrorKind::Char(c) => c, + _ => panic!("Internal filter parser error"), + } + } +} + +impl<'a> ParseError> for Error<'a> { + fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self { + let kind = match kind { + error::ErrorKind::Eof => ErrorKind::ExpectedEof, + error::ErrorKind::Tag => ErrorKind::UnexpectedInput(Vec::new()), + error::ErrorKind::MapRes => todo!(), + error::ErrorKind::MapOpt => todo!(), + error::ErrorKind::Alt => todo!(), + error::ErrorKind::IsNot => todo!(), + error::ErrorKind::IsA => todo!(), + error::ErrorKind::SeparatedList => todo!(), + error::ErrorKind::SeparatedNonEmptyList => todo!(), + error::ErrorKind::Many0 => todo!(), + error::ErrorKind::Many1 => todo!(), + error::ErrorKind::ManyTill => todo!(), + error::ErrorKind::Count => todo!(), + error::ErrorKind::TakeUntil => todo!(), + error::ErrorKind::LengthValue => todo!(), + error::ErrorKind::TagClosure => todo!(), + error::ErrorKind::Alpha => todo!(), + error::ErrorKind::Digit => todo!(), + error::ErrorKind::HexDigit => todo!(), + error::ErrorKind::OctDigit => todo!(), + error::ErrorKind::AlphaNumeric => todo!(), + error::ErrorKind::Space => todo!(), + error::ErrorKind::MultiSpace => todo!(), + error::ErrorKind::LengthValueFn => todo!(), + error::ErrorKind::Switch => todo!(), + error::ErrorKind::TagBits => todo!(), + error::ErrorKind::OneOf => todo!(), + error::ErrorKind::NoneOf => todo!(), + error::ErrorKind::Char => todo!(), + error::ErrorKind::CrLf => todo!(), + error::ErrorKind::RegexpMatch => todo!(), + error::ErrorKind::RegexpMatches => todo!(), + error::ErrorKind::RegexpFind => todo!(), + error::ErrorKind::RegexpCapture => todo!(), + error::ErrorKind::RegexpCaptures => todo!(), + error::ErrorKind::TakeWhile1 => ErrorKind::Unreachable, + error::ErrorKind::Complete => todo!(), + error::ErrorKind::Fix => todo!(), + error::ErrorKind::Escaped => todo!(), + error::ErrorKind::EscapedTransform => todo!(), + error::ErrorKind::NonEmpty => todo!(), + error::ErrorKind::ManyMN => todo!(), + error::ErrorKind::Not => todo!(), + error::ErrorKind::Permutation => todo!(), + error::ErrorKind::Verify => todo!(), + error::ErrorKind::TakeTill1 => todo!(), + error::ErrorKind::TakeWhileMN => todo!(), + error::ErrorKind::TooLarge => todo!(), + error::ErrorKind::Many0Count => todo!(), + error::ErrorKind::Many1Count => todo!(), + error::ErrorKind::Float => todo!(), + error::ErrorKind::Satisfy => todo!(), + error::ErrorKind::Fail => todo!(), + }; + Self { context: input, kind } + } + + fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self { + other + } + + fn from_char(input: Span<'a>, c: char) -> Self { + Self { context: input, kind: ErrorKind::Char(c) } + } +} + +impl<'a> Display for Error<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let input = self.context.fragment(); + + match self.kind { + ErrorKind::ExpectedValue if input.trim().is_empty() => { + writeln!(f, "Was expecting a value but instead got nothing.")? + } + ErrorKind::MissingClosingDelimiter(c) => { + writeln!(f, "Expression `{}` is missing the following closing delemiter: `{}`.", input, c)? + } + ErrorKind::ExpectedValue => { + writeln!(f, "Was expecting a value but instead got `{}`.", input)? + } + ErrorKind::InvalidPrimary if input.trim().is_empty() => { + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")? + } + ErrorKind::InvalidPrimary => { + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", input)? + } + ErrorKind::ExpectedEof => { + writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", input)? + } + ErrorKind::Geo => { + writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? + } + ErrorKind::ReservedGeo(name) => { + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name)? + } + ErrorKind::MisusedGeo => { + writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? + } + ErrorKind::Char(c) => { + panic!("Tried to display a char error with `{}`", c) + } + ErrorKind::ReservedKeyword => writeln!(f, "reserved keyword")?, + ErrorKind::UnexpectedInput(ref v) => writeln!(f, "Unexpected input found `{}`, vec: `{:?}`", input, v)?, + ErrorKind::Context(_) => todo!(), + ErrorKind::Unreachable => writeln!( + f, + "Encountered an internal error while parsing your filter. Please fill an issue" + )?, + } + write!( + f, + "{}:{} in `{}`.", + self.context.location_line(), + self.context.get_utf8_column(), + self.context.extra, + ) + } +} diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 86c6cd79c..cb9a13f58 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -20,6 +20,20 @@ //! ```text //! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")" //! ``` +//! +//! Specific errors: +//! ================ +//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error. +//! ```text +//! field = _geoPoint(12, 13, 14) +//! field < 12 AND _geoPoint(1, 2) +//! ``` +//! +//! - If a user try to use a geoRadius as a value we must throw an error. +//! ```text +//! field = _geoRadius(12, 13, 14) +//! ``` +//! mod condition; mod error; @@ -28,12 +42,12 @@ mod value; use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; +use error::{cut_with_err, ExtendNomError}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::{cut, eof, map}; -use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; @@ -102,14 +116,15 @@ impl<'a> FilterCondition<'a> { } } -// remove OPTIONAL whitespaces before AND after the the provided parser +/// remove OPTIONAL whitespaces before AND after the the provided parser. fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } -/// and = not (~ "AND" not)* +/// or = and (~ "OR" ~ and) fn parse_or(input: Span) -> IResult { let (input, lhs) = parse_and(input)?; + // if we found a `OR` then we MUST find something next let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?; let expr = ors @@ -118,8 +133,10 @@ fn parse_or(input: Span) -> IResult { Ok((input, expr)) } +/// and = not (~ "AND" not)* fn parse_and(input: Span) -> IResult { let (input, lhs) = parse_not(input)?; + // if we found a `AND` then we MUST find something next let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?; let expr = ors .into_iter() @@ -128,28 +145,29 @@ fn parse_and(input: Span) -> IResult { } /// not = ("NOT" | "!") not | primary +/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. +/// If we parse a `NOT` or `!` we MUST parse something behind. fn parse_not(input: Span) -> IResult { - alt(( - map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), - cut(parse_primary), - ))(input) + alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))( + input, + ) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +/// If we parse `_geoRadius` we MUST parse the rest of the expression. fn parse_geo_radius(input: Span) -> IResult { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - // we want to forbid space BEFORE the _geoRadius but not after - let parsed = preceded::<_, _, _, _, _, _>( + let parsed = preceded( tuple((multispace0, tag("_geoRadius"))), + // if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), - )(input); + )(input) + .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::Geo))); - let (input, args): (Span, Vec) = parsed?; + let (input, args) = parsed?; if args.len() != 3 { - let e = Error::from_char(input, '('); - return Err(nom::Err::Failure(Error::add_context(input, err_msg_args_incomplete, e))); + return Err(nom::Err::Failure(Error::kind(input, ErrorKind::Geo))); } let res = FilterCondition::GeoLowerThan { @@ -159,14 +177,39 @@ fn parse_geo_radius(input: Span) -> IResult { Ok((input, res)) } +/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float) +fn parse_geo_point(input: Span) -> IResult { + // we want to forbid space BEFORE the _geoPoint but not after + tuple(( + multispace0, + tag("_geoPoint"), + // if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next. + cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), + ))(input) + .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; + // if we succeeded we still returns a Failure because geoPoints are not allowed + Err(nom::Err::Failure(Error::kind(input, ErrorKind::ReservedGeo("_geoPoint")))) +} + /// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to fn parse_primary(input: Span) -> IResult { alt(( - delimited(ws(char('(')), cut(parse_expression), cut(ws(char(')')))), + // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis + delimited( + ws(char('(')), + cut(parse_expression), + cut_with_err(ws(char(')')), |c| { + Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())) + }), + ), |c| parse_geo_radius(c), |c| parse_condition(c), |c| parse_to(c), + // the next lines are only for error handling and are written at the end to have the less possible performance impact + |c| parse_geo_point(c), ))(input) + // if the inner parsers did not match enough information to return an accurate error + .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::InvalidPrimary))) } /// expression = or @@ -484,18 +527,24 @@ pub mod tests { fn error() { use FilterCondition as Fc; - let result = Fc::parse("test = truc OR truc"); - assert!(result.is_err()); - let test_case = [ // simple test - ("channel = Ponce = 12", "An error occured"), - ("OR", "An error occured"), - ("AND", "An error occured"), - ("channel = Ponce OR", "An error occured"), - ("_geoRadius = 12", "An error occured"), - ("_geoPoint(12, 13, 14)", "An error occured"), - ("_geo = _geoRadius(12, 13, 14)", "An error occured"), + ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), + ("channel = ", "Was expecting a value but instead got nothing."), + ("channel = 🐻", "Was expecting a value but instead got `🐻`."), + ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."), + ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."), + ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."), + ("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."), + ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), + ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), + ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), + ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), + ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), + ("channel = 'ponce", "Expression `'ponce` is missing the following closing delemiter: `'`."), + ("channel = \"ponce", "Expression `\"ponce` is missing the following closing delemiter: `\"`."), + ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delemiter: `)`."), + ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), ]; for (input, expected) in test_case { @@ -503,24 +552,12 @@ pub mod tests { assert!( result.is_err(), - "Filter `{:?}` wasn't supposed to be parsed but it did with the following result: `{:?}`", - expected, + "Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`", + input, result.unwrap() ); let filter = result.unwrap_err().to_string(); - assert_eq!(filter, expected, "Filter `{:?}` was supposed to return the following error: `{}`, but instead returned `{}`.", input, filter, expected); + assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter); } } - - /* - #[test] - fn bidule() { - use FilterCondition as Fc; - - let result = Fc::parse::>("test = truc OR truc"); - dbg!(result); - - assert!(false); - } - */ } diff --git a/filter_parser/src/main.rs b/filter_parser/src/main.rs new file mode 100644 index 000000000..4158a2063 --- /dev/null +++ b/filter_parser/src/main.rs @@ -0,0 +1,11 @@ +fn main() { + let input = std::env::args().nth(1).expect("You must provide a filter to test"); + + println!("Trying to execute the following filter:\n{}\n\n", input); + + if let Err(e) = filter_parser::FilterCondition::parse(&input) { + println!("{}", e.to_string()); + } else { + println!("✅ Valid filter"); + } +} diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 7c708aa73..5f4677a2e 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -1,12 +1,29 @@ use nom::branch::alt; -use nom::bytes::complete::{take_till, take_while1}; -use nom::character::complete::char; -use nom::sequence::delimited; +use nom::bytes::complete::{take_till, take_while, take_while1}; +use nom::character::complete::{char, multispace0}; +use nom::combinator::cut; +use nom::sequence::{delimited, terminated}; -use crate::{ws, Error, IResult, Span, Token}; +use crate::error::ExtendNomError; +use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* pub fn parse_value(input: Span) -> IResult { + // before anything we want to check if the user is misusing a geo expression + let err = parse_geo_point(input).unwrap_err(); + if err.is_failure() { + return Err(err); + } + match parse_geo_radius(input) { + Ok(_) => return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))), + // if we encountered a failure it means the user badly wrote a _geoRadius filter. + // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + Err(e) if e.is_failure() => { + return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))) + } + _ => (), + } + // singleQuoted = "'" .* all but quotes "'" let simple_quoted = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" @@ -14,13 +31,23 @@ pub fn parse_value(input: Span) -> IResult { // word = (alphanumeric | _ | - | .)+ let word = |input| take_while1(is_key_component)(input); - ws(alt(( - delimited(char('\''), simple_quoted, char('\'')), - delimited(char('"'), double_quoted, char('"')), - word, - )))(input) + // we want to remove the space before entering the alt because if we don't, + // when we create the errors from the output of the alt we have spaces everywhere + let (input, _) = take_while(char::is_whitespace)(input)?; + + terminated( + alt(( + delimited(char('\''), simple_quoted, cut(char('\''))), + delimited(char('"'), double_quoted, cut(char('"'))), + word, + )), + multispace0, + )(input) .map(|(s, t)| (s, t.into())) - .map_err(|e| e.map(|_| Error::expected_value(input))) + // if we found nothing in the alt it means the user did not input any value + .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::ExpectedValue))) + // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote + .map_err(|e| e.map_fail(|c| Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())))) } fn is_key_component(c: char) -> bool { @@ -38,12 +65,13 @@ pub mod tests { ("channel", rtok("", "channel")), (".private", rtok("", ".private")), ("I-love-kebab", rtok("", "I-love-kebab")), - ("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")), + ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")), ("parens(", rtok("", "parens")), ("parens)", rtok("", "parens")), ("not!", rtok("", "not")), (" channel", rtok(" ", "channel")), ("channel ", rtok("", "channel")), + (" channel ", rtok(" ", "channel")), ("'channel'", rtok("'", "channel")), ("\"channel\"", rtok("\"", "channel")), ("'cha)nnel'", rtok("'", "cha)nnel")), From b165c77fa79ded0f9ef8ebbee444e4a0dcc429bb Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 14:39:02 +0100 Subject: [PATCH 22/58] add a smol README --- filter_parser/README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 filter_parser/README.md diff --git a/filter_parser/README.md b/filter_parser/README.md new file mode 100644 index 000000000..3ba9d8f23 --- /dev/null +++ b/filter_parser/README.md @@ -0,0 +1,35 @@ +# Filter parser + +This workspace is dedicated to the parsing of the MeiliSearch filters. + +Most of the code and explanation are in the [src/lib.rs]. Especially, the BNF of the filters at the top of this file. + +The parser use [nom](https://docs.rs/nom/) to do most of its work and [nom-locate](https://docs.rs/nom_locate/) to keep track of what we were doing when we encountered an error. + +## Cli +A simple main is provided to quick-test if a filter can be parsed or not without bringing milli. +It takes one argument and try to parse it. +``` +cargo run -- 'field = value' # success +cargo run -- 'field = "doggo' # error => missing closing delimiter " +``` + +## Fuzz +The workspace have been fuzzed with [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html). + +### Setup +You'll need rust-nightly to execute the fuzzer. + +``` +cargo install cargo-fuzz +``` + +### Run +``` +cargo fuzz run parse +``` + +## What to do if you find a bug in the parser + +- Write a test at the end of the [src/lib.rs] to ensure it never happens again. +- Add a file in [fuzz/corpus/parse/] with your filter to help the fuzzer finding new bug. From d0fe9dea6177746b77633553be606cdcae601216 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 14:43:36 +0100 Subject: [PATCH 23/58] update the readme --- filter_parser/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/filter_parser/README.md b/filter_parser/README.md index 3ba9d8f23..44ffdada3 100644 --- a/filter_parser/README.md +++ b/filter_parser/README.md @@ -2,7 +2,7 @@ This workspace is dedicated to the parsing of the MeiliSearch filters. -Most of the code and explanation are in the [src/lib.rs]. Especially, the BNF of the filters at the top of this file. +Most of the code and explanation are in the [`lib.rs`](./src/lib.rs). Especially, the BNF of the filters at the top of this file. The parser use [nom](https://docs.rs/nom/) to do most of its work and [nom-locate](https://docs.rs/nom_locate/) to keep track of what we were doing when we encountered an error. @@ -31,5 +31,6 @@ cargo fuzz run parse ## What to do if you find a bug in the parser -- Write a test at the end of the [src/lib.rs] to ensure it never happens again. -- Add a file in [fuzz/corpus/parse/] with your filter to help the fuzzer finding new bug. +- Write a test at the end of the [`lib.rs`](./src/lib.rs) to ensure it never happens again. +- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer finding new bug. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test. + Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force add your new test. From b1a0110a47faf8362ee66513d121e45e1c28957d Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 14:48:39 +0100 Subject: [PATCH 24/58] update the main --- filter_parser/src/main.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/filter_parser/src/main.rs b/filter_parser/src/main.rs index 4158a2063..a3e4cab28 100644 --- a/filter_parser/src/main.rs +++ b/filter_parser/src/main.rs @@ -1,11 +1,16 @@ fn main() { let input = std::env::args().nth(1).expect("You must provide a filter to test"); - println!("Trying to execute the following filter:\n{}\n\n", input); + println!("Trying to execute the following filter:\n{}\n", input); - if let Err(e) = filter_parser::FilterCondition::parse(&input) { - println!("{}", e.to_string()); - } else { - println!("✅ Valid filter"); + match filter_parser::FilterCondition::parse(&input) { + Ok(filter) => { + println!("✅ Valid filter"); + println!("{:#?}", filter); + } + Err(e) => { + println!("❎ Invalid filter"); + println!("{}", e.to_string()); + } } } From a58bc5bebbef23550b46ba456c2db3271f6cb2f9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 15:02:36 +0100 Subject: [PATCH 25/58] update milli with the new parser_filter --- milli/Cargo.toml | 3 --- milli/src/facet/mod.rs | 2 -- milli/src/lib.rs | 5 ++--- milli/src/search/facet/filter_condition.rs | 16 +++------------- milli/src/search/facet/mod.rs | 2 -- milli/src/search/mod.rs | 2 +- 6 files changed, 6 insertions(+), 24 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 1aaeed008..6913178b0 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -38,10 +38,7 @@ smallvec = "1.6.1" tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } -# facet filter parser filter_parser = { path = "../filter_parser" } -nom = "7.0.0" -nom-greedyerror = "0.4.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/facet/mod.rs b/milli/src/facet/mod.rs index aaa7a65ce..274d2588d 100644 --- a/milli/src/facet/mod.rs +++ b/milli/src/facet/mod.rs @@ -2,7 +2,5 @@ mod facet_type; mod facet_value; pub mod value_encoding; -pub use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; - pub use self::facet_type::FacetType; pub use self::facet_value::FacetValue; diff --git a/milli/src/lib.rs b/milli/src/lib.rs index e2ecb060c..044d74ec1 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -17,6 +17,7 @@ use std::collections::{BTreeMap, HashMap}; use std::convert::{TryFrom, TryInto}; use std::hash::BuildHasherDefault; +pub use filter_parser::{Condition, FilterCondition}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; use serde_json::{Map, Value}; @@ -34,9 +35,7 @@ pub use self::heed_codec::{ RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, }; pub use self::index::Index; -pub use self::search::{ - Condition, FacetDistribution, Filter, FilterCondition, MatchingWords, Search, SearchResult, -}; +pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index b61cd451b..bb342fa27 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -3,11 +3,9 @@ use std::ops::Bound::{self, Excluded, Included}; use std::str::FromStr; use either::Either; -pub use filter_parser::{Condition, FilterCondition, FilterParserError, Span, Token}; +pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; use heed::types::DecodeIgnore; use log::debug; -use nom::error::{ErrorKind, VerboseError}; -use nom_greedyerror::{convert_error, GreedyError}; use roaring::RoaringBitmap; use super::FacetNumberRange; @@ -22,12 +20,6 @@ pub struct Filter<'a> { condition: FilterCondition<'a>, } -impl<'a> From>> for Error { - fn from(nom_error: VerboseError>) -> Self { - UserError::InvalidFilter { input: nom_error.to_string() }.into() - } -} - fn parse(tok: &Token) -> Result { match tok.inner.parse::() { Ok(t) => Ok(t), @@ -90,11 +82,9 @@ impl<'a> Filter<'a> { } pub fn from_str(expression: &'a str) -> Result { - let condition = match FilterCondition::parse::>(expression) { + let condition = match FilterCondition::parse(expression) { Ok(fc) => Ok(fc), - Err(e) => Err(Error::UserError(UserError::InvalidFilter { - input: convert_error(Span::new_extra(expression, expression), e).to_string(), - })), + Err(e) => Err(Error::UserError(UserError::InvalidFilter { input: e.to_string() })), }?; Ok(Self { condition }) } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index c0b692de7..d6f276fbb 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,5 +1,3 @@ -pub use filter_parser::{Condition, FilterCondition}; - pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index f52dd06f0..a31ead1ec 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{Condition, FacetDistribution, FacetNumberIter, Filter, FilterCondition}; +pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; From 07a5ffb04c3e3e241d7bf4b9f0e1d6c5bf5ddeb4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 15:52:22 +0100 Subject: [PATCH 26/58] update http-ui --- http-ui/src/main.rs | 24 ++++++++++++++-------- milli/src/search/facet/filter_condition.rs | 6 ++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index e3f8f0317..e84c94e50 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -738,31 +738,37 @@ async fn main() -> anyhow::Result<()> { search.query(query); } - let filters = match query.filters { + let filters = match query.filters.as_ref() { Some(condition) if !condition.trim().is_empty() => { - Some(MilliFilter::from_str(&condition).unwrap()) + Some(MilliFilter::from_str(condition).unwrap()) } _otherwise => None, }; - let facet_filters = match query.facet_filters { + let facet_filters = match query.facet_filters.as_ref() { Some(array) => { - let eithers = array.into_iter().map(Into::into); + let eithers = array.iter().map(|either| match either { + UntaggedEither::Left(l) => { + Either::Left(l.iter().map(|s| s.as_str()).collect::>()) + } + UntaggedEither::Right(r) => Either::Right(r.as_str()), + }); MilliFilter::from_array(eithers).unwrap() } _otherwise => None, }; let condition = match (filters, facet_filters) { - (Some(filters), Some(facet_filters)) => { - Some(FilterCondition::And(Box::new(filters), Box::new(facet_filters))) - } - (Some(condition), None) | (None, Some(condition)) => Some(condition), + (Some(filters), Some(facet_filters)) => Some(FilterCondition::And( + Box::new(filters.into()), + Box::new(facet_filters.into()), + )), + (Some(condition), None) | (None, Some(condition)) => Some(condition.into()), _otherwise => None, }; if let Some(condition) = condition { - search.filter(condition); + search.filter(condition.into()); } if let Some(limit) = query.limit { diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index bb342fa27..d0c32c8f4 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -35,6 +35,12 @@ fn parse(tok: &Token) -> Result { } } +impl<'a> From> for FilterCondition<'a> { + fn from(f: Filter<'a>) -> Self { + f.condition + } +} + impl<'a> Filter<'a> { pub fn from_array(array: I) -> Result> where From 72a90712037607688a04b56254089056a89de602 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 16:03:52 +0100 Subject: [PATCH 27/58] fix typo --- filter_parser/src/error.rs | 2 +- filter_parser/src/lib.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index b4155bb51..f92200882 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -150,7 +150,7 @@ impl<'a> Display for Error<'a> { writeln!(f, "Was expecting a value but instead got nothing.")? } ErrorKind::MissingClosingDelimiter(c) => { - writeln!(f, "Expression `{}` is missing the following closing delemiter: `{}`.", input, c)? + writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", input, c)? } ErrorKind::ExpectedValue => { writeln!(f, "Was expecting a value but instead got `{}`.", input)? diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index cb9a13f58..e6f8a75d1 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -541,9 +541,9 @@ pub mod tests { ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), - ("channel = 'ponce", "Expression `'ponce` is missing the following closing delemiter: `'`."), - ("channel = \"ponce", "Expression `\"ponce` is missing the following closing delemiter: `\"`."), - ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delemiter: `)`."), + ("channel = 'ponce", "Expression `'ponce` is missing the following closing delimiter: `'`."), + ("channel = \"ponce", "Expression `\"ponce` is missing the following closing delimiter: `\"`."), + ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."), ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), ]; From 3e5550c910068df31b9b5adc42c603938a49eddd Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 16:12:17 +0100 Subject: [PATCH 28/58] clean the errors --- filter_parser/src/error.rs | 87 ++++++++------------------------------ 1 file changed, 18 insertions(+), 69 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index f92200882..fbfbbe30b 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -1,6 +1,7 @@ use std::fmt::Display; -use nom::{Parser, error::{self, ParseError}}; +use nom::error::{self, ParseError}; +use nom::Parser; use crate::{IResult, Span}; @@ -31,11 +32,14 @@ impl ExtendNomError for nom::Err { } /// cut a parser and map the error -pub fn cut_with_err<'a, O>(mut parser: impl FnMut(Span<'a>) -> IResult, mut with: impl FnMut(Error<'a>) -> Error<'a>) -> impl FnMut(Span<'a>) -> IResult { - move |input| match parser.parse(input) { - Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))), - rest => rest, - } +pub fn cut_with_err<'a, O>( + mut parser: impl FnMut(Span<'a>) -> IResult, + mut with: impl FnMut(Error<'a>) -> Error<'a>, +) -> impl FnMut(Span<'a>) -> IResult { + move |input| match parser.parse(input) { + Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))), + rest => rest, + } } #[derive(Debug)] @@ -50,14 +54,12 @@ pub enum ErrorKind<'a> { Geo, MisusedGeo, InvalidPrimary, - ReservedKeyword, ExpectedEof, ExpectedValue, MissingClosingDelimiter(char), - UnexpectedInput(Vec<&'a str>), - Context(&'a str), Char(char), - Unreachable, + InternalError(error::ErrorKind), + External(String), } impl<'a> Error<'a> { @@ -68,66 +70,15 @@ impl<'a> Error<'a> { match self.kind { ErrorKind::Char(c) => c, _ => panic!("Internal filter parser error"), - } } + } } impl<'a> ParseError> for Error<'a> { fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self { let kind = match kind { error::ErrorKind::Eof => ErrorKind::ExpectedEof, - error::ErrorKind::Tag => ErrorKind::UnexpectedInput(Vec::new()), - error::ErrorKind::MapRes => todo!(), - error::ErrorKind::MapOpt => todo!(), - error::ErrorKind::Alt => todo!(), - error::ErrorKind::IsNot => todo!(), - error::ErrorKind::IsA => todo!(), - error::ErrorKind::SeparatedList => todo!(), - error::ErrorKind::SeparatedNonEmptyList => todo!(), - error::ErrorKind::Many0 => todo!(), - error::ErrorKind::Many1 => todo!(), - error::ErrorKind::ManyTill => todo!(), - error::ErrorKind::Count => todo!(), - error::ErrorKind::TakeUntil => todo!(), - error::ErrorKind::LengthValue => todo!(), - error::ErrorKind::TagClosure => todo!(), - error::ErrorKind::Alpha => todo!(), - error::ErrorKind::Digit => todo!(), - error::ErrorKind::HexDigit => todo!(), - error::ErrorKind::OctDigit => todo!(), - error::ErrorKind::AlphaNumeric => todo!(), - error::ErrorKind::Space => todo!(), - error::ErrorKind::MultiSpace => todo!(), - error::ErrorKind::LengthValueFn => todo!(), - error::ErrorKind::Switch => todo!(), - error::ErrorKind::TagBits => todo!(), - error::ErrorKind::OneOf => todo!(), - error::ErrorKind::NoneOf => todo!(), - error::ErrorKind::Char => todo!(), - error::ErrorKind::CrLf => todo!(), - error::ErrorKind::RegexpMatch => todo!(), - error::ErrorKind::RegexpMatches => todo!(), - error::ErrorKind::RegexpFind => todo!(), - error::ErrorKind::RegexpCapture => todo!(), - error::ErrorKind::RegexpCaptures => todo!(), - error::ErrorKind::TakeWhile1 => ErrorKind::Unreachable, - error::ErrorKind::Complete => todo!(), - error::ErrorKind::Fix => todo!(), - error::ErrorKind::Escaped => todo!(), - error::ErrorKind::EscapedTransform => todo!(), - error::ErrorKind::NonEmpty => todo!(), - error::ErrorKind::ManyMN => todo!(), - error::ErrorKind::Not => todo!(), - error::ErrorKind::Permutation => todo!(), - error::ErrorKind::Verify => todo!(), - error::ErrorKind::TakeTill1 => todo!(), - error::ErrorKind::TakeWhileMN => todo!(), - error::ErrorKind::TooLarge => todo!(), - error::ErrorKind::Many0Count => todo!(), - error::ErrorKind::Many1Count => todo!(), - error::ErrorKind::Float => todo!(), - error::ErrorKind::Satisfy => todo!(), - error::ErrorKind::Fail => todo!(), + kind => ErrorKind::InternalError(kind), }; Self { context: input, kind } } @@ -149,7 +100,7 @@ impl<'a> Display for Error<'a> { ErrorKind::ExpectedValue if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } - ErrorKind::MissingClosingDelimiter(c) => { + ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", input, c)? } ErrorKind::ExpectedValue => { @@ -176,13 +127,11 @@ impl<'a> Display for Error<'a> { ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) } - ErrorKind::ReservedKeyword => writeln!(f, "reserved keyword")?, - ErrorKind::UnexpectedInput(ref v) => writeln!(f, "Unexpected input found `{}`, vec: `{:?}`", input, v)?, - ErrorKind::Context(_) => todo!(), - ErrorKind::Unreachable => writeln!( + ErrorKind::InternalError(kind) => writeln!( f, - "Encountered an internal error while parsing your filter. Please fill an issue" + "Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind )?, + ErrorKind::External(ref error) => writeln!(f, "{}", error)?, } write!( f, From 7328ffb0340a7f092b251f345be1c6339dcb7431 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 16:20:53 +0100 Subject: [PATCH 29/58] stop panicking in case of internal error --- filter_parser/fuzz/fuzz_targets/parse.rs | 11 ++++++++--- filter_parser/src/error.rs | 7 ++++++- filter_parser/src/lib.rs | 12 ++++++------ filter_parser/src/value.rs | 10 ++++++---- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/filter_parser/fuzz/fuzz_targets/parse.rs b/filter_parser/fuzz/fuzz_targets/parse.rs index 99d4a03a6..6d0069c15 100644 --- a/filter_parser/fuzz/fuzz_targets/parse.rs +++ b/filter_parser/fuzz/fuzz_targets/parse.rs @@ -1,13 +1,18 @@ #![no_main] -use filter_parser::FilterCondition; +use filter_parser::{ErrorKind, FilterCondition}; use libfuzzer_sys::fuzz_target; fuzz_target!(|data: &[u8]| { if let Ok(s) = std::str::from_utf8(data) { - // When we are fuzzing the parser we can get stack overflow really easily. + // When we are fuzzing the parser we can get a stack overflow very easily. // But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters. if s.len() < 500 { - let _ = FilterCondition::parse(s); + match FilterCondition::parse(s) { + Err(e) if matches!(e.kind(), ErrorKind::InternalError(_)) => { + panic!("Found an internal error: `{:?}`", e) + } + _ => (), + } } } }); diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index fbfbbe30b..a0ea2efac 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -63,9 +63,14 @@ pub enum ErrorKind<'a> { } impl<'a> Error<'a> { - pub fn kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { + pub fn kind(&self) -> &ErrorKind<'a> { + &self.kind + } + + pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { Self { context, kind } } + pub fn char(self) -> char { match self.kind { ErrorKind::Char(c) => c, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index e6f8a75d1..9335ef185 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -162,12 +162,12 @@ fn parse_geo_radius(input: Span) -> IResult { // if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), )(input) - .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::Geo))); + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); let (input, args) = parsed?; if args.len() != 3 { - return Err(nom::Err::Failure(Error::kind(input, ErrorKind::Geo))); + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo))); } let res = FilterCondition::GeoLowerThan { @@ -186,9 +186,9 @@ fn parse_geo_point(input: Span) -> IResult { // if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next. cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), ))(input) - .map_err(|e| e.map(|_| Error::kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; // if we succeeded we still returns a Failure because geoPoints are not allowed - Err(nom::Err::Failure(Error::kind(input, ErrorKind::ReservedGeo("_geoPoint")))) + Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) } /// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to @@ -199,7 +199,7 @@ fn parse_primary(input: Span) -> IResult { ws(char('(')), cut(parse_expression), cut_with_err(ws(char(')')), |c| { - Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())) + Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), ), |c| parse_geo_radius(c), @@ -209,7 +209,7 @@ fn parse_primary(input: Span) -> IResult { |c| parse_geo_point(c), ))(input) // if the inner parsers did not match enough information to return an accurate error - .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::InvalidPrimary))) + .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) } /// expression = or diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 5f4677a2e..79fc00acd 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -15,11 +15,11 @@ pub fn parse_value(input: Span) -> IResult { return Err(err); } match parse_geo_radius(input) { - Ok(_) => return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))), + Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))), // if we encountered a failure it means the user badly wrote a _geoRadius filter. // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. Err(e) if e.is_failure() => { - return Err(nom::Err::Failure(Error::kind(input, ErrorKind::MisusedGeo))) + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))) } _ => (), } @@ -45,9 +45,11 @@ pub fn parse_value(input: Span) -> IResult { )(input) .map(|(s, t)| (s, t.into())) // if we found nothing in the alt it means the user did not input any value - .map_err(|e| e.map_err(|_| Error::kind(input, ErrorKind::ExpectedValue))) + .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::ExpectedValue))) // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote - .map_err(|e| e.map_fail(|c| Error::kind(input, ErrorKind::MissingClosingDelimiter(c.char())))) + .map_err(|e| { + e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) + }) } fn is_key_component(c: char) -> bool { From 8234f9fdf3886bebcfc1dfeef239c1bcbc442d1c Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 17:24:55 +0100 Subject: [PATCH 30/58] recreate most filter error except for the geosearch --- filter_parser/src/error.rs | 4 + filter_parser/src/lib.rs | 4 + milli/src/error.rs | 4 +- milli/src/search/facet/filter_condition.rs | 93 +++++++++++++++------- 4 files changed, 73 insertions(+), 32 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index a0ea2efac..a1bbac47a 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -71,6 +71,10 @@ impl<'a> Error<'a> { Self { context, kind } } + pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self { + Self::new_from_kind(context, ErrorKind::External(error.to_string())) + } + pub fn char(self) -> char { match self.kind { ErrorKind::Char(c) => c, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 9335ef185..31aa973ab 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -69,6 +69,10 @@ impl<'a> Token<'a> { pub fn new(position: Span<'a>) -> Self { Self { position, inner: &position } } + + pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> { + Error::new_from_external(self.position, error) + } } impl<'a> From> for Token<'a> { diff --git a/milli/src/error.rs b/milli/src/error.rs index c0ce101c8..3d744da5c 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -59,7 +59,7 @@ pub enum UserError { InvalidDocumentId { document_id: Value }, InvalidFacetsDistribution { invalid_facets_name: HashSet }, InvalidGeoField { document_id: Value, object: Value }, - InvalidFilter { input: String }, + InvalidFilter(String), InvalidSortName { name: String }, InvalidSortableAttribute { field: String, valid_fields: HashSet }, SortRankingRuleMissing, @@ -207,7 +207,7 @@ impl StdError for InternalError {} impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::InvalidFilter { input } => write!(f, "parser error {}", input), + Self::InvalidFilter(input) => write!(f, "{}", input), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::CriterionError(error) => write!(f, "{}", error), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index d0c32c8f4..13622a134 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,4 +1,4 @@ -use std::fmt::Debug; +use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; use std::str::FromStr; @@ -20,18 +20,50 @@ pub struct Filter<'a> { condition: FilterCondition<'a>, } -fn parse(tok: &Token) -> Result { +#[derive(Debug)] +enum FilterError<'a> { + AttributeNotFilterable { attribute: &'a str, filterable: String }, + BadGeo(&'a str), + Reserved(&'a str), +} +impl<'a> std::error::Error for FilterError<'a> {} + +impl<'a> Display for FilterError<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::AttributeNotFilterable { attribute, filterable } => write!( + f, + "Attribute `{}` is not filterable. Available filterable attributes are: `{}`.", + attribute, + filterable, + ), + Self::Reserved(keyword) => write!( + f, + "`{}` is a reserved keyword and thus can't be used as a filter expression.", + keyword + ), + Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), + } + } +} + +impl<'a> From> for Error { + fn from(error: FPError<'a>) -> Self { + Self::UserError(UserError::InvalidFilter(error.to_string())) + } +} + +fn parse(tok: &Token) -> Result +where + T: FromStr, + T::Err: std::error::Error, +{ match tok.inner.parse::() { Ok(t) => Ok(t), - Err(_e) => Err(UserError::InvalidFilter { - input: format!( - "Could not parse `{}` at line {} and offset {}", - tok.inner, - tok.position.location_line(), - tok.position.get_column() - ), + Err(e) => { + Err(UserError::InvalidFilter(FPError::new_from_external(tok.position, e).to_string()) + .into()) } - .into()), } } @@ -90,7 +122,7 @@ impl<'a> Filter<'a> { pub fn from_str(expression: &'a str) -> Result { let condition = match FilterCondition::parse(expression) { Ok(fc) => Ok(fc), - Err(e) => Err(Error::UserError(UserError::InvalidFilter { input: e.to_string() })), + Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))), }?; Ok(Self { condition }) } @@ -299,25 +331,26 @@ impl<'a> Filter<'a> { Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } else { match fid.inner { - // TODO update the error messages according to the spec - "_geo" => { - return Err(UserError::InvalidFilter { input: format!("Tried to use _geo in a filter, you probably wanted to use _geoRadius(latitude, longitude, radius)") })?; + attribute @ "_geo" => { + return Err(fid.as_external_error(FilterError::BadGeo(attribute)))?; } - "_geoDistance" => { - return Err(UserError::InvalidFilter { - input: format!("Reserved field _geoDistance"), - })?; + attribute if attribute.starts_with("_geoPoint(") => { + return Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?; } - fid if fid.starts_with("_geoPoint(") => { - return Err(UserError::InvalidFilter { input: format!("_geoPoint only available in sort. You wanted to use _geoRadius") })?; + attribute @ "_geoDistance" => { + return Err(fid.as_external_error(FilterError::Reserved(attribute)))?; } - fid => { - return Err(UserError::InvalidFilter { - input: format!( - "Bad filter {}, available filters are {:?}", - fid, filterable_fields - ), - })?; + attribute => { + return Err(fid.as_external_error( + FilterError::AttributeNotFilterable { + attribute, + filterable: filterable_fields + .iter() + .map(|(_, s)| s) + .collect::>() + .join(" "), + }, + ))?; } } } @@ -356,9 +389,9 @@ impl<'a> Filter<'a> { Ok(result) } else { // TODO TAMO: update the error message - return Err(UserError::InvalidFilter { - input: format!("You tried to use _geo in a filter, you probably wanted to use _geoRadius"), - })?; + return Err(UserError::InvalidFilter(format!( + "You tried to use _geo in a filter, you probably wanted to use _geoRadius" + )))?; } } FilterCondition::GeoGreaterThan { point, radius } => { From 76d961cc7720bfb5e0dc3ca321948dea2e8b73b9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 4 Nov 2021 17:42:06 +0100 Subject: [PATCH 31/58] implements the last errors --- milli/src/search/facet/filter_condition.rs | 30 +++++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 13622a134..83873285f 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -24,6 +24,8 @@ pub struct Filter<'a> { enum FilterError<'a> { AttributeNotFilterable { attribute: &'a str, filterable: String }, BadGeo(&'a str), + BadGeoLat(f64), + BadGeoLng(f64), Reserved(&'a str), } impl<'a> std::error::Error for FilterError<'a> {} @@ -43,6 +45,8 @@ impl<'a> Display for FilterError<'a> { keyword ), Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), + Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat), + Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Latitude must be contained between -180 and 180 degrees. ", lng), } } } @@ -369,9 +373,17 @@ impl<'a> Filter<'a> { FilterCondition::GeoLowerThan { point, radius } => { let filterable_fields = index.fields_ids_map(rtxn)?; if filterable_fields.id("_geo").is_some() { - let base_point = [parse(&point[0])?, parse(&point[1])?]; - // TODO TAMO: ensure lat is between -90 and 90 - // TODO TAMO: ensure lng is between -180 and 180 + let base_point: [f64; 2] = [parse(&point[0])?, parse(&point[1])?]; + if !(-90.0..=90.0).contains(&base_point[0]) { + return Err( + point[0].as_external_error(FilterError::BadGeoLat(base_point[0])) + )?; + } + if !(-180.0..=180.0).contains(&base_point[1]) { + return Err( + point[1].as_external_error(FilterError::BadGeoLng(base_point[1])) + )?; + } let radius = parse(&radius)?; let rtree = match index.geo_rtree(rtxn)? { Some(rtree) => rtree, @@ -388,10 +400,14 @@ impl<'a> Filter<'a> { Ok(result) } else { - // TODO TAMO: update the error message - return Err(UserError::InvalidFilter(format!( - "You tried to use _geo in a filter, you probably wanted to use _geoRadius" - )))?; + return Err(point[0].as_external_error(FilterError::AttributeNotFilterable { + attribute: "_geo", + filterable: filterable_fields + .iter() + .map(|(_, s)| s) + .collect::>() + .join(" "), + }))?; } } FilterCondition::GeoGreaterThan { point, radius } => { From 27a6a26b4be13936f52172d31a9da4407dbc24d8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 5 Nov 2021 10:46:54 +0100 Subject: [PATCH 32/58] makes the parse function part of the filter_parser --- filter_parser/src/lib.rs | 9 +++++++ milli/src/search/facet/filter_condition.rs | 29 ++++++---------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 31aa973ab..d09744196 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -40,6 +40,7 @@ mod error; mod value; use std::fmt::Debug; +use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; use error::{cut_with_err, ExtendNomError}; @@ -73,6 +74,14 @@ impl<'a> Token<'a> { pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> { Error::new_from_external(self.position, error) } + + pub fn parse(&self) -> Result + where + T: FromStr, + T::Err: std::error::Error, + { + self.inner.parse().map_err(|e| self.as_external_error(e)) + } } impl<'a> From> for Token<'a> { diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 83873285f..164e9aed5 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,6 +1,5 @@ use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; -use std::str::FromStr; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; @@ -57,20 +56,6 @@ impl<'a> From> for Error { } } -fn parse(tok: &Token) -> Result -where - T: FromStr, - T::Err: std::error::Error, -{ - match tok.inner.parse::() { - Ok(t) => Ok(t), - Err(e) => { - Err(UserError::InvalidFilter(FPError::new_from_external(tok.position, e).to_string()) - .into()) - } - } -} - impl<'a> From> for FilterCondition<'a> { fn from(f: Filter<'a>) -> Self { f.condition @@ -254,11 +239,11 @@ impl<'a> Filter<'a> { // field id and the level. let (left, right) = match operator { - Condition::GreaterThan(val) => (Excluded(parse(val)?), Included(f64::MAX)), - Condition::GreaterThanOrEqual(val) => (Included(parse(val)?), Included(f64::MAX)), - Condition::LowerThan(val) => (Included(f64::MIN), Excluded(parse(val)?)), - Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(parse(val)?)), - Condition::Between { from, to } => (Included(parse(from)?), Included(parse(to)?)), + Condition::GreaterThan(val) => (Excluded(val.parse()?), Included(f64::MAX)), + Condition::GreaterThanOrEqual(val) => (Included(val.parse()?), Included(f64::MAX)), + Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse()?)), + Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(val.parse()?)), + Condition::Between { from, to } => (Included(from.parse()?), Included(to.parse()?)), Condition::Equal(val) => { let (_original_value, string_docids) = strings_db .get(rtxn, &(field_id, &val.inner.to_lowercase()))? @@ -373,7 +358,7 @@ impl<'a> Filter<'a> { FilterCondition::GeoLowerThan { point, radius } => { let filterable_fields = index.fields_ids_map(rtxn)?; if filterable_fields.id("_geo").is_some() { - let base_point: [f64; 2] = [parse(&point[0])?, parse(&point[1])?]; + let base_point: [f64; 2] = [point[0].parse()?, point[1].parse()?]; if !(-90.0..=90.0).contains(&base_point[0]) { return Err( point[0].as_external_error(FilterError::BadGeoLat(base_point[0])) @@ -384,7 +369,7 @@ impl<'a> Filter<'a> { point[1].as_external_error(FilterError::BadGeoLng(base_point[1])) )?; } - let radius = parse(&radius)?; + let radius = radius.parse()?; let rtree = match index.geo_rtree(rtxn)? { Some(rtree) => rtree, None => return Ok(RoaringBitmap::new()), From 070ec9bd97c48cf55519463d8faa4ca8f4cb6f26 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 5 Nov 2021 17:45:20 +0100 Subject: [PATCH 33/58] small update on the README --- filter_parser/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/filter_parser/README.md b/filter_parser/README.md index 44ffdada3..0999b4340 100644 --- a/filter_parser/README.md +++ b/filter_parser/README.md @@ -25,8 +25,9 @@ cargo install cargo-fuzz ``` ### Run +When the filter parser is executed by the fuzzer it's triggering a stackoverflow really fast. We can avoid this problem by limiting the `max_len` of [libfuzzer](https://llvm.org/docs/LibFuzzer.html) at 500 characters. ``` -cargo fuzz run parse +cargo fuzz run parse -- -max_len=500 ``` ## What to do if you find a bug in the parser From b249989befa31a9baf0e340f20706f065f79f129 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sat, 6 Nov 2021 01:32:12 +0100 Subject: [PATCH 34/58] fix most of the tests --- milli/src/search/facet/filter_condition.rs | 351 ++++----------------- milli/src/update/delete_documents.rs | 4 +- milli/src/update/settings.rs | 5 +- milli/tests/search/filters.rs | 8 +- 4 files changed, 66 insertions(+), 302 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 164e9aed5..acab64171 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -14,7 +14,7 @@ use crate::heed_codec::facet::{ }; use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Filter<'a> { condition: FilterCondition<'a>, } @@ -45,7 +45,7 @@ impl<'a> Display for FilterError<'a> { ), Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat), - Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Latitude must be contained between -180 and 180 degrees. ", lng), + Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng), } } } @@ -426,275 +426,64 @@ mod tests { use crate::update::Settings; use crate::Index; - #[test] - fn number() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("timestamp"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { "timestamp".into() }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, LowerThan(22.0))), - Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), - ); - assert_eq!(condition, expected); - } - - #[test] - fn compare() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); - let expected = FilterCondition::Operator(0, LowerThan(20.0)); - assert_eq!(condition, expected); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); - let expected = FilterCondition::Operator(2, LowerThan(200.0)); - assert_eq!(condition, expected); - } - - #[test] - fn parentheses() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), - Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Operator(1, LowerThan(22.0))), - Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), - )), - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - } - #[test] fn from_array() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - // Simple array with Left - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = mv"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap(); + let expected = Filter::from_str("channel = mv").unwrap(); assert_eq!(condition, expected); // Simple array with Right - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = mv")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")]) + .unwrap() + .unwrap(); + let expected = Filter::from_str("channel = mv").unwrap(); assert_eq!(condition, expected); // Array with Left and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = \"Mister Mv\""])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + let condition = + Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap(); + let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); assert_eq!(condition, expected); // Array with Right and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = \"Mister Mv\"")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + let condition = + Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")]) + .unwrap() + .unwrap(); + let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); assert_eq!(condition, expected); // Array with Left and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = 'Mister Mv'"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + let condition = + Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap(); + let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); assert_eq!(condition, expected); // Array with Right and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = 'Mister Mv'")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + let condition = + Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")]) + .unwrap() + .unwrap(); + let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); assert_eq!(condition, expected); // Simple with parenthesis - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["(channel = mv)"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); + let condition = + Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap(); + let expected = Filter::from_str("(channel = mv)").unwrap(); assert_eq!(condition, expected); // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array( - &rtxn, - &index, - vec![ - Either::Right("channel = gotaga"), - Either::Left(vec!["timestamp = 44", "channel != ponce"]), - ], - ) + let condition = Filter::from_array(vec![ + Either::Right("channel = gotaga"), + Either::Left(vec!["timestamp = 44", "channel != ponce"]), + ]) .unwrap() .unwrap(); - let expected = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga AND (timestamp = 44 OR channel != ponce)", - ) - .unwrap(); - assert_eq!(condition, expected); - } - - #[test] - fn geo_radius() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - // basic test - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); - let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); - assert_eq!(condition, expected); - - // test the negation of the GeoLowerThan - let condition = - FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); - let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); - assert_eq!(condition, expected); - - // composition of multiple operations - let condition = FilterCondition::from_str( - &rtxn, - &index, - "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), - Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), - )), - Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), - ); + let expected = + Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)").unwrap(); + println!("\nExpecting: {:#?}\nGot: {:#?}\n", expected, condition); assert_eq!(condition, expected); } @@ -715,62 +504,40 @@ mod tests { let rtxn = index.read_txn().unwrap(); - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have enough parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius have too many parameters - let result = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); + // georadius have a bad latitude + let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!( - error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), + error.to_string().starts_with( + "Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees." + ), "{}", error.to_string() ); // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); + let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." + )); // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); + let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!( + error.to_string().contains( + "Bad longitude `250`. Longitude must be contained between -180 and 180 degrees." + ), + "{}", + error.to_string(), + ); // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); + let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." + )); } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 207aed63c..e1a658218 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -567,7 +567,7 @@ mod tests { use super::*; use crate::update::{IndexDocuments, Settings}; - use crate::FilterCondition; + use crate::Filter; #[test] fn delete_documents_with_numbers_as_primary_key() { @@ -667,7 +667,7 @@ mod tests { builder.delete_external_id("1_4"); builder.execute().unwrap(); - let filter = FilterCondition::from_str(&wtxn, &index, "label = sign").unwrap(); + let filter = Filter::from_str("label = sign").unwrap(); let results = index.search(&wtxn).filter(filter).execute().unwrap(); assert!(results.documents_ids.is_empty()); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index dee63c726..f25bceb7b 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -524,7 +524,7 @@ mod tests { use super::*; use crate::error::Error; use crate::update::IndexDocuments; - use crate::{Criterion, FilterCondition, SearchResult}; + use crate::{Criterion, Filter, SearchResult}; #[test] fn set_and_reset_searchable_fields() { @@ -1066,7 +1066,8 @@ mod tests { wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); - FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err(); + let filter = Filter::from_str("toto = 32").unwrap(); + let _ = filter.evaluate(&rtxn, &index).unwrap_err(); } #[test] diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index d992a8e95..99063f9f6 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -1,5 +1,5 @@ use either::{Either, Left, Right}; -use milli::{Criterion, FilterCondition, Search, SearchResult}; +use milli::{Criterion, Filter, Search, SearchResult}; use Criterion::*; use crate::search::{self, EXTERNAL_DOCUMENTS_IDS}; @@ -13,11 +13,7 @@ macro_rules! test_filter { let rtxn = index.read_txn().unwrap(); let filter_conditions = - FilterCondition::from_array::, &str>>, _, _, _>( - &rtxn, &index, $filter, - ) - .unwrap() - .unwrap(); + Filter::from_array::, &str>>, _>($filter).unwrap().unwrap(); let mut search = Search::new(&rtxn, &index); search.query(search::TEST_QUERY); From 075d9c97c079143ce2e8ccfe810f9402a35c2623 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sat, 6 Nov 2021 16:02:27 +0100 Subject: [PATCH 35/58] re-implement the equality between tokens to only compare the inner value --- filter_parser/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index d09744196..a1d66819f 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -60,12 +60,18 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Eq)] pub struct Token<'a> { pub position: Span<'a>, pub inner: &'a str, } +impl<'a> PartialEq for Token<'a> { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } +} + impl<'a> Token<'a> { pub fn new(position: Span<'a>) -> Self { Self { position, inner: &position } From 5c01e9bf7cdbcc221011bec721e6ae685ca83017 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sat, 6 Nov 2021 16:03:49 +0100 Subject: [PATCH 36/58] fix the benchmarks --- benchmarks/benches/utils.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 24f5d5343..00bd4e72a 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -9,7 +9,7 @@ use criterion::BenchmarkId; use heed::EnvOpenOptions; use milli::documents::DocumentBatchReader; use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder}; -use milli::{FilterCondition, Index}; +use milli::{Filter, Index}; use serde_json::{Map, Value}; pub struct Conf<'a> { @@ -117,7 +117,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { let mut search = index.search(&rtxn); search.query(query).optional_words(conf.optional_words); if let Some(filter) = conf.filter { - let filter = FilterCondition::from_str(&rtxn, &index, filter).unwrap(); + let filter = Filter::from_str(filter).unwrap(); search.filter(filter); } if let Some(sort) = &conf.sort { From e5af3ac65c183a3e7aa7d4eca6b07f3ee4d537c6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sat, 6 Nov 2021 16:37:55 +0100 Subject: [PATCH 37/58] rename the filter_condition.rs to filter.rs --- milli/src/search/facet/{filter_condition.rs => filter.rs} | 0 milli/src/search/facet/mod.rs | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename milli/src/search/facet/{filter_condition.rs => filter.rs} (100%) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter.rs similarity index 100% rename from milli/src/search/facet/filter_condition.rs rename to milli/src/search/facet/filter.rs diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index d6f276fbb..c8f91352b 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,9 +1,9 @@ pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; -pub use self::filter_condition::Filter; +pub use self::filter::Filter; mod facet_distribution; mod facet_number; mod facet_string; -mod filter_condition; +mod filter; From 7483c7513a6804184016fcc82117d4bcf0f1ff59 Mon Sep 17 00:00:00 2001 From: Tamo Date: Sun, 7 Nov 2021 01:52:19 +0100 Subject: [PATCH 38/58] fix the filterable fields --- milli/src/search/facet/filter.rs | 70 ++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index acab64171..a26c41736 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -26,6 +26,7 @@ enum FilterError<'a> { BadGeoLat(f64), BadGeoLng(f64), Reserved(&'a str), + InternalError, } impl<'a> std::error::Error for FilterError<'a> {} @@ -46,6 +47,7 @@ impl<'a> Display for FilterError<'a> { Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat), Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng), + Self::InternalError => write!(f, "Internal error while executing this filter."), } } } @@ -315,9 +317,14 @@ impl<'a> Filter<'a> { match &self.condition { FilterCondition::Condition { fid, op } => { - let filterable_fields = index.fields_ids_map(rtxn)?; - if let Some(fid) = filterable_fields.id(&fid.inner.to_lowercase()) { - Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) + let filterable_fields = index.filterable_fields(rtxn)?; + if filterable_fields.contains(&fid.inner.to_lowercase()) { + let field_ids_map = index.fields_ids_map(rtxn)?; + if let Some(fid) = field_ids_map.id(fid.inner) { + Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) + } else { + return Err(fid.as_external_error(FilterError::InternalError))?; + } } else { match fid.inner { attribute @ "_geo" => { @@ -334,8 +341,7 @@ impl<'a> Filter<'a> { FilterError::AttributeNotFilterable { attribute, filterable: filterable_fields - .iter() - .map(|(_, s)| s) + .into_iter() .collect::>() .join(" "), }, @@ -356,8 +362,8 @@ impl<'a> Filter<'a> { } FilterCondition::Empty => Ok(RoaringBitmap::new()), FilterCondition::GeoLowerThan { point, radius } => { - let filterable_fields = index.fields_ids_map(rtxn)?; - if filterable_fields.id("_geo").is_some() { + let filterable_fields = index.filterable_fields(rtxn)?; + if filterable_fields.contains("_geo") { let base_point: [f64; 2] = [point[0].parse()?, point[1].parse()?]; if !(-90.0..=90.0).contains(&base_point[0]) { return Err( @@ -387,11 +393,7 @@ impl<'a> Filter<'a> { } else { return Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: "_geo", - filterable: filterable_fields - .iter() - .map(|(_, s)| s) - .collect::>() - .join(" "), + filterable: filterable_fields.into_iter().collect::>().join(" "), }))?; } } @@ -487,6 +489,50 @@ mod tests { assert_eq!(condition, expected); } + #[test] + fn not_filterable() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let rtxn = index.read_txn().unwrap(); + let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. Available filterable attributes are: ``." + )); + + let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `dog` is not filterable. Available filterable attributes are: ``." + )); + drop(rtxn); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("title")]); + builder.set_filterable_fields(hashset! { S("title") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." + )); + + let filter = Filter::from_str("name = 12").unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `name` is not filterable. Available filterable attributes are: `title`." + )); + } + #[test] fn geo_radius_error() { let path = tempfile::tempdir().unwrap(); From 959ca66125c9f70c20e6f54b44c49ff51a3014ff Mon Sep 17 00:00:00 2001 From: Irevoire Date: Mon, 8 Nov 2021 15:30:26 +0100 Subject: [PATCH 39/58] improve the error diagnostic when parsing values --- filter_parser/src/error.rs | 4 +++ filter_parser/src/lib.rs | 1 + filter_parser/src/value.rs | 59 ++++++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index a1bbac47a..d52b17200 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -67,6 +67,10 @@ impl<'a> Error<'a> { &self.kind } + pub fn context(&self) -> &Span<'a> { + &self.context + } + pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { Self { context, kind } } diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index a1d66819f..7db80888b 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -551,6 +551,7 @@ pub mod tests { ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), ("channel = ", "Was expecting a value but instead got nothing."), ("channel = 🐻", "Was expecting a value but instead got `🐻`."), + ("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."), ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."), ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."), diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 79fc00acd..4c769fe5f 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -9,7 +9,10 @@ use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* pub fn parse_value(input: Span) -> IResult { - // before anything we want to check if the user is misusing a geo expression + // to get better diagnostic message we are going to strip the left whitespaces from the input right now + let (input, _) = take_while(char::is_whitespace)(input)?; + + // then, we want to check if the user is misusing a geo expression let err = parse_geo_point(input).unwrap_err(); if err.is_failure() { return Err(err); @@ -29,23 +32,30 @@ pub fn parse_value(input: Span) -> IResult { // doubleQuoted = "\"" (word | spaces)* "\"" let double_quoted = |input| take_till(|c: char| c == '"')(input); // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(is_key_component)(input); + let word = take_while1(is_key_component); + // this parser is only used when an error is encountered and it parse the + // largest string possible that do not contain any “language” syntax. + // If we try to parse `name = 🦀 AND language = rust` we want to return an + // error saying we could not parse `🦀`. Not that no value were found or that + // we could note parse `🦀 AND language = rust`. // we want to remove the space before entering the alt because if we don't, // when we create the errors from the output of the alt we have spaces everywhere - let (input, _) = take_while(char::is_whitespace)(input)?; + let error_word = take_till::<_, _, Error>(is_syntax_component); terminated( alt(( - delimited(char('\''), simple_quoted, cut(char('\''))), - delimited(char('"'), double_quoted, cut(char('"'))), + delimited(char('\''), cut(simple_quoted), cut(char('\''))), + delimited(char('"'), cut(double_quoted), cut(char('"'))), word, )), multispace0, )(input) .map(|(s, t)| (s, t.into())) - // if we found nothing in the alt it means the user did not input any value - .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::ExpectedValue))) + // if we found nothing in the alt it means the user specified something that was not recognized as a value + .map_err(|e: nom::Err| { + e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) + }) // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote .map_err(|e| { e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) @@ -56,8 +66,14 @@ fn is_key_component(c: char) -> bool { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } +fn is_syntax_component(c: char) -> bool { + c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) +} + #[cfg(test)] -pub mod tests { +pub mod test { + use nom::Finish; + use super::*; use crate::tests::rtok; @@ -82,6 +98,7 @@ pub mod tests { ("\" some spaces \"", rtok("\"", " some spaces ")), ("\"cha'nnel\"", rtok("'", "cha'nnel")), ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ("I'm tamo", rtok("'m tamo", "I")), ]; for (input, expected) in test_case { @@ -98,4 +115,30 @@ pub mod tests { assert_eq!(value, expected, "Filter `{}` failed.", input); } } + + #[test] + fn diagnostic() { + let test_case = [ + ("🦀", "🦀"), + (" 🦀", "🦀"), + ("🦀 AND crab = truc", "🦀"), + ("🦀_in_name", "🦀_in_name"), + (" (name = ...", ""), + ]; + + for (input, expected) in test_case { + let input = Span::new_extra(input, input); + let result = parse_value(input); + + assert!( + result.is_err(), + "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`", + expected, + result.unwrap() + ); + // get the inner string referenced in the error + let value = *result.finish().unwrap_err().context().fragment(); + assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value); + } + } } From 21d115dcbbc5e20d68b3483ece1f5089b9ad5618 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Mon, 8 Nov 2021 17:53:41 +0100 Subject: [PATCH 40/58] remove greedy-error --- filter_parser/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/filter_parser/Cargo.toml b/filter_parser/Cargo.toml index 2bdb3316a..80767d5c4 100644 --- a/filter_parser/Cargo.toml +++ b/filter_parser/Cargo.toml @@ -8,4 +8,3 @@ edition = "2021" [dependencies] nom = "7.0.0" nom_locate = "4.0.0" -nom-greedyerror = "0.4.0" From 15bd14297efbcf5ae901ab0e73ad6201473485fa Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 00:45:46 +0100 Subject: [PATCH 41/58] Remove useless closure Co-authored-by: marin --- filter_parser/src/condition.rs | 5 ++--- filter_parser/src/value.rs | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index faacceb72..b58a9f9f9 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -43,9 +43,8 @@ impl<'a> Condition<'a> { /// condition = value ("==" | ">" ...) value pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = tuple((|c| parse_value(c), operator, cut(parse_value)))(input)?; + let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; - let fid = key; match *op.fragment() { "=" => { @@ -73,7 +72,7 @@ pub fn parse_condition(input: Span) -> IResult { /// to = value value TO value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, to)) = - tuple((|c| parse_value(c), |c| parse_value(c), tag("TO"), cut(parse_value)))(input)?; + tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?; Ok(( input, diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 4c769fe5f..d82eda008 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -28,9 +28,9 @@ pub fn parse_value(input: Span) -> IResult { } // singleQuoted = "'" .* all but quotes "'" - let simple_quoted = |input| take_till(|c: char| c == '\'')(input); + let simple_quoted = take_till(|c: char| c == '\''); // doubleQuoted = "\"" (word | spaces)* "\"" - let double_quoted = |input| take_till(|c: char| c == '"')(input); + let double_quoted = take_till(|c: char| c == '"'); // word = (alphanumeric | _ | - | .)+ let word = take_while1(is_key_component); From ef0d5a824093d2365ee635e061980e446b65e4d6 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 00:49:13 +0100 Subject: [PATCH 42/58] flatten a match --- filter_parser/src/condition.rs | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index b58a9f9f9..cff2f2fdd 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -45,28 +45,17 @@ pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; - - match *op.fragment() { - "=" => { - let k = FilterCondition::Condition { fid, op: Equal(value) }; - Ok((input, k)) - } - "!=" => { - let k = FilterCondition::Condition { fid, op: NotEqual(value) }; - Ok((input, k)) - } - ">" | "<" | "<=" | ">=" => { - let k = match *op.fragment() { - ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, - "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, - "<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) }, - ">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) }, - _ => unreachable!(), - }; - Ok((input, k)) - } + let condition = match *op.fragment() { + "=" => FilterCondition::Condition { fid, op: Equal(value) }, + "!=" => FilterCondition::Condition { fid, op: NotEqual(value) }, + ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, + "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, + "<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) }, + ">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) }, _ => unreachable!(), - } + }; + + Ok((input, condition)) } /// to = value value TO value From ea52aff6dc5e0630d2cf0681186b59ffe8eb533d Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 00:50:15 +0100 Subject: [PATCH 43/58] Rename the ExtendNomError trait to NomErrorExt Co-authored-by: marin --- filter_parser/src/error.rs | 4 ++-- filter_parser/src/lib.rs | 2 +- filter_parser/src/value.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index d52b17200..b162fb554 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -5,13 +5,13 @@ use nom::Parser; use crate::{IResult, Span}; -pub trait ExtendNomError { +pub trait NomErrorExt { fn is_failure(&self) -> bool; fn map_err E>(self, op: O) -> nom::Err; fn map_fail E>(self, op: O) -> nom::Err; } -impl ExtendNomError for nom::Err { +impl NomErrorExt for nom::Err { fn is_failure(&self) -> bool { matches!(self, Self::Failure(_)) } diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 7db80888b..c4091fa86 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -43,7 +43,7 @@ use std::fmt::Debug; use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; -use error::{cut_with_err, ExtendNomError}; +use error::{cut_with_err, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index d82eda008..6f7952ebd 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -4,7 +4,7 @@ use nom::character::complete::{char, multispace0}; use nom::combinator::cut; use nom::sequence::{delimited, terminated}; -use crate::error::ExtendNomError; +use crate::error::NomErrorExt; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* From 6515838d35ad510af9ade79245278044db96c331 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 00:57:46 +0100 Subject: [PATCH 44/58] improve the readability of the _geoPoint thingy in the value --- filter_parser/src/value.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 6f7952ebd..b716dab66 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -13,9 +13,12 @@ pub fn parse_value(input: Span) -> IResult { let (input, _) = take_while(char::is_whitespace)(input)?; // then, we want to check if the user is misusing a geo expression - let err = parse_geo_point(input).unwrap_err(); - if err.is_failure() { - return Err(err); + // This expression can’t finish without error. + // We want to return an error in case of failure. + if let Err(err) = parse_geo_point(input) { + if err.is_failure() { + return Err(err); + } } match parse_geo_radius(input) { Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))), From 9c36e497d9bbfd9da2a8a6cbe5776128da413683 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 00:58:23 +0100 Subject: [PATCH 45/58] Rename the key_component into a value_component Co-authored-by: marin --- filter_parser/src/value.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index b716dab66..b9d929ab0 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -35,7 +35,7 @@ pub fn parse_value(input: Span) -> IResult { // doubleQuoted = "\"" (word | spaces)* "\"" let double_quoted = take_till(|c: char| c == '"'); // word = (alphanumeric | _ | - | .)+ - let word = take_while1(is_key_component); + let word = take_while1(is_value_component); // this parser is only used when an error is encountered and it parse the // largest string possible that do not contain any “language” syntax. @@ -65,7 +65,7 @@ pub fn parse_value(input: Span) -> IResult { }) } -fn is_key_component(c: char) -> bool { +fn is_value_component(c: char) -> bool { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } From bc9daf90410ca09abdb84e0fdc03b74c68d35ce4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 01:00:42 +0100 Subject: [PATCH 46/58] update the bnf Co-authored-by: marin --- filter_parser/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index c4091fa86..40e1fb3d4 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -11,7 +11,7 @@ //! to = value value TO value //! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* //! singleQuoted = "'" .* all but quotes "'" -//! doubleQuoted = "\"" (word | spaces)* "\"" +//! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ //! geoRadius = WS* ~ "_geoRadius(" ~ float ~ "," ~ float ~ "," float ~ ")" //! ``` From cf98bf37d0994239f059a595fff60762f496f776 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 01:03:02 +0100 Subject: [PATCH 47/58] Simplify some closure Co-authored-by: marin --- filter_parser/src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 40e1fb3d4..71e04af03 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -179,7 +179,7 @@ fn parse_geo_radius(input: Span) -> IResult { let parsed = preceded( tuple((multispace0, tag("_geoRadius"))), // if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure - cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), + cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); @@ -221,11 +221,11 @@ fn parse_primary(input: Span) -> IResult { Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), ), - |c| parse_geo_radius(c), - |c| parse_condition(c), - |c| parse_to(c), + parse_geo_radius, + parse_condition, + parse_to, // the next lines are only for error handling and are written at the end to have the less possible performance impact - |c| parse_geo_point(c), + parse_geo_point, ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) From 18eb4b9c51a2979b9170b1eb7c48c4a0742133ac Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 01:04:50 +0100 Subject: [PATCH 48/58] fix spaces in the bnf --- filter_parser/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 71e04af03..be9ed9370 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -13,7 +13,7 @@ //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ -//! geoRadius = WS* ~ "_geoRadius(" ~ float ~ "," ~ float ~ "," float ~ ")" +//! geoRadius = WS* ~ "_geoRadius(" ~ WS* ~ float ~ WS* ~ "," ~ WS* ~ float ~ WS* ~ "," float ~ WS* ~ ")" //! ``` //! //! Other BNF grammar used to handle some specific errors: From 2c6d08c5197c48c6dec72e0960adb0ff34f43049 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 01:06:03 +0100 Subject: [PATCH 49/58] Simplify the tokens to only wrap one span and no inner value Co-authored-by: marin --- filter_parser/src/lib.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index be9ed9370..014a008b1 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -61,24 +61,21 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; #[derive(Debug, Clone, Eq)] -pub struct Token<'a> { - pub position: Span<'a>, - pub inner: &'a str, -} +pub struct Token<'a>(Span<'a>); impl<'a> PartialEq for Token<'a> { fn eq(&self, other: &Self) -> bool { - self.inner == other.inner + self.0.fragment() == other.0.fragment() } } impl<'a> Token<'a> { pub fn new(position: Span<'a>) -> Self { - Self { position, inner: &position } + Self(position) } pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> { - Error::new_from_external(self.position, error) + Error::new_from_external(self.0, error) } pub fn parse(&self) -> Result @@ -86,13 +83,13 @@ impl<'a> Token<'a> { T: FromStr, T::Err: std::error::Error, { - self.inner.parse().map_err(|e| self.as_external_error(e)) + self.0.parse().map_err(|e| self.as_external_error(e)) } } impl<'a> From> for Token<'a> { fn from(span: Span<'a>) -> Self { - Self { inner: &span, position: span } + Self(span) } } From 9b24f83456d02954d0b5fcd6ab081cf61738cda3 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 10:27:29 +0100 Subject: [PATCH 50/58] in case of error return a range of chars position instead of one line and column --- filter_parser/src/error.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index b162fb554..4580cde4f 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -146,12 +146,8 @@ impl<'a> Display for Error<'a> { )?, ErrorKind::External(ref error) => writeln!(f, "{}", error)?, } - write!( - f, - "{}:{} in `{}`.", - self.context.location_line(), - self.context.get_utf8_column(), - self.context.extra, - ) + let base_column = self.context.get_utf8_column(); + let size = self.context.fragment().chars().count(); + write!(f, "{}:{} in `{}`.", base_column, base_column + size, self.context.extra,) } } From a211a9cdcdb309a4e3ef7eb03d14a7b877cc40bf Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 11:19:30 +0100 Subject: [PATCH 51/58] update the error format so it can be easily parsed by someone else --- filter_parser/src/error.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index 4580cde4f..401b8d7f3 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -109,30 +109,34 @@ impl<'a> Display for Error<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let input = self.context.fragment(); + // When printing our error message we want to escape all `\n` to be sure we keep our format with the + // first line being the diagnostic and the second line being the incriminated filter. + let escaped_input = input.escape_debug(); + match self.kind { ErrorKind::ExpectedValue if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } ErrorKind::MissingClosingDelimiter(c) => { - writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", input, c)? + writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } ErrorKind::ExpectedValue => { - writeln!(f, "Was expecting a value but instead got `{}`.", input)? + writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)? } ErrorKind::InvalidPrimary if input.trim().is_empty() => { writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")? } ErrorKind::InvalidPrimary => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", input)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", escaped_input)? } ErrorKind::ExpectedEof => { - writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", input)? + writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? } ErrorKind::Geo => { writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? } ErrorKind::ReservedGeo(name) => { - writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name)? + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())? } ErrorKind::MisusedGeo => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? @@ -148,6 +152,7 @@ impl<'a> Display for Error<'a> { } let base_column = self.context.get_utf8_column(); let size = self.context.fragment().chars().count(); - write!(f, "{}:{} in `{}`.", base_column, base_column + size, self.context.extra,) + + write!(f, "{}:{} {}", base_column, base_column + size, self.context.extra) } } From 0ea0146e048e09ba2c3f4690c1c1697534e6cf69 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 11:34:10 +0100 Subject: [PATCH 52/58] implement deref &str on the tokens --- filter_parser/src/lib.rs | 9 +++++++++ milli/src/search/facet/filter.rs | 16 ++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 014a008b1..6276023a9 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -40,6 +40,7 @@ mod error; mod value; use std::fmt::Debug; +use std::ops::Deref; use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; @@ -63,6 +64,14 @@ type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; #[derive(Debug, Clone, Eq)] pub struct Token<'a>(Span<'a>); +impl<'a> Deref for Token<'a> { + type Target = &'a str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + impl<'a> PartialEq for Token<'a> { fn eq(&self, other: &Self) -> bool { self.0.fragment() == other.0.fragment() diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index a26c41736..ec2c0b3eb 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -1,5 +1,6 @@ use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; +use std::ops::Deref; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; @@ -247,10 +248,9 @@ impl<'a> Filter<'a> { Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(val.parse()?)), Condition::Between { from, to } => (Included(from.parse()?), Included(to.parse()?)), Condition::Equal(val) => { - let (_original_value, string_docids) = strings_db - .get(rtxn, &(field_id, &val.inner.to_lowercase()))? - .unwrap_or_default(); - let number = val.inner.parse::().ok(); + let (_original_value, string_docids) = + strings_db.get(rtxn, &(field_id, &val.to_lowercase()))?.unwrap_or_default(); + let number = val.parse::().ok(); let number_docids = match number { Some(n) => { let n = Included(n); @@ -271,7 +271,7 @@ impl<'a> Filter<'a> { return Ok(string_docids | number_docids); } Condition::NotEqual(val) => { - let number = val.inner.parse::().ok(); + let number = val.parse::().ok(); let all_numbers_ids = if number.is_some() { index.number_faceted_documents_ids(rtxn, field_id)? } else { @@ -318,15 +318,15 @@ impl<'a> Filter<'a> { match &self.condition { FilterCondition::Condition { fid, op } => { let filterable_fields = index.filterable_fields(rtxn)?; - if filterable_fields.contains(&fid.inner.to_lowercase()) { + if filterable_fields.contains(&fid.to_lowercase()) { let field_ids_map = index.fields_ids_map(rtxn)?; - if let Some(fid) = field_ids_map.id(fid.inner) { + if let Some(fid) = field_ids_map.id(&fid) { Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } else { return Err(fid.as_external_error(FilterError::InternalError))?; } } else { - match fid.inner { + match *fid.deref() { attribute @ "_geo" => { return Err(fid.as_external_error(FilterError::BadGeo(attribute)))?; } From f28600031d6edc3be6ab1709f45d14e9c07b5ccb Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 16:16:28 +0100 Subject: [PATCH 53/58] Rename the filter_parser crate into filter-parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- Cargo.toml | 2 +- {filter_parser => filter-parser}/Cargo.toml | 2 +- {filter_parser => filter-parser}/README.md | 0 {filter_parser => filter-parser}/fuzz/Cargo.toml | 4 ++-- {filter_parser => filter-parser}/fuzz/corpus/parse/test_1 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_10 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_11 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_12 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_13 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_14 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_15 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_16 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_17 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_18 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_19 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_2 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_20 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_21 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_22 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_23 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_24 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_25 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_26 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_27 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_28 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_29 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_3 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_30 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_31 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_32 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_33 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_34 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_35 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_36 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_37 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_38 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_39 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_4 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_40 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_41 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_42 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_43 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_5 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_6 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_7 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_8 | 0 {filter_parser => filter-parser}/fuzz/corpus/parse/test_9 | 0 {filter_parser => filter-parser}/fuzz/fuzz_targets/parse.rs | 0 {filter_parser => filter-parser}/src/condition.rs | 0 {filter_parser => filter-parser}/src/error.rs | 0 {filter_parser => filter-parser}/src/lib.rs | 0 {filter_parser => filter-parser}/src/main.rs | 0 {filter_parser => filter-parser}/src/value.rs | 0 filter_parser/fuzz/.gitignore | 3 --- milli/Cargo.toml | 2 +- 55 files changed, 5 insertions(+), 8 deletions(-) rename {filter_parser => filter-parser}/Cargo.toml (89%) rename {filter_parser => filter-parser}/README.md (100%) rename {filter_parser => filter-parser}/fuzz/Cargo.toml (85%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_1 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_10 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_11 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_12 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_13 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_14 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_15 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_16 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_17 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_18 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_19 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_2 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_20 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_21 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_22 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_23 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_24 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_25 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_26 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_27 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_28 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_29 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_3 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_30 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_31 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_32 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_33 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_34 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_35 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_36 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_37 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_38 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_39 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_4 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_40 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_41 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_42 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_43 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_5 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_6 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_7 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_8 (100%) rename {filter_parser => filter-parser}/fuzz/corpus/parse/test_9 (100%) rename {filter_parser => filter-parser}/fuzz/fuzz_targets/parse.rs (100%) rename {filter_parser => filter-parser}/src/condition.rs (100%) rename {filter_parser => filter-parser}/src/error.rs (100%) rename {filter_parser => filter-parser}/src/lib.rs (100%) rename {filter_parser => filter-parser}/src/main.rs (100%) rename {filter_parser => filter-parser}/src/value.rs (100%) delete mode 100644 filter_parser/fuzz/.gitignore diff --git a/Cargo.toml b/Cargo.toml index 5d2d47713..6b3e12f07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["milli", "filter_parser", "http-ui", "benchmarks", "infos", "helpers", "cli"] +members = ["milli", "filter-parser", "http-ui", "benchmarks", "infos", "helpers", "cli"] default-members = ["milli"] [profile.dev] diff --git a/filter_parser/Cargo.toml b/filter-parser/Cargo.toml similarity index 89% rename from filter_parser/Cargo.toml rename to filter-parser/Cargo.toml index 80767d5c4..ee44bcb7f 100644 --- a/filter_parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "filter_parser" +name = "filter-parser" version = "0.1.0" edition = "2021" diff --git a/filter_parser/README.md b/filter-parser/README.md similarity index 100% rename from filter_parser/README.md rename to filter-parser/README.md diff --git a/filter_parser/fuzz/Cargo.toml b/filter-parser/fuzz/Cargo.toml similarity index 85% rename from filter_parser/fuzz/Cargo.toml rename to filter-parser/fuzz/Cargo.toml index 33e604e73..246276f2c 100644 --- a/filter_parser/fuzz/Cargo.toml +++ b/filter-parser/fuzz/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "filter_parser-fuzz" +name = "filter-parser-fuzz" version = "0.0.0" authors = ["Automatically generated"] publish = false @@ -11,7 +11,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" -[dependencies.filter_parser] +[dependencies.filter-parser] path = ".." # Prevent this from interfering with workspaces diff --git a/filter_parser/fuzz/corpus/parse/test_1 b/filter-parser/fuzz/corpus/parse/test_1 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_1 rename to filter-parser/fuzz/corpus/parse/test_1 diff --git a/filter_parser/fuzz/corpus/parse/test_10 b/filter-parser/fuzz/corpus/parse/test_10 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_10 rename to filter-parser/fuzz/corpus/parse/test_10 diff --git a/filter_parser/fuzz/corpus/parse/test_11 b/filter-parser/fuzz/corpus/parse/test_11 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_11 rename to filter-parser/fuzz/corpus/parse/test_11 diff --git a/filter_parser/fuzz/corpus/parse/test_12 b/filter-parser/fuzz/corpus/parse/test_12 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_12 rename to filter-parser/fuzz/corpus/parse/test_12 diff --git a/filter_parser/fuzz/corpus/parse/test_13 b/filter-parser/fuzz/corpus/parse/test_13 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_13 rename to filter-parser/fuzz/corpus/parse/test_13 diff --git a/filter_parser/fuzz/corpus/parse/test_14 b/filter-parser/fuzz/corpus/parse/test_14 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_14 rename to filter-parser/fuzz/corpus/parse/test_14 diff --git a/filter_parser/fuzz/corpus/parse/test_15 b/filter-parser/fuzz/corpus/parse/test_15 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_15 rename to filter-parser/fuzz/corpus/parse/test_15 diff --git a/filter_parser/fuzz/corpus/parse/test_16 b/filter-parser/fuzz/corpus/parse/test_16 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_16 rename to filter-parser/fuzz/corpus/parse/test_16 diff --git a/filter_parser/fuzz/corpus/parse/test_17 b/filter-parser/fuzz/corpus/parse/test_17 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_17 rename to filter-parser/fuzz/corpus/parse/test_17 diff --git a/filter_parser/fuzz/corpus/parse/test_18 b/filter-parser/fuzz/corpus/parse/test_18 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_18 rename to filter-parser/fuzz/corpus/parse/test_18 diff --git a/filter_parser/fuzz/corpus/parse/test_19 b/filter-parser/fuzz/corpus/parse/test_19 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_19 rename to filter-parser/fuzz/corpus/parse/test_19 diff --git a/filter_parser/fuzz/corpus/parse/test_2 b/filter-parser/fuzz/corpus/parse/test_2 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_2 rename to filter-parser/fuzz/corpus/parse/test_2 diff --git a/filter_parser/fuzz/corpus/parse/test_20 b/filter-parser/fuzz/corpus/parse/test_20 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_20 rename to filter-parser/fuzz/corpus/parse/test_20 diff --git a/filter_parser/fuzz/corpus/parse/test_21 b/filter-parser/fuzz/corpus/parse/test_21 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_21 rename to filter-parser/fuzz/corpus/parse/test_21 diff --git a/filter_parser/fuzz/corpus/parse/test_22 b/filter-parser/fuzz/corpus/parse/test_22 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_22 rename to filter-parser/fuzz/corpus/parse/test_22 diff --git a/filter_parser/fuzz/corpus/parse/test_23 b/filter-parser/fuzz/corpus/parse/test_23 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_23 rename to filter-parser/fuzz/corpus/parse/test_23 diff --git a/filter_parser/fuzz/corpus/parse/test_24 b/filter-parser/fuzz/corpus/parse/test_24 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_24 rename to filter-parser/fuzz/corpus/parse/test_24 diff --git a/filter_parser/fuzz/corpus/parse/test_25 b/filter-parser/fuzz/corpus/parse/test_25 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_25 rename to filter-parser/fuzz/corpus/parse/test_25 diff --git a/filter_parser/fuzz/corpus/parse/test_26 b/filter-parser/fuzz/corpus/parse/test_26 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_26 rename to filter-parser/fuzz/corpus/parse/test_26 diff --git a/filter_parser/fuzz/corpus/parse/test_27 b/filter-parser/fuzz/corpus/parse/test_27 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_27 rename to filter-parser/fuzz/corpus/parse/test_27 diff --git a/filter_parser/fuzz/corpus/parse/test_28 b/filter-parser/fuzz/corpus/parse/test_28 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_28 rename to filter-parser/fuzz/corpus/parse/test_28 diff --git a/filter_parser/fuzz/corpus/parse/test_29 b/filter-parser/fuzz/corpus/parse/test_29 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_29 rename to filter-parser/fuzz/corpus/parse/test_29 diff --git a/filter_parser/fuzz/corpus/parse/test_3 b/filter-parser/fuzz/corpus/parse/test_3 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_3 rename to filter-parser/fuzz/corpus/parse/test_3 diff --git a/filter_parser/fuzz/corpus/parse/test_30 b/filter-parser/fuzz/corpus/parse/test_30 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_30 rename to filter-parser/fuzz/corpus/parse/test_30 diff --git a/filter_parser/fuzz/corpus/parse/test_31 b/filter-parser/fuzz/corpus/parse/test_31 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_31 rename to filter-parser/fuzz/corpus/parse/test_31 diff --git a/filter_parser/fuzz/corpus/parse/test_32 b/filter-parser/fuzz/corpus/parse/test_32 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_32 rename to filter-parser/fuzz/corpus/parse/test_32 diff --git a/filter_parser/fuzz/corpus/parse/test_33 b/filter-parser/fuzz/corpus/parse/test_33 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_33 rename to filter-parser/fuzz/corpus/parse/test_33 diff --git a/filter_parser/fuzz/corpus/parse/test_34 b/filter-parser/fuzz/corpus/parse/test_34 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_34 rename to filter-parser/fuzz/corpus/parse/test_34 diff --git a/filter_parser/fuzz/corpus/parse/test_35 b/filter-parser/fuzz/corpus/parse/test_35 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_35 rename to filter-parser/fuzz/corpus/parse/test_35 diff --git a/filter_parser/fuzz/corpus/parse/test_36 b/filter-parser/fuzz/corpus/parse/test_36 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_36 rename to filter-parser/fuzz/corpus/parse/test_36 diff --git a/filter_parser/fuzz/corpus/parse/test_37 b/filter-parser/fuzz/corpus/parse/test_37 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_37 rename to filter-parser/fuzz/corpus/parse/test_37 diff --git a/filter_parser/fuzz/corpus/parse/test_38 b/filter-parser/fuzz/corpus/parse/test_38 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_38 rename to filter-parser/fuzz/corpus/parse/test_38 diff --git a/filter_parser/fuzz/corpus/parse/test_39 b/filter-parser/fuzz/corpus/parse/test_39 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_39 rename to filter-parser/fuzz/corpus/parse/test_39 diff --git a/filter_parser/fuzz/corpus/parse/test_4 b/filter-parser/fuzz/corpus/parse/test_4 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_4 rename to filter-parser/fuzz/corpus/parse/test_4 diff --git a/filter_parser/fuzz/corpus/parse/test_40 b/filter-parser/fuzz/corpus/parse/test_40 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_40 rename to filter-parser/fuzz/corpus/parse/test_40 diff --git a/filter_parser/fuzz/corpus/parse/test_41 b/filter-parser/fuzz/corpus/parse/test_41 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_41 rename to filter-parser/fuzz/corpus/parse/test_41 diff --git a/filter_parser/fuzz/corpus/parse/test_42 b/filter-parser/fuzz/corpus/parse/test_42 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_42 rename to filter-parser/fuzz/corpus/parse/test_42 diff --git a/filter_parser/fuzz/corpus/parse/test_43 b/filter-parser/fuzz/corpus/parse/test_43 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_43 rename to filter-parser/fuzz/corpus/parse/test_43 diff --git a/filter_parser/fuzz/corpus/parse/test_5 b/filter-parser/fuzz/corpus/parse/test_5 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_5 rename to filter-parser/fuzz/corpus/parse/test_5 diff --git a/filter_parser/fuzz/corpus/parse/test_6 b/filter-parser/fuzz/corpus/parse/test_6 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_6 rename to filter-parser/fuzz/corpus/parse/test_6 diff --git a/filter_parser/fuzz/corpus/parse/test_7 b/filter-parser/fuzz/corpus/parse/test_7 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_7 rename to filter-parser/fuzz/corpus/parse/test_7 diff --git a/filter_parser/fuzz/corpus/parse/test_8 b/filter-parser/fuzz/corpus/parse/test_8 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_8 rename to filter-parser/fuzz/corpus/parse/test_8 diff --git a/filter_parser/fuzz/corpus/parse/test_9 b/filter-parser/fuzz/corpus/parse/test_9 similarity index 100% rename from filter_parser/fuzz/corpus/parse/test_9 rename to filter-parser/fuzz/corpus/parse/test_9 diff --git a/filter_parser/fuzz/fuzz_targets/parse.rs b/filter-parser/fuzz/fuzz_targets/parse.rs similarity index 100% rename from filter_parser/fuzz/fuzz_targets/parse.rs rename to filter-parser/fuzz/fuzz_targets/parse.rs diff --git a/filter_parser/src/condition.rs b/filter-parser/src/condition.rs similarity index 100% rename from filter_parser/src/condition.rs rename to filter-parser/src/condition.rs diff --git a/filter_parser/src/error.rs b/filter-parser/src/error.rs similarity index 100% rename from filter_parser/src/error.rs rename to filter-parser/src/error.rs diff --git a/filter_parser/src/lib.rs b/filter-parser/src/lib.rs similarity index 100% rename from filter_parser/src/lib.rs rename to filter-parser/src/lib.rs diff --git a/filter_parser/src/main.rs b/filter-parser/src/main.rs similarity index 100% rename from filter_parser/src/main.rs rename to filter-parser/src/main.rs diff --git a/filter_parser/src/value.rs b/filter-parser/src/value.rs similarity index 100% rename from filter_parser/src/value.rs rename to filter-parser/src/value.rs diff --git a/filter_parser/fuzz/.gitignore b/filter_parser/fuzz/.gitignore deleted file mode 100644 index a0925114d..000000000 --- a/filter_parser/fuzz/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -target -corpus -artifacts diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 36e63916c..90bd1f926 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -38,7 +38,7 @@ smallvec = "1.6.1" tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } -filter_parser = { path = "../filter_parser" } +filter-parser = { path = "../filter-parser" } # documents words self-join itertools = "0.10.0" From 99197387af040700b7360a48a25628d3ad7eaf28 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 16:25:53 +0100 Subject: [PATCH 54/58] fix the test with the new escaped format --- filter-parser/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 6276023a9..073057b76 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -567,8 +567,8 @@ pub mod tests { ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), - ("channel = 'ponce", "Expression `'ponce` is missing the following closing delimiter: `'`."), - ("channel = \"ponce", "Expression `\"ponce` is missing the following closing delimiter: `\"`."), + ("channel = 'ponce", "Expression `\\'ponce` is missing the following closing delimiter: `'`."), + ("channel = \"ponce", "Expression `\\\"ponce` is missing the following closing delimiter: `\"`."), ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."), ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), ]; From 73df873f44adeb2d483c975e9ecbb3791e40de3f Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 16:40:05 +0100 Subject: [PATCH 55/58] fix typos --- filter-parser/README.md | 3 +-- filter-parser/src/lib.rs | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/filter-parser/README.md b/filter-parser/README.md index 0999b4340..dfbc03d07 100644 --- a/filter-parser/README.md +++ b/filter-parser/README.md @@ -33,5 +33,4 @@ cargo fuzz run parse -- -max_len=500 ## What to do if you find a bug in the parser - Write a test at the end of the [`lib.rs`](./src/lib.rs) to ensure it never happens again. -- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer finding new bug. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test. - Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force add your new test. +- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer find new bugs. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test. diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 073057b76..3e34e4d96 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -141,7 +141,7 @@ impl<'a> FilterCondition<'a> { } } -/// remove OPTIONAL whitespaces before AND after the the provided parser. +/// remove OPTIONAL whitespaces before AND after the provided parser. fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } @@ -184,7 +184,7 @@ fn parse_geo_radius(input: Span) -> IResult { // we want to forbid space BEFORE the _geoRadius but not after let parsed = preceded( tuple((multispace0, tag("_geoRadius"))), - // if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure + // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); @@ -212,7 +212,7 @@ fn parse_geo_point(input: Span) -> IResult { cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), ))(input) .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; - // if we succeeded we still returns a Failure because geoPoints are not allowed + // if we succeeded we still return a `Failure` because geoPoints are not allowed Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) } From 519d6b2bf3081ace12a8cbf9ac369831b2df7fc6 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 16:47:54 +0100 Subject: [PATCH 56/58] remove the `!` syntax for the not --- filter-parser/src/lib.rs | 10 ++++------ filter-parser/src/value.rs | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 3e34e4d96..ed36b1bf4 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -5,7 +5,7 @@ //! expression = or //! or = and (~ "OR" ~ and) //! and = not (~ "AND" not)* -//! not = ("NOT" | "!") not | primary +//! not = ("NOT" ~ not) | primary //! primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to //! condition = value ("==" | ">" ...) value //! to = value value TO value @@ -169,13 +169,11 @@ fn parse_and(input: Span) -> IResult { Ok((input, expr)) } -/// not = ("NOT" | "!") not | primary +/// not = ("NOT" ~ not) | primary /// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. -/// If we parse a `NOT` or `!` we MUST parse something behind. +/// If we parse a `NOT` we MUST parse something behind. fn parse_not(input: Span) -> IResult { - alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))( - input, - ) + alt((map(preceded(tag("NOT"), cut(parse_not)), |e| e.negate()), parse_primary))(input) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index b9d929ab0..936305837 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -70,7 +70,7 @@ fn is_value_component(c: char) -> bool { } fn is_syntax_component(c: char) -> bool { - c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) + c.is_whitespace() || ['(', ')', '=', '<', '>'].contains(&c) } #[cfg(test)] From bff48681d2ef798e2df555bc0e91e7c4bcca1184 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 9 Nov 2021 17:05:36 +0100 Subject: [PATCH 57/58] Re-order the operator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- filter-parser/src/condition.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index cff2f2fdd..abd549534 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -46,12 +46,12 @@ pub fn parse_condition(input: Span) -> IResult { let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; let condition = match *op.fragment() { - "=" => FilterCondition::Condition { fid, op: Equal(value) }, - "!=" => FilterCondition::Condition { fid, op: NotEqual(value) }, - ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, - "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, "<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) }, ">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) }, + "!=" => FilterCondition::Condition { fid, op: NotEqual(value) }, + "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, + ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, + "=" => FilterCondition::Condition { fid, op: Equal(value) }, _ => unreachable!(), }; From 7c3017734a15421e2b061e5ce1bafac60fa5e9ea Mon Sep 17 00:00:00 2001 From: Irevoire Date: Tue, 9 Nov 2021 17:08:04 +0100 Subject: [PATCH 58/58] re-ignore the ! symbol when generating a good error message --- filter-parser/src/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 936305837..b9d929ab0 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -70,7 +70,7 @@ fn is_value_component(c: char) -> bool { } fn is_syntax_component(c: char) -> bool { - c.is_whitespace() || ['(', ')', '=', '<', '>'].contains(&c) + c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) } #[cfg(test)]