diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 8f61796b3..21676f960 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -8,3 +8,6 @@ publish = false [dependencies] nom = "7.1.0" nom_locate = "4.0.0" + +[dev-dependencies] +insta = "1.18.2" diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index cbf73b96a..e967bd074 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -21,30 +21,12 @@ pub enum Condition<'a> { Equal(Token<'a>), NotEqual(Token<'a>), Exists, - NotExists, LowerThan(Token<'a>), LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, } -impl<'a> Condition<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { - match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(s) => (NotEqual(s), None), - NotEqual(s) => (Equal(s), None), - Exists => (NotExists, None), - NotExists => (Exists, None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), - } - } -} -/// condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value +/// condition = value ("==" | ">" ...) value pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?; @@ -73,7 +55,10 @@ pub fn parse_not_exists(input: Span) -> IResult { let (input, key) = parse_value(input)?; let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?; - Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists })) + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key.into(), op: Exists })), + )) } /// to = value value "TO" WS+ value diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index a3720f7bf..d5d36bd8e 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -48,6 +48,12 @@ pub struct Error<'a> { kind: ErrorKind<'a>, } +#[derive(Debug)] +pub enum ExpectedValueKind { + ReservedKeyword, + Other, +} + #[derive(Debug)] pub enum ErrorKind<'a> { ReservedGeo(&'a str), @@ -55,11 +61,16 @@ pub enum ErrorKind<'a> { MisusedGeo, InvalidPrimary, ExpectedEof, - ExpectedValue, + ExpectedValue(ExpectedValueKind), MalformedValue, + InOpeningBracket, + InClosingBracket, + InExpectedValue(ExpectedValueKind), + ReservedKeyword(String), MissingClosingDelimiter(char), Char(char), InternalError(error::ErrorKind), + DepthLimitReached, External(String), } @@ -109,24 +120,26 @@ impl<'a> ParseError> for Error<'a> { impl<'a> Display for Error<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let input = self.context.fragment(); - // When printing our error message we want to escape all `\n` to be sure we keep our format with the // first line being the diagnostic and the second line being the incriminated filter. let escaped_input = input.escape_debug(); - match self.kind { - ErrorKind::ExpectedValue if input.trim().is_empty() => { + match &self.kind { + ErrorKind::ExpectedValue(_) if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } + ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => { + writeln!(f, "Was expecting a value but instead got `{escaped_input}`, which is a reserved keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")? + } + ErrorKind::ExpectedValue(ExpectedValueKind::Other) => { + writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)? + } ErrorKind::MalformedValue => { writeln!(f, "Malformed value: `{}`.", escaped_input)? } ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } - ErrorKind::ExpectedValue => { - writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)? - } ErrorKind::InvalidPrimary if input.trim().is_empty() => { writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")? } @@ -145,9 +158,28 @@ impl<'a> Display for Error<'a> { ErrorKind::MisusedGeo => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? } + ErrorKind::ReservedKeyword(word) => { + writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? + } + ErrorKind::InOpeningBracket => { + writeln!(f, "Expected `[` after `IN` keyword.")? + } + ErrorKind::InClosingBracket => { + writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")? + } + ErrorKind::InExpectedValue(ExpectedValueKind::ReservedKeyword) => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`, which is a keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")? + } + ErrorKind::InExpectedValue(ExpectedValueKind::Other) => { + writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`.")? + } ErrorKind::Char(c) => { panic!("Tried to display a char error with `{}`", c) } + ErrorKind::DepthLimitReached => writeln!( + f, + "The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions." + )?, ErrorKind::InternalError(kind) => writeln!( f, "Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 01be432d7..33025e6e9 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -6,12 +6,14 @@ //! or = and ("OR" WS+ and)* //! and = not ("AND" WS+ not)* //! not = ("NOT" WS+ not) | primary -//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to +//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | in | condition | exists | not_exists | to +//! in = value "IN" WS* "[" value_list "]" //! condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value //! exists = value "EXISTS" //! not_exists = value "NOT" WS+ "EXISTS" //! to = value value "TO" WS+ value //! value = WS* ( word | singleQuoted | doubleQuoted) WS+ +//! value_list = (value ("," value)* ","?)? //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ @@ -46,23 +48,26 @@ use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; -use error::{cut_with_err, NomErrorExt}; +use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; -use nom::character::complete::{char, multispace0, multispace1}; -use nom::combinator::{cut, eof, map}; +use nom::character::complete::{char, multispace0}; +use nom::combinator::{cut, eof, map, opt}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::Finish; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; +use value::word_exact; pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; +const MAX_FILTER_DEPTH: usize = 200; + #[derive(Debug, Clone, Eq)] pub struct Token<'a> { /// The token in the original input, it should be used when possible. @@ -112,11 +117,12 @@ impl<'a> From> for Token<'a> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum FilterCondition<'a> { + Not(Box), Condition { fid: Token<'a>, op: Condition<'a> }, - Or(Box, Box), - And(Box, Box), + In { fid: Token<'a>, els: Vec> }, + Or(Vec), + And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, - GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, } impl<'a> FilterCondition<'a> { @@ -124,38 +130,29 @@ impl<'a> FilterCondition<'a> { pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { match self { FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), - FilterCondition::Or(left, right) => { + FilterCondition::Or(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } - FilterCondition::And(left, right) => { + FilterCondition::And(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), - FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), _ => None, } } - pub fn negate(self) -> FilterCondition<'a> { - use FilterCondition::*; - - match self { - Condition { fid, op } => match op.negate() { - (op, None) => Condition { fid, op }, - (a, Some(b)) => Or( - Condition { fid: fid.clone(), op: a }.into(), - Condition { fid, op: b }.into(), - ), - }, - Or(a, b) => And(a.negate().into(), b.negate().into()), - And(a, b) => Or(a.negate().into(), b.negate().into()), - GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, - GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, - } - } - pub fn parse(input: &'a str) -> Result, Error> { if input.trim().is_empty() { return Ok(None); @@ -170,37 +167,128 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) delimited(multispace0, inner, multispace0) } -/// or = and ("OR" WS+ and)* -fn parse_or(input: Span) -> IResult { - let (input, lhs) = parse_and(input)?; - // if we found a `OR` then we MUST find something next - let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; +/// value_list = (value ("," value)* ","?)? +fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { + let (input, first_value) = opt(parse_value)(input)?; + if let Some(first_value) = first_value { + let value_list_el_parser = preceded(ws(tag(",")), parse_value); - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + let (input, mut values) = many0(value_list_el_parser)(input)?; + let (input, _) = opt(ws(tag(",")))(input)?; + values.insert(0, first_value); + + Ok((input, values)) + } else { + Ok((input, vec![])) + } +} + +/// "IN" WS* "[" value_list "]" +fn parse_in_body(input: Span) -> IResult> { + let (input, _) = ws(word_exact("IN"))(input)?; + + // everything after `IN` can be a failure + let (input, _) = + cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))( + input, + )?; + + let (input, content) = cut(parse_value_list)(input)?; + + // everything after `IN` can be a failure + let (input, _) = cut_with_err(ws(tag("]")), |_| { + if eof::<_, ()>(input).is_ok() { + Error::new_from_kind(input, ErrorKind::InClosingBracket) + } else { + let expected_value_kind = match parse_value(input) { + Err(nom::Err::Error(e)) => match e.kind() { + ErrorKind::ReservedKeyword(_) => ExpectedValueKind::ReservedKeyword, + _ => ExpectedValueKind::Other, + }, + _ => ExpectedValueKind::Other, + }; + Error::new_from_kind(input, ErrorKind::InExpectedValue(expected_value_kind)) + } + })(input)?; + + Ok((input, content)) +} + +/// in = value "IN" "[" value_list "]" +fn parse_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, content) = parse_in_body(input)?; + + let filter = FilterCondition::In { fid: value, els: content }; + Ok((input, filter)) +} + +/// in = value "NOT" WS* "IN" "[" value_list "]" +fn parse_not_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, _) = word_exact("NOT")(input)?; + let (input, content) = parse_in_body(input)?; + + let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content })); + Ok((input, filter)) +} + +/// or = and ("OR" and) +fn parse_or(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } + let (input, first_filter) = parse_and(input, depth + 1)?; + // if we found a `OR` then we MUST find something next + let (input, mut ors) = + many0(preceded(ws(word_exact("OR")), cut(|input| parse_and(input, depth + 1))))(input)?; + + let filter = if ors.is_empty() { + first_filter + } else { + ors.insert(0, first_filter); + FilterCondition::Or(ors) + }; + + Ok((input, filter)) } /// and = not ("AND" not)* -fn parse_and(input: Span) -> IResult { - let (input, lhs) = parse_not(input)?; +fn parse_and(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } + let (input, first_filter) = parse_not(input, depth + 1)?; // if we found a `AND` then we MUST find something next - let (input, ors) = - many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + let (input, mut ands) = + many0(preceded(ws(word_exact("AND")), cut(|input| parse_not(input, depth + 1))))(input)?; + + let filter = if ands.is_empty() { + first_filter + } else { + ands.insert(0, first_filter); + FilterCondition::And(ands) + }; + + Ok((input, filter)) } /// not = ("NOT" WS+ not) | primary /// We can have multiple consecutive not, eg: `NOT NOT channel = mv`. /// If we parse a `NOT` we MUST parse something behind. -fn parse_not(input: Span) -> IResult { +fn parse_not(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } alt(( - map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| e.negate()), - parse_primary, + map( + preceded(ws(word_exact("NOT")), cut(|input| parse_not(input, depth + 1))), + |e| match e { + FilterCondition::Not(e) => *e, + _ => FilterCondition::Not(Box::new(e)), + }, + ), + |input| parse_primary(input, depth + 1), ))(input) } @@ -209,7 +297,7 @@ fn parse_not(input: Span) -> IResult { fn parse_geo_radius(input: Span) -> IResult { // we want to allow space BEFORE the _geoRadius but not after let parsed = preceded( - tuple((multispace0, tag("_geoRadius"))), + tuple((multispace0, word_exact("_geoRadius"))), // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) @@ -242,37 +330,58 @@ fn parse_geo_point(input: Span) -> IResult { Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) } +fn parse_error_reserved_keyword(input: Span) -> IResult { + match parse_condition(input) { + Ok(result) => Ok(result), + Err(nom::Err::Error(inner) | nom::Err::Failure(inner)) => match inner.kind() { + ErrorKind::ExpectedValue(ExpectedValueKind::ReservedKeyword) => { + return Err(nom::Err::Failure(inner)); + } + _ => return Err(nom::Err::Error(inner)), + }, + Err(e) => { + return Err(e); + } + } +} + /// primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to -fn parse_primary(input: Span) -> IResult { +fn parse_primary(input: Span, depth: usize) -> IResult { + if depth > MAX_FILTER_DEPTH { + return Err(nom::Err::Error(Error::new_from_kind(input, ErrorKind::DepthLimitReached))); + } alt(( // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis delimited( ws(char('(')), - cut(parse_expression), + cut(|input| parse_expression(input, depth + 1)), cut_with_err(ws(char(')')), |c| { Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())) }), ), parse_geo_radius, + parse_in, + parse_not_in, parse_condition, parse_exists, parse_not_exists, parse_to, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo_point, + parse_error_reserved_keyword, ))(input) // if the inner parsers did not match enough information to return an accurate error .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary))) } /// expression = or -pub fn parse_expression(input: Span) -> IResult { - parse_or(input) +pub fn parse_expression(input: Span, depth: usize) -> IResult { + parse_or(input, depth) } /// filter = expression EOF pub fn parse_filter(input: Span) -> IResult { - terminated(parse_expression, eof)(input) + terminated(|input| parse_expression(input, 0), eof)(input) } #[cfg(test)] @@ -292,371 +401,325 @@ pub mod tests { fn parse() { use FilterCondition as Fc; - let test_case = [ - // simple test - ( - "channel = Ponce", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "Ponce")), - }, - ), - ( - "subscribers = 12", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Equal(rtok("subscribers = ", "12")), - }, - ), - // test all the quotes and simple quotes - ( - "channel = 'Mister Mv'", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = '", "Mister Mv")), - }, - ), - ( - "channel = \"Mister Mv\"", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = \"", "Mister Mv")), - }, - ), - ( - "'dog race' = Borzoi", - Fc::Condition { - fid: rtok("'", "dog race"), - op: Condition::Equal(rtok("'dog race' = ", "Borzoi")), - }, - ), - ( - "\"dog race\" = Chusky", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")), - }, - ), - ( - "\"dog race\" = \"Bernese Mountain\"", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), - }, - ), - ( - "'dog race' = 'Bernese Mountain'", - Fc::Condition { - fid: rtok("'", "dog race"), - op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")), - }, - ), - ( - "\"dog race\" = 'Bernese Mountain'", - Fc::Condition { - fid: rtok("\"", "dog race"), - op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), - }, - ), - // test all the operators - ( - "channel != ponce", - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::NotEqual(rtok("channel != ", "ponce")), - }, - ), - ( - "NOT channel = ponce", - Fc::Condition { - fid: rtok("NOT ", "channel"), - op: Condition::NotEqual(rtok("NOT channel = ", "ponce")), - }, - ), - ( - "subscribers < 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::LowerThan(rtok("subscribers < ", "1000")), - }, - ), - ( - "subscribers > 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::GreaterThan(rtok("subscribers > ", "1000")), - }, - ), - ( - "subscribers <= 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")), - }, - ), - ( - "subscribers >= 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), - }, - ), - ( - "NOT subscribers < 1000", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), - }, - ), - ( - "NOT subscribers > 1000", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), - }, - ), - ( - "NOT subscribers <= 1000", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")), - }, - ), - ( - "NOT subscribers >= 1000", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")), - }, - ), - ( - "subscribers EXISTS", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Exists, - }, - ), - ( - "NOT subscribers EXISTS", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::NotExists, - }, - ), - ( - "subscribers NOT EXISTS", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::NotExists, - }, - ), - ( - "NOT subscribers NOT EXISTS", - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::Exists, - }, - ), - ( - "subscribers NOT EXISTS", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::NotExists, - }, - ), - ( - "subscribers 100 TO 1000", - Fc::Condition { - fid: rtok("", "subscribers"), - op: Condition::Between { - from: rtok("subscribers ", "100"), - to: rtok("subscribers 100 TO ", "1000"), - }, - }, - ), - ( - "NOT subscribers 100 TO 1000", - Fc::Or( - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::LowerThan(rtok("NOT subscribers ", "100")), - } - .into(), - Fc::Condition { - fid: rtok("NOT ", "subscribers"), - op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), - } - .into(), - ), - ), - ( - "_geoRadius(12, 13, 14)", - Fc::GeoLowerThan { - point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")], - radius: rtok("_geoRadius(12, 13, ", "14"), - }, - ), - ( - "NOT _geoRadius(12, 13, 14)", - Fc::GeoGreaterThan { - point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")], - radius: rtok("NOT _geoRadius(12, 13, ", "14"), - }, - ), - // test simple `or` and `and` - ( - "channel = ponce AND 'dog race' != 'bernese mountain'", - Fc::And( - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce AND '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce AND 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ), - ), - ( - "channel = ponce OR 'dog race' != 'bernese mountain'", - Fc::Or( - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce OR '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce OR 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ), - ), - ( - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - Fc::Or( - Fc::And( - Fc::Condition { - fid: rtok("", "channel"), - op: Condition::Equal(rtok("channel = ", "ponce")), - } - .into(), - Fc::Condition { - fid: rtok("channel = ponce AND '", "dog race"), - op: Condition::NotEqual(rtok( - "channel = ponce AND 'dog race' != '", - "bernese mountain", - )), - } - .into(), - ) - .into(), - Fc::Condition { - fid: rtok( - "channel = ponce AND 'dog race' != 'bernese mountain' OR ", - "subscribers", - ), - op: Condition::GreaterThan(rtok( - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", - "1000", - )), - } - .into(), - ), - ), - // test parenthesis - ( - "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - Fc::And( - Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), - Fc::Or( - Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into()), - ), - ( - "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - Fc::And( - Fc::Or( - Fc::And( - Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), - Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), - ).into(), - Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into(), - Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() - ) - ) - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_ok(), - "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap(); - assert_eq!(filter, Some(expected), "Filter `{}` failed.", input); + fn p<'a>(s: &'a str) -> impl std::fmt::Display + 'a { + Fc::parse(s).unwrap().unwrap() } + + // Test equal + insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}"); + insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}"); + insta::assert_display_snapshot!(p("channel = 'Mister Mv'"), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p("channel = \"Mister Mv\""), @"{channel} = {Mister Mv}"); + insta::assert_display_snapshot!(p("'dog race' = Borzoi"), @"{dog race} = {Borzoi}"); + insta::assert_display_snapshot!(p("\"dog race\" = Chusky"), @"{dog race} = {Chusky}"); + insta::assert_display_snapshot!(p("\"dog race\" = \"Bernese Mountain\""), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p("'dog race' = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + insta::assert_display_snapshot!(p("\"dog race\" = 'Bernese Mountain'"), @"{dog race} = {Bernese Mountain}"); + + // Test IN + insta::assert_display_snapshot!(p("colour IN[]"), @"{colour} IN[]"); + insta::assert_display_snapshot!(p("colour IN[green]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p("colour IN[green,]"), @"{colour} IN[{green}, ]"); + insta::assert_display_snapshot!(p("colour NOT IN[green,blue]"), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p(" colour IN [ green , blue , ]"), @"{colour} IN[{green}, {blue}, ]"); + + // Test IN + OR/AND/() + insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p("NOT (colour IN [green, blue]) AND color = green "), @"AND[NOT ({colour} IN[{green}, {blue}, ]), {color} = {green}, ]"); + insta::assert_display_snapshot!(p("x = 1 OR NOT (colour IN [green, blue] OR color = green) "), @"OR[{x} = {1}, NOT (OR[{colour} IN[{green}, {blue}, ], {color} = {green}, ]), ]"); + + // Test whitespace start/end + insta::assert_display_snapshot!(p(" colour = green "), @"{colour} = {green}"); + insta::assert_display_snapshot!(p(" (colour = green OR colour = red) "), @"OR[{colour} = {green}, {colour} = {red}, ]"); + insta::assert_display_snapshot!(p(" colour IN [green, blue] AND color = green "), @"AND[{colour} IN[{green}, {blue}, ], {color} = {green}, ]"); + insta::assert_display_snapshot!(p(" colour NOT IN [green, blue] "), @"NOT ({colour} IN[{green}, {blue}, ])"); + insta::assert_display_snapshot!(p(" colour IN [green, blue] "), @"{colour} IN[{green}, {blue}, ]"); + + // Test conditions + insta::assert_display_snapshot!(p("channel != ponce"), @"{channel} != {ponce}"); + insta::assert_display_snapshot!(p("NOT channel = ponce"), @"NOT ({channel} = {ponce})"); + insta::assert_display_snapshot!(p("subscribers < 1000"), @"{subscribers} < {1000}"); + insta::assert_display_snapshot!(p("subscribers > 1000"), @"{subscribers} > {1000}"); + insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p("subscribers >= 1000"), @"{subscribers} >= {1000}"); + insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}"); + insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}"); + + // Test NOT + EXISTS + insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS"); + insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})"); + insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); + insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})"); + + // Test nested NOT + insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); + insta::assert_display_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); + + // Test geo radius + insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); + insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + + // Test OR + AND + insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); + insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000"), @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ]"); + insta::assert_display_snapshot!( + p("channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000 OR colour = red OR colour = blue AND size = 7"), + @"OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, {colour} = {red}, AND[{colour} = {blue}, {size} = {7}, ], ]" + ); + + // Test parentheses + insta::assert_display_snapshot!(p("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )"), @"AND[{channel} = {ponce}, OR[{dog race} != {bernese mountain}, {subscribers} > {1000}, ], ]"); + insta::assert_display_snapshot!(p("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)"), @"AND[OR[AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ], {subscribers} > {1000}, ], _geoRadius({12}, {13}, {14}), ]"); + + // Test recursion + // This is the most that is allowed + insta::assert_display_snapshot!( + p("(((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))"), + @"{x} = {1}" + ); + insta::assert_display_snapshot!( + p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + @"NOT ({x} = {1})" + ); + + // Confusing keywords + insta::assert_display_snapshot!(p(r#"NOT "OR" EXISTS AND "EXISTS" NOT EXISTS"#), @"AND[NOT ({OR} EXISTS), NOT ({EXISTS} EXISTS), ]"); } #[test] fn error() { use FilterCondition as Fc; - let test_case = [ - // simple test - ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), - ("channel = ", "Was expecting a value but instead got nothing."), - ("channel = 🐻", "Was expecting a value but instead got `🐻`."), - ("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."), - ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `OR`."), - ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `AND`."), - ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."), - ("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."), - ("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), - ("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."), - ("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), - ("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."), - ("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."), - ("channel = 'ponce", "Expression `\\'ponce` is missing the following closing delimiter: `'`."), - ("channel = \"ponce", "Expression `\\\"ponce` is missing the following closing delimiter: `\"`."), - ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."), - ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."), - ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."), - ("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."), - ("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), - ("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."), - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_err(), - "Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`", - input, - result.unwrap() - ); - let filter = result.unwrap_err().to_string(); - assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter); + fn p<'a>(s: &'a str) -> impl std::fmt::Display + 'a { + Fc::parse(s).unwrap_err().to_string() } + + insta::assert_display_snapshot!(p("channel = Ponce = 12"), @r###" + Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule. + 17:21 channel = Ponce = 12 + "###); + + insta::assert_display_snapshot!(p("channel = "), @r###" + Was expecting a value but instead got nothing. + 14:14 channel = + "###); + + insta::assert_display_snapshot!(p("channel = 🐻"), @r###" + Was expecting a value but instead got `🐻`. + 11:12 channel = 🐻 + "###); + + insta::assert_display_snapshot!(p("channel = 🐻 AND followers < 100"), @r###" + Was expecting a value but instead got `🐻`. + 11:12 channel = 🐻 AND followers < 100 + "###); + + insta::assert_display_snapshot!(p("'OR'"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`. + 1:5 'OR' + "###); + + insta::assert_display_snapshot!(p("OR"), @r###" + Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. + 1:3 OR + "###); + + insta::assert_display_snapshot!(p("channel Ponce"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`. + 1:14 channel Ponce + "###); + + insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing. + 19:19 channel = Ponce OR + "###); + + insta::assert_display_snapshot!(p("_geoRadius"), @r###" + The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. + 1:11 _geoRadius + "###); + + insta::assert_display_snapshot!(p("_geoRadius = 12"), @r###" + The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`. + 1:16 _geoRadius = 12 + "###); + + insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + 1:22 _geoPoint(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + 13:34 position <= _geoPoint(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###" + The `_geoRadius` filter is an operation and can't be used as a value. + 13:35 position <= _geoRadius(12, 13, 14) + "###); + + insta::assert_display_snapshot!(p("channel = 'ponce"), @r###" + Expression `\'ponce` is missing the following closing delimiter: `'`. + 11:17 channel = 'ponce + "###); + + insta::assert_display_snapshot!(p("channel = \"ponce"), @r###" + Expression `\"ponce` is missing the following closing delimiter: `"`. + 11:17 channel = "ponce + "###); + + insta::assert_display_snapshot!(p("channel = mv OR (followers >= 1000"), @r###" + Expression `(followers >= 1000` is missing the following closing delimiter: `)`. + 17:35 channel = mv OR (followers >= 1000 + "###); + + insta::assert_display_snapshot!(p("channel = mv OR followers >= 1000)"), @r###" + Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule. + 34:35 channel = mv OR followers >= 1000) + "###); + + insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`. + 1:17 colour NOT EXIST + "###); + + insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`. + 1:23 subscribers 100 TO1000 + "###); + + insta::assert_display_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###" + Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule. + 17:44 channel = ponce ORdog != 'bernese mountain' + "###); + + insta::assert_display_snapshot!(p("colour IN blue, green]"), @r###" + Expected `[` after `IN` keyword. + 11:23 colour IN blue, green] + "###); + + insta::assert_display_snapshot!(p("colour IN [blue, green, 'blue' > 2]"), @r###" + Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`. + 32:36 colour IN [blue, green, 'blue' > 2] + "###); + + insta::assert_display_snapshot!(p("colour IN [blue, green, AND]"), @r###" + Expected only comma-separated field names inside `IN[..]` but instead found `AND]`. + 25:29 colour IN [blue, green, AND] + "###); + + insta::assert_display_snapshot!(p("colour IN [blue, green"), @r###" + Expected matching `]` after the list of field names given to `IN[` + 23:23 colour IN [blue, green + "###); + + insta::assert_display_snapshot!(p("colour IN ['blue, green"), @r###" + Expression `\'blue, green` is missing the following closing delimiter: `'`. + 12:24 colour IN ['blue, green + "###); + + insta::assert_display_snapshot!(p("x = EXISTS"), @r###" + Was expecting a value but instead got `EXISTS`, which is a reserved keyword. To use `EXISTS` as a field name or a value, surround it by quotes. + 5:11 x = EXISTS + "###); + + insta::assert_display_snapshot!(p("AND = 8"), @r###" + Was expecting a value but instead got `AND`, which is a reserved keyword. To use `AND` as a field name or a value, surround it by quotes. + 1:4 AND = 8 + "###); + + insta::assert_display_snapshot!(p("((((((((((((((((((((((((((((((((((((((((((((((((((x = 1))))))))))))))))))))))))))))))))))))))))))))))))))"), @r###" + The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. + 51:106 ((((((((((((((((((((((((((((((((((((((((((((((((((x = 1)))))))))))))))))))))))))))))))))))))))))))))))))) + "###); + + insta::assert_display_snapshot!( + p("NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1"), + @r###" + The filter exceeded the maximum depth limit. Try rewriting the filter so that it contains fewer nested conditions. + 797:802 NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT NOT x = 1 + "### + ); + + insta::assert_display_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###" + Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes. + 5:7 NOT OR EXISTS AND EXISTS NOT EXISTS + "###); } #[test] fn depth() { let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap(); - assert!(filter.token_at_depth(5).is_some()); + assert!(filter.token_at_depth(1).is_some()); + assert!(filter.token_at_depth(2).is_none()); + + let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); + + let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); + } +} + +impl<'a> std::fmt::Display for FilterCondition<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FilterCondition::Not(filter) => { + write!(f, "NOT ({filter})") + } + FilterCondition::Condition { fid, op } => { + write!(f, "{fid} {op}") + } + FilterCondition::In { fid, els } => { + write!(f, "{fid} IN[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::Or(els) => { + write!(f, "OR[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::And(els) => { + write!(f, "AND[")?; + for el in els { + write!(f, "{el}, ")?; + } + write!(f, "]") + } + FilterCondition::GeoLowerThan { point, radius } => { + write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) + } + } + } +} +impl<'a> std::fmt::Display for Condition<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Condition::GreaterThan(token) => write!(f, "> {token}"), + Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"), + Condition::Equal(token) => write!(f, "= {token}"), + Condition::NotEqual(token) => write!(f, "!= {token}"), + Condition::Exists => write!(f, "EXISTS"), + Condition::LowerThan(token) => write!(f, "< {token}"), + Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), + Condition::Between { from, to } => write!(f, "{from} TO {to}"), + } + } +} +impl<'a> std::fmt::Display for Token<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{{}}}", self.value()) } } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 22da6a0df..d015018c1 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -5,7 +5,7 @@ use nom::combinator::cut; use nom::sequence::{delimited, terminated}; use nom::{InputIter, InputLength, InputTake, Slice}; -use crate::error::NomErrorExt; +use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; /// This function goes through all characters in the [Span] if it finds any escaped character (`\`). @@ -48,6 +48,35 @@ fn quoted_by(quote: char, input: Span) -> IResult { )) } +// word = (alphanumeric | _ | - | .)+ except for reserved keywords +pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult> { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if is_keyword(word.value()) { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::ReservedKeyword(word.value().to_owned()), + ))); + } + Ok((input, word)) +} + +// word = {tag} +pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> { + move |input| { + let (input, word): (_, Token<'a>) = + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?; + if word.value() == tag { + Ok((input, word)) + } else { + Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::InternalError(nom::error::ErrorKind::Tag), + ))) + } + } +} + /// value = WS* ( word | singleQuoted | doubleQuoted) WS+ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // to get better diagnostic message we are going to strip the left whitespaces from the input right now @@ -71,11 +100,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { _ => (), } - // word = (alphanumeric | _ | - | .)+ - let word = |input: Span<'a>| -> IResult> { - take_while1(is_value_component)(input).map(|(s, t)| (s, t.into())) - }; - // this parser is only used when an error is encountered and it parse the // largest string possible that do not contain any “language” syntax. // If we try to parse `name = 🦀 AND language = rust` we want to return an @@ -85,17 +109,27 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // when we create the errors from the output of the alt we have spaces everywhere let error_word = take_till::<_, _, Error>(is_syntax_component); - terminated( + let (input, value) = terminated( alt(( delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), - word, + word_not_keyword, )), multispace0, )(input) // if we found nothing in the alt it means the user specified something that was not recognized as a value .map_err(|e: nom::Err| { - e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) + e.map_err(|error| { + let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) { + ExpectedValueKind::ReservedKeyword + } else { + ExpectedValueKind::Other + }; + Error::new_from_kind( + error_word(input).unwrap().1, + ErrorKind::ExpectedValue(expected_value_kind), + ) + }) }) .map_err(|e| { e.map_fail(|failure| { @@ -107,7 +141,9 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { failure } }) - }) + })?; + + Ok((input, value)) } fn is_value_component(c: char) -> bool { @@ -118,6 +154,10 @@ fn is_syntax_component(c: char) -> bool { c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) } +fn is_keyword(s: &str) -> bool { + matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius") +} + #[cfg(test)] pub mod test { use nom::Finish; diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index afde8cc1a..c20210443 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -744,10 +744,9 @@ async fn main() -> anyhow::Result<()> { }; let condition = match (filters, facet_filters) { - (Some(filters), Some(facet_filters)) => Some(FilterCondition::And( - Box::new(filters.into()), - Box::new(facet_filters.into()), - )), + (Some(filters), Some(facet_filters)) => { + Some(FilterCondition::And(vec![filters.into(), facet_filters.into()])) + } (Some(condition), None) | (None, Some(condition)) => Some(condition.into()), _otherwise => None, }; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 90aab826a..7241dab2b 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -89,52 +89,44 @@ impl<'a> Filter<'a> { I: IntoIterator>, J: IntoIterator, { - let mut ands: Option = None; + let mut ands = vec![]; for either in array { match either { Either::Left(array) => { - let mut ors = None; + let mut ors = vec![]; for rule in array { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ors = match ors.take() { - Some(ors) => { - Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) - } - None => Some(condition), - }; + ors.push(filter.condition); } } - if let Some(rule) = ors { - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(rule))) - } - None => Some(rule), - }; + if ors.len() > 1 { + ands.push(FilterCondition::Or(ors)); + } else if ors.len() == 1 { + ands.push(ors.pop().unwrap()); } } Either::Right(rule) => { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(condition))) - } - None => Some(condition), - }; + ands.push(filter.condition); } } } } + let and = if ands.is_empty() { + return Ok(None); + } else if ands.len() == 1 { + ands.pop().unwrap() + } else { + FilterCondition::And(ands) + }; - if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { + if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) { return Err(token.as_external_error(FilterError::TooDeep).into()); } - Ok(ands.map(|ands| Self { condition: ands })) + Ok(Some(Self { condition: and })) } pub fn from_str(expression: &'a str) -> Result> { @@ -284,14 +276,6 @@ impl<'a> Filter<'a> { let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; return Ok(exist); } - Condition::NotExists => { - let all_ids = index.documents_ids(rtxn)?; - - let exist = Self::evaluate_operator(rtxn, index, field_id, &Condition::Exists)?; - - let notexist = all_ids - exist; - return Ok(notexist); - } Condition::Equal(val) => { let (_original_value, string_docids) = strings_db .get(rtxn, &(field_id, &val.value().to_lowercase()))? @@ -317,11 +301,10 @@ impl<'a> Filter<'a> { return Ok(string_docids | number_docids); } Condition::NotEqual(val) => { - let all_numbers_ids = index.number_faceted_documents_ids(rtxn, field_id)?; - let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?; let operator = Condition::Equal(val.clone()); let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?; - return Ok((all_numbers_ids | all_strings_ids) - docids); + let all_ids = index.documents_ids(rtxn)?; + return Ok(all_ids - docids); } }; @@ -367,6 +350,39 @@ impl<'a> Filter<'a> { filterable_fields: &HashSet, ) -> Result { match &self.condition { + FilterCondition::Not(f) => { + let all_ids = index.documents_ids(rtxn)?; + let selected = Self::inner_evaluate( + &(f.as_ref().clone()).into(), + rtxn, + index, + filterable_fields, + )?; + return Ok(all_ids - selected); + } + FilterCondition::In { fid, els } => { + if crate::is_faceted(fid.value(), filterable_fields) { + let field_ids_map = index.fields_ids_map(rtxn)?; + + if let Some(fid) = field_ids_map.id(fid.value()) { + let mut bitmap = RoaringBitmap::new(); + + for el in els { + let op = Condition::Equal(el.clone()); + let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?; + bitmap |= el_bitmap; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) + } + } else { + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filterable_fields.clone(), + }))?; + } + } FilterCondition::Condition { fid, op } => { if crate::is_faceted(fid.value(), filterable_fields) { let field_ids_map = index.fields_ids_map(rtxn)?; @@ -397,38 +413,38 @@ impl<'a> Filter<'a> { } } } - FilterCondition::Or(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs | rhs) - } - FilterCondition::And(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - if lhs.is_empty() { - return Ok(lhs); + FilterCondition::Or(subfilters) => { + let mut bitmap = RoaringBitmap::new(); + for f in subfilters { + bitmap |= + Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + } + Ok(bitmap) + } + FilterCondition::And(subfilters) => { + let mut subfilters_iter = subfilters.iter(); + if let Some(first_subfilter) = subfilters_iter.next() { + let mut bitmap = Self::inner_evaluate( + &(first_subfilter.clone()).into(), + rtxn, + index, + filterable_fields, + )?; + for f in subfilters_iter { + if bitmap.is_empty() { + return Ok(bitmap); + } + bitmap &= Self::inner_evaluate( + &(f.clone()).into(), + rtxn, + index, + filterable_fields, + )?; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) } - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs & rhs) } FilterCondition::GeoLowerThan { point, radius } => { if filterable_fields.contains("_geo") { @@ -467,17 +483,6 @@ impl<'a> Filter<'a> { }))?; } } - FilterCondition::GeoGreaterThan { point, radius } => { - let result = Self::inner_evaluate( - &FilterCondition::GeoLowerThan { point: point.clone(), radius: radius.clone() } - .into(), - rtxn, - index, - filterable_fields, - )?; - let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - Ok(geo_faceted_doc_ids - result) - } } } } @@ -732,12 +737,10 @@ mod tests { } } - let error = Filter::from_str(&filter_string).unwrap_err(); - assert!( - error.to_string().starts_with("Too many filter conditions"), - "{}", - error.to_string() - ); + // Note: the filter used to be rejected for being too deep, but that is + // no longer the case + let filter = Filter::from_str(&filter_string).unwrap(); + assert!(filter.is_some()); } #[test] diff --git a/milli/tests/assets/test_set.ndjson b/milli/tests/assets/test_set.ndjson index 427daca8c..2e77f9faf 100644 --- a/milli/tests/assets/test_set.ndjson +++ b/milli/tests/assets/test_set.ndjson @@ -1,17 +1,17 @@ -{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":"", "opt1": [null]} -{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":"", "opt1": []} -{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":"", "opt1": null} -{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":"", "opt1": 4} -{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":"", "opt1": "E"} -{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":"", "opt1": ["F"]} +{"id":"A","word_rank":0,"typo_rank":1,"proximity_rank":15,"attribute_rank":505,"exact_rank":5,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":43,"title":"hell o","description":"hell o is the fourteenth episode of the american television series glee performing songs with this word","tag":"blue","_geo": { "lat": 50.62984446145472, "lng": 3.085712705162039 },"":"", "opt1": [null], "tag_in": 1} +{"id":"B","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":191,"title":"hello","description":"hello is a song recorded by english singer songwriter adele","tag":"red","_geo": { "lat": 50.63047567664291, "lng": 3.088852230809636 },"":"", "opt1": [], "tag_in": 2} +{"id":"C","word_rank":0,"typo_rank":1,"proximity_rank":8,"attribute_rank":336,"exact_rank":4,"asc_desc_rank":2,"sort_by_rank":0,"geo_rank":283,"title":"hell on earth","description":"hell on earth is the third studio album by american hip hop duo mobb deep","tag":"blue","_geo": { "lat": 50.6321800003937, "lng": 3.088331882262139 },"":"", "opt1": null, "tag_in": 3} +{"id":"D","word_rank":0,"typo_rank":1,"proximity_rank":10,"attribute_rank":757,"exact_rank":4,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":1381,"title":"hell on wheels tv series","description":"the construction of the first transcontinental railroad across the united states in the world","tag":"red","_geo": { "lat": 50.63728851135729, "lng": 3.0703951595971626 },"":"", "opt1": 4, "tag_in": "four"} +{"id":"E","word_rank":2,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":4,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":1979,"title":"hello kitty","description":"also known by her full name kitty white is a fictional character produced by the japanese company sanrio","tag":"green","_geo": { "lat": 50.64264610511925, "lng": 3.0665099941857634 },"":"", "opt1": "E", "tag_in": "five"} +{"id":"F","word_rank":2,"typo_rank":1,"proximity_rank":0,"attribute_rank":1017,"exact_rank":5,"asc_desc_rank":5,"sort_by_rank":0,"geo_rank":65022,"title":"laptop orchestra","description":"a laptop orchestra lork or lo is a chamber music ensemble consisting primarily of laptops like helo huddersfield experimental laptop orchestra","tag":"blue","_geo": { "lat": 51.05028653642387, "lng": 3.7301072771642096 },"":"", "opt1": ["F"], "tag_in": null} {"id":"G","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":5,"sort_by_rank":2,"geo_rank":34692,"title":"hello world film","description":"hello world is a 2019 japanese animated sci fi romantic drama film directed by tomohiko ito and produced by graphinica","tag":"red","_geo": { "lat": 50.78776041427129, "lng": 2.661201766290338 },"":"", "opt1": [7]} -{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":"", "opt1": ["H", 8]} -{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":""} -{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":"", "opt1": {}} -{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":"", "opt1": [{"opt2": 11}] } -{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":"", "opt1": {"opt2": [12]}} +{"id":"H","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":202182,"title":"world hello day","description":"holiday observed on november 21 to express that conflicts should be resolved through communication rather than the use of force","tag":"green","_geo": { "lat": 48.875617484531965, "lng": 2.346747821504194 },"":"", "opt1": ["H", 8], "tag_in": 8} +{"id":"I","word_rank":0,"typo_rank":0,"proximity_rank":8,"attribute_rank":338,"exact_rank":3,"asc_desc_rank":3,"sort_by_rank":0,"geo_rank":740667,"title":"hello world song","description":"hello world is a song written by tom douglas tony lane and david lee and recorded by american country music group lady antebellum","tag":"blue","_geo": { "lat": 43.973998070351065, "lng": 3.4661837318345032 },"":"", "tag_in": "nine"} +{"id":"J","word_rank":1,"typo_rank":0,"proximity_rank":1,"attribute_rank":1,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":739020,"title":"hello cruel world","description":"hello cruel world is an album by new zealand band tall dwarfs","tag":"green","_geo": { "lat": 43.98920130353838, "lng": 3.480519311627928 },"":"", "opt1": {}, "tag_in": 10} +{"id":"K","word_rank":0,"typo_rank":2,"proximity_rank":9,"attribute_rank":670,"exact_rank":5,"asc_desc_rank":1,"sort_by_rank":2,"geo_rank":738830,"title":"hallo creation system","description":"in few word hallo was a construction toy created by the american company mattel to engage girls in construction play","tag":"red","_geo": { "lat": 43.99155030238669, "lng": 3.503453528249425 },"":"", "opt1": [{"opt2": 11}] , "tag_in": "eleven"} +{"id":"L","word_rank":0,"typo_rank":0,"proximity_rank":2,"attribute_rank":250,"exact_rank":4,"asc_desc_rank":0,"sort_by_rank":0,"geo_rank":737861,"title":"good morning world","description":"good morning world is an american sitcom broadcast on cbs tv during the 1967 1968 season","tag":"blue","_geo": { "lat": 44.000507750283695, "lng": 3.5116812040621572 },"":"", "opt1": {"opt2": [12]}, "tag_in": 12} {"id":"M","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":0,"asc_desc_rank":0,"sort_by_rank":2,"geo_rank":739203,"title":"hello world america","description":"a perfect match for a perfect engine using the query hello world america","tag":"red","_geo": { "lat": 43.99150729038736, "lng": 3.606143957295055 },"":"", "opt1": [13, [{"opt2": null}]]} -{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":"", "opt1": {"a": 1, "opt2": {"opt3": 14}} } -{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":"", "opt1": [[[[]]]] } +{"id":"N","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":1,"asc_desc_rank":4,"sort_by_rank":1,"geo_rank":9499586,"title":"hello world america unleashed","description":"a very good match for a very good engine using the query hello world america","tag":"green","_geo": { "lat": 35.511540843367115, "lng": 138.764368875787 },"":"", "opt1": {"a": 1, "opt2": {"opt3": 14}}} +{"id":"O","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":10,"exact_rank":0,"asc_desc_rank":6,"sort_by_rank":0,"geo_rank":9425163,"title":"a perfect match for a perfect engine using the query hello world america","description":"hello world america","tag":"blue","_geo": { "lat": 35.00536702277189, "lng": 135.76118763940391 },"":"", "opt1": [[[[]]]]} {"id":"P","word_rank":0,"typo_rank":0,"proximity_rank":0,"attribute_rank":12,"exact_rank":1,"asc_desc_rank":3,"sort_by_rank":2,"geo_rank":9422437,"title":"a very good match for a very good engine using the query hello world america","description":"hello world america unleashed","tag":"red","_geo": { "lat": 35.06462306367058, "lng": 135.8338440354251 },"":"", "opt1.opt2": 16} {"id":"Q","word_rank":1,"typo_rank":0,"proximity_rank":0,"attribute_rank":0,"exact_rank":3,"asc_desc_rank":2,"sort_by_rank":1,"geo_rank":9339230,"title":"hello world","description":"a hello world program generally is a computer program that outputs or displays the message hello world","tag":"green","_geo": { "lat": 34.39548365683149, "lng": 132.4535960928883 },"":""} diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index 1700a1478..5451a9076 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -85,4 +85,6 @@ test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]); test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]); test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]); -test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]); +test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]); +test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]); +test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]); diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 728a4eb0b..0e1d43d2a 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -44,7 +44,8 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { S("asc_desc_rank"), S("_geo"), S("opt1"), - S("opt1.opt2") + S("opt1.opt2"), + S("tag_in") }); builder.set_sortable_fields(hashset! { S("tag"), @@ -205,6 +206,15 @@ fn execute_filter(filter: &str, document: &TestDocument) -> Option { } else if let Some(opt1) = &document.opt1 { id = contains_key_rec(opt1, "opt2").then(|| document.id.clone()); } + } else if matches!( + filter, + "tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]" + ) { + id = matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E") + .then(|| document.id.clone()); + } else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") { + id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E")) + .then(|| document.id.clone()); } id }