meilisearch/filter-parser/src/value.rs

use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while, take_while1};
use nom::character::complete::{char, multispace0};
use nom::combinator::cut;
use nom::sequence::{delimited, terminated};
use nom::{InputIter, InputLength, InputTake, Slice};

use crate::error::{ExpectedValueKind, NomErrorExt};
use crate::{
    parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
    Error, ErrorKind, IResult, Span, Token,
};

/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
/// It generates a new string with all `\` removed from the [Span].
fn unescape(buf: Span, char_to_escape: char) -> String {
    let to_escape = format!("\\{}", char_to_escape);
    buf.replace(&to_escape, &char_to_escape.to_string())
}

/// Parse a value in quote. If it encounter an escaped quote it'll unescape it.
fn quoted_by(quote: char, input: Span) -> IResult<Token> {
    // empty fields / values are valid in json
    if input.is_empty() {
        return Ok((input.slice(input.input_len()..), input.into()));
    }

    let mut escaped = false;
    let mut i = input.iter_indices();

    while let Some((idx, c)) = i.next() {
        if c == quote {
            let (rem, output) = input.take_split(idx);
            return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote)))));
        } else if c == '\\' {
            if let Some((_, c)) = i.next() {
                escaped |= c == quote;
            } else {
                return Err(nom::Err::Error(Error::new_from_kind(
                    input,
                    ErrorKind::MalformedValue,
                )));
            }
        }
        // if it was preceeded by a `\` or if it was anything else we can continue to advance
    }

    Ok((
        input.slice(input.input_len()..),
        Token::new(input, escaped.then(|| unescape(input, quote))),
    ))
}

// word           = (alphanumeric | _ | - | .)+    except for reserved keywords
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
    let (input, word): (_, Token<'a>) =
        take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
    if is_keyword(word.value()) {
        return Err(nom::Err::Error(Error::new_from_kind(
            input,
            ErrorKind::ReservedKeyword(word.value().to_owned()),
        )));
    }
    Ok((input, word))
}

// word           = {tag}
pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
    move |input| {
        let (input, word): (_, Token<'a>) =
            take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
        if word.value() == tag {
            Ok((input, word))
        } else {
            Err(nom::Err::Error(Error::new_from_kind(
                input,
                ErrorKind::InternalError(nom::error::ErrorKind::Tag),
            )))
        }
    }
}

/// value          = WS* ( word | singleQuoted | doubleQuoted) WS+
pub fn parse_value(input: Span) -> IResult<Token> {
    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
    let (input, _) = take_while(char::is_whitespace)(input)?;

    // then, we want to check if the user is misusing a geo expression
    // This expression can’t finish without error.
    // We want to return an error in case of failure.
    let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];

    for parser in geo_reserved_parse_functions {
        if let Err(err) = parser(input) {
            if err.is_failure() {
                return Err(err);
            }
        }
    }

    match parse_geo_radius(input) {
        Ok(_) => {
            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
        }
        // if we encountered a failure it means the user badly wrote a _geoRadius filter.
        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
        Err(e) if e.is_failure() => {
            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
        }
        _ => (),
    }

    match parse_geo_bounding_box(input) {
        Ok(_) => {
            return Err(nom::Err::Failure(Error::new_from_kind(
                input,
                ErrorKind::MisusedGeoBoundingBox,
            )))
        }
        // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
        Err(e) if e.is_failure() => {
            return Err(nom::Err::Failure(Error::new_from_kind(
                input,
                ErrorKind::MisusedGeoBoundingBox,
            )))
        }
        _ => (),
    }

    // this parser is only used when an error is encountered and it parse the
    // largest string possible that do not contain any “language” syntax.
    // If we try to parse `name = 🦀 AND language = rust` we want to return an
    // error saying we could not parse `🦀`. Not that no value were found or that
    // we could note parse `🦀 AND language = rust`.
    // we want to remove the space before entering the alt because if we don't,
    // when we create the errors from the output of the alt we have spaces everywhere
    let error_word = take_till::<_, _, Error>(is_syntax_component);

    let (input, value) = terminated(
        alt((
            delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
            delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
            word_not_keyword,
        )),
        multispace0,
    )(input)
    // if we found nothing in the alt it means the user specified something that was not recognized as a value
    .map_err(|e: nom::Err<Error>| {
        e.map_err(|error| {
            let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) {
                ExpectedValueKind::ReservedKeyword
            } else {
                ExpectedValueKind::Other
            };
            Error::new_from_kind(
                error_word(input).unwrap().1,
                ErrorKind::ExpectedValue(expected_value_kind),
            )
        })
    })
    .map_err(|e| {
        e.map_fail(|failure| {
            // if we found encountered a char failure it means the user had an unmatched quote
            if matches!(failure.kind(), ErrorKind::Char(_)) {
                Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char()))
            } else {
                // else we let the failure untouched
                failure
            }
        })
    })?;

    Ok((input, value))
}

fn is_value_component(c: char) -> bool {
    c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}

fn is_syntax_component(c: char) -> bool {
    c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
}

fn is_keyword(s: &str) -> bool {
    matches!(
        s,
        "AND"
            | "OR"
            | "IN"
            | "NOT"
            | "TO"
            | "EXISTS"
            | "IS"
            | "NULL"
            | "EMPTY"
            | "_geoRadius"
            | "_geoBoundingBox"
    )
}

#[cfg(test)]
pub mod test {
    use nom::Finish;

    use super::*;
    use crate::tests::rtok;

    #[test]
    fn test_span() {
        let test_case = [
            ("channel", rtok("", "channel")),
            (".private", rtok("", ".private")),
            ("I-love-kebab", rtok("", "I-love-kebab")),
            ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
            ("parens(", rtok("", "parens")),
            ("parens)", rtok("", "parens")),
            ("not!", rtok("", "not")),
            ("    channel", rtok("    ", "channel")),
            ("channel     ", rtok("", "channel")),
            ("    channel     ", rtok("    ", "channel")),
            ("'channel'", rtok("'", "channel")),
            ("\"channel\"", rtok("\"", "channel")),
            ("'cha)nnel'", rtok("'", "cha)nnel")),
            ("'cha\"nnel'", rtok("'", "cha\"nnel")),
            ("\"cha'nnel\"", rtok("\"", "cha'nnel")),
            ("\" some spaces \"", rtok("\"", " some spaces ")),
            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
            ("I'm tamo", rtok("'m tamo", "I")),
            ("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")),
        ];

        for (input, expected) in test_case {
            let input = Span::new_extra(input, input);
            let result = parse_value(input);

            assert!(
                result.is_ok(),
                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
                expected,
                result.unwrap_err()
            );
            let token = result.unwrap().1;
            assert_eq!(token, expected, "Filter `{}` failed.", input);
        }
    }

    #[test]
    fn test_escape_inside_double_quote() {
        // (input, remaining, expected output token, output value)
        let test_case = [
            ("aaaa", "", rtok("", "aaaa"), "aaaa"),
            (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
            (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
            (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
            (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
            (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
            (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
        ];

        for (input, remaining, expected_tok, expected_val) in test_case {
            let span = Span::new_extra(input, "");
            let result = quoted_by('"', span);
            assert!(result.is_ok());

            let (rem, output) = result.unwrap();
            assert_eq!(rem.to_string(), remaining);
            assert_eq!(output, expected_tok);
            assert_eq!(output.value(), expected_val.to_string());
        }
    }

    #[test]
    fn test_unescape() {
        // double quote
        assert_eq!(
            unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'),
            r#"Hello "World""#.to_string()
        );
        assert_eq!(
            unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'),
            r#"Hello \\"World\\""#.to_string()
        );
        // simple quote
        assert_eq!(
            unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
            r#"Hello 'World'"#.to_string()
        );
        assert_eq!(
            unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
            r#"Hello \\'World\\'"#.to_string()
        );
    }

    #[test]
    fn test_value() {
        let test_case = [
            // (input, expected value, if a string was generated to hold the new value)
            ("channel", "channel", false),
            // All the base test, no escaped string should be generated
            (".private", ".private", false),
            ("I-love-kebab", "I-love-kebab", false),
            ("but_snakes_is_also_good", "but_snakes_is_also_good", false),
            ("parens(", "parens", false),
            ("parens)", "parens", false),
            ("not!", "not", false),
            ("    channel", "channel", false),
            ("channel     ", "channel", false),
            ("    channel     ", "channel", false),
            ("'channel'", "channel", false),
            ("\"channel\"", "channel", false),
            ("'cha)nnel'", "cha)nnel", false),
            ("'cha\"nnel'", "cha\"nnel", false),
            ("\"cha'nnel\"", "cha'nnel", false),
            ("\" some spaces \"", " some spaces ", false),
            ("\"cha'nnel\"", "cha'nnel", false),
            ("\"cha'nnel\"", "cha'nnel", false),
            ("I'm tamo", "I", false),
            // escaped thing but not quote
            (r#""\\""#, r#"\\"#, false),
            (r#""\\\\\\""#, r#"\\\\\\"#, false),
            (r#""aa\\aa""#, r#"aa\\aa"#, false),
            // with double quote
            (r#""Hello \"world\"""#, r#"Hello "world""#, true),
            (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
            (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
            (r#""\"\"""#, r#""""#, true),
            // with simple quote
            (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
            (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
            (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
            (r#"'\'\''"#, r#"''"#, true),
        ];

        for (input, expected, escaped) in test_case {
            let input = Span::new_extra(input, input);
            let result = parse_value(input);

            assert!(
                result.is_ok(),
                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
                expected,
                result.unwrap_err()
            );
            let token = result.unwrap().1;
            assert_eq!(
                token.value.is_some(),
                escaped,
                "Filter `{}` was not supposed to be escaped",
                input
            );
            assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
        }
    }

    #[test]
    fn diagnostic() {
        let test_case = [
            ("🦀", "🦀"),
            ("     🦀", "🦀"),
            ("🦀 AND crab = truc", "🦀"),
            ("🦀_in_name", "🦀_in_name"),
            (" (name = ...", ""),
        ];

        for (input, expected) in test_case {
            let input = Span::new_extra(input, input);
            let result = parse_value(input);

            assert!(
                result.is_err(),
                "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
                expected,
                result.unwrap()
            );
            // get the inner string referenced in the error
            let value = *result.finish().unwrap_err().context().fragment();
            assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
        }
    }
}
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								use nom::branch::alt;
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								use nom::bytes::complete::{take_till, take_while, take_while1};
 								use nom::character::complete::{char, multispace0};
 								use nom::combinator::cut;
 								use nom::sequence::{delimited, terminated};
-												Use snapshot testing for the filter parser

											
										
										
											2022-08-17 23:25:31 +08:00
+								use nom::{InputIter, InputLength, InputTake, Slice};
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
-												Filters: add explicit error message when using a keyword as value

											
										
										
											2022-08-17 22:06:29 +08:00
+								use crate::error::{ExpectedValueKind, NomErrorExt};
-												cargo fmt

											
										
										
											2022-10-29 01:10:58 +08:00
+								use crate::{
-												handle _geoDistance(x,y,z) filter error

											
										
										
											2023-03-31 22:24:25 +08:00
+								    parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
 								    Error, ErrorKind, IResult, Span, Token,
-												cargo fmt

											
										
										
											2022-10-29 01:10:58 +08:00
+								};
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
-												Apply code suggestions

Co-authored-by: Clément Renault <clement@meilisearch.com>

											
										
										
											2022-01-10 22:14:32 +08:00
+								/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
 								/// It generates a new string with all `\` removed from the [Span].
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								fn unescape(buf: Span, char_to_escape: char) -> String {
 								    let to_escape = format!("\\{}", char_to_escape);
 								    buf.replace(&to_escape, &char_to_escape.to_string())
 								}
 								/// Parse a value in quote. If it encounter an escaped quote it'll unescape it.
 								fn quoted_by(quote: char, input: Span) -> IResult<Token> {
 								    // empty fields / values are valid in json
 								    if input.is_empty() {
 								        return Ok((input.slice(input.input_len()..), input.into()));
 								    }
 								    let mut escaped = false;
 								    let mut i = input.iter_indices();
 								    while let Some((idx, c)) = i.next() {
-												Update filter-parser/src/value.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
											
										
										
											2022-01-10 22:53:44 +08:00
+								        if c == quote {
 								            let (rem, output) = input.take_split(idx);
 								            return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote)))));
 								        } else if c == '\\' {
 								            if let Some((_, c)) = i.next() {
 								                escaped |= c == quote;
 								            } else {
 								                return Err(nom::Err::Error(Error::new_from_kind(
 								                    input,
 								                    ErrorKind::MalformedValue,
 								                )));
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								            }
 								        }
-												Update filter-parser/src/value.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
											
										
										
											2022-01-10 22:53:44 +08:00
+								        // if it was preceeded by a `\` or if it was anything else we can continue to advance
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								    }
 								    Ok((
 								        input.slice(input.input_len()..),
 								        Token::new(input, escaped.then(|| unescape(input, quote))),
 								    ))
 								}
-												Fix filter parser handling of keywords and surrounding spaces

Now the following fragments are allowed:

AND(field =

AND'field' =

AND"field" =

											
										
										
											2022-08-17 22:53:40 +08:00
+								// word           = (alphanumeric | _ | - | .)+    except for reserved keywords
 								pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
 								    let (input, word): (_, Token<'a>) =
 								        take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
 								    if is_keyword(word.value()) {
 								        return Err(nom::Err::Error(Error::new_from_kind(
 								            input,
 								            ErrorKind::ReservedKeyword(word.value().to_owned()),
 								        )));
 								    }
 								    Ok((input, word))
 								}
 								// word           = {tag}
 								pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
 								    move |input| {
 								        let (input, word): (_, Token<'a>) =
 								            take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
 								        if word.value() == tag {
 								            Ok((input, word))
 								        } else {
 								            Err(nom::Err::Error(Error::new_from_kind(
 								                input,
 								                ErrorKind::InternalError(nom::error::ErrorKind::Tag),
 								            )))
 								        }
 								    }
 								}
-												Make filter parser more strict regarding spacing around operators

OR, AND, NOT, TO must now be followed by spaces

											
										
										
											2022-06-16 15:12:37 +08:00
+								/// value          = WS* ( word | singleQuoted | doubleQuoted) WS+
-												Fix cargo clippy errors

Dont apply clippy for tests for now

Fix clippy warnings of filter-parser package

parent 8352febd646ec4bcf56a44161e5c4dce0e55111f
author unvalley <38400669+unvalley@users.noreply.github.com> 1666325847 +0900
committer unvalley <kirohi.code@gmail.com> 1666791316 +0900

Update .github/workflows/rust.yml

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>

Allow clippy lint too_many_argments

Allow clippy lint needless_collect

Allow clippy lint too_many_arguments and type_complexity

Fix for clippy warnings comparison_chains

Fix for clippy warnings vec_init_then_push

Allow clippy lint should_implement_trait

Allow clippy lint drop_non_drop

Fix lifetime clipy warnings in filter-paprser

Execute cargo fmt

Fix clippy remaining warnings

Fix clippy remaining warnings again and allow lint on each place

											
										
										
											2022-10-14 22:44:10 +08:00
+								pub fn parse_value(input: Span) -> IResult<Token> {
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
 								    let (input, _) = take_while(char::is_whitespace)(input)?;
 								    // then, we want to check if the user is misusing a geo expression
-												improve the readability of the _geoPoint thingy in the value

											
										
										
											2021-11-09 07:57:46 +08:00
+								    // This expression can’t finish without error.
 								    // We want to return an error in case of failure.
-												handle _geoDistance(x,y,z) filter error

											
										
										
											2023-03-31 22:24:25 +08:00
+								    let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
-												handle _geo(x,y,z) filter error

											
										
										
											2023-03-31 22:21:27 +08:00
 								    for parser in geo_reserved_parse_functions {
 								        if let Err(err) = parser(input) {
 								            if err.is_failure() {
 								                return Err(err);
 								            }
-												improve the readability of the _geoPoint thingy in the value

											
										
										
											2021-11-09 07:57:46 +08:00
+								        }
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    }
-												handle _geo(x,y,z) filter error

											
										
										
											2023-03-31 22:21:27 +08:00
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    match parse_geo_radius(input) {
-												cargo fmt

											
										
										
											2022-10-29 01:10:58 +08:00
+								        Ok(_) => {
 								            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
 								        }
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        // if we encountered a failure it means the user badly wrote a _geoRadius filter.
-												Apply review comments

											
										
										
											2022-11-01 16:56:38 +08:00
+								        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        Err(e) if e.is_failure() => {
-												Add error handling and earth lap collision with bounding box

											
										
										
											2022-10-28 21:30:53 +08:00
+								            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
 								        }
 								        _ => (),
 								    }
 								    match parse_geo_bounding_box(input) {
-												cargo fmt

											
										
										
											2022-10-29 01:10:58 +08:00
+								        Ok(_) => {
 								            return Err(nom::Err::Failure(Error::new_from_kind(
 								                input,
 								                ErrorKind::MisusedGeoBoundingBox,
 								            )))
 								        }
-												Add error handling and earth lap collision with bounding box

											
										
										
											2022-10-28 21:30:53 +08:00
+								        // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
-												Apply review comments

											
										
										
											2022-11-01 16:56:38 +08:00
+								        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
-												Add error handling and earth lap collision with bounding box

											
										
										
											2022-10-28 21:30:53 +08:00
+								        Err(e) if e.is_failure() => {
-												cargo fmt

											
										
										
											2022-10-29 01:10:58 +08:00
+								            return Err(nom::Err::Failure(Error::new_from_kind(
 								                input,
 								                ErrorKind::MisusedGeoBoundingBox,
 								            )))
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        }
 								        _ => (),
 								    }
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // this parser is only used when an error is encountered and it parse the
 								    // largest string possible that do not contain any “language” syntax.
 								    // If we try to parse `name = 🦀 AND language = rust` we want to return an
 								    // error saying we could not parse `🦀`. Not that no value were found or that
 								    // we could note parse `🦀 AND language = rust`.
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    // we want to remove the space before entering the alt because if we don't,
 								    // when we create the errors from the output of the alt we have spaces everywhere
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    let error_word = take_till::<_, _, Error>(is_syntax_component);
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
-												Improve syntax errors for `IN` filter

											
										
										
											2022-06-15 16:13:34 +08:00
+								    let (input, value) = terminated(
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        alt((
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								            delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
 								            delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
-												Fix filter parser handling of keywords and surrounding spaces

Now the following fragments are allowed:

AND(field =

AND'field' =

AND"field" =

											
										
										
											2022-08-17 22:53:40 +08:00
+								            word_not_keyword,
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        )),
 								        multispace0,
 								    )(input)
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // if we found nothing in the alt it means the user specified something that was not recognized as a value
 								    .map_err(|e: nom::Err<Error>| {
-												Filters: add explicit error message when using a keyword as value

											
										
										
											2022-08-17 22:06:29 +08:00
+								        e.map_err(|error| {
 								            let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) {
 								                ExpectedValueKind::ReservedKeyword
 								            } else {
 								                ExpectedValueKind::Other
 								            };
 								            Error::new_from_kind(
 								                error_word(input).unwrap().1,
 								                ErrorKind::ExpectedValue(expected_value_kind),
 								            )
 								        })
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    })
-												stop panicking in case of internal error

											
										
										
											2021-11-04 23:20:53 +08:00
+								    .map_err(|e| {
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								        e.map_fail(|failure| {
 								            // if we found encountered a char failure it means the user had an unmatched quote
 								            if matches!(failure.kind(), ErrorKind::Char(_)) {
 								                Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char()))
 								            } else {
 								                // else we let the failure untouched
 								                failure
 								            }
 								        })
-												Improve syntax errors for `IN` filter

											
										
										
											2022-06-15 16:13:34 +08:00
+								    })?;
 								    Ok((input, value))
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								}
-												Rename the key_component into a value_component

Co-authored-by: marin <postma.marin@protonmail.com>

											
										
										
											2021-11-09 07:58:23 +08:00
+								fn is_value_component(c: char) -> bool {
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
 								}
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								fn is_syntax_component(c: char) -> bool {
-												re-ignore the ! symbol when generating a good error message

											
										
										
											2021-11-10 00:08:04 +08:00
+								    c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								}
-												Improve syntax errors for `IN` filter

											
										
										
											2022-06-15 16:13:34 +08:00
+								fn is_keyword(s: &str) -> bool {
-												Specify that the NULL keyword is a keyword too

											
										
										
											2023-03-14 00:40:34 +08:00
+								    matches!(
 								        s,
-												Change the IS NULL filter syntax to use the IS keyword

											
										
										
											2023-03-14 17:31:04 +08:00
+								        "AND"
 								            | "OR"
 								            | "IN"
 								            | "NOT"
 								            | "TO"
 								            | "EXISTS"
 								            | "IS"
 								            | "NULL"
-												Implementing an IS EMPTY filter

											
										
										
											2023-03-15 01:08:12 +08:00
+								            | "EMPTY"
-												Change the IS NULL filter syntax to use the IS keyword

											
										
										
											2023-03-14 17:31:04 +08:00
+								            | "_geoRadius"
 								            | "_geoBoundingBox"
-												Specify that the NULL keyword is a keyword too

											
										
										
											2023-03-14 00:40:34 +08:00
+								    )
-												Improve syntax errors for `IN` filter

											
										
										
											2022-06-15 16:13:34 +08:00
+								}
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								#[cfg(test)]
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								pub mod test {
 								    use nom::Finish;
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    use super::*;
 								    use crate::tests::rtok;
 								    #[test]
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								    fn test_span() {
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								        let test_case = [
 								            ("channel", rtok("", "channel")),
 								            (".private", rtok("", ".private")),
 								            ("I-love-kebab", rtok("", "I-love-kebab")),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								            ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								            ("parens(", rtok("", "parens")),
 								            ("parens)", rtok("", "parens")),
 								            ("not!", rtok("", "not")),
 								            ("    channel", rtok("    ", "channel")),
 								            ("channel     ", rtok("", "channel")),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								            ("    channel     ", rtok("    ", "channel")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								            ("'channel'", rtok("'", "channel")),
 								            ("\"channel\"", rtok("\"", "channel")),
 								            ("'cha)nnel'", rtok("'", "cha)nnel")),
 								            ("'cha\"nnel'", rtok("'", "cha\"nnel")),
 								            ("\"cha'nnel\"", rtok("\"", "cha'nnel")),
 								            ("\" some spaces \"", rtok("\"", " some spaces ")),
 								            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
 								            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								            ("I'm tamo", rtok("'m tamo", "I")),
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								            ("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								        ];
 								        for (input, expected) in test_case {
-												re-enable the tests in the parser and start the creation of an error type

											
										
										
											2021-11-03 00:35:17 +08:00
+								            let input = Span::new_extra(input, input);
-												remove all genericity in favor of my custom error type

											
										
										
											2021-11-03 03:27:07 +08:00
+								            let result = parse_value(input);
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
 								            assert!(
 								                result.is_ok(),
 								                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
 								                expected,
 								                result.unwrap_err()
 								            );
-												Handle the escapes of quote in the filters

											
										
										
											2021-12-20 23:18:15 +08:00
+								            let token = result.unwrap().1;
 								            assert_eq!(token, expected, "Filter `{}` failed.", input);
 								        }
 								    }
 								    #[test]
 								    fn test_escape_inside_double_quote() {
 								        // (input, remaining, expected output token, output value)
 								        let test_case = [
 								            ("aaaa", "", rtok("", "aaaa"), "aaaa"),
 								            (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
 								            (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
 								            (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
 								            (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
 								            (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
 								            (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
 								        ];
 								        for (input, remaining, expected_tok, expected_val) in test_case {
 								            let span = Span::new_extra(input, "");
 								            let result = quoted_by('"', span);
 								            assert!(result.is_ok());
 								            let (rem, output) = result.unwrap();
 								            assert_eq!(rem.to_string(), remaining);
 								            assert_eq!(output, expected_tok);
 								            assert_eq!(output.value(), expected_val.to_string());
 								        }
 								    }
 								    #[test]
 								    fn test_unescape() {
 								        // double quote
 								        assert_eq!(
 								            unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'),
 								            r#"Hello "World""#.to_string()
 								        );
 								        assert_eq!(
 								            unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'),
 								            r#"Hello \\"World\\""#.to_string()
 								        );
 								        // simple quote
 								        assert_eq!(
 								            unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
 								            r#"Hello 'World'"#.to_string()
 								        );
 								        assert_eq!(
 								            unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
 								            r#"Hello \\'World\\'"#.to_string()
 								        );
 								    }
 								    #[test]
 								    fn test_value() {
 								        let test_case = [
 								            // (input, expected value, if a string was generated to hold the new value)
 								            ("channel", "channel", false),
 								            // All the base test, no escaped string should be generated
 								            (".private", ".private", false),
 								            ("I-love-kebab", "I-love-kebab", false),
 								            ("but_snakes_is_also_good", "but_snakes_is_also_good", false),
 								            ("parens(", "parens", false),
 								            ("parens)", "parens", false),
 								            ("not!", "not", false),
 								            ("    channel", "channel", false),
 								            ("channel     ", "channel", false),
 								            ("    channel     ", "channel", false),
 								            ("'channel'", "channel", false),
 								            ("\"channel\"", "channel", false),
 								            ("'cha)nnel'", "cha)nnel", false),
 								            ("'cha\"nnel'", "cha\"nnel", false),
 								            ("\"cha'nnel\"", "cha'nnel", false),
 								            ("\" some spaces \"", " some spaces ", false),
 								            ("\"cha'nnel\"", "cha'nnel", false),
 								            ("\"cha'nnel\"", "cha'nnel", false),
 								            ("I'm tamo", "I", false),
 								            // escaped thing but not quote
 								            (r#""\\""#, r#"\\"#, false),
 								            (r#""\\\\\\""#, r#"\\\\\\"#, false),
 								            (r#""aa\\aa""#, r#"aa\\aa"#, false),
 								            // with double quote
 								            (r#""Hello \"world\"""#, r#"Hello "world""#, true),
 								            (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
 								            (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
 								            (r#""\"\"""#, r#""""#, true),
 								            // with simple quote
 								            (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
 								            (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
 								            (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
 								            (r#"'\'\''"#, r#"''"#, true),
 								        ];
 								        for (input, expected, escaped) in test_case {
 								            let input = Span::new_extra(input, input);
 								            let result = parse_value(input);
 								            assert!(
 								                result.is_ok(),
 								                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
 								                expected,
 								                result.unwrap_err()
 								            );
 								            let token = result.unwrap().1;
 								            assert_eq!(
 								                token.value.is_some(),
 								                escaped,
 								                "Filter `{}` was not supposed to be escaped",
 								                input
 								            );
 								            assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								        }
 								    }
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
 								    #[test]
 								    fn diagnostic() {
 								        let test_case = [
 								            ("🦀", "🦀"),
 								            ("     🦀", "🦀"),
 								            ("🦀 AND crab = truc", "🦀"),
 								            ("🦀_in_name", "🦀_in_name"),
 								            (" (name = ...", ""),
 								        ];
 								        for (input, expected) in test_case {
 								            let input = Span::new_extra(input, input);
 								            let result = parse_value(input);
 								            assert!(
 								                result.is_err(),
 								                "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
 								                expected,
 								                result.unwrap()
 								            );
 								            // get the inner string referenced in the error
 								            let value = *result.finish().unwrap_err().context().fragment();
 								            assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
 								        }
 								    }
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								}