meilisearch/filter-parser/src/value.rs

use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while, take_while1};
use nom::character::complete::{char, multispace0};
use nom::combinator::cut;
use nom::sequence::{delimited, terminated};

use crate::error::NomErrorExt;
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};

/// value          = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Token> {
    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
    let (input, _) = take_while(char::is_whitespace)(input)?;

    // then, we want to check if the user is misusing a geo expression
    // This expression can’t finish without error.
    // We want to return an error in case of failure.
    if let Err(err) = parse_geo_point(input) {
        if err.is_failure() {
            return Err(err);
        }
    }
    match parse_geo_radius(input) {
        Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
        // if we encountered a failure it means the user badly wrote a _geoRadius filter.
        // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
        Err(e) if e.is_failure() => {
            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
        }
        _ => (),
    }

    // singleQuoted   = "'" .* all but quotes "'"
    let simple_quoted = take_till(|c: char| c == '\'');
    // doubleQuoted   = "\"" (word | spaces)* "\""
    let double_quoted = take_till(|c: char| c == '"');
    // word           = (alphanumeric | _ | - | .)+
    let word = take_while1(is_value_component);

    // this parser is only used when an error is encountered and it parse the
    // largest string possible that do not contain any “language” syntax.
    // If we try to parse `name = 🦀 AND language = rust` we want to return an
    // error saying we could not parse `🦀`. Not that no value were found or that
    // we could note parse `🦀 AND language = rust`.
    // we want to remove the space before entering the alt because if we don't,
    // when we create the errors from the output of the alt we have spaces everywhere
    let error_word = take_till::<_, _, Error>(is_syntax_component);

    terminated(
        alt((
            delimited(char('\''), cut(simple_quoted), cut(char('\''))),
            delimited(char('"'), cut(double_quoted), cut(char('"'))),
            word,
        )),
        multispace0,
    )(input)
    .map(|(s, t)| (s, t.into()))
    // if we found nothing in the alt it means the user specified something that was not recognized as a value
    .map_err(|e: nom::Err<Error>| {
        e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
    })
    // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
    .map_err(|e| {
        e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
    })
}

fn is_value_component(c: char) -> bool {
    c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}

fn is_syntax_component(c: char) -> bool {
    c.is_whitespace() || ['(', ')', '=', '<', '>'].contains(&c)
}

#[cfg(test)]
pub mod test {
    use nom::Finish;

    use super::*;
    use crate::tests::rtok;

    #[test]
    fn name() {
        let test_case = [
            ("channel", rtok("", "channel")),
            (".private", rtok("", ".private")),
            ("I-love-kebab", rtok("", "I-love-kebab")),
            ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
            ("parens(", rtok("", "parens")),
            ("parens)", rtok("", "parens")),
            ("not!", rtok("", "not")),
            ("    channel", rtok("    ", "channel")),
            ("channel     ", rtok("", "channel")),
            ("    channel     ", rtok("    ", "channel")),
            ("'channel'", rtok("'", "channel")),
            ("\"channel\"", rtok("\"", "channel")),
            ("'cha)nnel'", rtok("'", "cha)nnel")),
            ("'cha\"nnel'", rtok("'", "cha\"nnel")),
            ("\"cha'nnel\"", rtok("\"", "cha'nnel")),
            ("\" some spaces \"", rtok("\"", " some spaces ")),
            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
            ("I'm tamo", rtok("'m tamo", "I")),
        ];

        for (input, expected) in test_case {
            let input = Span::new_extra(input, input);
            let result = parse_value(input);

            assert!(
                result.is_ok(),
                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
                expected,
                result.unwrap_err()
            );
            let value = result.unwrap().1;
            assert_eq!(value, expected, "Filter `{}` failed.", input);
        }
    }

    #[test]
    fn diagnostic() {
        let test_case = [
            ("🦀", "🦀"),
            ("     🦀", "🦀"),
            ("🦀 AND crab = truc", "🦀"),
            ("🦀_in_name", "🦀_in_name"),
            (" (name = ...", ""),
        ];

        for (input, expected) in test_case {
            let input = Span::new_extra(input, input);
            let result = parse_value(input);

            assert!(
                result.is_err(),
                "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
                expected,
                result.unwrap()
            );
            // get the inner string referenced in the error
            let value = *result.finish().unwrap_err().context().fragment();
            assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
        }
    }
}
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								use nom::branch::alt;
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								use nom::bytes::complete::{take_till, take_while, take_while1};
 								use nom::character::complete::{char, multispace0};
 								use nom::combinator::cut;
 								use nom::sequence::{delimited, terminated};
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
-												Rename the ExtendNomError trait to NomErrorExt

Co-authored-by: marin <postma.marin@protonmail.com>

											
										
										
											2021-11-09 07:50:15 +08:00
+								use crate::error::NomErrorExt;
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
 								/// value          = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
-												remove all genericity in favor of my custom error type

											
										
										
											2021-11-03 03:27:07 +08:00
+								pub fn parse_value(input: Span) -> IResult<Token> {
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
 								    let (input, _) = take_while(char::is_whitespace)(input)?;
 								    // then, we want to check if the user is misusing a geo expression
-												improve the readability of the _geoPoint thingy in the value

											
										
										
											2021-11-09 07:57:46 +08:00
+								    // This expression can’t finish without error.
 								    // We want to return an error in case of failure.
 								    if let Err(err) = parse_geo_point(input) {
 								        if err.is_failure() {
 								            return Err(err);
 								        }
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    }
 								    match parse_geo_radius(input) {
-												stop panicking in case of internal error

											
										
										
											2021-11-04 23:20:53 +08:00
+								        Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        // if we encountered a failure it means the user badly wrote a _geoRadius filter.
 								        // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
 								        Err(e) if e.is_failure() => {
-												stop panicking in case of internal error

											
										
										
											2021-11-04 23:20:53 +08:00
+								            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								        }
 								        _ => (),
 								    }
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    // singleQuoted   = "'" .* all but quotes "'"
-												Remove useless closure

Co-authored-by: marin <postma.marin@protonmail.com>
											
										
										
											2021-11-09 07:45:46 +08:00
+								    let simple_quoted = take_till(|c: char| c == '\'');
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    // doubleQuoted   = "\"" (word | spaces)* "\""
-												Remove useless closure

Co-authored-by: marin <postma.marin@protonmail.com>
											
										
										
											2021-11-09 07:45:46 +08:00
+								    let double_quoted = take_till(|c: char| c == '"');
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    // word           = (alphanumeric | _ | - | .)+
-												Rename the key_component into a value_component

Co-authored-by: marin <postma.marin@protonmail.com>

											
										
										
											2021-11-09 07:58:23 +08:00
+								    let word = take_while1(is_value_component);
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // this parser is only used when an error is encountered and it parse the
 								    // largest string possible that do not contain any “language” syntax.
 								    // If we try to parse `name = 🦀 AND language = rust` we want to return an
 								    // error saying we could not parse `🦀`. Not that no value were found or that
 								    // we could note parse `🦀 AND language = rust`.
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    // we want to remove the space before entering the alt because if we don't,
 								    // when we create the errors from the output of the alt we have spaces everywhere
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    let error_word = take_till::<_, _, Error>(is_syntax_component);
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
 								    terminated(
 								        alt((
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								            delimited(char('\''), cut(simple_quoted), cut(char('\''))),
 								            delimited(char('"'), cut(double_quoted), cut(char('"'))),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								            word,
 								        )),
 								        multispace0,
 								    )(input)
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    .map(|(s, t)| (s, t.into()))
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								    // if we found nothing in the alt it means the user specified something that was not recognized as a value
 								    .map_err(|e: nom::Err<Error>| {
 								        e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
 								    })
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								    // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
-												stop panicking in case of internal error

											
										
										
											2021-11-04 23:20:53 +08:00
+								    .map_err(|e| {
 								        e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
 								    })
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								}
-												Rename the key_component into a value_component

Co-authored-by: marin <postma.marin@protonmail.com>

											
										
										
											2021-11-09 07:58:23 +08:00
+								fn is_value_component(c: char) -> bool {
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
 								}
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								fn is_syntax_component(c: char) -> bool {
-												remove the `!` syntax for the not

											
										
										
											2021-11-09 23:47:54 +08:00
+								    c.is_whitespace() || ['(', ')', '=', '<', '>'].contains(&c)
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								}
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								#[cfg(test)]
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								pub mod test {
 								    use nom::Finish;
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								    use super::*;
 								    use crate::tests::rtok;
 								    #[test]
 								    fn name() {
 								        let test_case = [
 								            ("channel", rtok("", "channel")),
 								            (".private", rtok("", ".private")),
 								            ("I-love-kebab", rtok("", "I-love-kebab")),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								            ("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								            ("parens(", rtok("", "parens")),
 								            ("parens)", rtok("", "parens")),
 								            ("not!", rtok("", "not")),
 								            ("    channel", rtok("    ", "channel")),
 								            ("channel     ", rtok("", "channel")),
-												update the filter parser and some code for the fuzzer

											
										
										
											2021-11-04 21:22:35 +08:00
+								            ("    channel     ", rtok("    ", "channel")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								            ("'channel'", rtok("'", "channel")),
 								            ("\"channel\"", rtok("\"", "channel")),
 								            ("'cha)nnel'", rtok("'", "cha)nnel")),
 								            ("'cha\"nnel'", rtok("'", "cha\"nnel")),
 								            ("\"cha'nnel\"", rtok("\"", "cha'nnel")),
 								            ("\" some spaces \"", rtok("\"", " some spaces ")),
 								            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
 								            ("\"cha'nnel\"", rtok("'", "cha'nnel")),
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
+								            ("I'm tamo", rtok("'m tamo", "I")),
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								        ];
 								        for (input, expected) in test_case {
-												re-enable the tests in the parser and start the creation of an error type

											
										
										
											2021-11-03 00:35:17 +08:00
+								            let input = Span::new_extra(input, input);
-												remove all genericity in favor of my custom error type

											
										
										
											2021-11-03 03:27:07 +08:00
+								            let result = parse_value(input);
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
 								            assert!(
 								                result.is_ok(),
 								                "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
 								                expected,
 								                result.unwrap_err()
 								            );
 								            let value = result.unwrap().1;
 								            assert_eq!(value, expected, "Filter `{}` failed.", input);
 								        }
 								    }
-												improve the error diagnostic when parsing values

											
										
										
											2021-11-08 22:30:26 +08:00
 								    #[test]
 								    fn diagnostic() {
 								        let test_case = [
 								            ("🦀", "🦀"),
 								            ("     🦀", "🦀"),
 								            ("🦀 AND crab = truc", "🦀"),
 								            ("🦀_in_name", "🦀_in_name"),
 								            (" (name = ...", ""),
 								        ];
 								        for (input, expected) in test_case {
 								            let input = Span::new_extra(input, input);
 								            let result = parse_value(input);
 								            assert!(
 								                result.is_err(),
 								                "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
 								                expected,
 								                result.unwrap()
 								            );
 								            // get the inner string referenced in the error
 								            let value = *result.finish().unwrap_err().context().fragment();
 								            assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
 								        }
 								    }
-												update some names and move some parser out of the lib.rs

											
										
										
											2021-10-22 07:59:38 +08:00
+								}