diff --git a/Cargo.lock b/Cargo.lock index c1392f4a3..e486648f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1440,6 +1440,7 @@ dependencies = [ "insta", "nom", "nom_locate", + "unescaper", ] [[package]] @@ -4147,6 +4148,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" +[[package]] +name = "unescaper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8" +dependencies = [ + "thiserror", +] + [[package]] name = "unicase" version = "2.6.0" diff --git a/filter-parser/Cargo.toml b/filter-parser/Cargo.toml index 58111ee08..d9b47f638 100644 --- a/filter-parser/Cargo.toml +++ b/filter-parser/Cargo.toml @@ -14,6 +14,7 @@ license.workspace = true [dependencies] nom = "7.1.3" nom_locate = "4.1.0" +unescaper = "0.1.2" [dev-dependencies] insta = "1.29.0" diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index b9d14a271..c71a3aaee 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -62,6 +62,7 @@ pub enum ErrorKind<'a> { MisusedGeoRadius, MisusedGeoBoundingBox, InvalidPrimary, + InvalidEscapedNumber, ExpectedEof, ExpectedValue(ExpectedValueKind), MalformedValue, @@ -147,6 +148,9 @@ impl<'a> Display for Error<'a> { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } + ErrorKind::InvalidEscapedNumber => { + writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? + } ErrorKind::ExpectedEof => { writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 11ffbb148..5760c8865 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -545,6 +545,8 @@ impl<'a> std::fmt::Display for Token<'a> { #[cfg(test)] pub mod tests { + use FilterCondition as Fc; + use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element @@ -556,14 +558,22 @@ pub mod tests { unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into() } + fn p(s: &str) -> impl std::fmt::Display + '_ { + Fc::parse(s).unwrap().unwrap() + } + + #[test] + fn parse_escaped() { + insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#); + insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#); + insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#); + insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#); + // but it also works with other sequencies + insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}"); + } + #[test] fn parse() { - use FilterCondition as Fc; - - fn p(s: &str) -> impl std::fmt::Display + '_ { - Fc::parse(s).unwrap().unwrap() - } - // Test equal insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}"); insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}"); diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 735352fc3..63d5ac384 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -171,7 +171,24 @@ pub fn parse_value(input: Span) -> IResult { }) })?; - Ok((input, value)) + match unescaper::unescape(value.value()) { + Ok(content) => { + if content.len() != value.value().len() { + Ok((input, Token::new(value.original_span(), Some(content)))) + } else { + Ok((input, value)) + } + } + Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)), + Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind( + value.original_span(), + ErrorKind::InvalidEscapedNumber, + ))), + Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind( + value.original_span(), + ErrorKind::MalformedValue, + ))), + } } fn is_value_component(c: char) -> bool { @@ -318,17 +335,17 @@ pub mod test { ("\"cha'nnel\"", "cha'nnel", false), ("I'm tamo", "I", false), // escaped thing but not quote - (r#""\\""#, r#"\\"#, false), - (r#""\\\\\\""#, r#"\\\\\\"#, false), - (r#""aa\\aa""#, r#"aa\\aa"#, false), + (r#""\\""#, r#"\"#, true), + (r#""\\\\\\""#, r#"\\\"#, true), + (r#""aa\\aa""#, r#"aa\aa"#, true), // with double quote (r#""Hello \"world\"""#, r#"Hello "world""#, true), - (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true), + (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true), (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), (r#""\"\"""#, r#""""#, true), // with simple quote (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true), - (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true), + (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true), (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), (r#"'\'\''"#, r#"''"#, true), ]; @@ -350,7 +367,14 @@ pub mod test { "Filter `{}` was not supposed to be escaped", input ); - assert_eq!(token.value(), expected, "Filter `{}` failed.", input); + assert_eq!( + token.value(), + expected, + "Filter `{}` failed by giving `{}` instead of `{}`.", + input, + token.value(), + expected + ); } }