From 257e621d4011857d998d9c2b51996a60edba4c2c Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 22 Sep 2021 15:18:39 +0200 Subject: [PATCH] create an asc_desc module --- milli/src/asc_desc.rs | 228 +++++++++++++++++++++++++++++++ milli/src/criterion.rs | 186 +------------------------ milli/src/lib.rs | 4 +- milli/src/search/criteria/mod.rs | 3 +- milli/src/search/mod.rs | 3 +- 5 files changed, 235 insertions(+), 189 deletions(-) create mode 100644 milli/src/asc_desc.rs diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs new file mode 100644 index 000000000..d68d5b6a2 --- /dev/null +++ b/milli/src/asc_desc.rs @@ -0,0 +1,228 @@ +//! This module provide the `AscDesc` type and define a all the errors related to this type + +use std::fmt; +use std::str::FromStr; + +use serde::{Deserialize, Serialize}; + +use crate::error::is_reserved_keyword; +use crate::CriterionError; + +/// This error type is never supposed to be shown to the end user. +/// You must always cast it to a sort error or a criterion error. +#[derive(Debug)] +pub enum AscDescError { + InvalidSyntax { name: String }, + ReservedKeyword { name: String }, +} + +impl fmt::Display for AscDescError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::InvalidSyntax { name } => { + write!(f, "invalid asc/desc syntax for {}", name) + } + Self::ReservedKeyword { name } => { + write!( + f, + "{} is a reserved keyword and thus can't be used as a asc/desc rule", + name + ) + } + } + } +} + +impl From for CriterionError { + fn from(error: AscDescError) -> Self { + match error { + AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name }, + AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => { + CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() } + } + AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { + CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() } + } + AscDescError::ReservedKeyword { name } => (CriterionError::ReservedName { name }), + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub enum Member { + Field(String), + Geo([f64; 2]), +} + +impl FromStr for Member { + type Err = AscDescError; + + fn from_str(text: &str) -> Result { + match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) { + Some(point) => { + let (lat, long) = point + .split_once(',') + .ok_or_else(|| AscDescError::ReservedKeyword { name: text.to_string() }) + .and_then(|(lat, long)| { + lat.trim() + .parse() + .and_then(|lat| long.trim().parse().map(|long| (lat, long))) + .map_err(|_| AscDescError::ReservedKeyword { name: text.to_string() }) + })?; + Ok(Member::Geo([lat, long])) + } + None => { + if is_reserved_keyword(text) || text.starts_with("_geoRadius(") { + return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; + } + Ok(Member::Field(text.to_string())) + } + } + } +} + +impl fmt::Display for Member { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Member::Field(name) => f.write_str(name), + Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng), + } + } +} + +impl Member { + pub fn field(&self) -> Option<&str> { + match self { + Member::Field(field) => Some(field), + Member::Geo(_) => None, + } + } + + pub fn geo_point(&self) -> Option<&[f64; 2]> { + match self { + Member::Geo(point) => Some(point), + Member::Field(_) => None, + } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub enum AscDesc { + Asc(Member), + Desc(Member), +} + +impl AscDesc { + pub fn member(&self) -> &Member { + match self { + AscDesc::Asc(member) => member, + AscDesc::Desc(member) => member, + } + } + + pub fn field(&self) -> Option<&str> { + self.member().field() + } +} + +impl FromStr for AscDesc { + type Err = AscDescError; + + /// Since we don't know if this was deserialized for a criterion or a sort we just return a + /// string and let the caller create his own error + fn from_str(text: &str) -> Result { + match text.rsplit_once(':') { + Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)), + Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)), + _ => Err(AscDescError::InvalidSyntax { name: text.to_string() }), + } + } +} + +#[cfg(test)] +mod tests { + use big_s::S; + use AscDesc::*; + use AscDescError::*; + use Member::*; + + use super::*; + + #[test] + fn parse_asc_desc() { + let valid_req = [ + ("truc:asc", Asc(Field(S("truc")))), + ("bidule:desc", Desc(Field(S("bidule")))), + ("a-b:desc", Desc(Field(S("a-b")))), + ("a:b:desc", Desc(Field(S("a:b")))), + ("a12:asc", Asc(Field(S("a12")))), + ("42:asc", Asc(Field(S("42")))), + ("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))), + ("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))), + ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))), + ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))), + ("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))), + ("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))), + ("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))), + ]; + + for (req, expected) in valid_req { + let res = req.parse::(); + assert!( + res.is_ok(), + "Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`", + req, + expected, + res + ); + assert_eq!(res.unwrap(), expected); + } + + let invalid_req = [ + ("truc:machin", InvalidSyntax { name: S("truc:machin") }), + ("truc:deesc", InvalidSyntax { name: S("truc:deesc") }), + ("truc:asc:deesc", InvalidSyntax { name: S("truc:asc:deesc") }), + ("42desc", InvalidSyntax { name: S("42desc") }), + ("_geoPoint:asc", ReservedKeyword { name: S("_geoPoint") }), + ("_geoDistance:asc", ReservedKeyword { name: S("_geoDistance") }), + ("_geoPoint(42.12 , 59.598)", InvalidSyntax { name: S("_geoPoint(42.12 , 59.598)") }), + ( + "_geoPoint(42.12 , 59.598):deesc", + InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):deesc") }, + ), + ( + "_geoPoint(42.12 , 59.598):machin", + InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):machin") }, + ), + ( + "_geoPoint(42.12 , 59.598):asc:aasc", + InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):asc:aasc") }, + ), + ( + "_geoPoint(42,12 , 59,598):desc", + ReservedKeyword { name: S("_geoPoint(42,12 , 59,598)") }, + ), + ("_geoPoint(35, 85, 75):asc", ReservedKeyword { name: S("_geoPoint(35, 85, 75)") }), + ("_geoPoint(18):asc", ReservedKeyword { name: S("_geoPoint(18)") }), + ]; + + for (req, expected_error) in invalid_req { + let res = req.parse::(); + assert!( + res.is_err(), + "Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`", + req, + res, + ); + let res = res.unwrap_err(); + assert_eq!( + res.to_string(), + expected_error.to_string(), + "Bad error for input {}: got `{:?}` instead of `{:?}`", + req, + res, + expected_error + ); + } + } +} diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 33112508c..acc0148bb 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -3,7 +3,8 @@ use std::str::FromStr; use serde::{Deserialize, Serialize}; -use crate::error::{is_reserved_keyword, Error, UserError}; +use crate::error::{Error, UserError}; +use crate::{AscDesc, Member}; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub enum Criterion { @@ -87,103 +88,6 @@ impl FromStr for Criterion { } } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub enum Member { - Field(String), - Geo([f64; 2]), -} - -impl FromStr for Member { - type Err = UserError; - - fn from_str(text: &str) -> Result { - match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(")")) { - Some(point) => { - let (lat, long) = point - .split_once(',') - .ok_or_else(|| UserError::InvalidReservedAscDescSyntax { - name: text.to_string(), - }) - .and_then(|(lat, long)| { - lat.trim() - .parse() - .and_then(|lat| long.trim().parse().map(|long| (lat, long))) - .map_err(|_| UserError::InvalidReservedAscDescSyntax { - name: text.to_string(), - }) - })?; - Ok(Member::Geo([lat, long])) - } - None => { - if is_reserved_keyword(text) || text.starts_with("_geoRadius(") { - return Err(UserError::InvalidReservedAscDescSyntax { - name: text.to_string(), - })?; - } - Ok(Member::Field(text.to_string())) - } - } - } -} - -impl fmt::Display for Member { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Member::Field(name) => f.write_str(name), - Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng), - } - } -} - -impl Member { - pub fn field(&self) -> Option<&str> { - match self { - Member::Field(field) => Some(field), - Member::Geo(_) => None, - } - } - - pub fn geo_point(&self) -> Option<&[f64; 2]> { - match self { - Member::Geo(point) => Some(point), - Member::Field(_) => None, - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -pub enum AscDesc { - Asc(Member), - Desc(Member), -} - -impl AscDesc { - pub fn member(&self) -> &Member { - match self { - AscDesc::Asc(member) => member, - AscDesc::Desc(member) => member, - } - } - - pub fn field(&self) -> Option<&str> { - self.member().field() - } -} - -impl FromStr for AscDesc { - type Err = UserError; - - /// Since we don't know if this was deserialized for a criterion or a sort we just return a - /// string and let the caller create his own error - fn from_str(text: &str) -> Result { - match text.rsplit_once(':') { - Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)), - Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)), - _ => Err(UserError::InvalidAscDescSyntax { name: text.to_string() }), - } - } -} - pub fn default_criteria() -> Vec { vec![ Criterion::Words, @@ -215,96 +119,10 @@ impl fmt::Display for Criterion { #[cfg(test)] mod tests { use big_s::S; - use AscDesc::*; - use Member::*; use UserError::*; use super::*; - #[test] - fn parse_asc_desc() { - let valid_req = [ - ("truc:asc", Asc(Field(S("truc")))), - ("bidule:desc", Desc(Field(S("bidule")))), - ("a-b:desc", Desc(Field(S("a-b")))), - ("a:b:desc", Desc(Field(S("a:b")))), - ("a12:asc", Asc(Field(S("a12")))), - ("42:asc", Asc(Field(S("42")))), - ("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))), - ("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))), - ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))), - ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))), - ("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))), - ("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))), - ("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))), - ]; - - for (req, expected) in valid_req { - let res = req.parse::(); - assert!( - res.is_ok(), - "Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`", - req, - expected, - res - ); - assert_eq!(res.unwrap(), expected); - } - - let invalid_req = [ - ("truc:machin", InvalidAscDescSyntax { name: S("truc:machin") }), - ("truc:deesc", InvalidAscDescSyntax { name: S("truc:deesc") }), - ("truc:asc:deesc", InvalidAscDescSyntax { name: S("truc:asc:deesc") }), - ("42desc", InvalidAscDescSyntax { name: S("42desc") }), - ("_geoPoint:asc", InvalidReservedAscDescSyntax { name: S("_geoPoint") }), - ("_geoDistance:asc", InvalidReservedAscDescSyntax { name: S("_geoDistance") }), - ( - "_geoPoint(42.12 , 59.598)", - InvalidAscDescSyntax { name: S("_geoPoint(42.12 , 59.598)") }, - ), - ( - "_geoPoint(42.12 , 59.598):deesc", - InvalidAscDescSyntax { name: S("_geoPoint(42.12 , 59.598):deesc") }, - ), - ( - "_geoPoint(42.12 , 59.598):machin", - InvalidAscDescSyntax { name: S("_geoPoint(42.12 , 59.598):machin") }, - ), - ( - "_geoPoint(42.12 , 59.598):asc:aasc", - InvalidAscDescSyntax { name: S("_geoPoint(42.12 , 59.598):asc:aasc") }, - ), - ( - "_geoPoint(42,12 , 59,598):desc", - InvalidReservedAscDescSyntax { name: S("_geoPoint(42,12 , 59,598)") }, - ), - ( - "_geoPoint(35, 85, 75):asc", - InvalidReservedAscDescSyntax { name: S("_geoPoint(35, 85, 75)") }, - ), - ("_geoPoint(18):asc", InvalidReservedAscDescSyntax { name: S("_geoPoint(18)") }), - ]; - - for (req, expected_error) in invalid_req { - let res = req.parse::(); - assert!( - res.is_err(), - "Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`", - req, - res, - ); - let res = res.unwrap_err(); - assert_eq!( - res.to_string(), - expected_error.to_string(), - "Bad error for input {}: got `{:?}` instead of `{:?}`", - req, - res, - expected_error - ); - } - } - #[test] fn parse_criterion() { let valid_criteria = [ diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 550e7f13d..f36de8437 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -4,6 +4,7 @@ extern crate pest_derive; #[macro_use] pub mod documents; +mod asc_desc; mod criterion; mod error; mod external_documents_ids; @@ -24,7 +25,8 @@ use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; use serde_json::{Map, Value}; -pub use self::criterion::{default_criteria, AscDesc, Criterion, Member}; +pub use self::asc_desc::{AscDesc, Member}; +pub use self::criterion::{default_criteria, Criterion}; pub use self::error::{ Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, }; diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index c2de55de5..a23e5acf9 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -12,10 +12,9 @@ use self::r#final::Final; use self::typo::Typo; use self::words::Words; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; -use crate::criterion::{AscDesc as AscDescName, Member}; use crate::search::criteria::geo::Geo; use crate::search::{word_derivations, WordDerivationsCache}; -use crate::{DocumentId, FieldId, Index, Result, TreeLevel}; +use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result, TreeLevel}; mod asc_desc; mod attribute; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index f752f5822..3984ed130 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -18,10 +18,9 @@ pub(crate) use self::facet::ParserRule; pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; -use crate::criterion::{AscDesc, Criterion}; use crate::error::UserError; use crate::search::criteria::r#final::{Final, FinalResult}; -use crate::{DocumentId, Index, Result}; +use crate::{AscDesc, Criterion, DocumentId, Index, Result}; // Building these factories is not free. static LEVDIST0: Lazy = Lazy::new(|| LevBuilder::new(0, true));