From ef59762d8ea933170f8545e52203e1b4ab157b53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Dec 2021 11:13:12 +0100 Subject: [PATCH 1/5] Prefer returning None instead of the Empty Filter state --- filter-parser/src/lib.rs | 12 ++--- milli/src/search/facet/filter.rs | 74 +++++++++++++++------------- milli/src/update/delete_documents.rs | 2 +- milli/src/update/settings.rs | 2 +- 4 files changed, 47 insertions(+), 43 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 0e49e00e9..4c5e03c82 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -109,7 +109,6 @@ pub enum FilterCondition<'a> { And(Box, Box), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, - Empty, } impl<'a> FilterCondition<'a> { @@ -144,18 +143,17 @@ impl<'a> FilterCondition<'a> { }, Or(a, b) => And(a.negate().into(), b.negate().into()), And(a, b) => Or(a.negate().into(), b.negate().into()), - Empty => Empty, GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, } } - pub fn parse(input: &'a str) -> Result { + pub fn parse(input: &'a str) -> Result, Error> { if input.trim().is_empty() { - return Ok(Self::Empty); + return Ok(None); } let span = Span::new_extra(input, input); - parse_filter(span).finish().map(|(_rem, output)| output) + parse_filter(span).finish().map(|(_rem, output)| Some(output)) } } @@ -560,7 +558,7 @@ pub mod tests { result.unwrap_err() ); let filter = result.unwrap(); - assert_eq!(filter, expected, "Filter `{}` failed.", input); + assert_eq!(filter, Some(expected), "Filter `{}` failed.", input); } } @@ -605,7 +603,7 @@ pub mod tests { #[test] fn depth() { - let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap(); + let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap(); assert!(filter.token_at_depth(5).is_some()); } } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 9d9d16de5..88815d884 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -86,13 +86,15 @@ impl<'a> Filter<'a> { Either::Left(array) => { let mut ors = None; for rule in array { - let condition = Self::from_str(rule.as_ref())?.condition; - ors = match ors.take() { - Some(ors) => { - Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) - } - None => Some(condition), - }; + if let Some(filter) = Self::from_str(rule.as_ref())? { + let condition = filter.condition; + ors = match ors.take() { + Some(ors) => { + Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) + } + None => Some(condition), + }; + } } if let Some(rule) = ors { @@ -105,13 +107,15 @@ impl<'a> Filter<'a> { } } Either::Right(rule) => { - let condition = Self::from_str(rule.as_ref())?.condition; - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(condition))) - } - None => Some(condition), - }; + if let Some(filter) = Self::from_str(rule.as_ref())? { + let condition = filter.condition; + ands = match ands.take() { + Some(ands) => { + Some(FilterCondition::And(Box::new(ands), Box::new(condition))) + } + None => Some(condition), + }; + } } } } @@ -123,9 +127,10 @@ impl<'a> Filter<'a> { Ok(ands.map(|ands| Self { condition: ands })) } - pub fn from_str(expression: &'a str) -> Result { + pub fn from_str(expression: &'a str) -> Result> { let condition = match FilterCondition::parse(expression) { - Ok(fc) => Ok(fc), + Ok(Some(fc)) => Ok(fc), + Ok(None) => return Ok(None), Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))), }?; @@ -133,7 +138,7 @@ impl<'a> Filter<'a> { return Err(token.as_external_error(FilterError::TooDeep).into()); } - Ok(Self { condition }) + Ok(Some(Self { condition })) } } @@ -377,7 +382,6 @@ impl<'a> Filter<'a> { let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs & rhs) } - FilterCondition::Empty => Ok(RoaringBitmap::new()), FilterCondition::GeoLowerThan { point, radius } => { let filterable_fields = index.filterable_fields(rtxn)?; if filterable_fields.contains("_geo") { @@ -451,20 +455,20 @@ mod tests { fn from_array() { // Simple array with Left let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap(); - let expected = Filter::from_str("channel = mv").unwrap(); + let expected = Filter::from_str("channel = mv").unwrap().unwrap(); assert_eq!(condition, expected); // Simple array with Right let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")]) .unwrap() .unwrap(); - let expected = Filter::from_str("channel = mv").unwrap(); + let expected = Filter::from_str("channel = mv").unwrap().unwrap(); assert_eq!(condition, expected); // Array with Left and escaped quote let condition = Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap(); - let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); + let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap(); assert_eq!(condition, expected); // Array with Right and escaped quote @@ -472,13 +476,13 @@ mod tests { Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")]) .unwrap() .unwrap(); - let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); + let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap(); assert_eq!(condition, expected); // Array with Left and escaped simple quote let condition = Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap(); - let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); + let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap(); assert_eq!(condition, expected); // Array with Right and escaped simple quote @@ -486,13 +490,13 @@ mod tests { Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")]) .unwrap() .unwrap(); - let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); + let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap(); assert_eq!(condition, expected); // Simple with parenthesis let condition = Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap(); - let expected = Filter::from_str("(channel = mv)").unwrap(); + let expected = Filter::from_str("(channel = mv)").unwrap().unwrap(); assert_eq!(condition, expected); // Test that the facet condition is correctly generated. @@ -503,7 +507,9 @@ mod tests { .unwrap() .unwrap(); let expected = - Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)").unwrap(); + Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)") + .unwrap() + .unwrap(); println!("\nExpecting: {:#?}\nGot: {:#?}\n", expected, condition); assert_eq!(condition, expected); } @@ -516,13 +522,13 @@ mod tests { let index = Index::new(options, &path).unwrap(); let rtxn = index.read_txn().unwrap(); - let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `_geo` is not filterable. Available filterable attributes are: ``." )); - let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap(); + let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `dog` is not filterable. Available filterable attributes are: ``." @@ -539,13 +545,13 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." )); - let filter = Filter::from_str("name = 12").unwrap(); + let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `name` is not filterable. Available filterable attributes are: `title`." @@ -570,7 +576,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); // georadius have a bad latitude - let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!( error.to_string().starts_with( @@ -581,14 +587,14 @@ mod tests { ); // georadius have a bad latitude - let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." )); // georadius have a bad longitude - let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!( error.to_string().contains( @@ -599,7 +605,7 @@ mod tests { ); // georadius have a bad longitude - let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap(); + let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 2fd3e084e..ed87132bd 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -681,7 +681,7 @@ mod tests { builder.delete_external_id("1_4"); builder.execute().unwrap(); - let filter = Filter::from_str("label = sign").unwrap(); + let filter = Filter::from_str("label = sign").unwrap().unwrap(); let results = index.search(&wtxn).filter(filter).execute().unwrap(); assert!(results.documents_ids.is_empty()); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 9c270ed71..fff5eb0fa 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1060,7 +1060,7 @@ mod tests { wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); - let filter = Filter::from_str("toto = 32").unwrap(); + let filter = Filter::from_str("toto = 32").unwrap().unwrap(); let _ = filter.evaluate(&rtxn, &index).unwrap_err(); } From 65519bc04ba573f5465d9ad9ed864117eedf2872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Dec 2021 11:14:51 +0100 Subject: [PATCH 2/5] Test that empty filters return a None --- milli/src/search/facet/filter.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 88815d884..642a32472 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -644,4 +644,10 @@ mod tests { error.to_string() ); } + + #[test] + fn empty_filter() { + let option = Filter::from_str(" ").unwrap(); + assert_eq!(option, None); + } } From 25faef67d052d52eeb9d1ef4844238df162a01fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Dec 2021 11:15:16 +0100 Subject: [PATCH 3/5] Remove the database setup in the filter_depth test --- milli/src/search/facet/filter.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 642a32472..6ece17eb4 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -614,19 +614,6 @@ mod tests { #[test] fn filter_depth() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index); - builder.set_searchable_fields(vec![S("account_ids")]); - builder.set_filterable_fields(hashset! { S("account_ids") }); - builder.execute(|_| ()).unwrap(); - wtxn.commit().unwrap(); - // generates a big (2 MiB) filter with too much of ORs. let tipic_filter = "account_ids=14361 OR "; let mut filter_string = String::with_capacity(tipic_filter.len() * 14360); From 1c6c89f34561c62a05b1f4575e9b6f2c060a579f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Dec 2021 11:50:12 +0100 Subject: [PATCH 4/5] Fix the binaries that use the new optional filters --- cli/src/main.rs | 5 +++-- http-ui/src/main.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 44c197de6..b3c18244d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -250,8 +250,9 @@ impl Search { } if let Some(ref filter) = self.filter { - let condition = milli::Filter::from_str(filter)?; - search.filter(condition); + if let Some(condition) = milli::Filter::from_str(filter)? { + search.filter(condition); + } } if let Some(offset) = self.offset { diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 4bd8815a5..75a9012c6 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -738,7 +738,7 @@ async fn main() -> anyhow::Result<()> { let filters = match query.filters.as_ref() { Some(condition) if !condition.trim().is_empty() => { - Some(MilliFilter::from_str(condition).unwrap()) + MilliFilter::from_str(condition).unwrap() } _otherwise => None, }; From 94011bb9a8cba5dd8c99bae463a1762472a7e6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 9 Dec 2021 12:14:16 +0100 Subject: [PATCH 5/5] Fix the benchmarks to work with optional filters --- benchmarks/benches/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 1b1d9be8c..df5a7b828 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -117,7 +117,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { let mut search = index.search(&rtxn); search.query(query).optional_words(conf.optional_words); if let Some(filter) = conf.filter { - let filter = Filter::from_str(filter).unwrap(); + let filter = Filter::from_str(filter).unwrap().unwrap(); search.filter(filter); } if let Some(sort) = &conf.sort {