From 57502fcf6a8f7b3c846502e20ad9785b5ab12a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 6 Dec 2021 17:35:20 +0100 Subject: [PATCH 1/5] Introduce the depth method on FilterCondition --- filter-parser/src/lib.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index ed36b1bf4..8b72c69ee 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -113,6 +113,17 @@ pub enum FilterCondition<'a> { } impl<'a> FilterCondition<'a> { + pub fn depth(&self) -> usize { + match self { + FilterCondition::Condition { .. } => 1, + FilterCondition::Or(left, right) => 1 + left.depth().max(right.depth()), + FilterCondition::And(left, right) => 1 + left.depth().max(right.depth()), + FilterCondition::GeoLowerThan { .. } => 1, + FilterCondition::GeoGreaterThan { .. } => 1, + FilterCondition::Empty => 0, + } + } + pub fn negate(self) -> FilterCondition<'a> { use FilterCondition::*; @@ -584,4 +595,10 @@ pub mod tests { assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter); } } + + #[test] + fn depth() { + let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap(); + assert_eq!(filter.depth(), 6); + } } From 49c2db948561529cdd2ffb68d6bae5ec01e29f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Dec 2021 15:16:29 +0100 Subject: [PATCH 2/5] Change the depth function to return the token depth --- filter-parser/src/lib.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 8b72c69ee..3ef31c3c8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -113,14 +113,19 @@ pub enum FilterCondition<'a> { } impl<'a> FilterCondition<'a> { - pub fn depth(&self) -> usize { + /// Returns the first token found at the specified depth, `None` if no token at this depth. + pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { match self { - FilterCondition::Condition { .. } => 1, - FilterCondition::Or(left, right) => 1 + left.depth().max(right.depth()), - FilterCondition::And(left, right) => 1 + left.depth().max(right.depth()), - FilterCondition::GeoLowerThan { .. } => 1, - FilterCondition::GeoGreaterThan { .. } => 1, - FilterCondition::Empty => 0, + FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), + FilterCondition::Or(left, right) => { + left.token_at_depth(depth - 1).or_else(|| right.token_at_depth(depth - 1)) + } + FilterCondition::And(left, right) => { + left.token_at_depth(depth - 1).or_else(|| right.token_at_depth(depth - 1)) + } + FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), + FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), + _ => None, } } @@ -599,6 +604,6 @@ pub mod tests { #[test] fn depth() { let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap(); - assert_eq!(filter.depth(), 6); + assert!(filter.token_at_depth(5).is_some()); } } From 90f49eab6d671c45bf7107868dc41c872aad4a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Dec 2021 16:32:48 +0100 Subject: [PATCH 3/5] Check the filter max depth limit and reject the invalid ones --- milli/src/search/facet/filter.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index e994f36d9..5a88c14dc 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -15,6 +15,9 @@ use crate::heed_codec::facet::{ }; use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; +/// The maximum number of filters the filter AST can process. +const MAX_FILTER_DEPTH: usize = 1000; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Filter<'a> { condition: FilterCondition<'a>, @@ -27,6 +30,7 @@ enum FilterError<'a> { BadGeoLat(f64), BadGeoLng(f64), Reserved(&'a str), + TooDeep, InternalError, } impl<'a> std::error::Error for FilterError<'a> {} @@ -40,6 +44,10 @@ impl<'a> Display for FilterError<'a> { attribute, filterable, ), + Self::TooDeep => write!(f, + "Too many filter conditions, can't process more than {} filters.", + MAX_FILTER_DEPTH + ), Self::Reserved(keyword) => write!( f, "`{}` is a reserved keyword and thus can't be used as a filter expression.", @@ -108,6 +116,10 @@ impl<'a> Filter<'a> { } } + if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { + return Err(token.as_external_error(FilterError::TooDeep).into()); + } + Ok(ands.map(|ands| Self { condition: ands })) } From 32bd9f091fd74b50e4f3623d42a471d2ebb23241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Dec 2021 17:20:11 +0100 Subject: [PATCH 4/5] Detect the filters that are too deep and return an error --- filter-parser/src/lib.rs | 6 +++-- milli/src/search/facet/filter.rs | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 3ef31c3c8..0e49e00e9 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -118,10 +118,12 @@ impl<'a> FilterCondition<'a> { match self { FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), FilterCondition::Or(left, right) => { - left.token_at_depth(depth - 1).or_else(|| right.token_at_depth(depth - 1)) + let depth = depth.saturating_sub(1); + right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) } FilterCondition::And(left, right) => { - left.token_at_depth(depth - 1).or_else(|| right.token_at_depth(depth - 1)) + let depth = depth.saturating_sub(1); + right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) } FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 5a88c14dc..2c78816fb 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -128,6 +128,11 @@ impl<'a> Filter<'a> { Ok(fc) => Ok(fc), Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))), }?; + + if let Some(token) = condition.token_at_depth(MAX_FILTER_DEPTH) { + return Err(token.as_external_error(FilterError::TooDeep).into()); + } + Ok(Self { condition }) } } @@ -431,6 +436,8 @@ impl<'a> From> for Filter<'a> { #[cfg(test)] mod tests { + use std::fmt::Write; + use big_s::S; use either::Either; use heed::EnvOpenOptions; @@ -598,4 +605,37 @@ mod tests { "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." )); } + + #[test] + fn filter_depth() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index); + builder.set_searchable_fields(vec![S("account_ids")]); + builder.set_filterable_fields(hashset! { S("account_ids") }); + builder.execute(|_| ()).unwrap(); + wtxn.commit().unwrap(); + + // generates a big (2 MiB) filter with too much of ORs. + let tipic_filter = "account_ids=14361 OR "; + let mut filter_string = String::with_capacity(tipic_filter.len() * 14360); + for i in 1..=14361 { + let _ = write!(&mut filter_string, "account_ids={}", i); + if i != 14361 { + let _ = write!(&mut filter_string, " OR "); + } + } + + let error = Filter::from_str(&filter_string).unwrap_err(); + assert!( + error.to_string().starts_with("Too many filter conditions"), + "{}", + error.to_string() + ); + } } From ee856a7a46be0ad3bfa40247956848d1dcf6f079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 7 Dec 2021 17:36:45 +0100 Subject: [PATCH 5/5] Limit the max filter depth to 2000 --- milli/src/search/facet/filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 2c78816fb..9d9d16de5 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -16,7 +16,7 @@ use crate::heed_codec::facet::{ use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; /// The maximum number of filters the filter AST can process. -const MAX_FILTER_DEPTH: usize = 1000; +const MAX_FILTER_DEPTH: usize = 2000; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Filter<'a> {