From 7bd67543dde9599d98fd7f262668b9cc112327a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 27 Apr 2023 16:04:03 +0200 Subject: [PATCH] Support the typoTolerant.enabled parameter --- milli/src/search/mod.rs | 87 ++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 114ca2a04..ab7c336b4 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -283,6 +283,7 @@ impl<'a> SearchForFacetValues<'a> { .into()); } }; + let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { Some(fst) => fst, None => return Ok(vec![]), @@ -292,37 +293,69 @@ impl<'a> SearchForFacetValues<'a> { match self.query.as_ref() { Some(query) => { - let is_prefix = true; - let starts = StartsWith(Str::new(get_first(query))); - let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); - let second_dfa = build_dfa(query, 2, is_prefix); - let second = Intersection(&second_dfa, &starts); - let automaton = Union(first, &second); + if self.search_query.index.authorize_typos(rtxn)? { + let is_prefix = true; + let starts = StartsWith(Str::new(get_first(query))); + let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts)); + let second_dfa = build_dfa(query, 2, is_prefix); + let second = Intersection(&second_dfa, &starts); + let automaton = Union(first, &second); - let mut stream = fst.search(automaton).into_stream(); - let mut result = vec![]; - let mut length = 0; - while let Some(facet_value) = stream.next() { - let value = std::str::from_utf8(facet_value)?; - let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; - let docids = match index.facet_id_string_docids.get(rtxn, &key)? { - Some(FacetGroupValue { bitmap, .. }) => bitmap, - None => { - error!("the facet value is missing from the facet database: {key:?}"); - continue; + let mut stream = fst.search(automaton).into_stream(); + let mut result = vec![]; + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetValueHit { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; } - }; - let count = search_candidates.intersection_len(&docids); - if count != 0 { - result.push(FacetValueHit { value: value.to_string(), count }); - length += 1; } - if length >= MAX_NUMBER_OF_FACETS { - break; - } - } - Ok(result) + Ok(result) + } else { + let automaton = StartsWith(Str::new(query)); + let mut stream = fst.search(automaton).into_stream(); + let mut result = vec![]; + let mut length = 0; + while let Some(facet_value) = stream.next() { + let value = std::str::from_utf8(facet_value)?; + let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value }; + let docids = match index.facet_id_string_docids.get(rtxn, &key)? { + Some(FacetGroupValue { bitmap, .. }) => bitmap, + None => { + error!( + "the facet value is missing from the facet database: {key:?}" + ); + continue; + } + }; + let count = search_candidates.intersection_len(&docids); + if count != 0 { + result.push(FacetValueHit { value: value.to_string(), count }); + length += 1; + } + if length >= MAX_NUMBER_OF_FACETS { + break; + } + } + + Ok(result) + } } None => { let mut stream = fst.stream();