mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
Implement the facet search with the normalized index
This commit is contained in:
parent
df528b41d8
commit
691a536893
@ -1,5 +1,8 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use charabia::normalizer::NormalizerOption;
|
||||||
|
use charabia::Normalize;
|
||||||
use fst::automaton::{Automaton, Str};
|
use fst::automaton::{Automaton, Str};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||||
@ -14,8 +17,8 @@ use crate::error::UserError;
|
|||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::{
|
use crate::{
|
||||||
execute_search, normalize_facet, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index,
|
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result,
|
||||||
Result, SearchContext, BEU16,
|
SearchContext, BEU16,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -301,29 +304,28 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
|
|
||||||
match self.query.as_ref() {
|
match self.query.as_ref() {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
let query = normalize_facet(query);
|
let options = NormalizerOption { lossy: true, ..Default::default() };
|
||||||
let query = query.as_str();
|
let query = query.normalize(&options);
|
||||||
|
let query = query.as_ref();
|
||||||
|
|
||||||
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
|
||||||
let field_authorizes_typos =
|
let field_authorizes_typos =
|
||||||
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
|
||||||
|
|
||||||
if authorize_typos && field_authorizes_typos {
|
if authorize_typos && field_authorizes_typos {
|
||||||
let mut results = vec![];
|
|
||||||
|
|
||||||
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
|
||||||
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
|
||||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: query };
|
let mut results = vec![];
|
||||||
if let Some(FacetGroupValue { bitmap, .. }) =
|
if fst.contains(query) {
|
||||||
index.facet_id_string_docids.get(rtxn, &key)?
|
self.fetch_original_facets_using_normalized(
|
||||||
{
|
fid,
|
||||||
let count = search_candidates.intersection_len(&bitmap);
|
query,
|
||||||
if count != 0 {
|
query,
|
||||||
let value = self
|
&search_candidates,
|
||||||
.one_original_value_of(fid, query, bitmap.min().unwrap())?
|
&mut results,
|
||||||
.unwrap_or_else(|| query.to_string());
|
)?;
|
||||||
results.push(FacetValueHit { value, count });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Ok(results)
|
||||||
} else {
|
} else {
|
||||||
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
|
||||||
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
|
||||||
@ -338,81 +340,41 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut stream = fst.search(automaton).into_stream();
|
let mut stream = fst.search(automaton).into_stream();
|
||||||
let mut length = 0;
|
let mut results = vec![];
|
||||||
'outer: while let Some(facet_value) = stream.next() {
|
while let Some(facet_value) = stream.next() {
|
||||||
let value = std::str::from_utf8(facet_value)?;
|
let value = std::str::from_utf8(facet_value)?;
|
||||||
let database = index.facet_id_normalized_string_strings;
|
if self
|
||||||
let key = (fid, value);
|
.fetch_original_facets_using_normalized(
|
||||||
let original_strings = match database.get(rtxn, &key)? {
|
fid,
|
||||||
Some(original_strings) => original_strings,
|
value,
|
||||||
None => {
|
query,
|
||||||
error!(
|
&search_candidates,
|
||||||
"the facet value is missing from the facet database: {key:?}"
|
&mut results,
|
||||||
);
|
)?
|
||||||
continue;
|
.is_break()
|
||||||
}
|
{
|
||||||
};
|
break;
|
||||||
for original_string in original_strings {
|
|
||||||
let key = FacetGroupKey {
|
|
||||||
field_id: fid,
|
|
||||||
level: 0,
|
|
||||||
left_bound: original_string.as_str(),
|
|
||||||
};
|
|
||||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
|
||||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
|
||||||
None => {
|
|
||||||
error!(
|
|
||||||
"the facet value is missing from the facet database: {key:?}"
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let count = search_candidates.intersection_len(&docids);
|
|
||||||
if count != 0 {
|
|
||||||
let value = self
|
|
||||||
.one_original_value_of(
|
|
||||||
fid,
|
|
||||||
&original_string,
|
|
||||||
docids.min().unwrap(),
|
|
||||||
)?
|
|
||||||
.unwrap_or_else(|| query.to_string());
|
|
||||||
results.push(FacetValueHit { value, count });
|
|
||||||
length += 1;
|
|
||||||
}
|
|
||||||
if length >= MAX_NUMBER_OF_FACETS {
|
|
||||||
break 'outer;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let automaton = Str::new(query).starts_with();
|
let automaton = Str::new(query).starts_with();
|
||||||
let mut stream = fst.search(automaton).into_stream();
|
let mut stream = fst.search(automaton).into_stream();
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut length = 0;
|
|
||||||
while let Some(facet_value) = stream.next() {
|
while let Some(facet_value) = stream.next() {
|
||||||
let value = std::str::from_utf8(facet_value)?;
|
let value = std::str::from_utf8(facet_value)?;
|
||||||
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
|
if self
|
||||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
.fetch_original_facets_using_normalized(
|
||||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
fid,
|
||||||
None => {
|
value,
|
||||||
error!(
|
query,
|
||||||
"the facet value is missing from the facet database: {key:?}"
|
&search_candidates,
|
||||||
);
|
&mut results,
|
||||||
continue;
|
)?
|
||||||
}
|
.is_break()
|
||||||
};
|
{
|
||||||
let count = search_candidates.intersection_len(&docids);
|
|
||||||
if count != 0 {
|
|
||||||
let value = self
|
|
||||||
.one_original_value_of(fid, value, docids.min().unwrap())?
|
|
||||||
.unwrap_or_else(|| query.to_string());
|
|
||||||
results.push(FacetValueHit { value, count });
|
|
||||||
length += 1;
|
|
||||||
}
|
|
||||||
if length >= MAX_NUMBER_OF_FACETS {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -422,7 +384,6 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut length = 0;
|
|
||||||
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
|
||||||
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
|
||||||
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
|
||||||
@ -433,9 +394,8 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
.one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
|
||||||
.unwrap_or_else(|| left_bound.to_string());
|
.unwrap_or_else(|| left_bound.to_string());
|
||||||
results.push(FacetValueHit { value, count });
|
results.push(FacetValueHit { value, count });
|
||||||
length += 1;
|
|
||||||
}
|
}
|
||||||
if length >= MAX_NUMBER_OF_FACETS {
|
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -443,6 +403,50 @@ impl<'a> SearchForFacetValues<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn fetch_original_facets_using_normalized(
|
||||||
|
&self,
|
||||||
|
fid: FieldId,
|
||||||
|
value: &str,
|
||||||
|
query: &str,
|
||||||
|
search_candidates: &RoaringBitmap,
|
||||||
|
results: &mut Vec<FacetValueHit>,
|
||||||
|
) -> Result<ControlFlow<()>> {
|
||||||
|
let index = self.search_query.index;
|
||||||
|
let rtxn = self.search_query.rtxn;
|
||||||
|
|
||||||
|
let database = index.facet_id_normalized_string_strings;
|
||||||
|
let key = (fid, value);
|
||||||
|
let original_strings = match database.get(rtxn, &key)? {
|
||||||
|
Some(original_strings) => original_strings,
|
||||||
|
None => {
|
||||||
|
error!("the facet value is missing from the facet database: {key:?}");
|
||||||
|
return Ok(ControlFlow::Continue(()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for original in original_strings {
|
||||||
|
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
|
||||||
|
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||||
|
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||||
|
None => {
|
||||||
|
error!("the facet value is missing from the facet database: {key:?}");
|
||||||
|
return Ok(ControlFlow::Continue(()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let count = search_candidates.intersection_len(&docids);
|
||||||
|
if count != 0 {
|
||||||
|
let value = self
|
||||||
|
.one_original_value_of(fid, &original, docids.min().unwrap())?
|
||||||
|
.unwrap_or_else(|| query.to_string());
|
||||||
|
results.push(FacetValueHit { value, count });
|
||||||
|
}
|
||||||
|
if results.len() >= MAX_NUMBER_OF_FACETS {
|
||||||
|
return Ok(ControlFlow::Break(()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ControlFlow::Continue(()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
|
||||||
|
Loading…
Reference in New Issue
Block a user