3741: Add ngram support to the highlighter r=ManyTheFish a=loiclec

This PR fixes a bug introduced by the search refactor, where ngrams were not highlighted. 

The solution was to add the ngrams to the vector of `LocatedQueryTerm` that is given to the `MatchingWords` structure.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
meili-bors[bot] 2023-05-16 09:03:31 +00:00 committed by GitHub
commit 65ad8cce36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 6 deletions

View File

@ -397,8 +397,8 @@ pub fn execute_search(
None None
}; };
let bucket_sort_output = if let Some(query_terms) = query_terms { let bucket_sort_output = if let Some(query_terms) = query_terms {
let graph = QueryGraph::from_query(ctx, &query_terms)?; let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
located_query_terms = Some(query_terms); located_query_terms = Some(new_located_query_terms);
let ranking_rules = get_ranking_rules_for_query_graph_search( let ranking_rules = get_ranking_rules_for_query_graph_search(
ctx, ctx,

View File

@ -88,12 +88,15 @@ pub struct QueryGraph {
} }
impl QueryGraph { impl QueryGraph {
/// Build the query graph from the parsed user search query. /// Build the query graph from the parsed user search query, return an updated list of the located query terms
/// which contains ngrams.
pub fn from_query( pub fn from_query(
ctx: &mut SearchContext, ctx: &mut SearchContext,
// NOTE: the terms here must be consecutive // NOTE: the terms here must be consecutive
terms: &[LocatedQueryTerm], terms: &[LocatedQueryTerm],
) -> Result<QueryGraph> { ) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
let mut new_located_query_terms = terms.to_vec();
let nbr_typos = number_of_typos_allowed(ctx)?; let nbr_typos = number_of_typos_allowed(ctx)?;
let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End]; let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
@ -107,10 +110,11 @@ impl QueryGraph {
let original_terms_len = terms.len(); let original_terms_len = terms.len();
for term_idx in 0..original_terms_len { for term_idx in 0..original_terms_len {
let mut new_nodes = vec![]; let mut new_nodes = vec![];
let new_node_idx = add_node( let new_node_idx = add_node(
&mut nodes_data, &mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset { QueryNodeData::Term(LocatedQueryTermSubset {
term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)), term_subset: QueryTermSubset::full(terms[term_idx].value),
positions: terms[term_idx].positions.clone(), positions: terms[term_idx].positions.clone(),
term_ids: term_idx as u8..=term_idx as u8, term_ids: term_idx as u8..=term_idx as u8,
}), }),
@ -121,6 +125,7 @@ impl QueryGraph {
if let Some(ngram) = if let Some(ngram) =
query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)? query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
{ {
new_located_query_terms.push(ngram.clone());
let ngram_idx = add_node( let ngram_idx = add_node(
&mut nodes_data, &mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset { QueryNodeData::Term(LocatedQueryTermSubset {
@ -136,6 +141,7 @@ impl QueryGraph {
if let Some(ngram) = if let Some(ngram) =
query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)? query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
{ {
new_located_query_terms.push(ngram.clone());
let ngram_idx = add_node( let ngram_idx = add_node(
&mut nodes_data, &mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset { QueryNodeData::Term(LocatedQueryTermSubset {
@ -167,7 +173,7 @@ impl QueryGraph {
let mut graph = QueryGraph { root_node, end_node, nodes }; let mut graph = QueryGraph { root_node, end_node, nodes };
graph.build_initial_edges(); graph.build_initial_edges();
Ok(graph) Ok((graph, new_located_query_terms))
} }
/// Remove the given nodes, connecting all their predecessors to all their successors. /// Remove the given nodes, connecting all their predecessors to all their successors.