mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Replace old tokenizer by charabia
This commit is contained in:
parent
8d09772334
commit
173eea06e1
@ -4,7 +4,7 @@ use std::str::FromStr;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
use milli::tokenizer::{Analyzer, AnalyzerConfig};
|
use milli::tokenizer::TokenizerBuilder;
|
||||||
use milli::{
|
use milli::{
|
||||||
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
|
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
|
||||||
};
|
};
|
||||||
@ -175,12 +175,9 @@ impl Index {
|
|||||||
&displayed_ids,
|
&displayed_ids,
|
||||||
);
|
);
|
||||||
|
|
||||||
let stop_words = fst::Set::default();
|
let tokenizer = TokenizerBuilder::default().build();
|
||||||
let mut config = AnalyzerConfig::default();
|
|
||||||
config.stop_words(&stop_words);
|
|
||||||
let analyzer = Analyzer::new(config);
|
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::from_matching_words(matching_words);
|
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer);
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
formatter_builder.crop_marker(query.crop_marker);
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
||||||
@ -204,7 +201,6 @@ impl Index {
|
|||||||
&displayed_document,
|
&displayed_document,
|
||||||
&fields_ids_map,
|
&fields_ids_map,
|
||||||
&formatter_builder,
|
&formatter_builder,
|
||||||
&analyzer,
|
|
||||||
&formatted_options,
|
&formatted_options,
|
||||||
query.show_matches_position,
|
query.show_matches_position,
|
||||||
&displayed_ids,
|
&displayed_ids,
|
||||||
@ -414,8 +410,7 @@ fn make_document(
|
|||||||
fn format_fields<'a, A: AsRef<[u8]>>(
|
fn format_fields<'a, A: AsRef<[u8]>>(
|
||||||
document: &Document,
|
document: &Document,
|
||||||
field_ids_map: &FieldsIdsMap,
|
field_ids_map: &FieldsIdsMap,
|
||||||
builder: &MatcherBuilder,
|
builder: &MatcherBuilder<'a, A>,
|
||||||
analyzer: &'a Analyzer<'a, A>,
|
|
||||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
displayable_ids: &BTreeSet<FieldId>,
|
displayable_ids: &BTreeSet<FieldId>,
|
||||||
@ -446,7 +441,6 @@ fn format_fields<'a, A: AsRef<[u8]>>(
|
|||||||
std::mem::take(value),
|
std::mem::take(value),
|
||||||
builder,
|
builder,
|
||||||
format,
|
format,
|
||||||
analyzer,
|
|
||||||
&mut infos,
|
&mut infos,
|
||||||
compute_matches,
|
compute_matches,
|
||||||
);
|
);
|
||||||
@ -470,19 +464,14 @@ fn format_fields<'a, A: AsRef<[u8]>>(
|
|||||||
|
|
||||||
fn format_value<'a, A: AsRef<[u8]>>(
|
fn format_value<'a, A: AsRef<[u8]>>(
|
||||||
value: Value,
|
value: Value,
|
||||||
builder: &MatcherBuilder,
|
builder: &MatcherBuilder<'a, A>,
|
||||||
format_options: Option<FormatOptions>,
|
format_options: Option<FormatOptions>,
|
||||||
analyzer: &'a Analyzer<'a, A>,
|
|
||||||
infos: &mut Vec<MatchBounds>,
|
infos: &mut Vec<MatchBounds>,
|
||||||
compute_matches: bool,
|
compute_matches: bool,
|
||||||
) -> Value {
|
) -> Value {
|
||||||
match value {
|
match value {
|
||||||
Value::String(old_string) => {
|
Value::String(old_string) => {
|
||||||
// this will be removed with charabia
|
let mut matcher = builder.build(&old_string);
|
||||||
let analyzed = analyzer.analyze(&old_string);
|
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
|
||||||
|
|
||||||
let mut matcher = builder.build(&tokens[..], &old_string);
|
|
||||||
if compute_matches {
|
if compute_matches {
|
||||||
let matches = matcher.matches();
|
let matches = matcher.matches();
|
||||||
infos.extend_from_slice(&matches[..]);
|
infos.extend_from_slice(&matches[..]);
|
||||||
@ -507,7 +496,6 @@ fn format_value<'a, A: AsRef<[u8]>>(
|
|||||||
highlight: format_options.highlight,
|
highlight: format_options.highlight,
|
||||||
crop: None,
|
crop: None,
|
||||||
}),
|
}),
|
||||||
analyzer,
|
|
||||||
infos,
|
infos,
|
||||||
compute_matches,
|
compute_matches,
|
||||||
)
|
)
|
||||||
@ -527,7 +515,6 @@ fn format_value<'a, A: AsRef<[u8]>>(
|
|||||||
highlight: format_options.highlight,
|
highlight: format_options.highlight,
|
||||||
crop: None,
|
crop: None,
|
||||||
}),
|
}),
|
||||||
analyzer,
|
|
||||||
infos,
|
infos,
|
||||||
compute_matches,
|
compute_matches,
|
||||||
),
|
),
|
||||||
@ -536,12 +523,9 @@ fn format_value<'a, A: AsRef<[u8]>>(
|
|||||||
.collect(),
|
.collect(),
|
||||||
),
|
),
|
||||||
Value::Number(number) => {
|
Value::Number(number) => {
|
||||||
// this will be removed with charabia
|
|
||||||
let s = number.to_string();
|
let s = number.to_string();
|
||||||
let analyzed = analyzer.analyze(&s);
|
|
||||||
let tokens: Vec<_> = analyzed.tokens().collect();
|
|
||||||
|
|
||||||
let mut matcher = builder.build(&tokens[..], &s);
|
let mut matcher = builder.build(&s);
|
||||||
if compute_matches {
|
if compute_matches {
|
||||||
let matches = matcher.matches();
|
let matches = matcher.matches();
|
||||||
infos.extend_from_slice(&matches[..]);
|
infos.extend_from_slice(&matches[..]);
|
||||||
|
Loading…
Reference in New Issue
Block a user