mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 03:25:06 +08:00
Use tokenizer on numbers and booleans
This commit is contained in:
parent
6f00c1dfb8
commit
cb226079fa
@ -2,7 +2,7 @@
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
||||
2.2 [21, ]
|
||||
2 [21, ]
|
||||
36 [3, ]
|
||||
37 [4, ]
|
||||
38 [5, ]
|
||||
|
@ -1,15 +1,18 @@
|
||||
---
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
0 [1, ]
|
||||
1 [2, ]
|
||||
10.0 [1, ]
|
||||
10 [1, ]
|
||||
1344 [3, ]
|
||||
2 [0, ]
|
||||
25.99 [2, ]
|
||||
3.5 [0, ]
|
||||
25 [2, ]
|
||||
3 [0, ]
|
||||
4 [4, ]
|
||||
42 [5, ]
|
||||
456 [1, ]
|
||||
5 [0, ]
|
||||
99 [2, ]
|
||||
adams [5, ]
|
||||
adventure [1, ]
|
||||
alice [2, ]
|
||||
|
@ -1,16 +1,19 @@
|
||||
---
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
0 [1, ]
|
||||
1 [2, ]
|
||||
10.0 [1, ]
|
||||
10 [1, ]
|
||||
1344 [3, ]
|
||||
1813 [0, ]
|
||||
2 [0, ]
|
||||
25.99 [2, ]
|
||||
3.5 [0, ]
|
||||
25 [2, ]
|
||||
3 [0, ]
|
||||
4 [4, ]
|
||||
42 [5, ]
|
||||
456 [1, ]
|
||||
5 [0, ]
|
||||
99 [2, ]
|
||||
adams [5, ]
|
||||
adventure [1, ]
|
||||
alice [2, ]
|
||||
|
@ -48,43 +48,43 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match value {
|
||||
let text;
|
||||
let tokens = match value {
|
||||
Value::Number(n) => {
|
||||
let token = n.to_string();
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token.as_str())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
text = n.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::Bool(b) => {
|
||||
text = b.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::String(text) => {
|
||||
// create an iterator of token with their positions.
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.map(|rule| rule.locales());
|
||||
let tokens = process_tokens(
|
||||
*position,
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
|
||||
)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||
}
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
let tokens = process_tokens(*position, tokens)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
_ => Ok(()),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
|
Loading…
Reference in New Issue
Block a user