mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Use tokenizer on numbers and booleans
This commit is contained in:
parent
6f00c1dfb8
commit
cb226079fa
@ -2,7 +2,7 @@
|
|||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
||||||
2.2 [21, ]
|
2 [21, ]
|
||||||
36 [3, ]
|
36 [3, ]
|
||||||
37 [4, ]
|
37 [4, ]
|
||||||
38 [5, ]
|
38 [5, ]
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
---
|
---
|
||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
|
0 [1, ]
|
||||||
1 [2, ]
|
1 [2, ]
|
||||||
10.0 [1, ]
|
10 [1, ]
|
||||||
1344 [3, ]
|
1344 [3, ]
|
||||||
2 [0, ]
|
2 [0, ]
|
||||||
25.99 [2, ]
|
25 [2, ]
|
||||||
3.5 [0, ]
|
3 [0, ]
|
||||||
4 [4, ]
|
4 [4, ]
|
||||||
42 [5, ]
|
42 [5, ]
|
||||||
456 [1, ]
|
456 [1, ]
|
||||||
|
5 [0, ]
|
||||||
|
99 [2, ]
|
||||||
adams [5, ]
|
adams [5, ]
|
||||||
adventure [1, ]
|
adventure [1, ]
|
||||||
alice [2, ]
|
alice [2, ]
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
---
|
---
|
||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
|
0 [1, ]
|
||||||
1 [2, ]
|
1 [2, ]
|
||||||
10.0 [1, ]
|
10 [1, ]
|
||||||
1344 [3, ]
|
1344 [3, ]
|
||||||
1813 [0, ]
|
1813 [0, ]
|
||||||
2 [0, ]
|
2 [0, ]
|
||||||
25.99 [2, ]
|
25 [2, ]
|
||||||
3.5 [0, ]
|
3 [0, ]
|
||||||
4 [4, ]
|
4 [4, ]
|
||||||
42 [5, ]
|
42 [5, ]
|
||||||
456 [1, ]
|
456 [1, ]
|
||||||
|
5 [0, ]
|
||||||
|
99 [2, ]
|
||||||
adams [5, ]
|
adams [5, ]
|
||||||
adventure [1, ]
|
adventure [1, ]
|
||||||
alice [2, ]
|
alice [2, ]
|
||||||
|
@ -48,43 +48,43 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
match value {
|
let text;
|
||||||
|
let tokens = match value {
|
||||||
Value::Number(n) => {
|
Value::Number(n) => {
|
||||||
let token = n.to_string();
|
text = n.to_string();
|
||||||
if let Ok(position) = (*position).try_into() {
|
self.tokenizer.tokenize(text.as_str())
|
||||||
token_fn(field_name, field_id, position, token.as_str())?;
|
}
|
||||||
}
|
Value::Bool(b) => {
|
||||||
|
text = b.to_string();
|
||||||
Ok(())
|
self.tokenizer.tokenize(text.as_str())
|
||||||
}
|
}
|
||||||
Value::String(text) => {
|
Value::String(text) => {
|
||||||
// create an iterator of token with their positions.
|
|
||||||
let locales = self
|
let locales = self
|
||||||
.localized_attributes_rules
|
.localized_attributes_rules
|
||||||
.iter()
|
.iter()
|
||||||
.find(|rule| rule.match_str(field_name))
|
.find(|rule| rule.match_str(field_name))
|
||||||
.map(|rule| rule.locales());
|
.map(|rule| rule.locales());
|
||||||
let tokens = process_tokens(
|
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||||
*position,
|
}
|
||||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
|
_ => return Ok(()),
|
||||||
)
|
};
|
||||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
|
||||||
|
// create an iterator of token with their positions.
|
||||||
for (index, token) in tokens {
|
let tokens = process_tokens(*position, tokens)
|
||||||
// keep a word only if it is not empty and fit in a LMDB key.
|
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||||
let token = token.lemma().trim();
|
|
||||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
for (index, token) in tokens {
|
||||||
*position = index;
|
// keep a word only if it is not empty and fit in a LMDB key.
|
||||||
if let Ok(position) = (*position).try_into() {
|
let token = token.lemma().trim();
|
||||||
token_fn(field_name, field_id, position, token)?;
|
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||||
}
|
*position = index;
|
||||||
}
|
if let Ok(position) = (*position).try_into() {
|
||||||
}
|
token_fn(field_name, field_id, position, token)?;
|
||||||
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
_ => Ok(()),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
// if the current field is searchable or contains a searchable attribute
|
||||||
|
Loading…
Reference in New Issue
Block a user