Use tokenizer on numbers and booleans

This commit is contained in:
ManyTheFish 2024-11-20 08:28:24 +01:00
parent 6f00c1dfb8
commit cb226079fa
4 changed files with 40 additions and 34 deletions

View File

@ -2,7 +2,7 @@
source: crates/milli/src/update/index_documents/mod.rs source: crates/milli/src/update/index_documents/mod.rs
--- ---
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ] 1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
2.2 [21, ] 2 [21, ]
36 [3, ] 36 [3, ]
37 [4, ] 37 [4, ]
38 [5, ] 38 [5, ]

View File

@ -1,15 +1,18 @@
--- ---
source: crates/milli/src/update/index_documents/mod.rs source: crates/milli/src/update/index_documents/mod.rs
--- ---
0 [1, ]
1 [2, ] 1 [2, ]
10.0 [1, ] 10 [1, ]
1344 [3, ] 1344 [3, ]
2 [0, ] 2 [0, ]
25.99 [2, ] 25 [2, ]
3.5 [0, ] 3 [0, ]
4 [4, ] 4 [4, ]
42 [5, ] 42 [5, ]
456 [1, ] 456 [1, ]
5 [0, ]
99 [2, ]
adams [5, ] adams [5, ]
adventure [1, ] adventure [1, ]
alice [2, ] alice [2, ]

View File

@ -1,16 +1,19 @@
--- ---
source: crates/milli/src/update/index_documents/mod.rs source: crates/milli/src/update/index_documents/mod.rs
--- ---
0 [1, ]
1 [2, ] 1 [2, ]
10.0 [1, ] 10 [1, ]
1344 [3, ] 1344 [3, ]
1813 [0, ] 1813 [0, ]
2 [0, ] 2 [0, ]
25.99 [2, ] 25 [2, ]
3.5 [0, ] 3 [0, ]
4 [4, ] 4 [4, ]
42 [5, ] 42 [5, ]
456 [1, ] 456 [1, ]
5 [0, ]
99 [2, ]
adams [5, ] adams [5, ]
adventure [1, ] adventure [1, ]
alice [2, ] alice [2, ]

View File

@ -48,26 +48,29 @@ impl<'a> DocumentTokenizer<'a> {
return Ok(()); return Ok(());
} }
match value { let text;
let tokens = match value {
Value::Number(n) => { Value::Number(n) => {
let token = n.to_string(); text = n.to_string();
if let Ok(position) = (*position).try_into() { self.tokenizer.tokenize(text.as_str())
token_fn(field_name, field_id, position, token.as_str())?;
} }
Value::Bool(b) => {
Ok(()) text = b.to_string();
self.tokenizer.tokenize(text.as_str())
} }
Value::String(text) => { Value::String(text) => {
// create an iterator of token with their positions.
let locales = self let locales = self
.localized_attributes_rules .localized_attributes_rules
.iter() .iter()
.find(|rule| rule.match_str(field_name)) .find(|rule| rule.match_str(field_name))
.map(|rule| rule.locales()); .map(|rule| rule.locales());
let tokens = process_tokens( self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
*position, }
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales), _ => return Ok(()),
) };
// create an iterator of token with their positions.
let tokens = process_tokens(*position, tokens)
.take_while(|(p, _)| *p < self.max_positions_per_attributes); .take_while(|(p, _)| *p < self.max_positions_per_attributes);
for (index, token) in tokens { for (index, token) in tokens {
@ -82,9 +85,6 @@ impl<'a> DocumentTokenizer<'a> {
} }
Ok(()) Ok(())
}
_ => Ok(()),
}
}; };
// if the current field is searchable or contains a searchable attribute // if the current field is searchable or contains a searchable attribute