mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-24 19:15:05 +08:00
Compare commits
2 Commits
6f00c1dfb8
...
ff4b3578bf
Author | SHA1 | Date | |
---|---|---|---|
|
ff4b3578bf | ||
|
cb226079fa |
@ -2,7 +2,7 @@
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
||||
2.2 [21, ]
|
||||
2 [21, ]
|
||||
36 [3, ]
|
||||
37 [4, ]
|
||||
38 [5, ]
|
||||
|
@ -1,15 +1,18 @@
|
||||
---
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
0 [1, ]
|
||||
1 [2, ]
|
||||
10.0 [1, ]
|
||||
10 [1, ]
|
||||
1344 [3, ]
|
||||
2 [0, ]
|
||||
25.99 [2, ]
|
||||
3.5 [0, ]
|
||||
25 [2, ]
|
||||
3 [0, ]
|
||||
4 [4, ]
|
||||
42 [5, ]
|
||||
456 [1, ]
|
||||
5 [0, ]
|
||||
99 [2, ]
|
||||
adams [5, ]
|
||||
adventure [1, ]
|
||||
alice [2, ]
|
||||
|
@ -1,16 +1,19 @@
|
||||
---
|
||||
source: crates/milli/src/update/index_documents/mod.rs
|
||||
---
|
||||
0 [1, ]
|
||||
1 [2, ]
|
||||
10.0 [1, ]
|
||||
10 [1, ]
|
||||
1344 [3, ]
|
||||
1813 [0, ]
|
||||
2 [0, ]
|
||||
25.99 [2, ]
|
||||
3.5 [0, ]
|
||||
25 [2, ]
|
||||
3 [0, ]
|
||||
4 [4, ]
|
||||
42 [5, ]
|
||||
456 [1, ]
|
||||
5 [0, ]
|
||||
99 [2, ]
|
||||
adams [5, ]
|
||||
adventure [1, ]
|
||||
alice [2, ]
|
||||
|
@ -24,25 +24,46 @@ pub fn extract_document_facets<'doc>(
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
|
||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||
if selection != perm_json_p::Selection::Skip {
|
||||
// parse json.
|
||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Object(object) => {
|
||||
perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Object(object),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Value::Array(array) => {
|
||||
perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Array(array),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
}
|
||||
|
@ -88,25 +88,37 @@ pub mod perm_json_p {
|
||||
|
||||
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
||||
// so we check the contained_in on both side
|
||||
let should_continue = select_field(&base_key, selectors, skip_selectors);
|
||||
if should_continue {
|
||||
let selection = select_field(&base_key, selectors, skip_selectors);
|
||||
if selection != Selection::Skip {
|
||||
match value {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
),
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
),
|
||||
Value::Object(object) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
}
|
||||
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||
}?;
|
||||
}
|
||||
@ -156,13 +168,37 @@ pub mod perm_json_p {
|
||||
field_name: &str,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
) -> bool {
|
||||
selectors.map_or(true, |selectors| {
|
||||
selectors.iter().any(|selector| {
|
||||
contained_in(selector, field_name) || contained_in(field_name, selector)
|
||||
})
|
||||
}) && !skip_selectors.iter().any(|skip_selector| {
|
||||
) -> Selection {
|
||||
if skip_selectors.iter().any(|skip_selector| {
|
||||
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
||||
})
|
||||
}) {
|
||||
Selection::Skip
|
||||
} else if let Some(selectors) = selectors {
|
||||
selectors
|
||||
.iter()
|
||||
.filter_map(|selector| {
|
||||
if contained_in(field_name, selector) {
|
||||
Some(Selection::Select)
|
||||
} else if contained_in(selector, field_name) {
|
||||
Some(Selection::Parent)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.unwrap_or(Selection::Skip)
|
||||
} else {
|
||||
Selection::Select
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Selection {
|
||||
/// The field is a parent of the of a nested field that must be selected
|
||||
Parent,
|
||||
/// The field must be selected
|
||||
Select,
|
||||
/// The field must be skipped
|
||||
Skip,
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
||||
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::perm_json_p::{
|
||||
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth,
|
||||
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection,
|
||||
};
|
||||
use crate::{
|
||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
||||
@ -48,47 +48,49 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match value {
|
||||
let text;
|
||||
let tokens = match value {
|
||||
Value::Number(n) => {
|
||||
let token = n.to_string();
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token.as_str())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
text = n.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::Bool(b) => {
|
||||
text = b.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::String(text) => {
|
||||
// create an iterator of token with their positions.
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.map(|rule| rule.locales());
|
||||
let tokens = process_tokens(
|
||||
*position,
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
|
||||
)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||
}
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
let tokens = process_tokens(*position, tokens)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
_ => Ok(()),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||
!= Selection::Skip
|
||||
{
|
||||
// parse json.
|
||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
|
Loading…
Reference in New Issue
Block a user