mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-24 19:15:05 +08:00
Compare commits
2 Commits
6f00c1dfb8
...
ff4b3578bf
Author | SHA1 | Date | |
---|---|---|---|
|
ff4b3578bf | ||
|
cb226079fa |
@ -2,7 +2,7 @@
|
|||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
1 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
|
||||||
2.2 [21, ]
|
2 [21, ]
|
||||||
36 [3, ]
|
36 [3, ]
|
||||||
37 [4, ]
|
37 [4, ]
|
||||||
38 [5, ]
|
38 [5, ]
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
---
|
---
|
||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
|
0 [1, ]
|
||||||
1 [2, ]
|
1 [2, ]
|
||||||
10.0 [1, ]
|
10 [1, ]
|
||||||
1344 [3, ]
|
1344 [3, ]
|
||||||
2 [0, ]
|
2 [0, ]
|
||||||
25.99 [2, ]
|
25 [2, ]
|
||||||
3.5 [0, ]
|
3 [0, ]
|
||||||
4 [4, ]
|
4 [4, ]
|
||||||
42 [5, ]
|
42 [5, ]
|
||||||
456 [1, ]
|
456 [1, ]
|
||||||
|
5 [0, ]
|
||||||
|
99 [2, ]
|
||||||
adams [5, ]
|
adams [5, ]
|
||||||
adventure [1, ]
|
adventure [1, ]
|
||||||
alice [2, ]
|
alice [2, ]
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
---
|
---
|
||||||
source: crates/milli/src/update/index_documents/mod.rs
|
source: crates/milli/src/update/index_documents/mod.rs
|
||||||
---
|
---
|
||||||
|
0 [1, ]
|
||||||
1 [2, ]
|
1 [2, ]
|
||||||
10.0 [1, ]
|
10 [1, ]
|
||||||
1344 [3, ]
|
1344 [3, ]
|
||||||
1813 [0, ]
|
1813 [0, ]
|
||||||
2 [0, ]
|
2 [0, ]
|
||||||
25.99 [2, ]
|
25 [2, ]
|
||||||
3.5 [0, ]
|
3 [0, ]
|
||||||
4 [4, ]
|
4 [4, ]
|
||||||
42 [5, ]
|
42 [5, ]
|
||||||
456 [1, ]
|
456 [1, ]
|
||||||
|
5 [0, ]
|
||||||
|
99 [2, ]
|
||||||
adams [5, ]
|
adams [5, ]
|
||||||
adventure [1, ]
|
adventure [1, ]
|
||||||
alice [2, ]
|
alice [2, ]
|
||||||
|
@ -24,25 +24,46 @@ pub fn extract_document_facets<'doc>(
|
|||||||
};
|
};
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
// if the current field is searchable or contains a searchable attribute
|
||||||
if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
|
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||||
|
if selection != perm_json_p::Selection::Skip {
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => perm_json_p::seek_leaf_values_in_object(
|
Value::Object(object) => {
|
||||||
&object,
|
perm_json_p::seek_leaf_values_in_object(
|
||||||
Some(attributes_to_extract),
|
&object,
|
||||||
&[], // skip no attributes
|
Some(attributes_to_extract),
|
||||||
field_name,
|
&[], // skip no attributes
|
||||||
perm_json_p::Depth::OnBaseKey,
|
field_name,
|
||||||
&mut tokenize_field,
|
perm_json_p::Depth::OnBaseKey,
|
||||||
)?,
|
&mut tokenize_field,
|
||||||
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
|
)?;
|
||||||
&array,
|
|
||||||
Some(attributes_to_extract),
|
if selection == perm_json_p::Selection::Select {
|
||||||
&[], // skip no attributes
|
tokenize_field(
|
||||||
field_name,
|
field_name,
|
||||||
perm_json_p::Depth::OnBaseKey,
|
perm_json_p::Depth::OnBaseKey,
|
||||||
&mut tokenize_field,
|
&Value::Object(object),
|
||||||
)?,
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(array) => {
|
||||||
|
perm_json_p::seek_leaf_values_in_array(
|
||||||
|
&array,
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
&[], // skip no attributes
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&mut tokenize_field,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if selection == perm_json_p::Selection::Select {
|
||||||
|
tokenize_field(
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&Value::Array(array),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,25 +88,37 @@ pub mod perm_json_p {
|
|||||||
|
|
||||||
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
||||||
// so we check the contained_in on both side
|
// so we check the contained_in on both side
|
||||||
let should_continue = select_field(&base_key, selectors, skip_selectors);
|
let selection = select_field(&base_key, selectors, skip_selectors);
|
||||||
if should_continue {
|
if selection != Selection::Skip {
|
||||||
match value {
|
match value {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => {
|
||||||
object,
|
if selection == Selection::Select {
|
||||||
selectors,
|
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||||
skip_selectors,
|
}
|
||||||
&base_key,
|
|
||||||
Depth::OnBaseKey,
|
seek_leaf_values_in_object(
|
||||||
seeker,
|
object,
|
||||||
),
|
selectors,
|
||||||
Value::Array(array) => seek_leaf_values_in_array(
|
skip_selectors,
|
||||||
array,
|
&base_key,
|
||||||
selectors,
|
Depth::OnBaseKey,
|
||||||
skip_selectors,
|
seeker,
|
||||||
&base_key,
|
)
|
||||||
Depth::OnBaseKey,
|
}
|
||||||
seeker,
|
Value::Array(array) => {
|
||||||
),
|
if selection == Selection::Select {
|
||||||
|
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
seek_leaf_values_in_array(
|
||||||
|
array,
|
||||||
|
selectors,
|
||||||
|
skip_selectors,
|
||||||
|
&base_key,
|
||||||
|
Depth::OnBaseKey,
|
||||||
|
seeker,
|
||||||
|
)
|
||||||
|
}
|
||||||
value => seeker(&base_key, Depth::OnBaseKey, value),
|
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||||
}?;
|
}?;
|
||||||
}
|
}
|
||||||
@ -156,13 +168,37 @@ pub mod perm_json_p {
|
|||||||
field_name: &str,
|
field_name: &str,
|
||||||
selectors: Option<&[&str]>,
|
selectors: Option<&[&str]>,
|
||||||
skip_selectors: &[&str],
|
skip_selectors: &[&str],
|
||||||
) -> bool {
|
) -> Selection {
|
||||||
selectors.map_or(true, |selectors| {
|
if skip_selectors.iter().any(|skip_selector| {
|
||||||
selectors.iter().any(|selector| {
|
|
||||||
contained_in(selector, field_name) || contained_in(field_name, selector)
|
|
||||||
})
|
|
||||||
}) && !skip_selectors.iter().any(|skip_selector| {
|
|
||||||
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
||||||
})
|
}) {
|
||||||
|
Selection::Skip
|
||||||
|
} else if let Some(selectors) = selectors {
|
||||||
|
selectors
|
||||||
|
.iter()
|
||||||
|
.filter_map(|selector| {
|
||||||
|
if contained_in(field_name, selector) {
|
||||||
|
Some(Selection::Select)
|
||||||
|
} else if contained_in(selector, field_name) {
|
||||||
|
Some(Selection::Parent)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.next()
|
||||||
|
.unwrap_or(Selection::Skip)
|
||||||
|
} else {
|
||||||
|
Selection::Select
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum Selection {
|
||||||
|
/// The field is a parent of the of a nested field that must be selected
|
||||||
|
Parent,
|
||||||
|
/// The field must be selected
|
||||||
|
Select,
|
||||||
|
/// The field must be skipped
|
||||||
|
Skip,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ use serde_json::Value;
|
|||||||
|
|
||||||
use crate::update::new::document::Document;
|
use crate::update::new::document::Document;
|
||||||
use crate::update::new::extract::perm_json_p::{
|
use crate::update::new::extract::perm_json_p::{
|
||||||
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth,
|
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
||||||
@ -48,47 +48,49 @@ impl<'a> DocumentTokenizer<'a> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
match value {
|
let text;
|
||||||
|
let tokens = match value {
|
||||||
Value::Number(n) => {
|
Value::Number(n) => {
|
||||||
let token = n.to_string();
|
text = n.to_string();
|
||||||
if let Ok(position) = (*position).try_into() {
|
self.tokenizer.tokenize(text.as_str())
|
||||||
token_fn(field_name, field_id, position, token.as_str())?;
|
}
|
||||||
}
|
Value::Bool(b) => {
|
||||||
|
text = b.to_string();
|
||||||
Ok(())
|
self.tokenizer.tokenize(text.as_str())
|
||||||
}
|
}
|
||||||
Value::String(text) => {
|
Value::String(text) => {
|
||||||
// create an iterator of token with their positions.
|
|
||||||
let locales = self
|
let locales = self
|
||||||
.localized_attributes_rules
|
.localized_attributes_rules
|
||||||
.iter()
|
.iter()
|
||||||
.find(|rule| rule.match_str(field_name))
|
.find(|rule| rule.match_str(field_name))
|
||||||
.map(|rule| rule.locales());
|
.map(|rule| rule.locales());
|
||||||
let tokens = process_tokens(
|
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||||
*position,
|
}
|
||||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales),
|
_ => return Ok(()),
|
||||||
)
|
};
|
||||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
|
||||||
|
// create an iterator of token with their positions.
|
||||||
for (index, token) in tokens {
|
let tokens = process_tokens(*position, tokens)
|
||||||
// keep a word only if it is not empty and fit in a LMDB key.
|
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||||
let token = token.lemma().trim();
|
|
||||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
for (index, token) in tokens {
|
||||||
*position = index;
|
// keep a word only if it is not empty and fit in a LMDB key.
|
||||||
if let Ok(position) = (*position).try_into() {
|
let token = token.lemma().trim();
|
||||||
token_fn(field_name, field_id, position, token)?;
|
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||||
}
|
*position = index;
|
||||||
}
|
if let Ok(position) = (*position).try_into() {
|
||||||
}
|
token_fn(field_name, field_id, position, token)?;
|
||||||
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
_ => Ok(()),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
// if the current field is searchable or contains a searchable attribute
|
||||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) {
|
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||||
|
!= Selection::Skip
|
||||||
|
{
|
||||||
// parse json.
|
// parse json.
|
||||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
Value::Object(object) => seek_leaf_values_in_object(
|
Value::Object(object) => seek_leaf_values_in_object(
|
||||||
|
Loading…
Reference in New Issue
Block a user