Parse every attributes and filter before tokenization

This commit is contained in:
ManyTheFish 2024-11-20 15:08:27 +01:00
parent ff9c92c409
commit 4d616f8794
3 changed files with 33 additions and 50 deletions

View File

@ -1623,15 +1623,14 @@ async fn change_attributes_settings() {
index
.search(
json!({
"q": "bobby"
"q": "bobby",
"attributesToRetrieve": ["id", "doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"father": "jean",
"mother": "michelle",
"id": 852,
"doggos": [
{
@ -1642,15 +1641,7 @@ async fn change_attributes_settings() {
"name": "buddy",
"age": 4
}
],
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
]
}
]
}
]
"###);
@ -1663,15 +1654,14 @@ async fn change_attributes_settings() {
.search(
json!({
"q": "",
"filter": "doggos.age < 5"
"filter": "doggos.age < 5",
"attributesToRetrieve": ["id", "doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"father": "jean",
"mother": "michelle",
"id": 852,
"doggos": [
{
@ -1682,15 +1672,7 @@ async fn change_attributes_settings() {
"name": "buddy",
"age": 4
}
],
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
]
}
]
}
]
"###);

View File

@ -75,12 +75,12 @@ pub trait SearchableExtractor: Sized + Sync {
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let builder = tokenizer_builder(
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.into_tokenizer();
let tokenizer = builder.build();
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;

View File

@ -40,6 +40,12 @@ impl<'a> DocumentTokenizer<'a> {
return Err(UserError::AttributeLimitReached.into());
};
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
!= Selection::Select
{
return Ok(());
}
let position = field_position
.entry(field_id)
.and_modify(|counter| *counter += MAX_DISTANCE)
@ -87,30 +93,25 @@ impl<'a> DocumentTokenizer<'a> {
Ok(())
};
// if the current field is searchable or contains a searchable attribute
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
!= Selection::Skip
{
// parse json.
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => seek_leaf_values_in_object(
&object,
self.attribute_to_extract,
self.attribute_to_skip,
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
Value::Array(array) => seek_leaf_values_in_array(
&array,
self.attribute_to_extract,
self.attribute_to_skip,
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
}
// parse json.
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => seek_leaf_values_in_object(
&object,
None,
&[],
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
Value::Array(array) => seek_leaf_values_in_array(
&array,
None,
&[],
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
}
}