diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs
index 9f1c22b75..dbc4fcc30 100644
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
@@ -386,12 +386,39 @@ async fn force_locales() {
|response, code| {
snapshot!(response, @r###"
{
- "hits": [],
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
"query": "\"进击的巨人\"",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
- "estimatedTotalHits": 0
+ "estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
@@ -483,12 +510,39 @@ async fn force_locales_with_pattern() {
|response, code| {
snapshot!(response, @r###"
{
- "hits": [],
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
"query": "\"进击的巨人\"",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
- "estimatedTotalHits": 0
+ "estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
@@ -761,6 +815,275 @@ async fn force_different_locales_with_pattern() {
.await;
}
+#[actix_rt::test]
+async fn auto_infer_locales_at_search_with_attributes_to_search_on() {
+ let server = Server::new().await;
+
+ let index = server.index("test");
+ let documents = DOCUMENTS.clone();
+ let (response, _) = index
+ .update_settings(
+ json!({
+ "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+ "localizedAttributes": [
+ // force japanese
+ {"attributePatterns": ["*_zh"], "locales": ["jpn"]},
+ // force chinese
+ {"attributePatterns": ["*_ja"], "locales": ["cmn"]},
+ // any language
+ {"attributePatterns": ["*_en"], "locales": []}
+ ]
+ }),
+ )
+ .await;
+ snapshot!(response, @r###"
+ {
+ "taskUid": 0,
+ "indexUid": "test",
+ "status": "enqueued",
+ "type": "settingsUpdate",
+ "enqueuedAt": "[date]"
+ }
+ "###);
+ index.add_documents(documents, None).await;
+ index.wait_task(1).await;
+
+ // auto infer any language
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 0
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ // should infer chinese
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+}
+
+#[actix_rt::test]
+async fn auto_infer_locales_at_search() {
+ let server = Server::new().await;
+
+ let index = server.index("test");
+ let documents = DOCUMENTS.clone();
+ let (response, _) = index
+ .update_settings(
+ json!({
+ "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+ "localizedAttributes": [
+ // force japanese
+ {"attributePatterns": ["*"], "locales": ["jpn"]},
+ ]
+ }),
+ )
+ .await;
+ snapshot!(response, @r###"
+ {
+ "taskUid": 0,
+ "indexUid": "test",
+ "status": "enqueued",
+ "type": "settingsUpdate",
+ "enqueuedAt": "[date]"
+ }
+ "###);
+ index.add_documents(documents, None).await;
+ index.wait_task(1).await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+}
+
#[actix_rt::test]
async fn force_different_locales_with_pattern_nested() {
let server = Server::new().await;
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index 301ef9aa2..a05b27947 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -7,6 +7,7 @@ mod facet_search;
mod formatted;
mod geo;
mod hybrid;
+#[cfg(not(feature = "chinese-pinyin"))]
mod locales;
mod matching_strategy;
mod multi;
@@ -392,6 +393,7 @@ async fn negative_special_cases_search() {
}
#[cfg(feature = "default")]
+#[cfg(not(feature = "chinese-pinyin"))]
#[actix_rt::test]
async fn test_kanji_language_detection() {
let server = Server::new().await;
diff --git a/milli/src/localized_attributes_rules.rs b/milli/src/localized_attributes_rules.rs
index 739d03043..3c421ca6b 100644
--- a/milli/src/localized_attributes_rules.rs
+++ b/milli/src/localized_attributes_rules.rs
@@ -90,6 +90,21 @@ impl LocalizedFieldIds {
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
}
+
+ pub fn all_locales(&self) -> Vec {
+ let mut locales = Vec::new();
+ for field_locales in self.field_id_to_locales.values() {
+ if !field_locales.is_empty() {
+ locales.extend(field_locales);
+ } else {
+ // If a field has no locales, we consider it as not localized
+ return Vec::new();
+ }
+ }
+ locales.sort();
+ locales.dedup();
+ locales
+ }
}
#[cfg(test)]
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 0f5eb23e1..3057066d2 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -360,6 +360,7 @@ mod test {
use super::*;
#[cfg(feature = "japanese")]
+ #[cfg(not(feature = "chinese-pinyin"))]
#[test]
fn test_kanji_language_detection() {
use crate::index::tests::TempIndex;
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 577e12a39..b30306a0b 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use self::vector_sort::VectorSort;
+use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::vector::Embedder;
@@ -671,9 +672,44 @@ pub fn execute_search(
tokbuilder.words_dict(dictionary);
}
- if let Some(locales) = locales {
- tokbuilder.allow_list(locales);
- }
+ let db_locales;
+ match locales {
+ Some(locales) => {
+ if !locales.is_empty() {
+ tokbuilder.allow_list(locales);
+ }
+ }
+ None => {
+ // If no locales are specified, we use the locales specified in the localized attributes rules
+ let localized_attributes_rules = ctx.index.localized_attributes_rules(ctx.txn)?;
+ let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
+ let searchable_fields = ctx.index.searchable_fields_ids(ctx.txn)?;
+
+ let localized_fields = match &ctx.restricted_fids {
+ // if AttributeToSearchOn is set, use the restricted list of ids
+ Some(restricted_fids) => {
+ let iter = restricted_fids
+ .exact
+ .iter()
+ .chain(restricted_fids.tolerant.iter())
+ .map(|(fid, _)| *fid);
+
+ LocalizedFieldIds::new(&localized_attributes_rules, &fields_ids_map, iter)
+ }
+ // Otherwise use the full list of ids coming from the index searchable fields
+ None => LocalizedFieldIds::new(
+ &localized_attributes_rules,
+ &fields_ids_map,
+ searchable_fields.into_iter(),
+ ),
+ };
+
+ db_locales = localized_fields.all_locales();
+ if !db_locales.is_empty() {
+ tokbuilder.allow_list(&db_locales);
+ }
+ }
+ };
let tokenizer = tokbuilder.build();
drop(entered);
diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs
index 0faff9425..37bca7597 100644
--- a/milli/src/search/new/tests/mod.rs
+++ b/milli/src/search/new/tests/mod.rs
@@ -6,6 +6,7 @@ pub mod exactness;
pub mod geo_sort;
pub mod integration;
#[cfg(feature = "all-tokenizations")]
+#[cfg(not(feature = "chinese-pinyin"))]
pub mod language;
pub mod ngram_split_words;
pub mod proximity;