mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Add a threshold filtering the Languages allowed to be detected at search time
This commit is contained in:
parent
da48506f15
commit
37d4551e8e
@ -1211,11 +1211,22 @@ impl Index {
|
|||||||
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||||
|
|
||||||
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
|
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
|
||||||
|
let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
|
||||||
|
let mut total = 0;
|
||||||
for sl in self.script_language_docids.iter(rtxn)? {
|
for sl in self.script_language_docids.iter(rtxn)? {
|
||||||
let ((script, language), docids) = sl?;
|
let ((script, language), docids) = sl?;
|
||||||
|
|
||||||
// keep only Languages that contains at least 1 document.
|
// keep only Languages that contains at least 1 document.
|
||||||
if !soft_deleted_documents.is_superset(&docids) {
|
let remaining_documents_count = (docids - &soft_deleted_documents).len();
|
||||||
|
total += remaining_documents_count;
|
||||||
|
if remaining_documents_count > 0 {
|
||||||
|
script_language_doc_count.push((script, language, remaining_documents_count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let threshold = total / 20; // 5% (arbitrar)
|
||||||
|
for (script, language, count) in script_language_doc_count {
|
||||||
|
if count > threshold {
|
||||||
if let Some(languages) = script_language.get_mut(&script) {
|
if let Some(languages) = script_language.get_mut(&script) {
|
||||||
(*languages).push(language);
|
(*languages).push(language);
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user