mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Use Languages list detected during indexing at search time
This commit is contained in:
parent
643d99e0f9
commit
0bc1a18f52
@ -1206,6 +1206,26 @@ impl Index {
|
|||||||
let doc_ids = self.script_language_docids.get(rtxn, key)?;
|
let doc_ids = self.script_language_docids.get(rtxn, key)?;
|
||||||
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
|
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
|
||||||
|
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||||
|
|
||||||
|
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
|
||||||
|
for sl in self.script_language_docids.iter(rtxn)? {
|
||||||
|
let ((script, language), docids) = sl?;
|
||||||
|
|
||||||
|
// keep only Languages that contains at least 1 document.
|
||||||
|
if !soft_deleted_documents.is_superset(&docids) {
|
||||||
|
if let Some(languages) = script_language.get_mut(&script) {
|
||||||
|
(*languages).push(language);
|
||||||
|
} else {
|
||||||
|
script_language.insert(script, vec![language]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(script_language)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -152,6 +152,11 @@ impl<'a> Search<'a> {
|
|||||||
tokbuilder.stop_words(stop_words);
|
tokbuilder.stop_words(stop_words);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let script_lang_map = self.index.script_language(self.rtxn)?;
|
||||||
|
if !script_lang_map.is_empty() {
|
||||||
|
tokbuilder.allow_list(&script_lang_map);
|
||||||
|
}
|
||||||
|
|
||||||
let tokenizer = tokbuilder.build();
|
let tokenizer = tokbuilder.build();
|
||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
builder
|
builder
|
||||||
|
Loading…
Reference in New Issue
Block a user