mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Use Languages list detected during indexing at search time
This commit is contained in:
parent
643d99e0f9
commit
0bc1a18f52
@ -1206,6 +1206,26 @@ impl Index {
|
||||
let doc_ids = self.script_language_docids.get(rtxn, key)?;
|
||||
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
|
||||
}
|
||||
|
||||
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
|
||||
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||
|
||||
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
|
||||
for sl in self.script_language_docids.iter(rtxn)? {
|
||||
let ((script, language), docids) = sl?;
|
||||
|
||||
// keep only Languages that contains at least 1 document.
|
||||
if !soft_deleted_documents.is_superset(&docids) {
|
||||
if let Some(languages) = script_language.get_mut(&script) {
|
||||
(*languages).push(language);
|
||||
} else {
|
||||
script_language.insert(script, vec![language]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(script_language)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -152,6 +152,11 @@ impl<'a> Search<'a> {
|
||||
tokbuilder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let script_lang_map = self.index.script_language(self.rtxn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokbuilder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let tokenizer = tokbuilder.build();
|
||||
let tokens = tokenizer.tokenize(query);
|
||||
builder
|
||||
|
Loading…
Reference in New Issue
Block a user