mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:08:26 +08:00
Merge #1224
1224: fix synonyms normalization r=MarinPostma a=LegendreM Synonyms needs to be indexed in ascendant order, and the new normalization step for synonyms potentially changes this order which break the indexation process because "Harry Potter" > "HP" but "harry potter" < "hp" Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
b1d9ad7134
@ -1,7 +1,7 @@
|
|||||||
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
|
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
|
||||||
|
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use fst::{set::OpBuilder, SetBuilder};
|
use fst::{SetBuilder, set::OpBuilder};
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use meilisearch_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||||
@ -299,15 +299,22 @@ pub fn apply_synonyms_update(
|
|||||||
.fold(String::new(), |s, t| s + t.text())
|
.fold(String::new(), |s, t| s + t.text())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalize synonyms and reorder them creating a BTreeMap
|
||||||
|
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
|
||||||
|
let word = normalize(&analyzer, &word);
|
||||||
|
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
|
||||||
|
|
||||||
|
(word, alternatives)
|
||||||
|
}).collect();
|
||||||
|
|
||||||
|
// index synonyms,
|
||||||
|
// synyonyms have to be ordered by key before indexation
|
||||||
let mut synonyms_builder = SetBuilder::memory();
|
let mut synonyms_builder = SetBuilder::memory();
|
||||||
synonyms_store.clear(writer)?;
|
synonyms_store.clear(writer)?;
|
||||||
for (word, alternatives) in synonyms {
|
for (word, alternatives) in synonyms {
|
||||||
let word = normalize(&analyzer, &word);
|
|
||||||
|
|
||||||
synonyms_builder.insert(&word)?;
|
synonyms_builder.insert(&word)?;
|
||||||
|
|
||||||
let alternatives = {
|
let alternatives = {
|
||||||
let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect();
|
|
||||||
let alternatives = SetBuf::from_dirty(alternatives);
|
let alternatives = SetBuf::from_dirty(alternatives);
|
||||||
let mut alternatives_builder = SetBuilder::memory();
|
let mut alternatives_builder = SetBuilder::memory();
|
||||||
alternatives_builder.extend_iter(alternatives)?;
|
alternatives_builder.extend_iter(alternatives)?;
|
||||||
|
@ -171,6 +171,8 @@ async fn write_all_and_update() {
|
|||||||
"synonyms": {
|
"synonyms": {
|
||||||
"road": ["street", "avenue"],
|
"road": ["street", "avenue"],
|
||||||
"street": ["avenue"],
|
"street": ["avenue"],
|
||||||
|
"HP": ["Harry Potter"],
|
||||||
|
"Harry Potter": ["HP"]
|
||||||
},
|
},
|
||||||
"attributesForFaceting": ["title"],
|
"attributesForFaceting": ["title"],
|
||||||
});
|
});
|
||||||
@ -208,6 +210,8 @@ async fn write_all_and_update() {
|
|||||||
"synonyms": {
|
"synonyms": {
|
||||||
"road": ["street", "avenue"],
|
"road": ["street", "avenue"],
|
||||||
"street": ["avenue"],
|
"street": ["avenue"],
|
||||||
|
"hp": ["harry potter"],
|
||||||
|
"harry potter": ["hp"]
|
||||||
},
|
},
|
||||||
"attributesForFaceting": ["title"],
|
"attributesForFaceting": ["title"],
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user