From 1df0fdf3e2d910433d795db5f606fe906dbf3bcb Mon Sep 17 00:00:00 2001 From: many Date: Wed, 3 Feb 2021 15:05:15 +0100 Subject: [PATCH] fix synonyms normalization Synonyms needs to be indexed in ascendant order, and the new normalization step for synonyms potentially changes this order which break the indexation process because "Harry Potter" > "HP" but "harry potter" < "hp" --- meilisearch-core/src/update/settings_update.rs | 15 +++++++++++---- meilisearch-http/tests/settings.rs | 4 ++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/meilisearch-core/src/update/settings_update.rs b/meilisearch-core/src/update/settings_update.rs index e96d5702c..c9d40fa1b 100644 --- a/meilisearch-core/src/update/settings_update.rs +++ b/meilisearch-core/src/update/settings_update.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}}; use heed::Result as ZResult; -use fst::{set::OpBuilder, SetBuilder}; +use fst::{SetBuilder, set::OpBuilder}; use sdset::SetBuf; use meilisearch_schema::Schema; use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig}; @@ -298,16 +298,23 @@ pub fn apply_synonyms_update( .tokens() .fold(String::new(), |s, t| s + t.text()) } + + // normalize synonyms and reorder them creating a BTreeMap + let synonyms: BTreeMap> = synonyms.into_iter().map( |(word, alternatives)| { + let word = normalize(&analyzer, &word); + let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect(); + (word, alternatives) + }).collect(); + + // index synonyms, + // synyonyms have to be ordered by key before indexation let mut synonyms_builder = SetBuilder::memory(); synonyms_store.clear(writer)?; for (word, alternatives) in synonyms { - let word = normalize(&analyzer, &word); - synonyms_builder.insert(&word)?; let alternatives = { - let alternatives = alternatives.iter().map(|text| normalize(&analyzer, &text)).collect(); let alternatives = SetBuf::from_dirty(alternatives); let mut alternatives_builder = SetBuilder::memory(); alternatives_builder.extend_iter(alternatives)?; diff --git a/meilisearch-http/tests/settings.rs b/meilisearch-http/tests/settings.rs index 6b125c13a..98973b56f 100644 --- a/meilisearch-http/tests/settings.rs +++ b/meilisearch-http/tests/settings.rs @@ -171,6 +171,8 @@ async fn write_all_and_update() { "synonyms": { "road": ["street", "avenue"], "street": ["avenue"], + "HP": ["Harry Potter"], + "Harry Potter": ["HP"] }, "attributesForFaceting": ["title"], }); @@ -208,6 +210,8 @@ async fn write_all_and_update() { "synonyms": { "road": ["street", "avenue"], "street": ["avenue"], + "hp": ["harry potter"], + "harry potter": ["hp"] }, "attributesForFaceting": ["title"], });