mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-22 19:13:10 +08:00
Merge #5258
5258: Unify facet strings by their normalized value r=ManyTheFish a=dureuill Fixes #5228: the "missing facet keys" issue. - Before this PR, updating a document such that `"facet": "DUREUILL"` would become `"facet": "dureuill"` could cause the normalized facet value `dureuill` to be removed from `field_id_docid_facet_strings` db. - This PR makes sure to unify the intermediate representation of the facet strings by their field_id and **normalized** (and truncated) string value. - The introduced test is testing only one of the two facet distribution algorithms. - We removed the panic when the facet string was not found, and we instead returned the normalized string. ## Draft status - [x] target release v1.12.6 branch and milestone - [ ] ~consider meilitool offline upgrade to fix the corrupted dbs in the wild.~ workaround: ~remove facets, then add them again... if your facet distribution is right.~ Just use a dump. - [x] Add unit test demonstrating the issue fixed by this PR. Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
c575d2693b
@ -1746,3 +1746,57 @@ async fn change_attributes_settings() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Modifying facets with different casing should work correctly
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn change_facet_casing() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"filterableAttributes": ["dog"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||||
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"dog": "Bouvier Bernois"
|
||||||
|
}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, _code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"dog": "bouvier bernois"
|
||||||
|
}
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
index.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({ "facets": ["dog"] }), |response, code| {
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###"
|
||||||
|
{
|
||||||
|
"dog": {
|
||||||
|
"bouvier bernois": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
@ -219,12 +219,19 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
|
||||||
|
|
||||||
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
|
||||||
let original_string = self
|
let optional_original_string =
|
||||||
.index
|
self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?;
|
||||||
.field_id_docid_facet_strings
|
|
||||||
.get(self.rtxn, &key)?
|
let original_string = match optional_original_string {
|
||||||
.unwrap()
|
Some(original_string) => original_string.to_owned(),
|
||||||
.to_owned();
|
None => {
|
||||||
|
tracing::error!(
|
||||||
|
"Missing original facet string. Using the normalized facet {} instead",
|
||||||
|
facet_key
|
||||||
|
);
|
||||||
|
facet_key.to_string()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
distribution.insert(original_string, nbr_docids);
|
distribution.insert(original_string, nbr_docids);
|
||||||
if distribution.len() == self.max_values_per_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
|
@ -283,42 +283,60 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct DelAddFacetValue<'doc> {
|
struct DelAddFacetValue<'doc> {
|
||||||
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
strings: HashMap<
|
||||||
|
(FieldId, &'doc str),
|
||||||
|
Option<BVec<'doc, u8>>,
|
||||||
|
hashbrown::DefaultHashBuilder,
|
||||||
|
&'doc Bump,
|
||||||
|
>,
|
||||||
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
|
doc_alloc: &'doc Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> DelAddFacetValue<'doc> {
|
impl<'doc> DelAddFacetValue<'doc> {
|
||||||
fn new(doc_alloc: &'doc Bump) -> Self {
|
fn new(doc_alloc: &'doc Bump) -> Self {
|
||||||
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc) }
|
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc), doc_alloc }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
let cache = match kind {
|
match kind {
|
||||||
FacetKind::String => &mut self.strings,
|
FacetKind::Number => {
|
||||||
FacetKind::Number => &mut self.f64s,
|
let key = (fid, value);
|
||||||
_ => return,
|
if let Some(DelAdd::Deletion) = self.f64s.get(&key) {
|
||||||
};
|
self.f64s.remove(&key);
|
||||||
|
} else {
|
||||||
let key = (fid, value);
|
self.f64s.insert(key, DelAdd::Addition);
|
||||||
if let Some(DelAdd::Deletion) = cache.get(&key) {
|
}
|
||||||
cache.remove(&key);
|
}
|
||||||
} else {
|
FacetKind::String => {
|
||||||
cache.insert(key, DelAdd::Addition);
|
if let Ok(s) = std::str::from_utf8(&value) {
|
||||||
|
let normalized = crate::normalize_facet(s);
|
||||||
|
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
||||||
|
self.strings.insert((fid, truncated), Some(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
let cache = match kind {
|
match kind {
|
||||||
FacetKind::String => &mut self.strings,
|
FacetKind::Number => {
|
||||||
FacetKind::Number => &mut self.f64s,
|
let key = (fid, value);
|
||||||
_ => return,
|
if let Some(DelAdd::Addition) = self.f64s.get(&key) {
|
||||||
};
|
self.f64s.remove(&key);
|
||||||
|
} else {
|
||||||
let key = (fid, value);
|
self.f64s.insert(key, DelAdd::Deletion);
|
||||||
if let Some(DelAdd::Addition) = cache.get(&key) {
|
}
|
||||||
cache.remove(&key);
|
}
|
||||||
} else {
|
FacetKind::String => {
|
||||||
cache.insert(key, DelAdd::Deletion);
|
if let Ok(s) = std::str::from_utf8(&value) {
|
||||||
|
let normalized = crate::normalize_facet(s);
|
||||||
|
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
||||||
|
self.strings.insert((fid, truncated), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,18 +347,14 @@ impl<'doc> DelAddFacetValue<'doc> {
|
|||||||
doc_alloc: &Bump,
|
doc_alloc: &Bump,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
for ((fid, value), deladd) in self.strings {
|
for ((fid, truncated), value) in self.strings {
|
||||||
if let Ok(s) = std::str::from_utf8(&value) {
|
buffer.clear();
|
||||||
buffer.clear();
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
buffer.extend_from_slice(&docid.to_be_bytes());
|
buffer.extend_from_slice(truncated.as_bytes());
|
||||||
let normalized = crate::normalize_facet(s);
|
match &value {
|
||||||
let truncated = truncate_str(&normalized);
|
Some(value) => sender.write_facet_string(&buffer, value)?,
|
||||||
buffer.extend_from_slice(truncated.as_bytes());
|
None => sender.delete_facet_string(&buffer)?,
|
||||||
match deladd {
|
|
||||||
DelAdd::Deletion => sender.delete_facet_string(&buffer)?,
|
|
||||||
DelAdd::Addition => sender.write_facet_string(&buffer, &value)?,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user