mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-22 19:13:10 +08:00
Unify facet strings by their normalized value
This commit is contained in:
parent
1c78447226
commit
d3a7e10348
@ -283,42 +283,60 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct DelAddFacetValue<'doc> {
|
struct DelAddFacetValue<'doc> {
|
||||||
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
strings: HashMap<
|
||||||
|
(FieldId, &'doc str),
|
||||||
|
Option<BVec<'doc, u8>>,
|
||||||
|
hashbrown::DefaultHashBuilder,
|
||||||
|
&'doc Bump,
|
||||||
|
>,
|
||||||
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
|
doc_alloc: &'doc Bump,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> DelAddFacetValue<'doc> {
|
impl<'doc> DelAddFacetValue<'doc> {
|
||||||
fn new(doc_alloc: &'doc Bump) -> Self {
|
fn new(doc_alloc: &'doc Bump) -> Self {
|
||||||
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc) }
|
Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc), doc_alloc }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
let cache = match kind {
|
match kind {
|
||||||
FacetKind::String => &mut self.strings,
|
FacetKind::Number => {
|
||||||
FacetKind::Number => &mut self.f64s,
|
let key = (fid, value);
|
||||||
_ => return,
|
if let Some(DelAdd::Deletion) = self.f64s.get(&key) {
|
||||||
};
|
self.f64s.remove(&key);
|
||||||
|
} else {
|
||||||
let key = (fid, value);
|
self.f64s.insert(key, DelAdd::Addition);
|
||||||
if let Some(DelAdd::Deletion) = cache.get(&key) {
|
}
|
||||||
cache.remove(&key);
|
}
|
||||||
} else {
|
FacetKind::String => {
|
||||||
cache.insert(key, DelAdd::Addition);
|
if let Ok(s) = std::str::from_utf8(&value) {
|
||||||
|
let normalized = crate::normalize_facet(s);
|
||||||
|
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
||||||
|
self.strings.insert((fid, truncated), Some(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
|
||||||
let cache = match kind {
|
match kind {
|
||||||
FacetKind::String => &mut self.strings,
|
FacetKind::Number => {
|
||||||
FacetKind::Number => &mut self.f64s,
|
let key = (fid, value);
|
||||||
_ => return,
|
if let Some(DelAdd::Addition) = self.f64s.get(&key) {
|
||||||
};
|
self.f64s.remove(&key);
|
||||||
|
} else {
|
||||||
let key = (fid, value);
|
self.f64s.insert(key, DelAdd::Deletion);
|
||||||
if let Some(DelAdd::Addition) = cache.get(&key) {
|
}
|
||||||
cache.remove(&key);
|
}
|
||||||
} else {
|
FacetKind::String => {
|
||||||
cache.insert(key, DelAdd::Deletion);
|
if let Ok(s) = std::str::from_utf8(&value) {
|
||||||
|
let normalized = crate::normalize_facet(s);
|
||||||
|
let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
|
||||||
|
self.strings.insert((fid, truncated), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,18 +347,14 @@ impl<'doc> DelAddFacetValue<'doc> {
|
|||||||
doc_alloc: &Bump,
|
doc_alloc: &Bump,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
|
||||||
for ((fid, value), deladd) in self.strings {
|
for ((fid, truncated), value) in self.strings {
|
||||||
if let Ok(s) = std::str::from_utf8(&value) {
|
buffer.clear();
|
||||||
buffer.clear();
|
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
buffer.extend_from_slice(&docid.to_be_bytes());
|
||||||
buffer.extend_from_slice(&docid.to_be_bytes());
|
buffer.extend_from_slice(truncated.as_bytes());
|
||||||
let normalized = crate::normalize_facet(s);
|
match &value {
|
||||||
let truncated = truncate_str(&normalized);
|
Some(value) => sender.write_facet_string(&buffer, value)?,
|
||||||
buffer.extend_from_slice(truncated.as_bytes());
|
None => sender.delete_facet_string(&buffer)?,
|
||||||
match deladd {
|
|
||||||
DelAdd::Deletion => sender.delete_facet_string(&buffer)?,
|
|
||||||
DelAdd::Addition => sender.write_facet_string(&buffer, &value)?,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user