mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
fix facet distribution case
This commit is contained in:
parent
fb69769991
commit
18a50b4dac
@ -39,7 +39,7 @@ pub fn bucket_sort<'c, FI>(
|
|||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
facets_docids: Option<SetBuf<DocumentId>>,
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
searchable_attrs: Option<ReorderedAttrs>,
|
searchable_attrs: Option<ReorderedAttrs>,
|
||||||
@ -199,7 +199,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
facets_docids: Option<SetBuf<DocumentId>>,
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
distinct: FD,
|
distinct: FD,
|
||||||
distinct_size: usize,
|
distinct_size: usize,
|
||||||
@ -636,18 +636,18 @@ pub fn placeholder_document_sort(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||||
pub fn facet_count(
|
fn facet_count(
|
||||||
facet_docids: HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>,
|
facet_docids: HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>,
|
||||||
candidate_docids: &Set<DocumentId>,
|
candidate_docids: &Set<DocumentId>,
|
||||||
) -> HashMap<String, HashMap<String, usize>> {
|
) -> HashMap<String, HashMap<String, usize>> {
|
||||||
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||||
for (key, doc_map) in facet_docids {
|
for (key, doc_map) in facet_docids {
|
||||||
let mut count_map = HashMap::with_capacity(doc_map.len());
|
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||||
for (value, docids) in doc_map {
|
for (_, (value, docids)) in doc_map {
|
||||||
let mut counter = Counter::new();
|
let mut counter = Counter::new();
|
||||||
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||||
count_map.insert(value, counter.0);
|
count_map.insert(value.to_string(), counter.0);
|
||||||
}
|
}
|
||||||
facets_counts.insert(key, count_map);
|
facets_counts.insert(key, count_map);
|
||||||
}
|
}
|
||||||
|
@ -164,7 +164,7 @@ impl<'a> heed::BytesDecode<'a> for FacetKey {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_to_facet_map(
|
pub fn add_to_facet_map(
|
||||||
facet_map: &mut HashMap<FacetKey, Vec<DocumentId>>,
|
facet_map: &mut HashMap<FacetKey, (String, Vec<DocumentId>)>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
value: Value,
|
value: Value,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
@ -175,8 +175,8 @@ pub fn add_to_facet_map(
|
|||||||
Value::Null => return Ok(()),
|
Value::Null => return Ok(()),
|
||||||
value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())),
|
value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())),
|
||||||
};
|
};
|
||||||
let key = FacetKey::new(field_id, value);
|
let key = FacetKey::new(field_id, value.clone());
|
||||||
facet_map.entry(key).or_insert_with(Vec::new).push(document_id);
|
facet_map.entry(key).or_insert_with(|| (value, Vec::new())).1.push(document_id);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,8 +185,10 @@ pub fn facet_map_from_docids(
|
|||||||
index: &crate::Index,
|
index: &crate::Index,
|
||||||
document_ids: &[DocumentId],
|
document_ids: &[DocumentId],
|
||||||
attributes_for_facetting: &[FieldId],
|
attributes_for_facetting: &[FieldId],
|
||||||
) -> MResult<HashMap<FacetKey, Vec<DocumentId>>> {
|
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||||
let mut facet_map = HashMap::new();
|
// A hashmap that ascociate a facet key to a pair containing the original facet attribute
|
||||||
|
// string with it's case preserved, and a list of document ids for that facet attribute.
|
||||||
|
let mut facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)> = HashMap::new();
|
||||||
for document_id in document_ids {
|
for document_id in document_ids {
|
||||||
for result in index
|
for result in index
|
||||||
.documents_fields
|
.documents_fields
|
||||||
@ -212,7 +214,7 @@ pub fn facet_map_from_docs(
|
|||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
documents: &HashMap<DocumentId, IndexMap<String, Value>>,
|
documents: &HashMap<DocumentId, IndexMap<String, Value>>,
|
||||||
attributes_for_facetting: &[FieldId],
|
attributes_for_facetting: &[FieldId],
|
||||||
) -> MResult<HashMap<FacetKey, Vec<DocumentId>>> {
|
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||||
let mut facet_map = HashMap::new();
|
let mut facet_map = HashMap::new();
|
||||||
let attributes_for_facetting = attributes_for_facetting
|
let attributes_for_facetting = attributes_for_facetting
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -97,16 +97,14 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
|||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
ors.push(docids);
|
ors.push(docids);
|
||||||
}
|
}
|
||||||
let sets: Vec<_> = ors.iter().map(Cow::deref).collect();
|
let sets: Vec<_> = ors.iter().map(|i| &i.1).map(Cow::deref).collect();
|
||||||
let or_result = sdset::multi::OpBuilder::from_vec(sets)
|
let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf();
|
||||||
.union()
|
|
||||||
.into_set_buf();
|
|
||||||
ands.push(Cow::Owned(or_result));
|
ands.push(Cow::Owned(or_result));
|
||||||
ors.clear();
|
ors.clear();
|
||||||
}
|
}
|
||||||
Either::Right(key) => {
|
Either::Right(key) => {
|
||||||
match self.index.facets.facet_document_ids(reader, &key)? {
|
match self.index.facets.facet_document_ids(reader, &key)? {
|
||||||
Some(docids) => ands.push(docids),
|
Some(docids) => ands.push(docids.1),
|
||||||
// no candidates for search, early return.
|
// no candidates for search, early return.
|
||||||
None => return Ok(Some(SetBuf::default())),
|
None => return Ok(Some(SetBuf::default())),
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
use heed::{RwTxn, RoTxn, Result as ZResult, RoRange};
|
use heed::{RwTxn, RoTxn, Result as ZResult, RoRange, types::Str, BytesEncode, BytesDecode};
|
||||||
use sdset::{SetBuf, Set, SetOperation};
|
use sdset::{SetBuf, Set, SetOperation};
|
||||||
|
|
||||||
use meilisearch_types::DocumentId;
|
use meilisearch_types::DocumentId;
|
||||||
@ -14,40 +15,76 @@ use super::cow_set::CowSet;
|
|||||||
/// contains facet info
|
/// contains facet info
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct Facets {
|
pub struct Facets {
|
||||||
pub(crate) facets: heed::Database<FacetKey, CowSet<DocumentId>>,
|
pub(crate) facets: heed::Database<FacetKey, FacetData>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetData;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for FacetData {
|
||||||
|
type EItem = (&'a str, &'a Set<DocumentId>);
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
// get size of the first item
|
||||||
|
let first_size = item.0.as_bytes().len();
|
||||||
|
let size = mem::size_of::<usize>()
|
||||||
|
+ first_size
|
||||||
|
+ item.1.len() * mem::size_of::<DocumentId>();
|
||||||
|
let mut buffer = Vec::with_capacity(size);
|
||||||
|
// encode the length of the first item
|
||||||
|
buffer.extend_from_slice(&first_size.to_be_bytes());
|
||||||
|
buffer.extend_from_slice(Str::bytes_encode(&item.0)?.as_ref());
|
||||||
|
let second_slice = CowSet::bytes_encode(&item.1)?;
|
||||||
|
buffer.extend_from_slice(second_slice.as_ref());
|
||||||
|
Some(Cow::Owned(buffer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for FacetData {
|
||||||
|
type DItem = (&'a str, Cow<'a, Set<DocumentId>>);
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let mut size_buf = [0; 8];
|
||||||
|
size_buf.copy_from_slice(bytes.get(0..8)?);
|
||||||
|
// decode size of the first item from the bytes
|
||||||
|
let first_size = usize::from_be_bytes(size_buf);
|
||||||
|
// decode first and second items
|
||||||
|
let first_item = Str::bytes_decode(bytes.get(8..(8 + first_size))?)?;
|
||||||
|
let second_item = CowSet::bytes_decode(bytes.get((8 + first_size)..)?)?;
|
||||||
|
Some((first_item, second_item))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Facets {
|
impl Facets {
|
||||||
// we use sdset::SetBuf to ensure the docids are sorted.
|
// we use sdset::SetBuf to ensure the docids are sorted.
|
||||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>) -> ZResult<()> {
|
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>, facet_value: &str) -> ZResult<()> {
|
||||||
self.facets.put(writer, &facet_key, doc_ids)
|
self.facets.put(writer, &facet_key, &(facet_value, doc_ids))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, FacetData>> {
|
||||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
|
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<(&'txn str,Cow<'txn, Set<DocumentId>>)>> {
|
||||||
self.facets.get(reader, &facet_key)
|
self.facets.get(reader, &facet_key)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// updates the facets store, revmoving the documents from the facets provided in the
|
/// updates the facets store, revmoving the documents from the facets provided in the
|
||||||
/// `facet_map` argument
|
/// `facet_map` argument
|
||||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> ZResult<()> {
|
||||||
for (key, document_ids) in facet_map {
|
for (key, (name, document_ids)) in facet_map {
|
||||||
if let Some(old) = self.facets.get(writer, &key)? {
|
if let Some((_, old)) = self.facets.get(writer, &key)? {
|
||||||
let to_remove = SetBuf::from_dirty(document_ids);
|
let to_remove = SetBuf::from_dirty(document_ids);
|
||||||
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
||||||
self.facets.put(writer, &key, new.as_set())?;
|
self.facets.put(writer, &key, &(&name, new.as_set()))?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> ZResult<()> {
|
||||||
for (key, document_ids) in facet_map {
|
for (key, (facet_name, document_ids)) in facet_map {
|
||||||
let set = SetBuf::from_dirty(document_ids);
|
let set = SetBuf::from_dirty(document_ids);
|
||||||
self.put_facet_document_ids(writer, key, set.as_set())?;
|
self.put_facet_document_ids(writer, key, set.as_set(), &facet_name)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user