mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 20:15:07 +08:00
enables faceted search
This commit is contained in:
parent
e07fe017c1
commit
d7f099d3ba
@ -11,7 +11,7 @@ use std::fmt;
|
|||||||
use compact_arena::{SmallArena, Idx32, mk_arena};
|
use compact_arena::{SmallArena, Idx32, mk_arena};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_types::DocIndex;
|
use meilisearch_types::DocIndex;
|
||||||
use sdset::{Set, SetBuf, exponential_search};
|
use sdset::{Set, SetBuf, exponential_search, SetOperation};
|
||||||
use slice_group_by::{GroupBy, GroupByMut};
|
use slice_group_by::{GroupBy, GroupByMut};
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
@ -28,6 +28,7 @@ pub fn bucket_sort<'c, FI>(
|
|||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
searchable_attrs: Option<ReorderedAttrs>,
|
searchable_attrs: Option<ReorderedAttrs>,
|
||||||
@ -50,6 +51,7 @@ where
|
|||||||
reader,
|
reader,
|
||||||
query,
|
query,
|
||||||
range,
|
range,
|
||||||
|
facets_docids,
|
||||||
filter,
|
filter,
|
||||||
distinct,
|
distinct,
|
||||||
distinct_size,
|
distinct_size,
|
||||||
@ -94,10 +96,17 @@ where
|
|||||||
let mut queries_kinds = HashMap::new();
|
let mut queries_kinds = HashMap::new();
|
||||||
recurs_operation(&mut queries_kinds, &operation);
|
recurs_operation(&mut queries_kinds, &operation);
|
||||||
|
|
||||||
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||||
debug!("found {} documents", docids.len());
|
debug!("found {} documents", docids.len());
|
||||||
debug!("number of postings {:?}", queries.len());
|
debug!("number of postings {:?}", queries.len());
|
||||||
|
|
||||||
|
if let Some(facets_docids) = facets_docids {
|
||||||
|
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||||
|
.intersection()
|
||||||
|
.into_set_buf();
|
||||||
|
docids = Cow::Owned(intersection);
|
||||||
|
}
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||||
@ -179,6 +188,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
|||||||
reader: &heed::RoTxn<MainT>,
|
reader: &heed::RoTxn<MainT>,
|
||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
|
facets_docids: Option<SetBuf<DocumentId>>,
|
||||||
filter: Option<FI>,
|
filter: Option<FI>,
|
||||||
distinct: FD,
|
distinct: FD,
|
||||||
distinct_size: usize,
|
distinct_size: usize,
|
||||||
@ -225,10 +235,17 @@ where
|
|||||||
let mut queries_kinds = HashMap::new();
|
let mut queries_kinds = HashMap::new();
|
||||||
recurs_operation(&mut queries_kinds, &operation);
|
recurs_operation(&mut queries_kinds, &operation);
|
||||||
|
|
||||||
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
|
||||||
debug!("found {} documents", docids.len());
|
debug!("found {} documents", docids.len());
|
||||||
debug!("number of postings {:?}", queries.len());
|
debug!("number of postings {:?}", queries.len());
|
||||||
|
|
||||||
|
if let Some(facets_docids) = facets_docids {
|
||||||
|
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
|
||||||
|
.intersection()
|
||||||
|
.into_set_buf();
|
||||||
|
docids = Cow::Owned(intersection);
|
||||||
|
}
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
mk_arena!(arena);
|
mk_arena!(arena);
|
||||||
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
|
||||||
|
@ -1,66 +1,50 @@
|
|||||||
use std::ops::Range;
|
use std::borrow::Cow;
|
||||||
|
use std::ops::{Range, Deref};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use crate::database::MainT;
|
use crate::database::MainT;
|
||||||
use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct};
|
use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct};
|
||||||
use crate::{criterion::Criteria, Document, DocumentId};
|
use crate::{criterion::Criteria, Document, DocumentId};
|
||||||
use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
|
use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
|
||||||
|
use crate::facets::FacetFilter;
|
||||||
|
|
||||||
pub struct QueryBuilder<'c, 'f, 'd> {
|
use either::Either;
|
||||||
|
use sdset::SetOperation;
|
||||||
|
|
||||||
|
pub struct QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
searchable_attrs: Option<ReorderedAttrs>,
|
searchable_attrs: Option<ReorderedAttrs>,
|
||||||
filter: Option<Box<dyn Fn(DocumentId) -> bool + 'f>>,
|
filter: Option<Box<dyn Fn(DocumentId) -> bool + 'f>>,
|
||||||
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
|
||||||
timeout: Option<Duration>,
|
timeout: Option<Duration>,
|
||||||
main_store: store::Main,
|
index: &'i store::Index,
|
||||||
postings_lists_store: store::PostingsLists,
|
facets: Option<&'fa FacetFilter>,
|
||||||
documents_fields_counts_store: store::DocumentsFieldsCounts,
|
|
||||||
synonyms_store: store::Synonyms,
|
|
||||||
prefix_documents_cache_store: store::PrefixDocumentsCache,
|
|
||||||
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||||
pub fn new(
|
pub fn new(index: &'i store::Index) -> Self {
|
||||||
main: store::Main,
|
|
||||||
postings_lists: store::PostingsLists,
|
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
|
||||||
synonyms: store::Synonyms,
|
|
||||||
prefix_documents_cache: store::PrefixDocumentsCache,
|
|
||||||
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
|
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
|
||||||
QueryBuilder::with_criteria(
|
QueryBuilder::with_criteria(
|
||||||
main,
|
index,
|
||||||
postings_lists,
|
|
||||||
documents_fields_counts,
|
|
||||||
synonyms,
|
|
||||||
prefix_documents_cache,
|
|
||||||
prefix_postings_lists_cache,
|
|
||||||
Criteria::default(),
|
Criteria::default(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_facets(&mut self, facets: Option<&'fa FacetFilter>) {
|
||||||
|
self.facets = facets;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn with_criteria(
|
pub fn with_criteria(
|
||||||
main: store::Main,
|
index: &'i store::Index,
|
||||||
postings_lists: store::PostingsLists,
|
|
||||||
documents_fields_counts: store::DocumentsFieldsCounts,
|
|
||||||
synonyms: store::Synonyms,
|
|
||||||
prefix_documents_cache: store::PrefixDocumentsCache,
|
|
||||||
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
|
|
||||||
criteria: Criteria<'c>,
|
criteria: Criteria<'c>,
|
||||||
) -> QueryBuilder<'c, 'f, 'd> {
|
) -> Self {
|
||||||
QueryBuilder {
|
QueryBuilder {
|
||||||
criteria,
|
criteria,
|
||||||
searchable_attrs: None,
|
searchable_attrs: None,
|
||||||
filter: None,
|
filter: None,
|
||||||
distinct: None,
|
distinct: None,
|
||||||
timeout: None,
|
timeout: None,
|
||||||
main_store: main,
|
index,
|
||||||
postings_lists_store: postings_lists,
|
facets: None,
|
||||||
documents_fields_counts_store: documents_fields_counts,
|
|
||||||
synonyms_store: synonyms,
|
|
||||||
prefix_documents_cache_store: prefix_documents_cache,
|
|
||||||
prefix_postings_lists_cache_store: prefix_postings_lists_cache,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,36 +77,70 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
|
|||||||
query: &str,
|
query: &str,
|
||||||
range: Range<usize>,
|
range: Range<usize>,
|
||||||
) -> MResult<(Vec<Document>, usize)> {
|
) -> MResult<(Vec<Document>, usize)> {
|
||||||
|
let facets_docids = match self.facets {
|
||||||
|
Some(facets) => {
|
||||||
|
let mut ands = Vec::with_capacity(facets.len());
|
||||||
|
let mut ors = Vec::new();
|
||||||
|
for f in facets.deref() {
|
||||||
|
match f {
|
||||||
|
Either::Left(keys) => {
|
||||||
|
ors.reserve(keys.len());
|
||||||
|
for key in keys {
|
||||||
|
let docids = self.index.facets.facet_document_ids(reader, &key)?.unwrap_or_default();
|
||||||
|
ors.push(docids);
|
||||||
|
}
|
||||||
|
let sets: Vec<_> = ors.iter().map(Cow::deref).collect();
|
||||||
|
let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf();
|
||||||
|
ands.push(Cow::Owned(or_result));
|
||||||
|
ors.clear();
|
||||||
|
}
|
||||||
|
Either::Right(key) =>{
|
||||||
|
match self.index.facets.facet_document_ids(reader, &key)? {
|
||||||
|
Some(docids) => ands.push(docids),
|
||||||
|
// no candidates for search, early return.
|
||||||
|
None => return Ok((vec![], 0)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let ands: Vec<_> = ands.iter().map(Cow::deref).collect();
|
||||||
|
Some(sdset::multi::OpBuilder::from_vec(ands).intersection().into_set_buf())
|
||||||
|
}
|
||||||
|
None => None
|
||||||
|
};
|
||||||
|
|
||||||
match self.distinct {
|
match self.distinct {
|
||||||
Some((distinct, distinct_size)) => bucket_sort_with_distinct(
|
Some((distinct, distinct_size)) => bucket_sort_with_distinct(
|
||||||
reader,
|
reader,
|
||||||
query,
|
query,
|
||||||
range,
|
range,
|
||||||
|
facets_docids,
|
||||||
self.filter,
|
self.filter,
|
||||||
distinct,
|
distinct,
|
||||||
distinct_size,
|
distinct_size,
|
||||||
self.criteria,
|
self.criteria,
|
||||||
self.searchable_attrs,
|
self.searchable_attrs,
|
||||||
self.main_store,
|
self.index.main,
|
||||||
self.postings_lists_store,
|
self.index.postings_lists,
|
||||||
self.documents_fields_counts_store,
|
self.index.documents_fields_counts,
|
||||||
self.synonyms_store,
|
self.index.synonyms,
|
||||||
self.prefix_documents_cache_store,
|
self.index.prefix_documents_cache,
|
||||||
self.prefix_postings_lists_cache_store,
|
self.index.prefix_postings_lists_cache,
|
||||||
),
|
),
|
||||||
None => bucket_sort(
|
None => bucket_sort(
|
||||||
reader,
|
reader,
|
||||||
query,
|
query,
|
||||||
range,
|
range,
|
||||||
|
facets_docids,
|
||||||
self.filter,
|
self.filter,
|
||||||
self.criteria,
|
self.criteria,
|
||||||
self.searchable_attrs,
|
self.searchable_attrs,
|
||||||
self.main_store,
|
self.index.main,
|
||||||
self.postings_lists_store,
|
self.index.postings_lists,
|
||||||
self.documents_fields_counts_store,
|
self.index.documents_fields_counts,
|
||||||
self.synonyms_store,
|
self.index.synonyms,
|
||||||
self.prefix_documents_cache_store,
|
self.index.prefix_documents_cache,
|
||||||
self.prefix_postings_lists_cache_store,
|
self.index.prefix_postings_lists_cache,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ use std::time::Instant;
|
|||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use log::error;
|
use log::error;
|
||||||
use meilisearch_core::Filter;
|
use meilisearch_core::Filter;
|
||||||
|
use meilisearch_core::facets::FacetFilter;
|
||||||
use meilisearch_core::criterion::*;
|
use meilisearch_core::criterion::*;
|
||||||
use meilisearch_core::settings::RankingRule;
|
use meilisearch_core::settings::RankingRule;
|
||||||
use meilisearch_core::{Highlight, Index, MainT, RankedMap};
|
use meilisearch_core::{Highlight, Index, MainT, RankedMap};
|
||||||
@ -34,6 +35,7 @@ impl IndexSearchExt for Index {
|
|||||||
attributes_to_highlight: None,
|
attributes_to_highlight: None,
|
||||||
filters: None,
|
filters: None,
|
||||||
matches: false,
|
matches: false,
|
||||||
|
facet_filters: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -48,6 +50,7 @@ pub struct SearchBuilder<'a> {
|
|||||||
attributes_to_highlight: Option<HashSet<String>>,
|
attributes_to_highlight: Option<HashSet<String>>,
|
||||||
filters: Option<String>,
|
filters: Option<String>,
|
||||||
matches: bool,
|
matches: bool,
|
||||||
|
facet_filters: Option<FacetFilter>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> SearchBuilder<'a> {
|
impl<'a> SearchBuilder<'a> {
|
||||||
@ -82,6 +85,11 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn add_facet_filters(&mut self, filters: FacetFilter) -> &SearchBuilder {
|
||||||
|
self.facet_filters = Some(filters);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn filters(&mut self, value: String) -> &SearchBuilder {
|
pub fn filters(&mut self, value: String) -> &SearchBuilder {
|
||||||
self.filters = Some(value);
|
self.filters = Some(value);
|
||||||
self
|
self
|
||||||
@ -138,6 +146,8 @@ impl<'a> SearchBuilder<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
query_builder.set_facets(self.facet_filters.as_ref());
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
|
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
|
||||||
let (docs, nb_hits) = result.map_err(ResponseError::search_documents)?;
|
let (docs, nb_hits) = result.map_err(ResponseError::search_documents)?;
|
||||||
|
@ -12,6 +12,8 @@ use crate::helpers::Authentication;
|
|||||||
use crate::routes::IndexParam;
|
use crate::routes::IndexParam;
|
||||||
use crate::Data;
|
use crate::Data;
|
||||||
|
|
||||||
|
use meilisearch_core::facets::FacetFilter;
|
||||||
|
|
||||||
pub fn services(cfg: &mut web::ServiceConfig) {
|
pub fn services(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(search_with_url_query);
|
cfg.service(search_with_url_query);
|
||||||
}
|
}
|
||||||
@ -28,6 +30,7 @@ struct SearchQuery {
|
|||||||
attributes_to_highlight: Option<String>,
|
attributes_to_highlight: Option<String>,
|
||||||
filters: Option<String>,
|
filters: Option<String>,
|
||||||
matches: Option<bool>,
|
matches: Option<bool>,
|
||||||
|
facet_filters: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
|
||||||
@ -81,6 +84,13 @@ async fn search_with_url_query(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(ref facet_filters) = params.facet_filters {
|
||||||
|
match index.main.attributes_for_faceting(&reader)? {
|
||||||
|
Some(ref attrs) => { search_builder.add_facet_filters(FacetFilter::from_str(facet_filters, &schema, attrs)?); },
|
||||||
|
None => return Err(ResponseError::FacetExpression("can't filter on facets, as no facet is set".to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(attributes_to_crop) = ¶ms.attributes_to_crop {
|
if let Some(attributes_to_crop) = ¶ms.attributes_to_crop {
|
||||||
let default_length = params.crop_length.unwrap_or(200);
|
let default_length = params.crop_length.unwrap_or(200);
|
||||||
let mut final_attributes: HashMap<String, usize> = HashMap::new();
|
let mut final_attributes: HashMap<String, usize> = HashMap::new();
|
||||||
|
Loading…
Reference in New Issue
Block a user