enables faceted search

This commit is contained in:
mpostma 2020-05-05 22:29:35 +02:00
parent e07fe017c1
commit d7f099d3ba
4 changed files with 106 additions and 51 deletions

View File

@ -11,7 +11,7 @@ use std::fmt;
use compact_arena::{SmallArena, Idx32, mk_arena}; use compact_arena::{SmallArena, Idx32, mk_arena};
use log::debug; use log::debug;
use meilisearch_types::DocIndex; use meilisearch_types::DocIndex;
use sdset::{Set, SetBuf, exponential_search}; use sdset::{Set, SetBuf, exponential_search, SetOperation};
use slice_group_by::{GroupBy, GroupByMut}; use slice_group_by::{GroupBy, GroupByMut};
use crate::error::Error; use crate::error::Error;
@ -28,6 +28,7 @@ pub fn bucket_sort<'c, FI>(
reader: &heed::RoTxn<MainT>, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
facets_docids: Option<SetBuf<DocumentId>>,
filter: Option<FI>, filter: Option<FI>,
criteria: Criteria<'c>, criteria: Criteria<'c>,
searchable_attrs: Option<ReorderedAttrs>, searchable_attrs: Option<ReorderedAttrs>,
@ -50,6 +51,7 @@ where
reader, reader,
query, query,
range, range,
facets_docids,
filter, filter,
distinct, distinct,
distinct_size, distinct_size,
@ -94,10 +96,17 @@ where
let mut queries_kinds = HashMap::new(); let mut queries_kinds = HashMap::new();
recurs_operation(&mut queries_kinds, &operation); recurs_operation(&mut queries_kinds, &operation);
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?; let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
debug!("found {} documents", docids.len()); debug!("found {} documents", docids.len());
debug!("number of postings {:?}", queries.len()); debug!("number of postings {:?}", queries.len());
if let Some(facets_docids) = facets_docids {
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
.intersection()
.into_set_buf();
docids = Cow::Owned(intersection);
}
let before = Instant::now(); let before = Instant::now();
mk_arena!(arena); mk_arena!(arena);
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries); let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);
@ -179,6 +188,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
reader: &heed::RoTxn<MainT>, reader: &heed::RoTxn<MainT>,
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
facets_docids: Option<SetBuf<DocumentId>>,
filter: Option<FI>, filter: Option<FI>,
distinct: FD, distinct: FD,
distinct_size: usize, distinct_size: usize,
@ -225,10 +235,17 @@ where
let mut queries_kinds = HashMap::new(); let mut queries_kinds = HashMap::new();
recurs_operation(&mut queries_kinds, &operation); recurs_operation(&mut queries_kinds, &operation);
let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?; let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?;
debug!("found {} documents", docids.len()); debug!("found {} documents", docids.len());
debug!("number of postings {:?}", queries.len()); debug!("number of postings {:?}", queries.len());
if let Some(facets_docids) = facets_docids {
let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set())
.intersection()
.into_set_buf();
docids = Cow::Owned(intersection);
}
let before = Instant::now(); let before = Instant::now();
mk_arena!(arena); mk_arena!(arena);
let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries); let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries);

View File

@ -1,66 +1,50 @@
use std::ops::Range; use std::borrow::Cow;
use std::ops::{Range, Deref};
use std::time::Duration; use std::time::Duration;
use crate::database::MainT; use crate::database::MainT;
use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct}; use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct};
use crate::{criterion::Criteria, Document, DocumentId}; use crate::{criterion::Criteria, Document, DocumentId};
use crate::{reordered_attrs::ReorderedAttrs, store, MResult}; use crate::{reordered_attrs::ReorderedAttrs, store, MResult};
use crate::facets::FacetFilter;
pub struct QueryBuilder<'c, 'f, 'd> { use either::Either;
use sdset::SetOperation;
pub struct QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
criteria: Criteria<'c>, criteria: Criteria<'c>,
searchable_attrs: Option<ReorderedAttrs>, searchable_attrs: Option<ReorderedAttrs>,
filter: Option<Box<dyn Fn(DocumentId) -> bool + 'f>>, filter: Option<Box<dyn Fn(DocumentId) -> bool + 'f>>,
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>, distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
timeout: Option<Duration>, timeout: Option<Duration>,
main_store: store::Main, index: &'i store::Index,
postings_lists_store: store::PostingsLists, facets: Option<&'fa FacetFilter>,
documents_fields_counts_store: store::DocumentsFieldsCounts,
synonyms_store: store::Synonyms,
prefix_documents_cache_store: store::PrefixDocumentsCache,
prefix_postings_lists_cache_store: store::PrefixPostingsListsCache,
} }
impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
pub fn new( pub fn new(index: &'i store::Index) -> Self {
main: store::Main,
postings_lists: store::PostingsLists,
documents_fields_counts: store::DocumentsFieldsCounts,
synonyms: store::Synonyms,
prefix_documents_cache: store::PrefixDocumentsCache,
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
) -> QueryBuilder<'c, 'f, 'd> {
QueryBuilder::with_criteria( QueryBuilder::with_criteria(
main, index,
postings_lists,
documents_fields_counts,
synonyms,
prefix_documents_cache,
prefix_postings_lists_cache,
Criteria::default(), Criteria::default(),
) )
} }
pub fn set_facets(&mut self, facets: Option<&'fa FacetFilter>) {
self.facets = facets;
}
pub fn with_criteria( pub fn with_criteria(
main: store::Main, index: &'i store::Index,
postings_lists: store::PostingsLists,
documents_fields_counts: store::DocumentsFieldsCounts,
synonyms: store::Synonyms,
prefix_documents_cache: store::PrefixDocumentsCache,
prefix_postings_lists_cache: store::PrefixPostingsListsCache,
criteria: Criteria<'c>, criteria: Criteria<'c>,
) -> QueryBuilder<'c, 'f, 'd> { ) -> Self {
QueryBuilder { QueryBuilder {
criteria, criteria,
searchable_attrs: None, searchable_attrs: None,
filter: None, filter: None,
distinct: None, distinct: None,
timeout: None, timeout: None,
main_store: main, index,
postings_lists_store: postings_lists, facets: None,
documents_fields_counts_store: documents_fields_counts,
synonyms_store: synonyms,
prefix_documents_cache_store: prefix_documents_cache,
prefix_postings_lists_cache_store: prefix_postings_lists_cache,
} }
} }
@ -93,36 +77,70 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
query: &str, query: &str,
range: Range<usize>, range: Range<usize>,
) -> MResult<(Vec<Document>, usize)> { ) -> MResult<(Vec<Document>, usize)> {
let facets_docids = match self.facets {
Some(facets) => {
let mut ands = Vec::with_capacity(facets.len());
let mut ors = Vec::new();
for f in facets.deref() {
match f {
Either::Left(keys) => {
ors.reserve(keys.len());
for key in keys {
let docids = self.index.facets.facet_document_ids(reader, &key)?.unwrap_or_default();
ors.push(docids);
}
let sets: Vec<_> = ors.iter().map(Cow::deref).collect();
let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf();
ands.push(Cow::Owned(or_result));
ors.clear();
}
Either::Right(key) =>{
match self.index.facets.facet_document_ids(reader, &key)? {
Some(docids) => ands.push(docids),
// no candidates for search, early return.
None => return Ok((vec![], 0)),
}
}
};
}
let ands: Vec<_> = ands.iter().map(Cow::deref).collect();
Some(sdset::multi::OpBuilder::from_vec(ands).intersection().into_set_buf())
}
None => None
};
match self.distinct { match self.distinct {
Some((distinct, distinct_size)) => bucket_sort_with_distinct( Some((distinct, distinct_size)) => bucket_sort_with_distinct(
reader, reader,
query, query,
range, range,
facets_docids,
self.filter, self.filter,
distinct, distinct,
distinct_size, distinct_size,
self.criteria, self.criteria,
self.searchable_attrs, self.searchable_attrs,
self.main_store, self.index.main,
self.postings_lists_store, self.index.postings_lists,
self.documents_fields_counts_store, self.index.documents_fields_counts,
self.synonyms_store, self.index.synonyms,
self.prefix_documents_cache_store, self.index.prefix_documents_cache,
self.prefix_postings_lists_cache_store, self.index.prefix_postings_lists_cache,
), ),
None => bucket_sort( None => bucket_sort(
reader, reader,
query, query,
range, range,
facets_docids,
self.filter, self.filter,
self.criteria, self.criteria,
self.searchable_attrs, self.searchable_attrs,
self.main_store, self.index.main,
self.postings_lists_store, self.index.postings_lists,
self.documents_fields_counts_store, self.index.documents_fields_counts,
self.synonyms_store, self.index.synonyms,
self.prefix_documents_cache_store, self.index.prefix_documents_cache,
self.prefix_postings_lists_cache_store, self.index.prefix_postings_lists_cache,
), ),
} }
} }

View File

@ -6,6 +6,7 @@ use std::time::Instant;
use indexmap::IndexMap; use indexmap::IndexMap;
use log::error; use log::error;
use meilisearch_core::Filter; use meilisearch_core::Filter;
use meilisearch_core::facets::FacetFilter;
use meilisearch_core::criterion::*; use meilisearch_core::criterion::*;
use meilisearch_core::settings::RankingRule; use meilisearch_core::settings::RankingRule;
use meilisearch_core::{Highlight, Index, MainT, RankedMap}; use meilisearch_core::{Highlight, Index, MainT, RankedMap};
@ -34,6 +35,7 @@ impl IndexSearchExt for Index {
attributes_to_highlight: None, attributes_to_highlight: None,
filters: None, filters: None,
matches: false, matches: false,
facet_filters: None,
} }
} }
} }
@ -48,6 +50,7 @@ pub struct SearchBuilder<'a> {
attributes_to_highlight: Option<HashSet<String>>, attributes_to_highlight: Option<HashSet<String>>,
filters: Option<String>, filters: Option<String>,
matches: bool, matches: bool,
facet_filters: Option<FacetFilter>,
} }
impl<'a> SearchBuilder<'a> { impl<'a> SearchBuilder<'a> {
@ -82,6 +85,11 @@ impl<'a> SearchBuilder<'a> {
self self
} }
pub fn add_facet_filters(&mut self, filters: FacetFilter) -> &SearchBuilder {
self.facet_filters = Some(filters);
self
}
pub fn filters(&mut self, value: String) -> &SearchBuilder { pub fn filters(&mut self, value: String) -> &SearchBuilder {
self.filters = Some(value); self.filters = Some(value);
self self
@ -138,6 +146,8 @@ impl<'a> SearchBuilder<'a> {
} }
} }
query_builder.set_facets(self.facet_filters.as_ref());
let start = Instant::now(); let start = Instant::now();
let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit));
let (docs, nb_hits) = result.map_err(ResponseError::search_documents)?; let (docs, nb_hits) = result.map_err(ResponseError::search_documents)?;

View File

@ -12,6 +12,8 @@ use crate::helpers::Authentication;
use crate::routes::IndexParam; use crate::routes::IndexParam;
use crate::Data; use crate::Data;
use meilisearch_core::facets::FacetFilter;
pub fn services(cfg: &mut web::ServiceConfig) { pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(search_with_url_query); cfg.service(search_with_url_query);
} }
@ -28,6 +30,7 @@ struct SearchQuery {
attributes_to_highlight: Option<String>, attributes_to_highlight: Option<String>,
filters: Option<String>, filters: Option<String>,
matches: Option<bool>, matches: Option<bool>,
facet_filters: Option<String>,
} }
#[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")] #[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")]
@ -81,6 +84,13 @@ async fn search_with_url_query(
} }
} }
if let Some(ref facet_filters) = params.facet_filters {
match index.main.attributes_for_faceting(&reader)? {
Some(ref attrs) => { search_builder.add_facet_filters(FacetFilter::from_str(facet_filters, &schema, attrs)?); },
None => return Err(ResponseError::FacetExpression("can't filter on facets, as no facet is set".to_string()))
}
}
if let Some(attributes_to_crop) = &params.attributes_to_crop { if let Some(attributes_to_crop) = &params.attributes_to_crop {
let default_length = params.crop_length.unwrap_or(200); let default_length = params.crop_length.unwrap_or(200);
let mut final_attributes: HashMap<String, usize> = HashMap::new(); let mut final_attributes: HashMap<String, usize> = HashMap::new();