mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Merge #204
204: Decorrelate Distinct, Asc/Desc, Filterable fields from the faceted fields r=Kerollmops a=Kerollmops This PR decorrelates the fields that need to be stored in facet databases (big inverted indexes for fast access) from the filterable fields, the previously named faceted fields are now named filterable fields and are the union of the distinct attribute, all the Asc/Desc criteria and, the filterable fields. I added two tests to make sure that the engine was correctly generating the faceted databases when a distinct attribute or an Asc/Desc criteria were added, and one to make sure that it was impossible to filter on a non-filterable field even if it was a faceted one. Note that the `AttributesForFacetting` has also been renamed into `FilterableAttributes`. But it will be the Transplant's job to do that on the API, this change is only visible to the milli's library users. - Related to https://github.com/meilisearch/transplant/issues/187. - Fixes #161 by returning the documents that don't have the Asc/Desc field at the end of the bucket. - Fixes #168. - Fixes #152. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Marin Postma <postma.marin@protonmail.com> Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
834504aec0
@ -29,7 +29,7 @@ use tokio::sync::broadcast;
|
||||
use warp::{Filter, http::Response};
|
||||
use warp::filters::ws::Message;
|
||||
|
||||
use milli::{FacetCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore};
|
||||
use milli::{FilterCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore};
|
||||
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat};
|
||||
use milli::update::UpdateIndexingStep::*;
|
||||
|
||||
@ -251,7 +251,7 @@ struct Settings {
|
||||
searchable_attributes: Setting<Vec<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
faceted_attributes: Setting<HashSet<String>>,
|
||||
filterable_attributes: Setting<HashSet<String>>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
criteria: Setting<Vec<String>>,
|
||||
@ -420,9 +420,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
// We transpose the settings JSON struct into a real setting update.
|
||||
match settings.faceted_attributes {
|
||||
Setting::Set(faceted_attributes) => builder.set_faceted_fields(faceted_attributes),
|
||||
Setting::Reset => builder.reset_faceted_fields(),
|
||||
match settings.filterable_attributes {
|
||||
Setting::Set(filterable_attributes) => builder.set_filterable_fields(filterable_attributes),
|
||||
Setting::Reset => builder.reset_filterable_fields(),
|
||||
Setting::NotSet => ()
|
||||
}
|
||||
|
||||
@ -690,7 +690,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
let filters = match query.filters {
|
||||
Some(condition) if !condition.trim().is_empty() => {
|
||||
Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap())
|
||||
Some(FilterCondition::from_str(&rtxn, &index, &condition).unwrap())
|
||||
}
|
||||
_otherwise => None,
|
||||
};
|
||||
@ -698,21 +698,21 @@ async fn main() -> anyhow::Result<()> {
|
||||
let facet_filters = match query.facet_filters {
|
||||
Some(array) => {
|
||||
let eithers = array.into_iter().map(Into::into);
|
||||
FacetCondition::from_array(&rtxn, &index, eithers).unwrap()
|
||||
FilterCondition::from_array(&rtxn, &index, eithers).unwrap()
|
||||
}
|
||||
_otherwise => None,
|
||||
};
|
||||
|
||||
let condition = match (filters, facet_filters) {
|
||||
(Some(filters), Some(facet_filters)) => {
|
||||
Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters)))
|
||||
Some(FilterCondition::And(Box::new(filters), Box::new(facet_filters)))
|
||||
}
|
||||
(Some(condition), None) | (None, Some(condition)) => Some(condition),
|
||||
_otherwise => None,
|
||||
};
|
||||
|
||||
if let Some(condition) = condition {
|
||||
search.facet_condition(condition);
|
||||
search.filter(condition);
|
||||
}
|
||||
|
||||
let SearchResult { matching_words, candidates, documents_ids } = search.execute().unwrap();
|
||||
@ -996,7 +996,7 @@ mod tests {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec!["name".to_string()]),
|
||||
searchable_attributes: Setting::Set(vec!["age".to_string()]),
|
||||
faceted_attributes: Setting::Set(hashset!{ "age".to_string() }),
|
||||
filterable_attributes: Setting::Set(hashset!{ "age".to_string() }),
|
||||
criteria: Setting::Set(vec!["asc(age)".to_string()]),
|
||||
stop_words: Setting::Set(btreeset! { "and".to_string() }),
|
||||
synonyms: Setting::Set(hashmap!{ "alex".to_string() => vec!["alexey".to_string()] })
|
||||
@ -1047,7 +1047,7 @@ mod tests {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Reset,
|
||||
searchable_attributes: Setting::Reset,
|
||||
faceted_attributes: Setting::Reset,
|
||||
filterable_attributes: Setting::Reset,
|
||||
criteria: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
@ -1076,7 +1076,7 @@ mod tests {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::NotSet,
|
||||
searchable_attributes: Setting::NotSet,
|
||||
faceted_attributes: Setting::NotSet,
|
||||
filterable_attributes: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
|
@ -1,5 +1,5 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use regex::Regex;
|
||||
@ -30,8 +30,10 @@ pub enum Criterion {
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl Criterion {
|
||||
pub fn from_str(faceted_attributes: &HashSet<String>, txt: &str) -> anyhow::Result<Criterion> {
|
||||
impl FromStr for Criterion {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
|
||||
match txt {
|
||||
"words" => Ok(Criterion::Words),
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
@ -42,9 +44,6 @@ impl Criterion {
|
||||
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
faceted_attributes.get(field_name).with_context(|| {
|
||||
format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name)
|
||||
})?;
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||
|
@ -23,9 +23,9 @@ use crate::fields_ids_map::FieldsIdsMap;
|
||||
|
||||
pub const CRITERIA_KEY: &str = "criteria";
|
||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key";
|
||||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||
pub const FACETED_FIELDS_KEY: &str = "faceted-fields";
|
||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
@ -324,21 +324,62 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* faceted fields */
|
||||
/* filterable fields */
|
||||
|
||||
/// Writes the facet fields names in the database.
|
||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields)
|
||||
/// Writes the filterable fields names in the database.
|
||||
pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields)
|
||||
}
|
||||
|
||||
/// Deletes the facet fields ids in the database.
|
||||
pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
|
||||
/// Deletes the filterable fields ids in the database.
|
||||
pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the facet fields names.
|
||||
/// Returns the filterable fields names.
|
||||
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Same as `filterable_fields`, but returns ids instead.
|
||||
pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
|
||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
let filterable_fields = filterable_fields
|
||||
.iter()
|
||||
.map(|k| {
|
||||
fields_ids_map
|
||||
.id(k)
|
||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
||||
.expect("corrupted data: ")
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(filterable_fields)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Returns the faceted fields names.
|
||||
///
|
||||
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields = self.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|criterion| match criterion {
|
||||
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
_otherwise => None,
|
||||
});
|
||||
|
||||
let mut faceted_fields = filterable_fields;
|
||||
faceted_fields.extend(asc_desc_fields);
|
||||
if let Some(field) = distinct_field {
|
||||
faceted_fields.insert(field.to_owned());
|
||||
}
|
||||
|
||||
Ok(faceted_fields)
|
||||
}
|
||||
|
||||
/// Same as `faceted_fields`, but returns ids instead.
|
||||
@ -424,18 +465,18 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* Distinct attribute */
|
||||
/* distinct field */
|
||||
|
||||
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute)
|
||||
pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
|
||||
}
|
||||
|
||||
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
|
||||
self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY)
|
||||
pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
|
||||
}
|
||||
|
||||
/* criteria */
|
||||
|
@ -27,7 +27,7 @@ pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec,
|
||||
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords};
|
||||
pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
|
||||
pub use self::tree_level::TreeLevel;
|
||||
pub use self::update_store::UpdateStore;
|
||||
|
||||
|
@ -24,6 +24,7 @@ pub struct AscDesc<'t> {
|
||||
ascending: bool,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
@ -68,6 +69,7 @@ impl<'t> AscDesc<'t> {
|
||||
ascending,
|
||||
query_tree: None,
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent,
|
||||
@ -78,6 +80,9 @@ impl<'t> AscDesc<'t> {
|
||||
impl<'t> Criterion for AscDesc<'t> {
|
||||
#[logging_timer::time("AscDesc::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
self.allowed_candidates -= params.excluded_candidates;
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Facet {}({}) iteration",
|
||||
@ -86,18 +91,25 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
);
|
||||
|
||||
match self.candidates.next().transpose()? {
|
||||
None if !self.allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(take(&mut self.allowed_candidates)),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates & &self.faceted_candidates,
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||
let candidates = resolve_query_tree(&context, qt, params.wdcache)?;
|
||||
candidates & &self.faceted_candidates
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
},
|
||||
(None, None) => take(&mut self.faceted_candidates),
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
@ -113,12 +125,13 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
self.field_id,
|
||||
self.ascending,
|
||||
candidates,
|
||||
candidates & &self.faceted_candidates,
|
||||
)?;
|
||||
},
|
||||
None => return Ok(None),
|
||||
@ -126,6 +139,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
},
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
|
@ -203,14 +203,14 @@ impl<'t> CriteriaBuilder<'t> {
|
||||
&'t self,
|
||||
query_tree: Option<Operation>,
|
||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||
facet_candidates: Option<RoaringBitmap>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
) -> anyhow::Result<Final<'t>>
|
||||
{
|
||||
use crate::criterion::Criterion as Name;
|
||||
|
||||
let primitive_query = primitive_query.unwrap_or_default();
|
||||
|
||||
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>;
|
||||
let mut criterion = Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
|
||||
for name in self.index.criteria(&self.rtxn)? {
|
||||
criterion = match name {
|
||||
Name::Typo => Box::new(Typo::new(self, criterion)),
|
||||
|
@ -172,7 +172,7 @@ impl DocIter for FacetDistinctIter<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
||||
impl<'a> Distinct for FacetDistinct<'a> {
|
||||
type Iter = FacetDistinctIter<'a>;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||
use super::*;
|
||||
|
||||
@ -198,10 +196,7 @@ mod test {
|
||||
($name:ident, $distinct:literal) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
let facets = HashSet::from_iter(Some(($distinct.to_string())));
|
||||
let (index, fid, candidates) = generate_index($distinct, facets);
|
||||
let (index, fid, candidates) = generate_index($distinct);
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
|
||||
let excluded = RoaringBitmap::new();
|
||||
|
@ -1,138 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::{DocumentId, FieldId, Index};
|
||||
|
||||
/// A distinct implementer that is backed by an `HashMap`.
|
||||
///
|
||||
/// Each time a document is seen, the value
|
||||
/// for its distinct field is added to the map. If the map already contains an entry for this
|
||||
/// value, then the document is filtered out, and is added to the excluded set.
|
||||
pub struct MapDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
map: HashMap<String, usize>,
|
||||
index: &'a Index,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
}
|
||||
|
||||
impl<'a> MapDistinct<'a> {
|
||||
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
|
||||
Self {
|
||||
distinct,
|
||||
map: HashMap::new(),
|
||||
index,
|
||||
txn,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MapDistinctIter<'a, 'b> {
|
||||
distinct: FieldId,
|
||||
map: &'b mut HashMap<String, usize>,
|
||||
index: &'a Index,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
candidates: roaring::bitmap::IntoIter,
|
||||
excluded: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'a, 'b> MapDistinctIter<'a, 'b> {
|
||||
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
|
||||
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||
/// handling easier.
|
||||
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
|
||||
let map = &mut self.map;
|
||||
let mut filter = |value: Value| {
|
||||
let entry = map.entry(value.to_string()).or_insert(0);
|
||||
*entry += 1;
|
||||
*entry <= 1
|
||||
};
|
||||
|
||||
while let Some(id) = self.candidates.next() {
|
||||
let document = self.index.documents(&self.txn, Some(id))?[0].1;
|
||||
let value = document
|
||||
.get(self.distinct)
|
||||
.map(serde_json::from_slice::<Value>)
|
||||
.transpose()?;
|
||||
|
||||
let accept = match value {
|
||||
Some(Value::Array(values)) => {
|
||||
let mut accept = true;
|
||||
for value in values {
|
||||
accept &= filter(value);
|
||||
}
|
||||
accept
|
||||
}
|
||||
Some(Value::Null) | Some(Value::Object(_)) | None => true,
|
||||
Some(value) => filter(value),
|
||||
};
|
||||
|
||||
if accept {
|
||||
return Ok(Some(id));
|
||||
} else {
|
||||
self.excluded.insert(id);
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for MapDistinctIter<'_, '_> {
|
||||
type Item = anyhow::Result<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_inner().transpose()
|
||||
}
|
||||
}
|
||||
|
||||
impl DocIter for MapDistinctIter<'_, '_> {
|
||||
fn into_excluded(self) -> RoaringBitmap {
|
||||
self.excluded
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
|
||||
type Iter = MapDistinctIter<'a, 'b>;
|
||||
|
||||
fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
MapDistinctIter {
|
||||
distinct: self.distinct,
|
||||
map: &mut self.map,
|
||||
index: &self.index,
|
||||
txn: &self.txn,
|
||||
candidates: candidates.into_iter(),
|
||||
excluded,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||
|
||||
macro_rules! test_map_distinct {
|
||||
($name:ident, $distinct:literal) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let (index, fid, candidates) = generate_index($distinct, HashSet::new());
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
|
||||
let excluded = RoaringBitmap::new();
|
||||
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
||||
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
||||
let excluded = iter.into_excluded();
|
||||
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_map_distinct!(test_string, "txt");
|
||||
test_map_distinct!(test_strings, "txts");
|
||||
test_map_distinct!(test_int, "cat-int");
|
||||
test_map_distinct!(test_ints, "cat-ints");
|
||||
}
|
@ -1,12 +1,10 @@
|
||||
mod facet_distinct;
|
||||
mod map_distinct;
|
||||
mod noop_distinct;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::DocumentId;
|
||||
pub use facet_distinct::FacetDistinct;
|
||||
pub use map_distinct::MapDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
|
||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||
@ -20,10 +18,10 @@ pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> {
|
||||
/// must return an iterator containing only distinct documents, and add the discarded documents to
|
||||
/// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the
|
||||
/// returned iterator.
|
||||
pub trait Distinct<'a> {
|
||||
pub trait Distinct {
|
||||
type Iter: DocIter;
|
||||
|
||||
fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -74,17 +72,14 @@ mod test {
|
||||
|
||||
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||
pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||
pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||
let index = TempIndex::new();
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
// set distinct and faceted attributes for the index.
|
||||
let builder = UpdateBuilder::new(0);
|
||||
let mut update = builder.settings(&mut txn, &index);
|
||||
update.set_distinct_attribute(distinct.to_string());
|
||||
if !facets.is_empty() {
|
||||
update.set_faceted_fields(facets)
|
||||
}
|
||||
update.set_distinct_field(distinct.to_string());
|
||||
update.execute(|_, _| ()).unwrap();
|
||||
|
||||
// add documents to the index
|
||||
|
@ -26,7 +26,7 @@ impl DocIter for NoopDistinctIter {
|
||||
}
|
||||
}
|
||||
|
||||
impl Distinct<'_> for NoopDistinct {
|
||||
impl Distinct for NoopDistinct {
|
||||
type Iter = NoopDistinctIter;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
|
@ -197,10 +197,10 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for name in faceted_fields {
|
||||
for name in filterable_fields {
|
||||
let fid = fields_ids_map.id(&name).with_context(|| {
|
||||
format!("missing field name {:?} from the fields id map", name)
|
||||
})?;
|
||||
|
@ -18,7 +18,7 @@ use super::FacetRange;
|
||||
use super::parser::Rule;
|
||||
use super::parser::{PREC_CLIMBER, FilterParser};
|
||||
|
||||
use self::FacetCondition::*;
|
||||
use self::FilterCondition::*;
|
||||
use self::Operator::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
@ -49,74 +49,18 @@ impl Operator {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FacetCondition {
|
||||
pub enum FilterCondition {
|
||||
Operator(FieldId, Operator),
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
}
|
||||
|
||||
fn field_id(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
items: &mut Pairs<Rule>,
|
||||
) -> Result<FieldId, PestError<Rule>>
|
||||
{
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
|
||||
let field_id = match fields_ids_map.id(key.as_str()) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)),
|
||||
};
|
||||
|
||||
if !faceted_fields.contains(&field_id) {
|
||||
return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` is not faceted, available faceted attributes are: {}",
|
||||
key.as_str(),
|
||||
faceted_fields.iter().flat_map(|id| {
|
||||
fields_ids_map.name(*id)
|
||||
}).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(field_id)
|
||||
}
|
||||
|
||||
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
|
||||
where T: FromStr,
|
||||
T::Err: ToString,
|
||||
{
|
||||
let result = match pair.as_str().parse::<T>() {
|
||||
Ok(value) => Ok(value),
|
||||
Err(e) => Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError { message: e.to_string() },
|
||||
pair.as_span(),
|
||||
)),
|
||||
};
|
||||
|
||||
(result, pair.as_str().to_string())
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
impl FilterCondition {
|
||||
pub fn from_array<I, J, A, B>(
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
array: I,
|
||||
) -> anyhow::Result<Option<FacetCondition>>
|
||||
) -> anyhow::Result<Option<FilterCondition>>
|
||||
where I: IntoIterator<Item=Either<J, B>>,
|
||||
J: IntoIterator<Item=A>,
|
||||
A: AsRef<str>,
|
||||
@ -129,7 +73,7 @@ impl FacetCondition {
|
||||
Either::Left(array) => {
|
||||
let mut ors = None;
|
||||
for rule in array {
|
||||
let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?;
|
||||
let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
|
||||
ors = match ors.take() {
|
||||
Some(ors) => Some(Or(Box::new(ors), Box::new(condition))),
|
||||
None => Some(condition),
|
||||
@ -144,7 +88,7 @@ impl FacetCondition {
|
||||
}
|
||||
},
|
||||
Either::Right(rule) => {
|
||||
let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?;
|
||||
let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
|
||||
ands = match ands.take() {
|
||||
Some(ands) => Some(And(Box::new(ands), Box::new(condition))),
|
||||
None => Some(condition),
|
||||
@ -160,12 +104,12 @@ impl FacetCondition {
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
expression: &str,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let faceted_fields = index.faceted_fields_ids(rtxn)?;
|
||||
let filterable_fields = index.filterable_fields_ids(rtxn)?;
|
||||
let lexed = FilterParser::parse(Rule::prgm, expression)?;
|
||||
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed)
|
||||
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
|
||||
}
|
||||
|
||||
fn from_pairs(
|
||||
@ -199,7 +143,7 @@ impl FacetCondition {
|
||||
)
|
||||
}
|
||||
|
||||
fn negate(self) -> FacetCondition {
|
||||
fn negate(self) -> FilterCondition {
|
||||
match self {
|
||||
Operator(fid, op) => match op.negate() {
|
||||
(op, None) => Operator(fid, op),
|
||||
@ -212,12 +156,12 @@ impl FacetCondition {
|
||||
|
||||
fn between(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let (lresult, _) = pest_parse(items.next().unwrap());
|
||||
let (rresult, _) = pest_parse(items.next().unwrap());
|
||||
@ -230,12 +174,12 @@ impl FacetCondition {
|
||||
|
||||
fn equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let value = items.next().unwrap();
|
||||
let (result, svalue) = pest_parse(value);
|
||||
@ -246,12 +190,12 @@ impl FacetCondition {
|
||||
|
||||
fn greater_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let value = items.next().unwrap();
|
||||
let (result, _svalue) = pest_parse(value);
|
||||
@ -261,12 +205,12 @@ impl FacetCondition {
|
||||
|
||||
fn greater_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let value = items.next().unwrap();
|
||||
let (result, _svalue) = pest_parse(value);
|
||||
@ -276,12 +220,12 @@ impl FacetCondition {
|
||||
|
||||
fn lower_than(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let value = items.next().unwrap();
|
||||
let (result, _svalue) = pest_parse(value);
|
||||
@ -291,12 +235,12 @@ impl FacetCondition {
|
||||
|
||||
fn lower_than_or_equal(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
item: Pair<Rule>,
|
||||
) -> anyhow::Result<FacetCondition>
|
||||
) -> anyhow::Result<FilterCondition>
|
||||
{
|
||||
let mut items = item.into_inner();
|
||||
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?;
|
||||
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
|
||||
|
||||
let value = items.next().unwrap();
|
||||
let (result, _svalue) = pest_parse(value);
|
||||
@ -305,7 +249,7 @@ impl FacetCondition {
|
||||
}
|
||||
}
|
||||
|
||||
impl FacetCondition {
|
||||
impl FilterCondition {
|
||||
/// Aggregates the documents ids that are part of the specified range automatically
|
||||
/// going deeper through the levels.
|
||||
fn explore_facet_number_levels(
|
||||
@ -469,6 +413,71 @@ impl FacetCondition {
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve the field id base on the pest value, returns an error is
|
||||
/// the field does not exist or is not filterable.
|
||||
///
|
||||
/// The pest pair is simply a string associated with a span, a location to highlight in
|
||||
/// the error message.
|
||||
fn field_id(
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
filterable_fields: &HashSet<FieldId>,
|
||||
items: &mut Pairs<Rule>,
|
||||
) -> Result<FieldId, PestError<Rule>>
|
||||
{
|
||||
// lexing ensures that we at least have a key
|
||||
let key = items.next().unwrap();
|
||||
|
||||
let field_id = match fields_ids_map.id(key.as_str()) {
|
||||
Some(field_id) => field_id,
|
||||
None => return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` not found, available attributes are: {}",
|
||||
key.as_str(),
|
||||
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
)),
|
||||
};
|
||||
|
||||
if !filterable_fields.contains(&field_id) {
|
||||
return Err(PestError::new_from_span(
|
||||
ErrorVariant::CustomError {
|
||||
message: format!(
|
||||
"attribute `{}` is not filterable, available filterable attributes are: {}",
|
||||
key.as_str(),
|
||||
filterable_fields.iter().flat_map(|id| {
|
||||
fields_ids_map.name(*id)
|
||||
}).collect::<Vec<_>>().join(", "),
|
||||
),
|
||||
},
|
||||
key.as_span(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(field_id)
|
||||
}
|
||||
|
||||
/// Tries to parse the pest pair into the type `T` specified, always returns
|
||||
/// the original string that we tried to parse.
|
||||
///
|
||||
/// Returns the parsing error associated with the span if the conversion fails.
|
||||
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
|
||||
where T: FromStr,
|
||||
T::Err: ToString,
|
||||
{
|
||||
let result = match pair.as_str().parse::<T>() {
|
||||
Ok(value) => Ok(value),
|
||||
Err(e) => Err(PestError::<Rule>::new_from_span(
|
||||
ErrorVariant::CustomError { message: e.to_string() },
|
||||
pair.as_span(),
|
||||
)),
|
||||
};
|
||||
|
||||
(result, pair.as_str().to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -484,24 +493,24 @@ mod tests {
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_faceted_fields(hashset!{ S("channel") });
|
||||
builder.set_filterable_fields(hashset!{ S("channel") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap();
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap();
|
||||
let expected = Operator(0, Operator::Equal(None, S("ponce")));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
|
||||
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
|
||||
let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
|
||||
assert_eq!(condition, expected);
|
||||
}
|
||||
@ -513,20 +522,20 @@ mod tests {
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_faceted_fields(hashset!{ "timestamp".into() });
|
||||
builder.set_filterable_fields(hashset!{ "timestamp".into() });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
|
||||
let expected = Operator(0, Between(22.0, 44.0));
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
|
||||
let expected = Or(
|
||||
Box::new(Operator(0, LowerThan(22.0))),
|
||||
Box::new(Operator(0, GreaterThan(44.0))),
|
||||
@ -541,17 +550,17 @@ mod tests {
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
|
||||
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_str(
|
||||
let condition = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
@ -564,7 +573,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(condition, expected);
|
||||
|
||||
let condition = FacetCondition::from_str(
|
||||
let condition = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
|
||||
).unwrap();
|
||||
@ -588,21 +597,21 @@ mod tests {
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the channel.
|
||||
// Set the filterable fields to be the channel.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
|
||||
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Test that the facet condition is correctly generated.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let condition = FacetCondition::from_array(
|
||||
let condition = FilterCondition::from_array(
|
||||
&rtxn, &index,
|
||||
vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])],
|
||||
).unwrap().unwrap();
|
||||
let expected = FacetCondition::from_str(
|
||||
let expected = FilterCondition::from_str(
|
||||
&rtxn, &index,
|
||||
"channel = gotaga AND (timestamp = 44 OR channel != ponce)",
|
||||
).unwrap();
|
@ -9,10 +9,10 @@ use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||
use crate::{Index, FieldId};
|
||||
|
||||
pub use self::facet_condition::{FacetCondition, Operator};
|
||||
pub use self::filter_condition::{FilterCondition, Operator};
|
||||
pub use self::facet_distribution::FacetDistribution;
|
||||
|
||||
mod facet_condition;
|
||||
mod filter_condition;
|
||||
mod facet_distribution;
|
||||
mod parser;
|
||||
|
||||
|
@ -12,11 +12,11 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct};
|
||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::{Index, DocumentId};
|
||||
|
||||
pub use self::facet::{FacetCondition, FacetDistribution, FacetIter, Operator};
|
||||
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
|
||||
pub use self::matching_words::MatchingWords;
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
|
||||
@ -33,7 +33,7 @@ mod matching_words;
|
||||
|
||||
pub struct Search<'a> {
|
||||
query: Option<String>,
|
||||
facet_condition: Option<FacetCondition>,
|
||||
filter: Option<FilterCondition>,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
optional_words: bool,
|
||||
@ -47,7 +47,7 @@ impl<'a> Search<'a> {
|
||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||
Search {
|
||||
query: None,
|
||||
facet_condition: None,
|
||||
filter: None,
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
optional_words: true,
|
||||
@ -88,8 +88,8 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn facet_condition(&mut self, condition: FacetCondition) -> &mut Search<'a> {
|
||||
self.facet_condition = Some(condition);
|
||||
pub fn filter(&mut self, condition: FilterCondition) -> &mut Search<'a> {
|
||||
self.filter = Some(condition);
|
||||
self
|
||||
}
|
||||
|
||||
@ -121,12 +121,12 @@ impl<'a> Search<'a> {
|
||||
|
||||
// We create the original candidates with the facet conditions results.
|
||||
let before = Instant::now();
|
||||
let facet_candidates = match &self.facet_condition {
|
||||
let filtered_candidates = match &self.filter {
|
||||
Some(condition) => Some(condition.evaluate(self.rtxn, self.index)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
debug!("facet candidates: {:?} took {:.02?}", facet_candidates, before.elapsed());
|
||||
debug!("facet candidates: {:?} took {:.02?}", filtered_candidates, before.elapsed());
|
||||
|
||||
let matching_words = match query_tree.as_ref() {
|
||||
Some(query_tree) => MatchingWords::from_query_tree(&query_tree),
|
||||
@ -134,31 +134,26 @@ impl<'a> Search<'a> {
|
||||
};
|
||||
|
||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
let criteria = criteria_builder.build(query_tree, primitive_query, facet_candidates)?;
|
||||
let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
|
||||
|
||||
match self.index.distinct_attribute(self.rtxn)? {
|
||||
match self.index.distinct_field(self.rtxn)? {
|
||||
None => self.perform_sort(NoopDistinct, matching_words, criteria),
|
||||
Some(name) => {
|
||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let id = field_ids_map.id(name).expect("distinct not present in field map");
|
||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||
if faceted_fields.contains(name) {
|
||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
} else {
|
||||
let distinct = MapDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
}
|
||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||
self.perform_sort(distinct, matching_words, criteria)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn perform_sort(
|
||||
fn perform_sort<D: Distinct>(
|
||||
&self,
|
||||
mut distinct: impl for<'c> Distinct<'c>,
|
||||
mut distinct: D,
|
||||
matching_words: MatchingWords,
|
||||
mut criteria: Final,
|
||||
) -> anyhow::Result<SearchResult> {
|
||||
) -> anyhow::Result<SearchResult>
|
||||
{
|
||||
let mut offset = self.offset;
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
let mut excluded_candidates = RoaringBitmap::new();
|
||||
@ -193,7 +188,7 @@ impl fmt::Debug for Search<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let Search {
|
||||
query,
|
||||
facet_condition,
|
||||
filter,
|
||||
offset,
|
||||
limit,
|
||||
optional_words,
|
||||
@ -204,7 +199,7 @@ impl fmt::Debug for Search<'_> {
|
||||
} = self;
|
||||
f.debug_struct("Search")
|
||||
.field("query", query)
|
||||
.field("facet_condition", facet_condition)
|
||||
.field("filter", filter)
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("optional_words", optional_words)
|
||||
|
@ -9,7 +9,6 @@ use rayon::ThreadPool;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use crate::{FieldsIdsMap, Index};
|
||||
use crate::criterion::Criterion;
|
||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
||||
|
||||
@ -66,10 +65,10 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
|
||||
searchable_fields: Setting<Vec<String>>,
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
faceted_fields: Setting<HashSet<String>>,
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<String>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
distinct_attribute: Setting<String>,
|
||||
distinct_field: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
}
|
||||
|
||||
@ -92,10 +91,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
thread_pool: None,
|
||||
searchable_fields: Setting::NotSet,
|
||||
displayed_fields: Setting::NotSet,
|
||||
faceted_fields: Setting::NotSet,
|
||||
filterable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
update_id,
|
||||
}
|
||||
@ -117,12 +116,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.displayed_fields = Setting::Set(names);
|
||||
}
|
||||
|
||||
pub fn reset_faceted_fields(&mut self) {
|
||||
self.faceted_fields = Setting::Reset;
|
||||
pub fn reset_filterable_fields(&mut self) {
|
||||
self.filterable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_faceted_fields(&mut self, names_facet_types: HashSet<String>) {
|
||||
self.faceted_fields = Setting::Set(names_facet_types);
|
||||
pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
|
||||
self.filterable_fields = Setting::Set(names);
|
||||
}
|
||||
|
||||
pub fn reset_criteria(&mut self) {
|
||||
@ -145,12 +144,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_distinct_attribute(&mut self) {
|
||||
self.distinct_attribute = Setting::Reset;
|
||||
pub fn reset_distinct_field(&mut self) {
|
||||
self.distinct_field = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) {
|
||||
self.distinct_attribute = Setting::Set(distinct_attribute);
|
||||
pub fn set_distinct_field(&mut self, distinct_field: String) {
|
||||
self.distinct_field = Setting::Set(distinct_field);
|
||||
}
|
||||
|
||||
pub fn reset_synonyms(&mut self) {
|
||||
@ -166,8 +165,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
where
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
{
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let update_id = self.update_id;
|
||||
@ -198,7 +197,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let output = transform.remap_index_documents(
|
||||
primary_key.to_string(),
|
||||
old_fields_ids_map,
|
||||
fields_ids_map.clone())?;
|
||||
fields_ids_map.clone(),
|
||||
)?;
|
||||
|
||||
// We clear the full database (words-fst, documents ids and documents content).
|
||||
ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?;
|
||||
@ -215,6 +215,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
indexing_builder.thread_pool = self.thread_pool;
|
||||
indexing_builder.execute_raw(output, &cb)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -243,18 +244,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn update_distinct_attribute(&mut self) -> anyhow::Result<bool> {
|
||||
match self.distinct_attribute {
|
||||
fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
|
||||
match self.distinct_field {
|
||||
Setting::Set(ref attr) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
fields_ids_map
|
||||
.insert(attr)
|
||||
.context("field id limit exceeded")?;
|
||||
|
||||
self.index.put_distinct_attribute(self.wtxn, &attr)?;
|
||||
self.index.put_distinct_field(self.wtxn, &attr)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Setting::Reset => { self.index.delete_distinct_attribute(self.wtxn)?; },
|
||||
Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; },
|
||||
Setting::NotSet => return Ok(false),
|
||||
}
|
||||
Ok(true)
|
||||
@ -267,7 +268,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
Setting::Set(ref fields) => {
|
||||
// every time the searchable attributes are updated, we need to update the
|
||||
// ids for any settings that uses the facets. (displayed_fields,
|
||||
// faceted_fields)
|
||||
// filterable_fields)
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
@ -381,8 +382,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_facets(&mut self) -> anyhow::Result<bool> {
|
||||
match self.faceted_fields {
|
||||
fn update_filterable(&mut self) -> anyhow::Result<()> {
|
||||
match self.filterable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
let mut new_facets = HashSet::new();
|
||||
@ -390,22 +391,21 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
||||
new_facets.insert(name.clone());
|
||||
}
|
||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
}
|
||||
Setting::Reset => { self.index.delete_faceted_fields(self.wtxn)?; }
|
||||
Setting::NotSet => return Ok(false)
|
||||
Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; }
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
Ok(true)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_criteria(&mut self) -> anyhow::Result<()> {
|
||||
match self.criteria {
|
||||
Setting::Set(ref fields) => {
|
||||
let faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let mut new_criteria = Vec::new();
|
||||
for name in fields {
|
||||
let criterion = Criterion::from_str(&faceted_fields, &name)?;
|
||||
let criterion = name.parse()?;
|
||||
new_criteria.push(criterion);
|
||||
}
|
||||
self.index.put_criteria(self.wtxn, &new_criteria)?;
|
||||
@ -421,20 +421,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
{
|
||||
self.index.set_updated_at(self.wtxn, &Utc::now())?;
|
||||
|
||||
let old_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
|
||||
|
||||
self.update_displayed()?;
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let facets_updated = self.update_facets()?;
|
||||
self.update_distinct_attribute()?;
|
||||
// update_criteria MUST be called after update_facets, since criterion fields must be set
|
||||
// as facets.
|
||||
self.update_filterable()?;
|
||||
self.update_distinct_field()?;
|
||||
self.update_criteria()?;
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
|
||||
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated {
|
||||
if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated {
|
||||
self.reindex(&progress_callback, old_fields_ids_map)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@ -446,6 +455,7 @@ mod tests {
|
||||
use maplit::{btreeset, hashmap, hashset};
|
||||
use big_s::S;
|
||||
|
||||
use crate::{Criterion, FilterCondition, SearchResult};
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
||||
use super::*;
|
||||
@ -611,16 +621,16 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_faceted_fields() {
|
||||
fn set_filterable_fields() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the faceted fields to be the age.
|
||||
// Set the filterable fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_faceted_fields(hashset!{ S("age") });
|
||||
builder.set_filterable_fields(hashset!{ S("age") });
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
@ -637,7 +647,7 @@ mod tests {
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.faceted_fields(&rtxn).unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset!{ S("age") });
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
// TODO we must support typed CSVs for numbers to be understood.
|
||||
@ -670,6 +680,88 @@ mod tests {
|
||||
assert_eq!(count, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_asc_desc_field() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the filterable fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
// Don't display the generated `id` field.
|
||||
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||
builder.set_criteria(vec![S("asc(age)")]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
let content = &br#"[
|
||||
{ "name": "kevin", "age": 23 },
|
||||
{ "name": "kevina", "age": 21 },
|
||||
{ "name": "benoit", "age": 34 }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.enable_autogenerate_docids();
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Run an empty query just to ensure that the search results are ordered.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||
let documents = index.documents(&rtxn, documents_ids).unwrap();
|
||||
|
||||
// Fetch the documents "age" field in the ordre in which the documents appear.
|
||||
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
|
||||
let iter = documents.into_iter().map(|(_, doc)| {
|
||||
let bytes = doc.get(age_field_id).unwrap();
|
||||
let string = std::str::from_utf8(bytes).unwrap();
|
||||
string.parse::<u32>().unwrap()
|
||||
});
|
||||
|
||||
assert_eq!(iter.collect::<Vec<_>>(), vec![21, 23, 34]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn set_distinct_field() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the filterable fields to be the age.
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
// Don't display the generated `id` field.
|
||||
builder.set_displayed_fields(vec![S("name"), S("age")]);
|
||||
builder.set_distinct_field(S("age"));
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
|
||||
// Then index some documents.
|
||||
let content = &br#"[
|
||||
{ "name": "kevin", "age": 23 },
|
||||
{ "name": "kevina", "age": 21 },
|
||||
{ "name": "benoit", "age": 34 },
|
||||
{ "name": "bernard", "age": 34 },
|
||||
{ "name": "bertrand", "age": 34 },
|
||||
{ "name": "bernie", "age": 34 },
|
||||
{ "name": "ben", "age": 34 }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.enable_autogenerate_docids();
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Run an empty query just to ensure that the search results are ordered.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
|
||||
|
||||
// There must be at least one document with a 34 as the age.
|
||||
assert_eq!(documents_ids.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_stop_words() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
@ -833,7 +925,7 @@ mod tests {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
builder.set_faceted_fields(hashset!{ S("age"), S("toto") });
|
||||
builder.set_filterable_fields(hashset!{ S("age"), S("toto") });
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
@ -858,4 +950,24 @@ mod tests {
|
||||
assert!(index.primary_key(&rtxn).unwrap().is_none());
|
||||
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn setting_not_filterable_cant_filter() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set all the settings except searchable
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_displayed_fields(vec!["hello".to_string()]);
|
||||
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
|
||||
builder.set_criteria(vec!["asc(toto)".to_string()]);
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user