mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 17:11:15 +08:00
Merge #550
550: Add the two new pagination and faceting settings r=ManyTheFish a=Kerollmops This PR adds two new settings in the database, those settings are described [in this spec](https://github.com/meilisearch/specifications/pull/157). Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
commit
19d44142a1
@ -56,6 +56,8 @@ pub mod main_key {
|
|||||||
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
|
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
|
||||||
pub const EXACT_WORDS: &str = "exact-words";
|
pub const EXACT_WORDS: &str = "exact-words";
|
||||||
pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
|
pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
|
||||||
|
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
|
||||||
|
pub const PAGINATION_LIMITED_TO: &str = "pagination-limited-to";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -1087,6 +1089,34 @@ impl Index {
|
|||||||
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?;
|
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
||||||
|
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pagination_limited_to(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
|
||||||
|
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_LIMITED_TO)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_pagination_limited_to(
|
||||||
|
&self,
|
||||||
|
txn: &mut RwTxn,
|
||||||
|
val: usize,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_LIMITED_TO, &val)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_pagination_limited_to(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.delete::<_, Str>(txn, main_key::PAGINATION_LIMITED_TO)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -38,7 +38,7 @@ pub use self::heed_codec::{
|
|||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
||||||
MatchingWords, Search, SearchResult,
|
MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
@ -15,7 +15,7 @@ use crate::{FieldId, Index, Result};
|
|||||||
|
|
||||||
/// The default number of values by facets that will
|
/// The default number of values by facets that will
|
||||||
/// be fetched from the key-value store.
|
/// be fetched from the key-value store.
|
||||||
const DEFAULT_VALUES_BY_FACET: usize = 100;
|
pub const DEFAULT_VALUES_PER_FACET: usize = 100;
|
||||||
|
|
||||||
/// Threshold on the number of candidates that will make
|
/// Threshold on the number of candidates that will make
|
||||||
/// the system to choose between one algorithm or another.
|
/// the system to choose between one algorithm or another.
|
||||||
@ -24,7 +24,7 @@ const CANDIDATES_THRESHOLD: u64 = 3000;
|
|||||||
pub struct FacetDistribution<'a> {
|
pub struct FacetDistribution<'a> {
|
||||||
facets: Option<HashSet<String>>,
|
facets: Option<HashSet<String>>,
|
||||||
candidates: Option<RoaringBitmap>,
|
candidates: Option<RoaringBitmap>,
|
||||||
max_values_by_facet: usize,
|
max_values_per_facet: usize,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
}
|
}
|
||||||
@ -34,7 +34,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
FacetDistribution {
|
FacetDistribution {
|
||||||
facets: None,
|
facets: None,
|
||||||
candidates: None,
|
candidates: None,
|
||||||
max_values_by_facet: DEFAULT_VALUES_BY_FACET,
|
max_values_per_facet: DEFAULT_VALUES_PER_FACET,
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
}
|
}
|
||||||
@ -45,8 +45,8 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self {
|
pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self {
|
||||||
self.max_values_by_facet = max;
|
self.max_values_per_facet = max;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,7 +82,8 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
let ((_, _, value), ()) = result?;
|
let ((_, _, value), ()) = result?;
|
||||||
*distribution.entry(value.to_string()).or_insert(0) += 1;
|
*distribution.entry(value.to_string()).or_insert(0) += 1;
|
||||||
|
|
||||||
if distribution.len() - distribution_prelength == self.max_values_by_facet {
|
if distribution.len() - distribution_prelength == self.max_values_per_facet
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,7 +109,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
.or_insert_with(|| (original_value, 0));
|
.or_insert_with(|| (original_value, 0));
|
||||||
*count += 1;
|
*count += 1;
|
||||||
|
|
||||||
if normalized_distribution.len() == self.max_values_by_facet {
|
if normalized_distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -141,7 +142,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
distribution.insert(value.to_string(), docids.len());
|
distribution.insert(value.to_string(), docids.len());
|
||||||
}
|
}
|
||||||
if distribution.len() == self.max_values_by_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -164,7 +165,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
distribution.insert(original.to_string(), docids.len());
|
distribution.insert(original.to_string(), docids.len());
|
||||||
}
|
}
|
||||||
if distribution.len() == self.max_values_by_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -186,7 +187,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
for result in range {
|
for result in range {
|
||||||
let ((_, _, value, _), docids) = result?;
|
let ((_, _, value, _), docids) = result?;
|
||||||
distribution.insert(value.to_string(), docids.len());
|
distribution.insert(value.to_string(), docids.len());
|
||||||
if distribution.len() == self.max_values_by_facet {
|
if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -202,7 +203,7 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
for result in iter {
|
for result in iter {
|
||||||
let ((_, normalized_value), (original_value, docids)) = result?;
|
let ((_, normalized_value), (original_value, docids)) = result?;
|
||||||
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
|
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
|
||||||
if normalized_distribution.len() == self.max_values_by_facet {
|
if normalized_distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -290,12 +291,13 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
|
|
||||||
impl fmt::Debug for FacetDistribution<'_> {
|
impl fmt::Debug for FacetDistribution<'_> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self;
|
let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } =
|
||||||
|
self;
|
||||||
|
|
||||||
f.debug_struct("FacetDistribution")
|
f.debug_struct("FacetDistribution")
|
||||||
.field("facets", facets)
|
.field("facets", facets)
|
||||||
.field("candidates", candidates)
|
.field("candidates", candidates)
|
||||||
.field("max_values_by_facet", max_values_by_facet)
|
.field("max_values_per_facet", max_values_per_facet)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
pub use self::facet_distribution::FacetDistribution;
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
|
pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
|
||||||
pub use self::facet_string::FacetStringIter;
|
pub use self::facet_string::FacetStringIter;
|
||||||
pub use self::filter::Filter;
|
pub use self::filter::Filter;
|
||||||
|
@ -15,7 +15,7 @@ use log::debug;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
|
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET};
|
||||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
pub use self::matches::{
|
pub use self::matches::{
|
||||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||||
|
@ -104,6 +104,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
exact_words: Setting<BTreeSet<String>>,
|
exact_words: Setting<BTreeSet<String>>,
|
||||||
/// Attributes on which typo tolerance is disabled.
|
/// Attributes on which typo tolerance is disabled.
|
||||||
exact_attributes: Setting<HashSet<String>>,
|
exact_attributes: Setting<HashSet<String>>,
|
||||||
|
max_values_per_facet: Setting<usize>,
|
||||||
|
pagination_limited_to: Setting<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -129,6 +131,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
min_word_len_two_typos: Setting::NotSet,
|
min_word_len_two_typos: Setting::NotSet,
|
||||||
min_word_len_one_typo: Setting::NotSet,
|
min_word_len_one_typo: Setting::NotSet,
|
||||||
exact_attributes: Setting::NotSet,
|
exact_attributes: Setting::NotSet,
|
||||||
|
max_values_per_facet: Setting::NotSet,
|
||||||
|
pagination_limited_to: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -246,6 +250,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.exact_attributes = Setting::Reset;
|
self.exact_attributes = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_max_values_per_facet(&mut self, value: usize) {
|
||||||
|
self.max_values_per_facet = Setting::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_max_values_per_facet(&mut self) {
|
||||||
|
self.max_values_per_facet = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_pagination_limited_to(&mut self, value: usize) {
|
||||||
|
self.pagination_limited_to = Setting::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_pagination_limited_to(&mut self) {
|
||||||
|
self.pagination_limited_to = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -612,6 +632,34 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_max_values_per_facet(&mut self) -> Result<()> {
|
||||||
|
match self.max_values_per_facet {
|
||||||
|
Setting::Set(max) => {
|
||||||
|
self.index.put_max_values_per_facet(&mut self.wtxn, max)?;
|
||||||
|
}
|
||||||
|
Setting::Reset => {
|
||||||
|
self.index.delete_max_values_per_facet(&mut self.wtxn)?;
|
||||||
|
}
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_pagination_limited_to(&mut self) -> Result<()> {
|
||||||
|
match self.pagination_limited_to {
|
||||||
|
Setting::Set(max) => {
|
||||||
|
self.index.put_pagination_limited_to(&mut self.wtxn, max)?;
|
||||||
|
}
|
||||||
|
Setting::Reset => {
|
||||||
|
self.index.delete_pagination_limited_to(&mut self.wtxn)?;
|
||||||
|
}
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -630,6 +678,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.update_authorize_typos()?;
|
self.update_authorize_typos()?;
|
||||||
self.update_min_typo_word_len()?;
|
self.update_min_typo_word_len()?;
|
||||||
self.update_exact_words()?;
|
self.update_exact_words()?;
|
||||||
|
self.update_max_values_per_facet()?;
|
||||||
|
self.update_pagination_limited_to()?;
|
||||||
|
|
||||||
// If there is new faceted fields we indicate that we must reindex as we must
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
// index new fields as facets. It means that the distinct attribute,
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
@ -1525,6 +1575,8 @@ mod tests {
|
|||||||
min_word_len_one_typo,
|
min_word_len_one_typo,
|
||||||
exact_words,
|
exact_words,
|
||||||
exact_attributes,
|
exact_attributes,
|
||||||
|
max_values_per_facet,
|
||||||
|
pagination_limited_to,
|
||||||
} = builder;
|
} = builder;
|
||||||
|
|
||||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||||
@ -1541,5 +1593,7 @@ mod tests {
|
|||||||
assert!(matches!(min_word_len_one_typo, Setting::NotSet));
|
assert!(matches!(min_word_len_one_typo, Setting::NotSet));
|
||||||
assert!(matches!(exact_words, Setting::NotSet));
|
assert!(matches!(exact_words, Setting::NotSet));
|
||||||
assert!(matches!(exact_attributes, Setting::NotSet));
|
assert!(matches!(exact_attributes, Setting::NotSet));
|
||||||
|
assert!(matches!(max_values_per_facet, Setting::NotSet));
|
||||||
|
assert!(matches!(pagination_limited_to, Setting::NotSet));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user