2022-10-12 00:03:10 +08:00
use std ::collections ::{ BTreeMap , BTreeSet } ;
2023-01-11 19:33:56 +08:00
use std ::convert ::Infallible ;
use std ::fmt ;
2022-10-12 00:03:10 +08:00
use std ::marker ::PhantomData ;
use std ::num ::NonZeroUsize ;
2023-02-14 01:45:13 +08:00
use std ::ops ::ControlFlow ;
2023-01-11 19:33:56 +08:00
use std ::str ::FromStr ;
2022-10-12 00:03:10 +08:00
2023-02-14 01:45:13 +08:00
use deserr ::{ DeserializeError , Deserr , ErrorKind , MergeWithError , ValuePointerRef } ;
2022-10-13 21:02:59 +08:00
use fst ::IntoStreamer ;
2023-12-06 21:46:12 +08:00
use milli ::proximity ::ProximityPrecision ;
2023-01-11 19:33:56 +08:00
use milli ::update ::Setting ;
use milli ::{ Criterion , CriterionError , Index , DEFAULT_VALUES_PER_FACET } ;
use serde ::{ Deserialize , Serialize , Serializer } ;
2023-01-16 23:59:26 +08:00
use crate ::deserr ::DeserrJsonError ;
2023-01-12 00:10:32 +08:00
use crate ::error ::deserr_codes ::* ;
2023-06-22 23:13:40 +08:00
use crate ::facet_values_sort ::FacetValuesSort ;
2022-10-12 00:03:10 +08:00
2023-06-28 20:23:03 +08:00
/// The maximum number of results that the engine
2022-10-13 21:02:59 +08:00
/// will be able to return in one search call.
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS : usize = 1000 ;
2022-10-12 00:03:10 +08:00
fn serialize_with_wildcard < S > (
field : & Setting < Vec < String > > ,
s : S ,
) -> std ::result ::Result < S ::Ok , S ::Error >
where
S : Serializer ,
{
let wildcard = vec! [ " * " . to_string ( ) ] ;
match field {
Setting ::Set ( value ) = > Some ( value ) ,
Setting ::Reset = > Some ( & wildcard ) ,
Setting ::NotSet = > None ,
}
. serialize ( s )
}
#[ derive(Clone, Default, Debug, Serialize, PartialEq, Eq) ]
pub struct Checked ;
#[ derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq) ]
pub struct Unchecked ;
2023-02-14 01:45:13 +08:00
impl < E > Deserr < E > for Unchecked
2022-12-14 20:00:43 +08:00
where
E : DeserializeError ,
{
fn deserialize_from_value < V : deserr ::IntoValue > (
_value : deserr ::Value < V > ,
_location : deserr ::ValuePointerRef ,
) -> Result < Self , E > {
unreachable! ( )
}
}
2023-01-11 19:33:56 +08:00
fn validate_min_word_size_for_typo_setting < E : DeserializeError > (
s : MinWordSizeTyposSetting ,
location : ValuePointerRef ,
) -> Result < MinWordSizeTyposSetting , E > {
if let ( Setting ::Set ( one ) , Setting ::Set ( two ) ) = ( s . one_typo , s . two_typos ) {
if one > two {
2023-02-14 20:58:33 +08:00
return Err ( deserr ::take_cf_content ( E ::error ::< Infallible > ( None , ErrorKind ::Unexpected { msg : format ! ( " `minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`. " ) } , location ) ) ) ;
2023-01-11 19:33:56 +08:00
}
}
Ok ( s )
}
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr) ]
2023-01-11 19:33:56 +08:00
#[ serde(deny_unknown_fields, rename_all = " camelCase " ) ]
2023-01-18 19:28:46 +08:00
#[ deserr(deny_unknown_fields, rename_all = camelCase, validate = validate_min_word_size_for_typo_setting -> DeserrJsonError<InvalidSettingsTypoTolerance>) ]
2022-10-12 00:03:10 +08:00
pub struct MinWordSizeTyposSetting {
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub one_typo : Setting < u8 > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub two_typos : Setting < u8 > ,
}
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr) ]
2023-01-11 19:33:56 +08:00
#[ serde(deny_unknown_fields, rename_all = " camelCase " ) ]
2023-01-18 19:28:46 +08:00
#[ deserr(deny_unknown_fields, rename_all = camelCase, where_predicate = __Deserr_E: deserr::MergeWithError<DeserrJsonError<InvalidSettingsTypoTolerance>>) ]
2022-10-12 00:03:10 +08:00
pub struct TypoSettings {
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub enabled : Setting < bool > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-18 19:28:46 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>) ]
2022-10-12 00:03:10 +08:00
pub min_word_size_for_typos : Setting < MinWordSizeTyposSetting > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub disable_on_words : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub disable_on_attributes : Setting < BTreeSet < String > > ,
}
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr) ]
2023-01-11 19:33:56 +08:00
#[ serde(deny_unknown_fields, rename_all = " camelCase " ) ]
2022-12-14 20:00:43 +08:00
#[ deserr(rename_all = camelCase, deny_unknown_fields) ]
2022-10-12 00:03:10 +08:00
pub struct FacetingSettings {
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub max_values_per_facet : Setting < usize > ,
2023-06-22 23:13:40 +08:00
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
#[ deserr(default) ]
pub sort_facet_values_by : Setting < BTreeMap < String , FacetValuesSort > > ,
2022-10-12 00:03:10 +08:00
}
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr) ]
2023-01-11 19:33:56 +08:00
#[ serde(deny_unknown_fields, rename_all = " camelCase " ) ]
2022-12-14 20:00:43 +08:00
#[ deserr(rename_all = camelCase, deny_unknown_fields) ]
2022-10-12 00:03:10 +08:00
pub struct PaginationSettings {
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default) ]
2022-10-12 00:03:10 +08:00
pub max_total_hits : Setting < usize > ,
}
2023-01-12 20:55:53 +08:00
impl MergeWithError < milli ::CriterionError > for DeserrJsonError < InvalidSettingsRankingRules > {
2023-01-11 19:33:56 +08:00
fn merge (
_self_ : Option < Self > ,
other : milli ::CriterionError ,
merge_location : ValuePointerRef ,
2023-02-14 01:45:13 +08:00
) -> ControlFlow < Self , Self > {
2023-01-11 19:33:56 +08:00
Self ::error ::< Infallible > (
None ,
ErrorKind ::Unexpected { msg : other . to_string ( ) } ,
merge_location ,
)
}
}
2022-10-12 00:03:10 +08:00
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr) ]
2023-01-11 19:33:56 +08:00
#[ serde(
deny_unknown_fields ,
rename_all = " camelCase " ,
bound ( serialize = " T: Serialize " , deserialize = " T: Deserialize<'static> " )
) ]
2023-01-12 20:55:53 +08:00
#[ deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields) ]
2022-10-12 00:03:10 +08:00
pub struct Settings < T > {
#[ serde(
default ,
serialize_with = " serialize_with_wildcard " ,
skip_serializing_if = " Setting::is_not_set "
) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>) ]
2022-10-12 00:03:10 +08:00
pub displayed_attributes : Setting < Vec < String > > ,
#[ serde(
default ,
serialize_with = " serialize_with_wildcard " ,
skip_serializing_if = " Setting::is_not_set "
) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>) ]
2022-10-12 00:03:10 +08:00
pub searchable_attributes : Setting < Vec < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>) ]
2022-10-12 00:03:10 +08:00
pub filterable_attributes : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>) ]
2022-10-12 00:03:10 +08:00
pub sortable_attributes : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsRankingRules>) ]
2023-01-11 19:33:56 +08:00
pub ranking_rules : Setting < Vec < RankingRuleView > > ,
2022-10-12 00:03:10 +08:00
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>) ]
2022-10-12 00:03:10 +08:00
pub stop_words : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-07-24 23:00:18 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>) ]
pub non_separator_tokens : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
#[ deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>) ]
pub separator_tokens : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
#[ deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>) ]
pub dictionary : Setting < BTreeSet < String > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>) ]
2022-10-12 00:03:10 +08:00
pub synonyms : Setting < BTreeMap < String , Vec < String > > > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>) ]
2022-10-12 00:03:10 +08:00
pub distinct_attribute : Setting < String > ,
2024-01-08 21:03:47 +08:00
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-12-06 21:46:12 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>) ]
pub proximity_precision : Setting < ProximityPrecisionView > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>) ]
2022-10-12 00:03:10 +08:00
pub typo_tolerance : Setting < TypoSettings > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsFaceting>) ]
2022-10-12 00:03:10 +08:00
pub faceting : Setting < FacetingSettings > ,
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2023-01-12 22:35:03 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsPagination>) ]
2022-10-12 00:03:10 +08:00
pub pagination : Setting < PaginationSettings > ,
2023-11-15 22:46:37 +08:00
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
#[ deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>) ]
pub embedders : Setting < BTreeMap < String , Setting < milli ::vector ::settings ::EmbeddingSettings > > > ,
2024-03-12 01:24:21 +08:00
#[ serde(default, skip_serializing_if = " Setting::is_not_set " ) ]
2024-03-19 18:14:28 +08:00
#[ deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>) ]
2024-03-18 19:06:00 +08:00
pub search_cutoff_ms : Setting < u64 > ,
2023-11-15 22:46:37 +08:00
2022-10-12 00:03:10 +08:00
#[ serde(skip) ]
2023-01-11 19:33:56 +08:00
#[ deserr(skip) ]
2022-10-12 00:03:10 +08:00
pub _kind : PhantomData < T > ,
}
impl Settings < Checked > {
pub fn cleared ( ) -> Settings < Checked > {
Settings {
displayed_attributes : Setting ::Reset ,
searchable_attributes : Setting ::Reset ,
filterable_attributes : Setting ::Reset ,
sortable_attributes : Setting ::Reset ,
ranking_rules : Setting ::Reset ,
stop_words : Setting ::Reset ,
synonyms : Setting ::Reset ,
2023-07-24 23:00:18 +08:00
non_separator_tokens : Setting ::Reset ,
separator_tokens : Setting ::Reset ,
dictionary : Setting ::Reset ,
2022-10-12 00:03:10 +08:00
distinct_attribute : Setting ::Reset ,
2023-12-06 21:46:12 +08:00
proximity_precision : Setting ::Reset ,
2022-10-12 00:03:10 +08:00
typo_tolerance : Setting ::Reset ,
faceting : Setting ::Reset ,
pagination : Setting ::Reset ,
2023-11-15 22:46:37 +08:00
embedders : Setting ::Reset ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms : Setting ::Reset ,
2022-10-12 00:03:10 +08:00
_kind : PhantomData ,
}
}
pub fn into_unchecked ( self ) -> Settings < Unchecked > {
let Self {
displayed_attributes ,
searchable_attributes ,
filterable_attributes ,
sortable_attributes ,
ranking_rules ,
stop_words ,
2023-07-24 23:00:18 +08:00
non_separator_tokens ,
separator_tokens ,
dictionary ,
2022-10-12 00:03:10 +08:00
synonyms ,
distinct_attribute ,
2023-12-06 21:46:12 +08:00
proximity_precision ,
2022-10-12 00:03:10 +08:00
typo_tolerance ,
faceting ,
pagination ,
2023-11-15 22:46:37 +08:00
embedders ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms ,
2022-10-12 00:03:10 +08:00
..
} = self ;
Settings {
displayed_attributes ,
searchable_attributes ,
filterable_attributes ,
sortable_attributes ,
ranking_rules ,
stop_words ,
2023-07-24 23:00:18 +08:00
non_separator_tokens ,
separator_tokens ,
dictionary ,
2022-10-12 00:03:10 +08:00
synonyms ,
distinct_attribute ,
2023-12-06 21:46:12 +08:00
proximity_precision ,
2022-10-12 00:03:10 +08:00
typo_tolerance ,
faceting ,
pagination ,
2023-11-15 22:46:37 +08:00
embedders ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms ,
2022-10-12 00:03:10 +08:00
_kind : PhantomData ,
}
}
}
impl Settings < Unchecked > {
pub fn check ( self ) -> Settings < Checked > {
let displayed_attributes = match self . displayed_attributes {
Setting ::Set ( fields ) = > {
if fields . iter ( ) . any ( | f | f = = " * " ) {
Setting ::Reset
} else {
Setting ::Set ( fields )
}
}
otherwise = > otherwise ,
} ;
let searchable_attributes = match self . searchable_attributes {
Setting ::Set ( fields ) = > {
if fields . iter ( ) . any ( | f | f = = " * " ) {
Setting ::Reset
} else {
Setting ::Set ( fields )
}
}
otherwise = > otherwise ,
} ;
Settings {
displayed_attributes ,
searchable_attributes ,
filterable_attributes : self . filterable_attributes ,
sortable_attributes : self . sortable_attributes ,
ranking_rules : self . ranking_rules ,
stop_words : self . stop_words ,
synonyms : self . synonyms ,
2023-07-24 23:00:18 +08:00
non_separator_tokens : self . non_separator_tokens ,
separator_tokens : self . separator_tokens ,
dictionary : self . dictionary ,
2022-10-12 00:03:10 +08:00
distinct_attribute : self . distinct_attribute ,
2023-12-06 21:46:12 +08:00
proximity_precision : self . proximity_precision ,
2022-10-12 00:03:10 +08:00
typo_tolerance : self . typo_tolerance ,
faceting : self . faceting ,
pagination : self . pagination ,
2023-11-15 22:46:37 +08:00
embedders : self . embedders ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms : self . search_cutoff_ms ,
2022-10-12 00:03:10 +08:00
_kind : PhantomData ,
}
}
2023-12-21 00:13:27 +08:00
pub fn validate ( self ) -> Result < Self , milli ::Error > {
self . validate_embedding_settings ( )
}
fn validate_embedding_settings ( mut self ) -> Result < Self , milli ::Error > {
let Setting ::Set ( mut configs ) = self . embedders else { return Ok ( self ) } ;
for ( name , config ) in configs . iter_mut ( ) {
let config_to_check = std ::mem ::take ( config ) ;
let checked_config = milli ::update ::validate_embedding_settings ( config_to_check , name ) ? ;
* config = checked_config
}
self . embedders = Setting ::Set ( configs ) ;
Ok ( self )
}
2022-10-12 00:03:10 +08:00
}
#[ derive(Debug, Clone, Serialize, Deserialize) ]
#[ serde(deny_unknown_fields) ]
#[ serde(rename_all = " camelCase " ) ]
pub struct Facets {
pub level_group_size : Option < NonZeroUsize > ,
pub min_level_size : Option < NonZeroUsize > ,
}
pub fn apply_settings_to_builder (
settings : & Settings < Checked > ,
builder : & mut milli ::update ::Settings ,
) {
2024-03-12 01:24:21 +08:00
let Settings {
displayed_attributes ,
searchable_attributes ,
filterable_attributes ,
sortable_attributes ,
ranking_rules ,
stop_words ,
non_separator_tokens ,
separator_tokens ,
dictionary ,
synonyms ,
distinct_attribute ,
proximity_precision ,
typo_tolerance ,
faceting ,
pagination ,
embedders ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms ,
2024-03-12 01:24:21 +08:00
_kind ,
} = settings ;
match searchable_attributes {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref names ) = > builder . set_searchable_fields ( names . clone ( ) ) ,
Setting ::Reset = > builder . reset_searchable_fields ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match displayed_attributes {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref names ) = > builder . set_displayed_fields ( names . clone ( ) ) ,
Setting ::Reset = > builder . reset_displayed_fields ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match filterable_attributes {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref facets ) = > {
builder . set_filterable_fields ( facets . clone ( ) . into_iter ( ) . collect ( ) )
}
Setting ::Reset = > builder . reset_filterable_fields ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match sortable_attributes {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref fields ) = > builder . set_sortable_fields ( fields . iter ( ) . cloned ( ) . collect ( ) ) ,
Setting ::Reset = > builder . reset_sortable_fields ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match ranking_rules {
2023-01-11 19:33:56 +08:00
Setting ::Set ( ref criteria ) = > {
builder . set_criteria ( criteria . iter ( ) . map ( | c | c . clone ( ) . into ( ) ) . collect ( ) )
}
2022-10-12 00:03:10 +08:00
Setting ::Reset = > builder . reset_criteria ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match stop_words {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref stop_words ) = > builder . set_stop_words ( stop_words . clone ( ) ) ,
Setting ::Reset = > builder . reset_stop_words ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match non_separator_tokens {
2023-07-24 23:00:18 +08:00
Setting ::Set ( ref non_separator_tokens ) = > {
builder . set_non_separator_tokens ( non_separator_tokens . clone ( ) )
}
Setting ::Reset = > builder . reset_non_separator_tokens ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match separator_tokens {
2023-07-24 23:00:18 +08:00
Setting ::Set ( ref separator_tokens ) = > {
builder . set_separator_tokens ( separator_tokens . clone ( ) )
}
Setting ::Reset = > builder . reset_separator_tokens ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match dictionary {
2023-07-24 23:00:18 +08:00
Setting ::Set ( ref dictionary ) = > builder . set_dictionary ( dictionary . clone ( ) ) ,
Setting ::Reset = > builder . reset_dictionary ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match synonyms {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref synonyms ) = > builder . set_synonyms ( synonyms . clone ( ) . into_iter ( ) . collect ( ) ) ,
Setting ::Reset = > builder . reset_synonyms ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match distinct_attribute {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref attr ) = > builder . set_distinct_field ( attr . clone ( ) ) ,
Setting ::Reset = > builder . reset_distinct_field ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match proximity_precision {
2023-12-06 21:46:12 +08:00
Setting ::Set ( ref precision ) = > builder . set_proximity_precision ( ( * precision ) . into ( ) ) ,
Setting ::Reset = > builder . reset_proximity_precision ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match typo_tolerance {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref value ) = > {
match value . enabled {
Setting ::Set ( val ) = > builder . set_autorize_typos ( val ) ,
Setting ::Reset = > builder . reset_authorize_typos ( ) ,
Setting ::NotSet = > ( ) ,
}
match value . min_word_size_for_typos {
Setting ::Set ( ref setting ) = > {
match setting . one_typo {
Setting ::Set ( val ) = > builder . set_min_word_len_one_typo ( val ) ,
Setting ::Reset = > builder . reset_min_word_len_one_typo ( ) ,
Setting ::NotSet = > ( ) ,
}
match setting . two_typos {
Setting ::Set ( val ) = > builder . set_min_word_len_two_typos ( val ) ,
Setting ::Reset = > builder . reset_min_word_len_two_typos ( ) ,
Setting ::NotSet = > ( ) ,
}
}
Setting ::Reset = > {
builder . reset_min_word_len_one_typo ( ) ;
builder . reset_min_word_len_two_typos ( ) ;
}
Setting ::NotSet = > ( ) ,
}
match value . disable_on_words {
Setting ::Set ( ref words ) = > {
builder . set_exact_words ( words . clone ( ) ) ;
}
Setting ::Reset = > builder . reset_exact_words ( ) ,
Setting ::NotSet = > ( ) ,
}
match value . disable_on_attributes {
Setting ::Set ( ref words ) = > {
builder . set_exact_attributes ( words . iter ( ) . cloned ( ) . collect ( ) )
}
Setting ::Reset = > builder . reset_exact_attributes ( ) ,
Setting ::NotSet = > ( ) ,
}
}
Setting ::Reset = > {
// all typo settings need to be reset here.
builder . reset_authorize_typos ( ) ;
builder . reset_min_word_len_one_typo ( ) ;
builder . reset_min_word_len_two_typos ( ) ;
builder . reset_exact_words ( ) ;
builder . reset_exact_attributes ( ) ;
}
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match faceting {
2023-06-22 23:13:40 +08:00
Setting ::Set ( FacetingSettings { max_values_per_facet , sort_facet_values_by } ) = > {
match max_values_per_facet {
Setting ::Set ( val ) = > builder . set_max_values_per_facet ( * val ) ,
Setting ::Reset = > builder . reset_max_values_per_facet ( ) ,
Setting ::NotSet = > ( ) ,
}
match sort_facet_values_by {
Setting ::Set ( val ) = > builder . set_sort_facet_values_by (
val . iter ( ) . map ( | ( name , order ) | ( name . clone ( ) , ( * order ) . into ( ) ) ) . collect ( ) ,
) ,
Setting ::Reset = > builder . reset_sort_facet_values_by ( ) ,
Setting ::NotSet = > ( ) ,
}
}
2023-06-26 19:10:36 +08:00
Setting ::Reset = > {
builder . reset_max_values_per_facet ( ) ;
builder . reset_sort_facet_values_by ( ) ;
}
2022-10-12 00:03:10 +08:00
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
match pagination {
2022-10-12 00:03:10 +08:00
Setting ::Set ( ref value ) = > match value . max_total_hits {
Setting ::Set ( val ) = > builder . set_pagination_max_total_hits ( val ) ,
Setting ::Reset = > builder . reset_pagination_max_total_hits ( ) ,
Setting ::NotSet = > ( ) ,
} ,
Setting ::Reset = > builder . reset_pagination_max_total_hits ( ) ,
Setting ::NotSet = > ( ) ,
}
2023-11-15 22:46:37 +08:00
2024-03-12 01:24:21 +08:00
match embedders {
Setting ::Set ( value ) = > builder . set_embedder_settings ( value . clone ( ) ) ,
2023-11-15 22:46:37 +08:00
Setting ::Reset = > builder . reset_embedder_settings ( ) ,
Setting ::NotSet = > ( ) ,
}
2024-03-12 01:24:21 +08:00
2024-03-18 19:06:00 +08:00
match search_cutoff_ms {
2024-03-12 01:24:21 +08:00
Setting ::Set ( cutoff ) = > builder . set_search_cutoff ( * cutoff ) ,
Setting ::Reset = > builder . reset_search_cutoff ( ) ,
Setting ::NotSet = > ( ) ,
}
2022-10-12 00:03:10 +08:00
}
2022-10-13 21:02:59 +08:00
pub fn settings (
index : & Index ,
rtxn : & crate ::heed ::RoTxn ,
) -> Result < Settings < Checked > , milli ::Error > {
2022-10-21 00:00:07 +08:00
let displayed_attributes =
index . displayed_fields ( rtxn ) ? . map ( | fields | fields . into_iter ( ) . map ( String ::from ) . collect ( ) ) ;
2022-10-13 21:02:59 +08:00
let searchable_attributes = index
. user_defined_searchable_fields ( rtxn ) ?
. map ( | fields | fields . into_iter ( ) . map ( String ::from ) . collect ( ) ) ;
let filterable_attributes = index . filterable_fields ( rtxn ) ? . into_iter ( ) . collect ( ) ;
let sortable_attributes = index . sortable_fields ( rtxn ) ? . into_iter ( ) . collect ( ) ;
2023-01-11 19:33:56 +08:00
let criteria = index . criteria ( rtxn ) ? ;
2022-10-13 21:02:59 +08:00
let stop_words = index
. stop_words ( rtxn ) ?
. map ( | stop_words | -> Result < BTreeSet < _ > , milli ::Error > {
Ok ( stop_words . stream ( ) . into_strs ( ) ? . into_iter ( ) . collect ( ) )
} )
. transpose ( ) ?
. unwrap_or_default ( ) ;
2023-07-24 23:00:18 +08:00
let non_separator_tokens = index . non_separator_tokens ( rtxn ) ? . unwrap_or_default ( ) ;
let separator_tokens = index . separator_tokens ( rtxn ) ? . unwrap_or_default ( ) ;
let dictionary = index . dictionary ( rtxn ) ? . unwrap_or_default ( ) ;
2022-10-13 21:02:59 +08:00
let distinct_field = index . distinct_field ( rtxn ) ? . map ( String ::from ) ;
2023-12-06 21:46:12 +08:00
let proximity_precision = index . proximity_precision ( rtxn ) ? . map ( ProximityPrecisionView ::from ) ;
2023-07-27 20:12:23 +08:00
let synonyms = index . user_defined_synonyms ( rtxn ) ? ;
2022-10-13 21:02:59 +08:00
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo : Setting ::Set ( index . min_word_len_one_typo ( rtxn ) ? ) ,
two_typos : Setting ::Set ( index . min_word_len_two_typos ( rtxn ) ? ) ,
} ;
let disabled_words = match index . exact_words ( rtxn ) ? {
Some ( fst ) = > fst . into_stream ( ) . into_strs ( ) ? . into_iter ( ) . collect ( ) ,
None = > BTreeSet ::new ( ) ,
} ;
2022-10-21 00:00:07 +08:00
let disabled_attributes = index . exact_attributes ( rtxn ) ? . into_iter ( ) . map ( String ::from ) . collect ( ) ;
2022-10-13 21:02:59 +08:00
let typo_tolerance = TypoSettings {
enabled : Setting ::Set ( index . authorize_typos ( rtxn ) ? ) ,
min_word_size_for_typos : Setting ::Set ( min_typo_word_len ) ,
disable_on_words : Setting ::Set ( disabled_words ) ,
disable_on_attributes : Setting ::Set ( disabled_attributes ) ,
} ;
let faceting = FacetingSettings {
max_values_per_facet : Setting ::Set (
2023-11-27 18:52:22 +08:00
index
. max_values_per_facet ( rtxn ) ?
. map ( | x | x as usize )
. unwrap_or ( DEFAULT_VALUES_PER_FACET ) ,
2022-10-13 21:02:59 +08:00
) ,
2023-06-22 23:13:40 +08:00
sort_facet_values_by : Setting ::Set (
index
. sort_facet_values_by ( rtxn ) ?
. into_iter ( )
. map ( | ( name , sort ) | ( name , sort . into ( ) ) )
. collect ( ) ,
) ,
2022-10-13 21:02:59 +08:00
} ;
let pagination = PaginationSettings {
max_total_hits : Setting ::Set (
2023-11-27 18:52:22 +08:00
index
. pagination_max_total_hits ( rtxn ) ?
. map ( | x | x as usize )
. unwrap_or ( DEFAULT_PAGINATION_MAX_TOTAL_HITS ) ,
2022-10-13 21:02:59 +08:00
) ,
} ;
2024-01-12 04:35:06 +08:00
let embedders : BTreeMap < _ , _ > = index
2023-11-15 22:46:37 +08:00
. embedding_configs ( rtxn ) ?
. into_iter ( )
. map ( | ( name , config ) | ( name , Setting ::Set ( config . into ( ) ) ) )
. collect ( ) ;
2024-01-12 04:35:06 +08:00
let embedders = if embedders . is_empty ( ) { Setting ::NotSet } else { Setting ::Set ( embedders ) } ;
2023-11-15 22:46:37 +08:00
2024-03-18 19:06:00 +08:00
let search_cutoff_ms = index . search_cutoff ( rtxn ) ? ;
2024-03-12 01:24:21 +08:00
2022-10-13 21:02:59 +08:00
Ok ( Settings {
displayed_attributes : match displayed_attributes {
Some ( attrs ) = > Setting ::Set ( attrs ) ,
None = > Setting ::Reset ,
} ,
searchable_attributes : match searchable_attributes {
Some ( attrs ) = > Setting ::Set ( attrs ) ,
None = > Setting ::Reset ,
} ,
filterable_attributes : Setting ::Set ( filterable_attributes ) ,
sortable_attributes : Setting ::Set ( sortable_attributes ) ,
2023-01-11 19:33:56 +08:00
ranking_rules : Setting ::Set ( criteria . iter ( ) . map ( | c | c . clone ( ) . into ( ) ) . collect ( ) ) ,
2022-10-13 21:02:59 +08:00
stop_words : Setting ::Set ( stop_words ) ,
2023-07-24 23:00:18 +08:00
non_separator_tokens : Setting ::Set ( non_separator_tokens ) ,
separator_tokens : Setting ::Set ( separator_tokens ) ,
dictionary : Setting ::Set ( dictionary ) ,
2022-10-13 21:02:59 +08:00
distinct_attribute : match distinct_field {
Some ( field ) = > Setting ::Set ( field ) ,
None = > Setting ::Reset ,
} ,
2024-01-08 21:03:47 +08:00
proximity_precision : Setting ::Set ( proximity_precision . unwrap_or_default ( ) ) ,
2022-10-13 21:02:59 +08:00
synonyms : Setting ::Set ( synonyms ) ,
typo_tolerance : Setting ::Set ( typo_tolerance ) ,
faceting : Setting ::Set ( faceting ) ,
pagination : Setting ::Set ( pagination ) ,
2024-01-12 04:35:06 +08:00
embedders ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms : match search_cutoff_ms {
2024-03-12 01:24:21 +08:00
Some ( cutoff ) = > Setting ::Set ( cutoff ) ,
None = > Setting ::Reset ,
} ,
2022-10-13 21:02:59 +08:00
_kind : PhantomData ,
} )
}
2023-02-14 01:45:13 +08:00
#[ derive(Debug, Clone, PartialEq, Eq, Deserr) ]
#[ deserr(try_from(&String) = FromStr::from_str -> CriterionError) ]
2023-01-11 19:33:56 +08:00
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words ,
/// Sorted by increasing number of typos.
Typo ,
/// Sorted by increasing distance between matched query terms.
Proximity ,
/// Documents with quey words contained in more important
/// attributes are considered better.
Attribute ,
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
/// attributes can be used in place of this criterion at query time.
Sort ,
/// Sorted by the similarity of the matched words with the query words.
Exactness ,
/// Sorted by the increasing value of the field specified.
Asc ( String ) ,
/// Sorted by the decreasing value of the field specified.
Desc ( String ) ,
}
impl Serialize for RankingRuleView {
fn serialize < S > ( & self , serializer : S ) -> Result < S ::Ok , S ::Error >
where
S : Serializer ,
{
serializer . serialize_str ( & format! ( " {} " , Criterion ::from ( self . clone ( ) ) ) )
}
}
impl < ' de > Deserialize < ' de > for RankingRuleView {
fn deserialize < D > ( deserializer : D ) -> Result < Self , D ::Error >
where
D : serde ::Deserializer < ' de > ,
{
struct Visitor ;
impl < ' de > serde ::de ::Visitor < ' de > for Visitor {
type Value = RankingRuleView ;
fn expecting ( & self , formatter : & mut std ::fmt ::Formatter ) -> std ::fmt ::Result {
write! ( formatter , " the name of a valid ranking rule (string) " )
}
fn visit_str < E > ( self , v : & str ) -> Result < Self ::Value , E >
where
E : serde ::de ::Error ,
{
let criterion = Criterion ::from_str ( v ) . map_err ( | _ | {
E ::invalid_value ( serde ::de ::Unexpected ::Str ( v ) , & " a valid ranking rule " )
} ) ? ;
Ok ( RankingRuleView ::from ( criterion ) )
}
}
deserializer . deserialize_str ( Visitor )
}
}
impl FromStr for RankingRuleView {
type Err = < Criterion as FromStr > ::Err ;
fn from_str ( s : & str ) -> Result < Self , Self ::Err > {
Ok ( RankingRuleView ::from ( Criterion ::from_str ( s ) ? ) )
}
}
impl fmt ::Display for RankingRuleView {
fn fmt ( & self , f : & mut std ::fmt ::Formatter < '_ > ) -> std ::fmt ::Result {
fmt ::Display ::fmt ( & Criterion ::from ( self . clone ( ) ) , f )
}
}
impl From < Criterion > for RankingRuleView {
fn from ( value : Criterion ) -> Self {
match value {
Criterion ::Words = > RankingRuleView ::Words ,
Criterion ::Typo = > RankingRuleView ::Typo ,
Criterion ::Proximity = > RankingRuleView ::Proximity ,
Criterion ::Attribute = > RankingRuleView ::Attribute ,
Criterion ::Sort = > RankingRuleView ::Sort ,
Criterion ::Exactness = > RankingRuleView ::Exactness ,
Criterion ::Asc ( x ) = > RankingRuleView ::Asc ( x ) ,
Criterion ::Desc ( x ) = > RankingRuleView ::Desc ( x ) ,
}
}
}
impl From < RankingRuleView > for Criterion {
fn from ( value : RankingRuleView ) -> Self {
match value {
RankingRuleView ::Words = > Criterion ::Words ,
RankingRuleView ::Typo = > Criterion ::Typo ,
RankingRuleView ::Proximity = > Criterion ::Proximity ,
RankingRuleView ::Attribute = > Criterion ::Attribute ,
RankingRuleView ::Sort = > Criterion ::Sort ,
RankingRuleView ::Exactness = > Criterion ::Exactness ,
RankingRuleView ::Asc ( x ) = > Criterion ::Asc ( x ) ,
RankingRuleView ::Desc ( x ) = > Criterion ::Desc ( x ) ,
}
}
}
2024-01-08 18:09:37 +08:00
#[ derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize) ]
2023-12-06 21:46:12 +08:00
#[ serde(deny_unknown_fields, rename_all = " camelCase " ) ]
#[ deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields) ]
pub enum ProximityPrecisionView {
2024-01-08 18:09:37 +08:00
#[ default ]
2023-12-14 23:31:00 +08:00
ByWord ,
ByAttribute ,
2023-12-06 21:46:12 +08:00
}
impl From < ProximityPrecision > for ProximityPrecisionView {
fn from ( value : ProximityPrecision ) -> Self {
match value {
2023-12-14 23:31:00 +08:00
ProximityPrecision ::ByWord = > ProximityPrecisionView ::ByWord ,
ProximityPrecision ::ByAttribute = > ProximityPrecisionView ::ByAttribute ,
2023-12-06 21:46:12 +08:00
}
}
}
impl From < ProximityPrecisionView > for ProximityPrecision {
fn from ( value : ProximityPrecisionView ) -> Self {
match value {
2023-12-14 23:31:00 +08:00
ProximityPrecisionView ::ByWord = > ProximityPrecision ::ByWord ,
ProximityPrecisionView ::ByAttribute = > ProximityPrecision ::ByAttribute ,
2023-12-06 21:46:12 +08:00
}
}
}
2022-10-12 00:03:10 +08:00
#[ cfg(test) ]
pub ( crate ) mod test {
use super ::* ;
#[ test ]
fn test_setting_check ( ) {
// test no changes
let settings = Settings {
displayed_attributes : Setting ::Set ( vec! [ String ::from ( " hello " ) ] ) ,
searchable_attributes : Setting ::Set ( vec! [ String ::from ( " hello " ) ] ) ,
filterable_attributes : Setting ::NotSet ,
sortable_attributes : Setting ::NotSet ,
ranking_rules : Setting ::NotSet ,
stop_words : Setting ::NotSet ,
2023-07-24 23:00:18 +08:00
non_separator_tokens : Setting ::NotSet ,
separator_tokens : Setting ::NotSet ,
dictionary : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
synonyms : Setting ::NotSet ,
distinct_attribute : Setting ::NotSet ,
2023-12-06 21:46:12 +08:00
proximity_precision : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
typo_tolerance : Setting ::NotSet ,
faceting : Setting ::NotSet ,
pagination : Setting ::NotSet ,
2023-11-15 22:46:37 +08:00
embedders : Setting ::NotSet ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
_kind : PhantomData ::< Unchecked > ,
} ;
let checked = settings . clone ( ) . check ( ) ;
assert_eq! ( settings . displayed_attributes , checked . displayed_attributes ) ;
2022-10-21 00:00:07 +08:00
assert_eq! ( settings . searchable_attributes , checked . searchable_attributes ) ;
2022-10-12 00:03:10 +08:00
// test wildcard
// test no changes
let settings = Settings {
displayed_attributes : Setting ::Set ( vec! [ String ::from ( " * " ) ] ) ,
searchable_attributes : Setting ::Set ( vec! [ String ::from ( " hello " ) , String ::from ( " * " ) ] ) ,
filterable_attributes : Setting ::NotSet ,
sortable_attributes : Setting ::NotSet ,
ranking_rules : Setting ::NotSet ,
stop_words : Setting ::NotSet ,
2023-07-24 23:00:18 +08:00
non_separator_tokens : Setting ::NotSet ,
separator_tokens : Setting ::NotSet ,
dictionary : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
synonyms : Setting ::NotSet ,
distinct_attribute : Setting ::NotSet ,
2023-12-06 21:46:12 +08:00
proximity_precision : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
typo_tolerance : Setting ::NotSet ,
faceting : Setting ::NotSet ,
pagination : Setting ::NotSet ,
2023-11-15 22:46:37 +08:00
embedders : Setting ::NotSet ,
2024-03-18 19:06:00 +08:00
search_cutoff_ms : Setting ::NotSet ,
2022-10-12 00:03:10 +08:00
_kind : PhantomData ::< Unchecked > ,
} ;
let checked = settings . check ( ) ;
assert_eq! ( checked . displayed_attributes , Setting ::Reset ) ;
assert_eq! ( checked . searchable_attributes , Setting ::Reset ) ;
}
}