mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Merge #4225
4225: [EXP] Let the user customize the proximity precision r=dureuill a=ManyTheFish # Pull Request This PR introduces a new setting `proximityPrecision` allowing the user to trade indexing time with search precision on proximity-based features: - proximity ranking rules - multi-word synonyms - phrase search - split-words I put the API PRD below: https://www.notion.so/meilisearch/3988b345b5b248948a4a0dc5932a18ce?v=45d79150adb84b0aa27826ff6da2e029&p=aa69c2bab2c3402bab9340ae4def4577&pm=s ## Related issue Fixes #4187 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
0fbc1511d7
@ -267,6 +267,7 @@ pub(crate) mod test {
|
|||||||
dictionary: Setting::NotSet,
|
dictionary: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
distinct_attribute: Setting::NotSet,
|
distinct_attribute: Setting::NotSet,
|
||||||
|
proximity_precision: Setting::NotSet,
|
||||||
typo_tolerance: Setting::NotSet,
|
typo_tolerance: Setting::NotSet,
|
||||||
faceting: Setting::Set(FacetingSettings {
|
faceting: Setting::Set(FacetingSettings {
|
||||||
max_values_per_facet: Setting::Set(111),
|
max_values_per_facet: Setting::Set(111),
|
||||||
|
@ -345,6 +345,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
|||||||
dictionary: v6::Setting::NotSet,
|
dictionary: v6::Setting::NotSet,
|
||||||
synonyms: settings.synonyms.into(),
|
synonyms: settings.synonyms.into(),
|
||||||
distinct_attribute: settings.distinct_attribute.into(),
|
distinct_attribute: settings.distinct_attribute.into(),
|
||||||
|
proximity_precision: v6::Setting::NotSet,
|
||||||
typo_tolerance: match settings.typo_tolerance {
|
typo_tolerance: match settings.typo_tolerance {
|
||||||
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
|
v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
|
||||||
enabled: typo.enabled.into(),
|
enabled: typo.enabled.into(),
|
||||||
|
@ -1343,6 +1343,9 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
|
||||||
let checked_settings = settings.clone().check();
|
let checked_settings = settings.clone().check();
|
||||||
|
if checked_settings.proximity_precision.set().is_some() {
|
||||||
|
self.features.features().check_proximity_precision()?;
|
||||||
|
}
|
||||||
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
|
||||||
apply_settings_to_builder(&checked_settings, &mut builder);
|
apply_settings_to_builder(&checked_settings, &mut builder);
|
||||||
|
|
||||||
|
@ -81,6 +81,19 @@ impl RoFeatures {
|
|||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn check_proximity_precision(&self) -> Result<()> {
|
||||||
|
if self.runtime.proximity_precision {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(FeatureNotEnabledError {
|
||||||
|
disabled_action: "Using `proximityPrecision` index setting",
|
||||||
|
feature: "proximity precision",
|
||||||
|
issue_link: "https://github.com/orgs/meilisearch/discussions/710",
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FeatureData {
|
impl FeatureData {
|
||||||
|
@ -252,6 +252,7 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ;
|
||||||
|
@ -7,6 +7,7 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub vector_store: bool,
|
pub vector_store: bool,
|
||||||
pub metrics: bool,
|
pub metrics: bool,
|
||||||
pub export_puffin_reports: bool,
|
pub export_puffin_reports: bool,
|
||||||
|
pub proximity_precision: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Debug, Clone, Copy)]
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
|
use milli::proximity::ProximityPrecision;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||||
use serde::{Deserialize, Serialize, Serializer};
|
use serde::{Deserialize, Serialize, Serializer};
|
||||||
@ -186,6 +187,9 @@ pub struct Settings<T> {
|
|||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
|
||||||
pub distinct_attribute: Setting<String>,
|
pub distinct_attribute: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
|
||||||
|
pub proximity_precision: Setting<ProximityPrecisionView>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
|
||||||
pub typo_tolerance: Setting<TypoSettings>,
|
pub typo_tolerance: Setting<TypoSettings>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
@ -214,6 +218,7 @@ impl Settings<Checked> {
|
|||||||
separator_tokens: Setting::Reset,
|
separator_tokens: Setting::Reset,
|
||||||
dictionary: Setting::Reset,
|
dictionary: Setting::Reset,
|
||||||
distinct_attribute: Setting::Reset,
|
distinct_attribute: Setting::Reset,
|
||||||
|
proximity_precision: Setting::Reset,
|
||||||
typo_tolerance: Setting::Reset,
|
typo_tolerance: Setting::Reset,
|
||||||
faceting: Setting::Reset,
|
faceting: Setting::Reset,
|
||||||
pagination: Setting::Reset,
|
pagination: Setting::Reset,
|
||||||
@ -234,6 +239,7 @@ impl Settings<Checked> {
|
|||||||
dictionary,
|
dictionary,
|
||||||
synonyms,
|
synonyms,
|
||||||
distinct_attribute,
|
distinct_attribute,
|
||||||
|
proximity_precision,
|
||||||
typo_tolerance,
|
typo_tolerance,
|
||||||
faceting,
|
faceting,
|
||||||
pagination,
|
pagination,
|
||||||
@ -252,6 +258,7 @@ impl Settings<Checked> {
|
|||||||
dictionary,
|
dictionary,
|
||||||
synonyms,
|
synonyms,
|
||||||
distinct_attribute,
|
distinct_attribute,
|
||||||
|
proximity_precision,
|
||||||
typo_tolerance,
|
typo_tolerance,
|
||||||
faceting,
|
faceting,
|
||||||
pagination,
|
pagination,
|
||||||
@ -296,6 +303,7 @@ impl Settings<Unchecked> {
|
|||||||
separator_tokens: self.separator_tokens,
|
separator_tokens: self.separator_tokens,
|
||||||
dictionary: self.dictionary,
|
dictionary: self.dictionary,
|
||||||
distinct_attribute: self.distinct_attribute,
|
distinct_attribute: self.distinct_attribute,
|
||||||
|
proximity_precision: self.proximity_precision,
|
||||||
typo_tolerance: self.typo_tolerance,
|
typo_tolerance: self.typo_tolerance,
|
||||||
faceting: self.faceting,
|
faceting: self.faceting,
|
||||||
pagination: self.pagination,
|
pagination: self.pagination,
|
||||||
@ -390,6 +398,12 @@ pub fn apply_settings_to_builder(
|
|||||||
Setting::NotSet => (),
|
Setting::NotSet => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
match settings.proximity_precision {
|
||||||
|
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
|
||||||
|
Setting::Reset => builder.reset_proximity_precision(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
match settings.typo_tolerance {
|
match settings.typo_tolerance {
|
||||||
Setting::Set(ref value) => {
|
Setting::Set(ref value) => {
|
||||||
match value.enabled {
|
match value.enabled {
|
||||||
@ -509,6 +523,8 @@ pub fn settings(
|
|||||||
|
|
||||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||||
|
|
||||||
|
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
|
||||||
|
|
||||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||||
|
|
||||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||||
@ -575,6 +591,10 @@ pub fn settings(
|
|||||||
Some(field) => Setting::Set(field),
|
Some(field) => Setting::Set(field),
|
||||||
None => Setting::Reset,
|
None => Setting::Reset,
|
||||||
},
|
},
|
||||||
|
proximity_precision: match proximity_precision {
|
||||||
|
Some(precision) => Setting::Set(precision),
|
||||||
|
None => Setting::Reset,
|
||||||
|
},
|
||||||
synonyms: Setting::Set(synonyms),
|
synonyms: Setting::Set(synonyms),
|
||||||
typo_tolerance: Setting::Set(typo_tolerance),
|
typo_tolerance: Setting::Set(typo_tolerance),
|
||||||
faceting: Setting::Set(faceting),
|
faceting: Setting::Set(faceting),
|
||||||
@ -679,6 +699,31 @@ impl From<RankingRuleView> for Criterion {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
|
||||||
|
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub enum ProximityPrecisionView {
|
||||||
|
WordScale,
|
||||||
|
AttributeScale,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ProximityPrecision> for ProximityPrecisionView {
|
||||||
|
fn from(value: ProximityPrecision) -> Self {
|
||||||
|
match value {
|
||||||
|
ProximityPrecision::WordScale => ProximityPrecisionView::WordScale,
|
||||||
|
ProximityPrecision::AttributeScale => ProximityPrecisionView::AttributeScale,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl From<ProximityPrecisionView> for ProximityPrecision {
|
||||||
|
fn from(value: ProximityPrecisionView) -> Self {
|
||||||
|
match value {
|
||||||
|
ProximityPrecisionView::WordScale => ProximityPrecision::WordScale,
|
||||||
|
ProximityPrecisionView::AttributeScale => ProximityPrecision::AttributeScale,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod test {
|
pub(crate) mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -698,6 +743,7 @@ pub(crate) mod test {
|
|||||||
dictionary: Setting::NotSet,
|
dictionary: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
distinct_attribute: Setting::NotSet,
|
distinct_attribute: Setting::NotSet,
|
||||||
|
proximity_precision: Setting::NotSet,
|
||||||
typo_tolerance: Setting::NotSet,
|
typo_tolerance: Setting::NotSet,
|
||||||
faceting: Setting::NotSet,
|
faceting: Setting::NotSet,
|
||||||
pagination: Setting::NotSet,
|
pagination: Setting::NotSet,
|
||||||
@ -722,6 +768,7 @@ pub(crate) mod test {
|
|||||||
dictionary: Setting::NotSet,
|
dictionary: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
distinct_attribute: Setting::NotSet,
|
distinct_attribute: Setting::NotSet,
|
||||||
|
proximity_precision: Setting::NotSet,
|
||||||
typo_tolerance: Setting::NotSet,
|
typo_tolerance: Setting::NotSet,
|
||||||
faceting: Setting::NotSet,
|
faceting: Setting::NotSet,
|
||||||
pagination: Setting::NotSet,
|
pagination: Setting::NotSet,
|
||||||
|
@ -48,6 +48,8 @@ pub struct RuntimeTogglableFeatures {
|
|||||||
pub metrics: Option<bool>,
|
pub metrics: Option<bool>,
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
pub export_puffin_reports: Option<bool>,
|
pub export_puffin_reports: Option<bool>,
|
||||||
|
#[deserr(default)]
|
||||||
|
pub proximity_precision: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn patch_features(
|
async fn patch_features(
|
||||||
@ -70,6 +72,10 @@ async fn patch_features(
|
|||||||
.0
|
.0
|
||||||
.export_puffin_reports
|
.export_puffin_reports
|
||||||
.unwrap_or(old_features.export_puffin_reports),
|
.unwrap_or(old_features.export_puffin_reports),
|
||||||
|
proximity_precision: new_features
|
||||||
|
.0
|
||||||
|
.proximity_precision
|
||||||
|
.unwrap_or(old_features.proximity_precision),
|
||||||
};
|
};
|
||||||
|
|
||||||
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
|
||||||
@ -80,6 +86,7 @@ async fn patch_features(
|
|||||||
vector_store,
|
vector_store,
|
||||||
metrics,
|
metrics,
|
||||||
export_puffin_reports,
|
export_puffin_reports,
|
||||||
|
proximity_precision,
|
||||||
} = new_features;
|
} = new_features;
|
||||||
|
|
||||||
analytics.publish(
|
analytics.publish(
|
||||||
@ -89,6 +96,7 @@ async fn patch_features(
|
|||||||
"vector_store": vector_store,
|
"vector_store": vector_store,
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"export_puffin_reports": export_puffin_reports,
|
"export_puffin_reports": export_puffin_reports,
|
||||||
|
"proximity_precision": proximity_precision,
|
||||||
}),
|
}),
|
||||||
Some(&req),
|
Some(&req),
|
||||||
);
|
);
|
||||||
|
@ -435,6 +435,30 @@ make_setting_route!(
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
make_setting_route!(
|
||||||
|
"/proximity-precision",
|
||||||
|
put,
|
||||||
|
meilisearch_types::settings::ProximityPrecisionView,
|
||||||
|
meilisearch_types::deserr::DeserrJsonError<
|
||||||
|
meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
|
||||||
|
>,
|
||||||
|
proximity_precision,
|
||||||
|
"proximityPrecision",
|
||||||
|
analytics,
|
||||||
|
|precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
|
||||||
|
use serde_json::json;
|
||||||
|
analytics.publish(
|
||||||
|
"ProximityPrecision Updated".to_string(),
|
||||||
|
json!({
|
||||||
|
"proximity_precision": {
|
||||||
|
"set": precision.is_some(),
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
Some(req),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
make_setting_route!(
|
make_setting_route!(
|
||||||
"/ranking-rules",
|
"/ranking-rules",
|
||||||
put,
|
put,
|
||||||
@ -541,6 +565,7 @@ generate_configure!(
|
|||||||
displayed_attributes,
|
displayed_attributes,
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
distinct_attribute,
|
distinct_attribute,
|
||||||
|
proximity_precision,
|
||||||
stop_words,
|
stop_words,
|
||||||
separator_tokens,
|
separator_tokens,
|
||||||
non_separator_tokens,
|
non_separator_tokens,
|
||||||
@ -594,6 +619,9 @@ pub async fn update_all(
|
|||||||
"distinct_attribute": {
|
"distinct_attribute": {
|
||||||
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
"set": new_settings.distinct_attribute.as_ref().set().is_some()
|
||||||
},
|
},
|
||||||
|
"proximity_precision": {
|
||||||
|
"set": new_settings.proximity_precision.as_ref().set().is_some()
|
||||||
|
},
|
||||||
"typo_tolerance": {
|
"typo_tolerance": {
|
||||||
"enabled": new_settings.typo_tolerance
|
"enabled": new_settings.typo_tolerance
|
||||||
.as_ref()
|
.as_ref()
|
||||||
|
Binary file not shown.
@ -20,6 +20,8 @@ pub enum GetDump {
|
|||||||
RubyGemsWithSettingsV4,
|
RubyGemsWithSettingsV4,
|
||||||
|
|
||||||
TestV5,
|
TestV5,
|
||||||
|
|
||||||
|
TestV6WithExperimental,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GetDump {
|
impl GetDump {
|
||||||
@ -68,6 +70,10 @@ impl GetDump {
|
|||||||
GetDump::TestV5 => {
|
GetDump::TestV5 => {
|
||||||
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
|
exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
|
||||||
}
|
}
|
||||||
|
GetDump::TestV6WithExperimental => exist_relative_path!(
|
||||||
|
"tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
|
||||||
|
)
|
||||||
|
.into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -219,6 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -365,6 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -497,6 +500,7 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -641,6 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -784,6 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -916,6 +922,7 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1060,6 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1203,6 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1335,6 +1344,7 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1479,6 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1622,6 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
"dictionary": [],
|
"dictionary": [],
|
||||||
"synonyms": {},
|
"synonyms": {},
|
||||||
"distinctAttribute": null,
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": null,
|
||||||
"typoTolerance": {
|
"typoTolerance": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"minWordSizeForTypos": {
|
"minWordSizeForTypos": {
|
||||||
@ -1810,3 +1822,108 @@ async fn import_dump_v5() {
|
|||||||
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
|
json_string!(tasks, { ".results[].details.dumpUid" => "[uid]", ".results[].duration" => "[duration]" , ".results[].startedAt" => "[date]" , ".results[].finishedAt" => "[date]" })
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn import_dump_v6_containing_experimental_features() {
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
|
||||||
|
let options = Opt {
|
||||||
|
import_dump: Some(GetDump::TestV6WithExperimental.path()),
|
||||||
|
..default_settings(temp.path())
|
||||||
|
};
|
||||||
|
let mut server = Server::new_auth_with_options(options, temp).await;
|
||||||
|
server.use_api_key("MASTER_KEY");
|
||||||
|
|
||||||
|
let (indexes, code) = server.list_indexes(None, None).await;
|
||||||
|
assert_eq!(code, 200, "{indexes}");
|
||||||
|
|
||||||
|
assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
|
||||||
|
assert_eq!(indexes["results"][0]["uid"], json!("movies"));
|
||||||
|
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||||
|
|
||||||
|
let (response, code) = server.get_features().await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"scoreDetails": false,
|
||||||
|
"vectorStore": false,
|
||||||
|
"metrics": false,
|
||||||
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let index = server.index("movies");
|
||||||
|
|
||||||
|
let (response, code) = index.settings().await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"displayedAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"searchableAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [],
|
||||||
|
"sortableAttributes": [],
|
||||||
|
"rankingRules": [
|
||||||
|
"words",
|
||||||
|
"typo",
|
||||||
|
"proximity"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"nonSeparatorTokens": [],
|
||||||
|
"separatorTokens": [],
|
||||||
|
"dictionary": [],
|
||||||
|
"synonyms": {},
|
||||||
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": "attributeScale",
|
||||||
|
"typoTolerance": {
|
||||||
|
"enabled": true,
|
||||||
|
"minWordSizeForTypos": {
|
||||||
|
"oneTypo": 5,
|
||||||
|
"twoTypos": 9
|
||||||
|
},
|
||||||
|
"disableOnWords": [],
|
||||||
|
"disableOnAttributes": []
|
||||||
|
},
|
||||||
|
"faceting": {
|
||||||
|
"maxValuesPerFacet": 100,
|
||||||
|
"sortFacetValuesBy": {
|
||||||
|
"*": "alpha"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"maxTotalHits": 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||||
|
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
@ -21,7 +21,8 @@ async fn experimental_features() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -33,7 +34,8 @@ async fn experimental_features() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -45,7 +47,8 @@ async fn experimental_features() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -58,7 +61,8 @@ async fn experimental_features() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -71,7 +75,8 @@ async fn experimental_features() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": true,
|
"vectorStore": true,
|
||||||
"metrics": false,
|
"metrics": false,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -91,7 +96,8 @@ async fn experimental_feature_metrics() {
|
|||||||
"scoreDetails": false,
|
"scoreDetails": false,
|
||||||
"vectorStore": false,
|
"vectorStore": false,
|
||||||
"metrics": true,
|
"metrics": true,
|
||||||
"exportPuffinReports": false
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": false
|
||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
@ -146,7 +152,7 @@ async fn errors() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
{
|
{
|
||||||
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
|
"message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`, `proximityPrecision`",
|
||||||
"code": "bad_request",
|
"code": "bad_request",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||||
|
@ -54,7 +54,7 @@ async fn get_settings() {
|
|||||||
let (response, code) = index.settings().await;
|
let (response, code) = index.settings().await;
|
||||||
assert_eq!(code, 200);
|
assert_eq!(code, 200);
|
||||||
let settings = response.as_object().unwrap();
|
let settings = response.as_object().unwrap();
|
||||||
assert_eq!(settings.keys().len(), 14);
|
assert_eq!(settings.keys().len(), 15);
|
||||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
mod distinct;
|
mod distinct;
|
||||||
mod errors;
|
mod errors;
|
||||||
mod get_settings;
|
mod get_settings;
|
||||||
|
mod proximity_settings;
|
||||||
mod tokenizer_customization;
|
mod tokenizer_customization;
|
||||||
|
396
meilisearch/tests/settings/proximity_settings.rs
Normal file
396
meilisearch/tests/settings/proximity_settings.rs
Normal file
@ -0,0 +1,396 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use crate::common::Server;
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish",
|
||||||
|
},
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn attribute_scale_search() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"scoreDetails": false,
|
||||||
|
"vectorStore": false,
|
||||||
|
"metrics": false,
|
||||||
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"proximityPrecision": "attributeScale",
|
||||||
|
"rankingRules": ["words", "typo", "proximity"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
// the expected order is [1, 3, 2] instead of [3, 1, 2]
|
||||||
|
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// the expected order is [1, 2, 3] instead of [1, 3, 2]
|
||||||
|
// because the attribute scale sees all the word in the same attribute
|
||||||
|
// and so doesn't make the difference between the documents.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many the fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn attribute_scale_phrase_search() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"scoreDetails": false,
|
||||||
|
"vectorStore": false,
|
||||||
|
"metrics": false,
|
||||||
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (_response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"proximityPrecision": "attributeScale",
|
||||||
|
"rankingRules": ["words", "typo", "proximity"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
// the expected order is [1, 3] instead of [3, 1]
|
||||||
|
// because the attribute scale doesn't make the difference between 1 and 3.
|
||||||
|
// But 2 shouldn't be returned because "the" is not in the same attribute.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// the expected order is [1, 2, 3] instead of [1, 3]
|
||||||
|
// because the attribute scale sees all the word in the same attribute
|
||||||
|
// and so doesn't make the difference between the documents.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn word_scale_set_and_reset() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"scoreDetails": false,
|
||||||
|
"vectorStore": false,
|
||||||
|
"metrics": false,
|
||||||
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
// Set and reset the setting ensuring the swap between the 2 settings is applied.
|
||||||
|
let (_response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"proximityPrecision": "attributeScale",
|
||||||
|
"rankingRules": ["words", "typo", "proximity"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
let (_response, _code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"proximityPrecision": "wordScale",
|
||||||
|
"rankingRules": ["words", "typo", "proximity"],
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
index.wait_task(2).await;
|
||||||
|
|
||||||
|
// [3, 1, 2]
|
||||||
|
index
|
||||||
|
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// [1, 3, 2]
|
||||||
|
index
|
||||||
|
.search(json!({"q": "many the fish"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// [3]
|
||||||
|
index
|
||||||
|
.search(json!({"q": "\"the soup of day\""}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// [1, 3]
|
||||||
|
index
|
||||||
|
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn attribute_scale_default_ranking_rules() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let (response, code) = server.set_features(json!({"proximityPrecision": true})).await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"scoreDetails": false,
|
||||||
|
"vectorStore": false,
|
||||||
|
"metrics": false,
|
||||||
|
"exportPuffinReports": false,
|
||||||
|
"proximityPrecision": true
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
index.add_documents(DOCUMENTS.clone(), None).await;
|
||||||
|
index.wait_task(0).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"proximityPrecision": "attributeScale"
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
assert_eq!("202", code.as_str(), "{:?}", response);
|
||||||
|
index.wait_task(1).await;
|
||||||
|
|
||||||
|
// the expected order is [3, 1, 2]
|
||||||
|
index
|
||||||
|
.search(json!({"q": "the soup of day"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// the expected order is [1, 3, 2] instead of [1, 3]
|
||||||
|
// because the attribute scale sees all the word in the same attribute
|
||||||
|
// and so doesn't remove the document 2.
|
||||||
|
index
|
||||||
|
.search(json!({"q": "\"many the fish\""}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"a": "Soup of the day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"a": "the Soup of day",
|
||||||
|
"b": "many the fish"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"a": "Soup of day",
|
||||||
|
"b": "many the lazy fish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
@ -21,6 +21,7 @@ use crate::heed_codec::facet::{
|
|||||||
use crate::heed_codec::{
|
use crate::heed_codec::{
|
||||||
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
|
||||||
};
|
};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::readable_slices::ReadableSlices;
|
use crate::readable_slices::ReadableSlices;
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
@ -72,6 +73,7 @@ pub mod main_key {
|
|||||||
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
|
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
|
||||||
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
|
pub const SORT_FACET_VALUES_BY: &str = "sort-facet-values-by";
|
||||||
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
|
||||||
|
pub const PROXIMITY_PRECISION: &str = "proximity-precision";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -1466,6 +1468,28 @@ impl Index {
|
|||||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn proximity_precision(&self, txn: &RoTxn) -> heed::Result<Option<ProximityPrecision>> {
|
||||||
|
self.main
|
||||||
|
.remap_types::<Str, SerdeBincode<ProximityPrecision>>()
|
||||||
|
.get(txn, main_key::PROXIMITY_PRECISION)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_proximity_precision(
|
||||||
|
&self,
|
||||||
|
txn: &mut RwTxn,
|
||||||
|
val: ProximityPrecision,
|
||||||
|
) -> heed::Result<()> {
|
||||||
|
self.main.remap_types::<Str, SerdeBincode<ProximityPrecision>>().put(
|
||||||
|
txn,
|
||||||
|
main_key::PROXIMITY_PRECISION,
|
||||||
|
&val,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_proximity_precision(&self, txn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
|
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
|
||||||
|
}
|
||||||
|
|
||||||
/* script language docids */
|
/* script language docids */
|
||||||
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
|
/// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
|
||||||
pub fn script_language_documents_ids(
|
pub fn script_language_documents_ids(
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{relative_from_absolute_position, Position};
|
use crate::{relative_from_absolute_position, Position};
|
||||||
|
|
||||||
pub const MAX_DISTANCE: u32 = 4;
|
pub const MAX_DISTANCE: u32 = 4;
|
||||||
@ -25,3 +27,11 @@ pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
|
|||||||
pub fn path_proximity(path: &[Position]) -> u32 {
|
pub fn path_proximity(path: &[Position]) -> u32 {
|
||||||
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
|
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum ProximityPrecision {
|
||||||
|
#[default]
|
||||||
|
WordScale,
|
||||||
|
AttributeScale,
|
||||||
|
}
|
||||||
|
@ -10,6 +10,7 @@ use roaring::RoaringBitmap;
|
|||||||
use super::interner::Interned;
|
use super::interner::Interned;
|
||||||
use super::Word;
|
use super::Word;
|
||||||
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
|
||||||
use crate::{
|
use crate::{
|
||||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
|
||||||
@ -263,17 +264,67 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||||
self.txn,
|
ProximityPrecision::AttributeScale => {
|
||||||
(proximity, word1, word2),
|
// Force proximity to 0 because:
|
||||||
&(
|
// in AttributeScale, there are only 2 possible distances:
|
||||||
proximity,
|
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||||
self.word_interner.get(word1).as_str(),
|
// 2. words in different attributes: no DB entry for these two words.
|
||||||
self.word_interner.get(word2).as_str(),
|
let proximity = 0;
|
||||||
),
|
let docids = if let Some(docids) =
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2))
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
{
|
||||||
)
|
docids
|
||||||
|
.as_ref()
|
||||||
|
.map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
|
||||||
|
.transpose()
|
||||||
|
.map_err(heed::Error::Decoding)?
|
||||||
|
} else {
|
||||||
|
// Compute the distance at the attribute level and store it in the cache.
|
||||||
|
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||||
|
fids
|
||||||
|
} else {
|
||||||
|
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||||
|
};
|
||||||
|
let mut docids = RoaringBitmap::new();
|
||||||
|
for fid in fids {
|
||||||
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
|
// then merge the result in a global bitmap before storing it in the cache.
|
||||||
|
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
||||||
|
let word2_docids = self.get_db_word_fid_docids(word2, fid)?;
|
||||||
|
if let (Some(word1_docids), Some(word2_docids)) =
|
||||||
|
(word1_docids, word2_docids)
|
||||||
|
{
|
||||||
|
docids |= word1_docids & word2_docids;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||||
|
.map(Cow::into_owned)
|
||||||
|
.map(Cow::Owned)
|
||||||
|
.map(Some)
|
||||||
|
.map_err(heed::Error::Decoding)?;
|
||||||
|
self.db_cache
|
||||||
|
.word_pair_proximity_docids
|
||||||
|
.insert((proximity, word1, word2), encoded);
|
||||||
|
Some(docids)
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(docids)
|
||||||
|
}
|
||||||
|
ProximityPrecision::WordScale => {
|
||||||
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||||
|
self.txn,
|
||||||
|
(proximity, word1, word2),
|
||||||
|
&(
|
||||||
|
proximity,
|
||||||
|
self.word_interner.get(word1).as_str(),
|
||||||
|
self.word_interner.get(word2).as_str(),
|
||||||
|
),
|
||||||
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
|
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_pair_proximity_docids_len(
|
pub fn get_db_word_pair_proximity_docids_len(
|
||||||
@ -282,54 +333,95 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
) -> Result<Option<u64>> {
|
) -> Result<Option<u64>> {
|
||||||
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
|
||||||
self.txn,
|
ProximityPrecision::AttributeScale => Ok(self
|
||||||
(proximity, word1, word2),
|
.get_db_word_pair_proximity_docids(word1, word2, proximity)?
|
||||||
&(
|
.map(|d| d.len())),
|
||||||
proximity,
|
ProximityPrecision::WordScale => {
|
||||||
self.word_interner.get(word1).as_str(),
|
DatabaseCache::get_value::<_, _, CboRoaringBitmapLenCodec>(
|
||||||
self.word_interner.get(word2).as_str(),
|
self.txn,
|
||||||
),
|
(proximity, word1, word2),
|
||||||
&mut self.db_cache.word_pair_proximity_docids,
|
&(
|
||||||
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
proximity,
|
||||||
)
|
self.word_interner.get(word1).as_str(),
|
||||||
|
self.word_interner.get(word2).as_str(),
|
||||||
|
),
|
||||||
|
&mut self.db_cache.word_pair_proximity_docids,
|
||||||
|
self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_db_word_prefix_pair_proximity_docids(
|
pub fn get_db_word_prefix_pair_proximity_docids(
|
||||||
&mut self,
|
&mut self,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
prefix2: Interned<String>,
|
prefix2: Interned<String>,
|
||||||
proximity: u8,
|
mut proximity: u8,
|
||||||
) -> Result<Option<RoaringBitmap>> {
|
) -> Result<Option<RoaringBitmap>> {
|
||||||
let docids = match self
|
let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
|
||||||
.db_cache
|
if proximity_precision == ProximityPrecision::AttributeScale {
|
||||||
.word_prefix_pair_proximity_docids
|
// Force proximity to 0 because:
|
||||||
.entry((proximity, word1, prefix2))
|
// in AttributeScale, there are only 2 possible distances:
|
||||||
{
|
// 1. words in same attribute: in that the DB contains (0, word1, word2)
|
||||||
Entry::Occupied(docids) => docids.get().clone(),
|
// 2. words in different attributes: no DB entry for these two words.
|
||||||
Entry::Vacant(entry) => {
|
proximity = 0;
|
||||||
// compute docids using prefix iter and store the result in the cache.
|
}
|
||||||
let key = U8StrStrCodec::bytes_encode(&(
|
|
||||||
proximity,
|
|
||||||
self.word_interner.get(word1).as_str(),
|
|
||||||
self.word_interner.get(prefix2).as_str(),
|
|
||||||
))
|
|
||||||
.unwrap()
|
|
||||||
.into_owned();
|
|
||||||
let mut prefix_docids = RoaringBitmap::new();
|
|
||||||
let remap_key_type = self
|
|
||||||
.index
|
|
||||||
.word_pair_proximity_docids
|
|
||||||
.remap_key_type::<Bytes>()
|
|
||||||
.prefix_iter(self.txn, &key)?;
|
|
||||||
for result in remap_key_type {
|
|
||||||
let (_, docids) = result?;
|
|
||||||
|
|
||||||
prefix_docids |= docids;
|
let docids = if let Some(docids) =
|
||||||
|
self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2))
|
||||||
|
{
|
||||||
|
docids.clone()
|
||||||
|
} else {
|
||||||
|
let prefix_docids = match proximity_precision {
|
||||||
|
ProximityPrecision::AttributeScale => {
|
||||||
|
// Compute the distance at the attribute level and store it in the cache.
|
||||||
|
let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
|
||||||
|
fids
|
||||||
|
} else {
|
||||||
|
self.index.fields_ids_map(self.txn)?.ids().collect()
|
||||||
|
};
|
||||||
|
let mut prefix_docids = RoaringBitmap::new();
|
||||||
|
// for each field, intersect left word bitmap and right word bitmap,
|
||||||
|
// then merge the result in a global bitmap before storing it in the cache.
|
||||||
|
for fid in fids {
|
||||||
|
let word1_docids = self.get_db_word_fid_docids(word1, fid)?;
|
||||||
|
let prefix2_docids = self.get_db_word_prefix_fid_docids(prefix2, fid)?;
|
||||||
|
if let (Some(word1_docids), Some(prefix2_docids)) =
|
||||||
|
(word1_docids, prefix2_docids)
|
||||||
|
{
|
||||||
|
prefix_docids |= word1_docids & prefix2_docids;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prefix_docids
|
||||||
}
|
}
|
||||||
entry.insert(Some(prefix_docids.clone()));
|
ProximityPrecision::WordScale => {
|
||||||
Some(prefix_docids)
|
// compute docids using prefix iter and store the result in the cache.
|
||||||
}
|
let key = U8StrStrCodec::bytes_encode(&(
|
||||||
|
proximity,
|
||||||
|
self.word_interner.get(word1).as_str(),
|
||||||
|
self.word_interner.get(prefix2).as_str(),
|
||||||
|
))
|
||||||
|
.unwrap()
|
||||||
|
.into_owned();
|
||||||
|
let mut prefix_docids = RoaringBitmap::new();
|
||||||
|
let remap_key_type = self
|
||||||
|
.index
|
||||||
|
.word_pair_proximity_docids
|
||||||
|
.remap_key_type::<Bytes>()
|
||||||
|
.prefix_iter(self.txn, &key)?;
|
||||||
|
for result in remap_key_type {
|
||||||
|
let (_, docids) = result?;
|
||||||
|
|
||||||
|
prefix_docids |= docids;
|
||||||
|
}
|
||||||
|
prefix_docids
|
||||||
|
}
|
||||||
|
};
|
||||||
|
self.db_cache
|
||||||
|
.word_prefix_pair_proximity_docids
|
||||||
|
.insert((proximity, word1, prefix2), Some(prefix_docids.clone()));
|
||||||
|
Some(prefix_docids)
|
||||||
};
|
};
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ use super::helpers::{
|
|||||||
MergeFn, MergeableReader,
|
MergeFn, MergeableReader,
|
||||||
};
|
};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
/// Extract data for each databases from obkv documents in parallel.
|
/// Extract data for each databases from obkv documents in parallel.
|
||||||
@ -52,6 +53,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
dictionary: Option<&[&str]>,
|
dictionary: Option<&[&str]>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
exact_attributes: HashSet<FieldId>,
|
exact_attributes: HashSet<FieldId>,
|
||||||
|
proximity_precision: ProximityPrecision,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
|
|
||||||
@ -150,15 +152,17 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
if proximity_precision == ProximityPrecision::WordScale {
|
||||||
docid_word_positions_chunks.clone(),
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||||
indexer,
|
docid_word_positions_chunks.clone(),
|
||||||
lmdb_writer_sx.clone(),
|
indexer,
|
||||||
extract_word_pair_proximity_docids,
|
lmdb_writer_sx.clone(),
|
||||||
merge_deladd_cbo_roaring_bitmaps,
|
extract_word_pair_proximity_docids,
|
||||||
TypedChunk::WordPairProximityDocids,
|
merge_deladd_cbo_roaring_bitmaps,
|
||||||
"word-pair-proximity-docids",
|
TypedChunk::WordPairProximityDocids,
|
||||||
);
|
"word-pair-proximity-docids",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||||
docid_word_positions_chunks.clone(),
|
docid_word_positions_chunks.clone(),
|
||||||
|
@ -352,6 +352,7 @@ where
|
|||||||
let dictionary: Option<Vec<_>> =
|
let dictionary: Option<Vec<_>> =
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||||
|
let proximity_precision = self.index.proximity_precision(self.wtxn)?.unwrap_or_default();
|
||||||
|
|
||||||
let pool_params = GrenadParameters {
|
let pool_params = GrenadParameters {
|
||||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||||
@ -392,6 +393,7 @@ where
|
|||||||
dictionary.as_deref(),
|
dictionary.as_deref(),
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
exact_attributes,
|
exact_attributes,
|
||||||
|
proximity_precision,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ use super::IndexerConfig;
|
|||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
use crate::{FieldsIdsMap, Index, OrderBy, Result};
|
||||||
@ -127,6 +128,7 @@ pub struct Settings<'a, 't, 'i> {
|
|||||||
max_values_per_facet: Setting<usize>,
|
max_values_per_facet: Setting<usize>,
|
||||||
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
|
sort_facet_values_by: Setting<HashMap<String, OrderBy>>,
|
||||||
pagination_max_total_hits: Setting<usize>,
|
pagination_max_total_hits: Setting<usize>,
|
||||||
|
proximity_precision: Setting<ProximityPrecision>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||||
@ -158,6 +160,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
max_values_per_facet: Setting::NotSet,
|
max_values_per_facet: Setting::NotSet,
|
||||||
sort_facet_values_by: Setting::NotSet,
|
sort_facet_values_by: Setting::NotSet,
|
||||||
pagination_max_total_hits: Setting::NotSet,
|
pagination_max_total_hits: Setting::NotSet,
|
||||||
|
proximity_precision: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -332,6 +335,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
self.pagination_max_total_hits = Setting::Reset;
|
self.pagination_max_total_hits = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_proximity_precision(&mut self, value: ProximityPrecision) {
|
||||||
|
self.proximity_precision = Setting::Set(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_proximity_precision(&mut self) {
|
||||||
|
self.proximity_precision = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
fn reindex<FP, FA>(
|
fn reindex<FP, FA>(
|
||||||
&mut self,
|
&mut self,
|
||||||
progress_callback: &FP,
|
progress_callback: &FP,
|
||||||
@ -861,6 +872,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_proximity_precision(&mut self) -> Result<bool> {
|
||||||
|
let changed = match self.proximity_precision {
|
||||||
|
Setting::Set(new) => {
|
||||||
|
let old = self.index.proximity_precision(self.wtxn)?;
|
||||||
|
if old == Some(new) {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
self.index.put_proximity_precision(self.wtxn, new)?;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
|
||||||
|
Setting::NotSet => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(changed)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
|
||||||
where
|
where
|
||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -897,6 +926,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let synonyms_updated = self.update_synonyms()?;
|
let synonyms_updated = self.update_synonyms()?;
|
||||||
let searchable_updated = self.update_searchable()?;
|
let searchable_updated = self.update_searchable()?;
|
||||||
let exact_attributes_updated = self.update_exact_attributes()?;
|
let exact_attributes_updated = self.update_exact_attributes()?;
|
||||||
|
let proximity_precision = self.update_proximity_precision()?;
|
||||||
|
|
||||||
if stop_words_updated
|
if stop_words_updated
|
||||||
|| non_separator_tokens_updated
|
|| non_separator_tokens_updated
|
||||||
@ -906,6 +936,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
|| synonyms_updated
|
|| synonyms_updated
|
||||||
|| searchable_updated
|
|| searchable_updated
|
||||||
|| exact_attributes_updated
|
|| exact_attributes_updated
|
||||||
|
|| proximity_precision
|
||||||
{
|
{
|
||||||
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
|
self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
@ -1731,6 +1762,7 @@ mod tests {
|
|||||||
max_values_per_facet,
|
max_values_per_facet,
|
||||||
sort_facet_values_by,
|
sort_facet_values_by,
|
||||||
pagination_max_total_hits,
|
pagination_max_total_hits,
|
||||||
|
proximity_precision,
|
||||||
} = settings;
|
} = settings;
|
||||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||||
@ -1752,6 +1784,7 @@ mod tests {
|
|||||||
assert!(matches!(max_values_per_facet, Setting::NotSet));
|
assert!(matches!(max_values_per_facet, Setting::NotSet));
|
||||||
assert!(matches!(sort_facet_values_by, Setting::NotSet));
|
assert!(matches!(sort_facet_values_by, Setting::NotSet));
|
||||||
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
|
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
|
||||||
|
assert!(matches!(proximity_precision, Setting::NotSet));
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user