Reduce the number of complex calls to settings diff functions

This commit is contained in:
Clément Renault 2024-05-30 11:17:03 +02:00
parent d9e5074189
commit 3a78e988da
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 53 additions and 32 deletions

View File

@ -808,13 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut new_inner_settings = old_inner_settings.clone(); let mut new_inner_settings = old_inner_settings.clone();
new_inner_settings.fields_ids_map = fields_ids_map; new_inner_settings.fields_ids_map = fields_ids_map;
let settings_diff = InnerIndexSettingsDiff { let embedding_configs_updated = false;
old: old_inner_settings, let settings_update_only = false;
new: new_inner_settings, let settings_diff = InnerIndexSettingsDiff::new(
old_inner_settings,
new_inner_settings,
primary_key_id, primary_key_id,
embedding_configs_updated: false, embedding_configs_updated,
settings_update_only: false, settings_update_only,
}; );
Ok(TransformOutput { Ok(TransformOutput {
primary_key, primary_key,

View File

@ -489,7 +489,7 @@ pub(crate) fn write_typed_chunk_into_index(
} }
let merger = builder.build(); let merger = builder.build();
if settings_diff.only_additional_fields().is_some() { if settings_diff.only_additional_fields.is_some() {
write_proximity_entries_into_database_additional_searchables( write_proximity_entries_into_database_additional_searchables(
merger, merger,
&index.word_pair_proximity_docids, &index.word_pair_proximity_docids,

View File

@ -1073,13 +1073,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
.index .index
.primary_key(self.wtxn)? .primary_key(self.wtxn)?
.and_then(|name| new_inner_settings.fields_ids_map.id(name)); .and_then(|name| new_inner_settings.fields_ids_map.id(name));
let inner_settings_diff = InnerIndexSettingsDiff { let settings_update_only = true;
old: old_inner_settings, let inner_settings_diff = InnerIndexSettingsDiff::new(
new: new_inner_settings, old_inner_settings,
new_inner_settings,
primary_key_id, primary_key_id,
embedding_configs_updated, embedding_configs_updated,
settings_update_only: true, settings_update_only,
}; );
if inner_settings_diff.any_reindexing_needed() { if inner_settings_diff.any_reindexing_needed() {
self.reindex(&progress_callback, &should_abort, inner_settings_diff)?; self.reindex(&progress_callback, &should_abort, inner_settings_diff)?;
@ -1096,9 +1097,46 @@ pub struct InnerIndexSettingsDiff {
// TODO: compare directly the embedders. // TODO: compare directly the embedders.
pub(crate) embedding_configs_updated: bool, pub(crate) embedding_configs_updated: bool,
pub(crate) settings_update_only: bool, pub(crate) settings_update_only: bool,
/// The set of only the additional searchable fields.
/// If any other searchable field has been modified, is set to None.
pub(crate) only_additional_fields: Option<HashSet<String>>,
} }
impl InnerIndexSettingsDiff { impl InnerIndexSettingsDiff {
pub(crate) fn new(
old_settings: InnerIndexSettings,
new_settings: InnerIndexSettings,
primary_key_id: Option<FieldId>,
embedding_configs_updated: bool,
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
&old_settings.user_defined_searchable_fields,
&new_settings.user_defined_searchable_fields,
) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
let old: HashSet<_> = old.iter().cloned().collect();
let new: HashSet<_> = new.iter().cloned().collect();
if old.difference(&new).next().is_none() {
// if no field has been removed return only the additional ones
Some(&new - &old)
} else {
None
}
}
};
InnerIndexSettingsDiff {
old: old_settings,
new: new_settings,
primary_key_id,
embedding_configs_updated,
settings_update_only,
only_additional_fields,
}
}
pub fn any_reindexing_needed(&self) -> bool { pub fn any_reindexing_needed(&self) -> bool {
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors()
} }
@ -1123,7 +1161,7 @@ impl InnerIndexSettingsDiff {
|| self.old.proximity_precision != self.new.proximity_precision || self.old.proximity_precision != self.new.proximity_precision
{ {
Some(DelAddOperation::DeletionAndAddition) Some(DelAddOperation::DeletionAndAddition)
} else if let Some(only_additional_fields) = self.only_additional_fields() { } else if let Some(only_additional_fields) = &self.only_additional_fields {
let additional_field = self.new.fields_ids_map.name(id).unwrap(); let additional_field = self.new.fields_ids_map.name(id).unwrap();
if only_additional_fields.contains(additional_field) { if only_additional_fields.contains(additional_field) {
Some(DelAddOperation::Addition) Some(DelAddOperation::Addition)
@ -1138,25 +1176,6 @@ impl InnerIndexSettingsDiff {
} }
} }
/// Returns only the additional searchable fields.
/// If any other searchable field has been modified, returns None.
pub fn only_additional_fields(&self) -> Option<HashSet<String>> {
match (&self.old.user_defined_searchable_fields, &self.new.user_defined_searchable_fields) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
let old: HashSet<_> = old.iter().cloned().collect();
let new: HashSet<_> = new.iter().cloned().collect();
if old.difference(&new).next().is_none() {
// if no field has been removed
// return only the additional ones
Some(&new - &old)
} else {
None
}
}
}
}
pub fn reindex_facets(&self) -> bool { pub fn reindex_facets(&self) -> bool {
let existing_fields = &self.new.existing_fields; let existing_fields = &self.new.existing_fields;
if existing_fields.iter().any(|field| field.contains('.')) { if existing_fields.iter().any(|field| field.contains('.')) {