mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Merge #4479
4479: Skip reindexing when modifying unknown faceted fields r=dureuill a=Kerollmops This PR improves Meilisearch's decision to reindex when a faceted field is added to the settings, but not a single document contains this field. It is effectively a waste of time to reindex documents when the engine needs to know a field. This is related to a conversation [we have with our biggest customer (internal link)](https://discord.com/channels/1006923006964154428/1101213808627830794/1217112918857089187). They have 170 million documents, so reindexing this amount would be problematic. --- The image is available by using the following Docker command. You can see the advancement of the image's build [on the GitHub CI page](https://github.com/meilisearch/meilisearch/actions/runs/8251688778). ``` docker pull getmeili/meilisearch:prototype-no-reindex-unknown-fields-0 ``` Here is the hand-made test that shows that when modifying unknown filterable attributes, here `lol`, it doesn't reindex. However, when modifying the known `genre` field, it does reindex. You can see all that by looking at the time spent processing the update. ```json { "uid": 3, "indexUid": "movies", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "filterableAttributes": [ "genres" ] }, "error": null, "duration": "PT9.237703S", "enqueuedAt": "2024-03-12T15:34:26.836083Z", "startedAt": "2024-03-12T15:34:26.836374Z", "finishedAt": "2024-03-12T15:34:36.074077Z" }, { "uid": 2, "indexUid": "movies", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "filterableAttributes": [ "lol" ] }, "error": null, "duration": "PT0.000751S", "enqueuedAt": "2024-03-12T15:33:53.563923Z", "startedAt": "2024-03-12T15:33:53.565259Z", "finishedAt": "2024-03-12T15:33:53.56601Z" }, { "uid": 0, "indexUid": "movies", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 31944, "indexedDocuments": 31944 }, "error": null, "duration": "PT3.120723S", "enqueuedAt": "2024-02-17T10:35:55.042864Z", "startedAt": "2024-02-17T10:35:55.043505Z", "finishedAt": "2024-02-17T10:35:58.164228Z" } ``` Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
d2f77e88bd
@ -1032,6 +1032,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
{
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
let existing_fields: HashSet<_> = self
|
||||
.index
|
||||
.field_distribution(self.wtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||
.collect();
|
||||
|
||||
let old_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
@ -1052,7 +1059,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
let faceted_updated =
|
||||
(&existing_fields - &old_faceted_fields) != (&existing_fields - &new_faceted_fields);
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
|
||||
|
Loading…
Reference in New Issue
Block a user