mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Merge #4818
4818: Custom headers and QoL improvements r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4734 Depends on #4815 ## What does this PR do? - Adds custom headers for rest embedders ([public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#41354652885242c899def07e36a66d49)) - Quality of life: allow specifying `dimensions` for `ollama` embedders ([public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#37218531431343dab3d2d3a9a1937e9d)). As for `rest` embedders, specifying `dimensions` disables the "test" embedding when the embedder is spawned. - Improve error message again when indexing documents that don't have a vector for a user-provided vector 1. Remove the contents of the document 2. Display the docid of the first document that triggered the error 3. Indicate how many documents in that chunk suffered from the same issue for that embedder Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
00c97c7152
@ -9,6 +9,7 @@ expression: fakerest_config.embedder_options
|
|||||||
"dimensions": 384,
|
"dimensions": 384,
|
||||||
"url": "http://localhost:7777",
|
"url": "http://localhost:7777",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}"
|
"response": "{{embedding}}",
|
||||||
|
"headers": {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ expression: config.embedder_options
|
|||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"url": "http://localhost:7777",
|
"url": "http://localhost:7777",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}"
|
"response": "{{embedding}}",
|
||||||
|
"headers": {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::str::FromStr as _;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use actix_http::body::MessageBody;
|
use actix_http::body::MessageBody;
|
||||||
@ -8,7 +9,7 @@ use actix_web::dev::ServiceResponse;
|
|||||||
use actix_web::http::StatusCode;
|
use actix_web::http::StatusCode;
|
||||||
use byte_unit::{Byte, Unit};
|
use byte_unit::{Byte, Unit};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use meilisearch::option::{IndexerOpts, MaxMemory, Opt};
|
use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt};
|
||||||
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
@ -239,7 +240,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
// memory has to be unlimited because several meilisearch are running in test context.
|
// memory has to be unlimited because several meilisearch are running in test context.
|
||||||
max_indexing_memory: MaxMemory::unlimited(),
|
max_indexing_memory: MaxMemory::unlimited(),
|
||||||
skip_index_budget: true,
|
skip_index_budget: true,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
max_indexing_threads: MaxThreads::from_str("1").unwrap(),
|
||||||
},
|
},
|
||||||
experimental_enable_metrics: false,
|
experimental_enable_metrics: false,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
|
@ -192,7 +192,8 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
"url": "https://localhost:7777",
|
"url": "https://localhost:7777",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}"
|
"response": "{{embedding}}",
|
||||||
|
"headers": {}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"searchCutoffMs": null,
|
"searchCutoffMs": null,
|
||||||
|
@ -487,10 +487,11 @@ async fn user_provided_embeddings_error() {
|
|||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn user_provided_vectors_error() {
|
async fn user_provided_vectors_error() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
|
|
||||||
let index = generate_default_user_provided_documents(&server).await;
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
// First case, we forget to specify `_vectors`
|
// First case, we forget to specify `_vectors`
|
||||||
let documents = json!({"id": 42, "name": "kefir"});
|
let documents = json!([{"id": 40, "name": "kefir"}, {"id": 41, "name": "intel"}, {"id": 42, "name": "max"}, {"id": 43, "name": "venus"}, {"id": 44, "name": "eva"}]);
|
||||||
let (value, code) = index.add_documents(documents, None).await;
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
snapshot!(code, @"202 Accepted");
|
snapshot!(code, @"202 Accepted");
|
||||||
let task = index.wait_task(value.uid()).await;
|
let task = index.wait_task(value.uid()).await;
|
||||||
@ -502,11 +503,11 @@ async fn user_provided_vectors_error() {
|
|||||||
"type": "documentAdditionOrUpdate",
|
"type": "documentAdditionOrUpdate",
|
||||||
"canceledBy": null,
|
"canceledBy": null,
|
||||||
"details": {
|
"details": {
|
||||||
"receivedDocuments": 1,
|
"receivedDocuments": 5,
|
||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"40\" and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -535,7 +536,7 @@ async fn user_provided_vectors_error() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vector: manaul000\n _vector.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
@ -564,7 +565,7 @@ async fn user_provided_vectors_error() {
|
|||||||
"indexedDocuments": 0
|
"indexedDocuments": 0
|
||||||
},
|
},
|
||||||
"error": {
|
"error": {
|
||||||
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: manaul000\n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vectors.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||||
"code": "vector_embedding_error",
|
"code": "vector_embedding_error",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
@ -161,6 +161,55 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
|||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||||
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
|
let counter = AtomicUsize::new(0);
|
||||||
|
|
||||||
|
Mock::given(method("POST"))
|
||||||
|
.and(path("/"))
|
||||||
|
.respond_with(move |req: &Request| {
|
||||||
|
match req.headers.get("my-nonstandard-auth") {
|
||||||
|
Some(x) if x == "bearer of the ring" => {}
|
||||||
|
Some(x) => {
|
||||||
|
return ResponseTemplate::new(401).set_body_json(
|
||||||
|
json!({"error": format!("thou shall not pass, {}", x.to_str().unwrap())}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
return ResponseTemplate::new(401)
|
||||||
|
.set_body_json(json!({"error": "missing header 'my-nonstandard-auth'"}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let _req: String = match req.body_json() {
|
||||||
|
Ok(req) => req,
|
||||||
|
Err(error) => {
|
||||||
|
return ResponseTemplate::new(400).set_body_json(json!({
|
||||||
|
"error": format!("Invalid request: {error}")
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
|
||||||
|
|
||||||
|
ResponseTemplate::new(200).set_body_json(output)
|
||||||
|
})
|
||||||
|
.mount(&mock_server)
|
||||||
|
.await;
|
||||||
|
let url = mock_server.uri();
|
||||||
|
|
||||||
|
let embedder_settings = json!({
|
||||||
|
"source": "rest",
|
||||||
|
"url": url,
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}",
|
||||||
|
"headers": {"my-nonstandard-auth": "bearer of the ring"}
|
||||||
|
});
|
||||||
|
|
||||||
|
(mock_server, embedder_settings)
|
||||||
|
}
|
||||||
|
|
||||||
async fn create_mock_raw() -> (MockServer, Value) {
|
async fn create_mock_raw() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
@ -1732,3 +1781,129 @@ async fn server_raw() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn server_custom_header() {
|
||||||
|
let (mock, setting) = create_mock_raw_with_custom_header().await;
|
||||||
|
|
||||||
|
let server = get_server_vector().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}" }),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 0,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"rest": {
|
||||||
|
"source": "rest",
|
||||||
|
"url": "[url]",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "headers": {"my-nonstandard-auth": "Balrog"} }),
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 1,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"rest": {
|
||||||
|
"source": "rest",
|
||||||
|
"url": "[url]",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}",
|
||||||
|
"headers": {
|
||||||
|
"my-nonstandard-auth": "Balrog"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"rest": setting,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 2,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "succeeded",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"rest": {
|
||||||
|
"source": "rest",
|
||||||
|
"url": "[url]",
|
||||||
|
"request": "{{text}}",
|
||||||
|
"response": "{{embedding}}",
|
||||||
|
"headers": {
|
||||||
|
"my-nonstandard-auth": "bearer of the ring"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": null,
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -95,6 +95,84 @@ enum ExtractionAction {
|
|||||||
DocumentOperation(DocumentOperation),
|
DocumentOperation(DocumentOperation),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ManualEmbedderErrors {
|
||||||
|
embedder_name: String,
|
||||||
|
docid: String,
|
||||||
|
other_docids: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ManualEmbedderErrors {
|
||||||
|
pub fn push_error(
|
||||||
|
errors: &mut Option<ManualEmbedderErrors>,
|
||||||
|
embedder_name: &str,
|
||||||
|
document_id: impl Fn() -> Value,
|
||||||
|
) {
|
||||||
|
match errors {
|
||||||
|
Some(errors) => {
|
||||||
|
if errors.embedder_name == embedder_name {
|
||||||
|
errors.other_docids = errors.other_docids.saturating_add(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
*errors = Some(Self {
|
||||||
|
embedder_name: embedder_name.to_owned(),
|
||||||
|
docid: document_id().to_string(),
|
||||||
|
other_docids: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_result(
|
||||||
|
errors: Option<ManualEmbedderErrors>,
|
||||||
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
|
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||||
|
) -> Result<()> {
|
||||||
|
match errors {
|
||||||
|
Some(errors) => {
|
||||||
|
let embedder_name = &errors.embedder_name;
|
||||||
|
let mut msg = format!(
|
||||||
|
r"While embedding documents for embedder `{embedder_name}`: no vectors provided for document {}{}",
|
||||||
|
errors.docid,
|
||||||
|
if errors.other_docids != 0 {
|
||||||
|
format!(" and at least {} other document(s)", errors.other_docids)
|
||||||
|
} else {
|
||||||
|
"".to_string()
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`.");
|
||||||
|
|
||||||
|
let mut hint_count = 0;
|
||||||
|
|
||||||
|
for (vector_misspelling, count) in
|
||||||
|
possible_embedding_mistakes.vector_mistakes().take(2)
|
||||||
|
{
|
||||||
|
msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s).");
|
||||||
|
hint_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (embedder_misspelling, count) in possible_embedding_mistakes
|
||||||
|
.embedder_mistakes(embedder_name, unused_vectors_distribution)
|
||||||
|
.take(2)
|
||||||
|
{
|
||||||
|
msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s).");
|
||||||
|
hint_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if hint_count == 0 {
|
||||||
|
msg += &format!(
|
||||||
|
"\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)))
|
||||||
|
}
|
||||||
|
None => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
/// Extracts the embedding vector contained in each document under the `_vectors` field.
|
||||||
///
|
///
|
||||||
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
|
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
|
||||||
@ -104,8 +182,10 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
embedders_configs: &[IndexEmbeddingConfig],
|
embedders_configs: &[IndexEmbeddingConfig],
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
) -> Result<(Vec<ExtractedVectorPoints>, UnusedVectorsDistribution)> {
|
) -> Result<(Vec<ExtractedVectorPoints>, UnusedVectorsDistribution)> {
|
||||||
let mut unused_vectors_distribution = UnusedVectorsDistribution::new();
|
let mut unused_vectors_distribution = UnusedVectorsDistribution::new();
|
||||||
|
let mut manual_errors = None;
|
||||||
let reindex_vectors = settings_diff.reindex_vectors();
|
let reindex_vectors = settings_diff.reindex_vectors();
|
||||||
|
|
||||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||||
@ -246,7 +326,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
for EmbedderVectorExtractor {
|
for EmbedderVectorExtractor {
|
||||||
embedder_name,
|
embedder_name,
|
||||||
embedder: _,
|
embedder,
|
||||||
prompt,
|
prompt,
|
||||||
prompts_writer,
|
prompts_writer,
|
||||||
remove_vectors_writer,
|
remove_vectors_writer,
|
||||||
@ -255,6 +335,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
action,
|
action,
|
||||||
} in extractors.iter_mut()
|
} in extractors.iter_mut()
|
||||||
{
|
{
|
||||||
|
let embedder_is_manual = matches!(**embedder, Embedder::UserProvided(_));
|
||||||
|
|
||||||
let (old, new) = parsed_vectors.remove(embedder_name);
|
let (old, new) = parsed_vectors.remove(embedder_name);
|
||||||
let delta = match action {
|
let delta = match action {
|
||||||
ExtractionAction::SettingsFullReindex => match old {
|
ExtractionAction::SettingsFullReindex => match old {
|
||||||
@ -285,11 +367,29 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
// this happens only when an existing embedder changed. We cannot regenerate userProvided vectors
|
// this happens only when an existing embedder changed. We cannot regenerate userProvided vectors
|
||||||
VectorState::Manual => VectorStateDelta::NoChange,
|
VectorState::Manual => VectorStateDelta::NoChange,
|
||||||
// generated vectors must be regenerated
|
// generated vectors must be regenerated
|
||||||
VectorState::Generated => regenerate_prompt(obkv, prompt, new_fields_ids_map)?,
|
VectorState::Generated => {
|
||||||
|
if embedder_is_manual {
|
||||||
|
ManualEmbedderErrors::push_error(
|
||||||
|
&mut manual_errors,
|
||||||
|
embedder_name.as_str(),
|
||||||
|
document_id,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
|
||||||
|
}
|
||||||
},
|
},
|
||||||
// prompt regeneration is only triggered for existing embedders
|
// prompt regeneration is only triggered for existing embedders
|
||||||
ExtractionAction::SettingsRegeneratePrompts { old_prompt } => {
|
ExtractionAction::SettingsRegeneratePrompts { old_prompt } => {
|
||||||
if old.must_regenerate() {
|
if old.must_regenerate() {
|
||||||
|
if embedder_is_manual {
|
||||||
|
ManualEmbedderErrors::push_error(
|
||||||
|
&mut manual_errors,
|
||||||
|
embedder_name.as_str(),
|
||||||
|
document_id,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
regenerate_if_prompt_changed(
|
regenerate_if_prompt_changed(
|
||||||
obkv,
|
obkv,
|
||||||
(old_prompt, prompt),
|
(old_prompt, prompt),
|
||||||
@ -311,6 +411,9 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
(old, new),
|
(old, new),
|
||||||
(old_fields_ids_map, new_fields_ids_map),
|
(old_fields_ids_map, new_fields_ids_map),
|
||||||
document_id,
|
document_id,
|
||||||
|
embedder_name,
|
||||||
|
embedder_is_manual,
|
||||||
|
&mut manual_errors,
|
||||||
)?,
|
)?,
|
||||||
};
|
};
|
||||||
// and we finally push the unique vectors into the writer
|
// and we finally push the unique vectors into the writer
|
||||||
@ -326,6 +429,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
unused_vectors_distribution.append(parsed_vectors);
|
unused_vectors_distribution.append(parsed_vectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ManualEmbedderErrors::to_result(
|
||||||
|
manual_errors,
|
||||||
|
possible_embedding_mistakes,
|
||||||
|
&unused_vectors_distribution,
|
||||||
|
)?;
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for EmbedderVectorExtractor {
|
for EmbedderVectorExtractor {
|
||||||
@ -363,6 +472,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
Ok((results, unused_vectors_distribution))
|
Ok((results, unused_vectors_distribution))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments
|
||||||
fn extract_vector_document_diff(
|
fn extract_vector_document_diff(
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
obkv: obkv::KvReader<'_, FieldId>,
|
obkv: obkv::KvReader<'_, FieldId>,
|
||||||
@ -371,6 +481,9 @@ fn extract_vector_document_diff(
|
|||||||
(old, new): (VectorState, VectorState),
|
(old, new): (VectorState, VectorState),
|
||||||
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
||||||
document_id: impl Fn() -> Value,
|
document_id: impl Fn() -> Value,
|
||||||
|
embedder_name: &str,
|
||||||
|
embedder_is_manual: bool,
|
||||||
|
manual_errors: &mut Option<ManualEmbedderErrors>,
|
||||||
) -> Result<VectorStateDelta> {
|
) -> Result<VectorStateDelta> {
|
||||||
match (old.must_regenerate(), new.must_regenerate()) {
|
match (old.must_regenerate(), new.must_regenerate()) {
|
||||||
(true, true) | (false, false) => {}
|
(true, true) | (false, false) => {}
|
||||||
@ -408,6 +521,10 @@ fn extract_vector_document_diff(
|
|||||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||||
|
|
||||||
if document_is_kept {
|
if document_is_kept {
|
||||||
|
if embedder_is_manual {
|
||||||
|
ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id);
|
||||||
|
return Ok(VectorStateDelta::NoChange);
|
||||||
|
}
|
||||||
// Don't give up if the old prompt was failing
|
// Don't give up if the old prompt was failing
|
||||||
let old_prompt = Some(&prompt).map(|p| {
|
let old_prompt = Some(&prompt).map(|p| {
|
||||||
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
||||||
@ -439,6 +556,10 @@ fn extract_vector_document_diff(
|
|||||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||||
if document_is_kept {
|
if document_is_kept {
|
||||||
|
if embedder_is_manual {
|
||||||
|
ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id);
|
||||||
|
return Ok(VectorStateDelta::NoChange);
|
||||||
|
}
|
||||||
// becomes autogenerated
|
// becomes autogenerated
|
||||||
VectorStateDelta::NowGenerated(prompt.render(
|
VectorStateDelta::NowGenerated(prompt.render(
|
||||||
obkv,
|
obkv,
|
||||||
|
@ -251,6 +251,7 @@ fn send_original_documents_data(
|
|||||||
indexer,
|
indexer,
|
||||||
&embedders_configs,
|
&embedders_configs,
|
||||||
&settings_diff,
|
&settings_diff,
|
||||||
|
&possible_embedding_mistakes,
|
||||||
) {
|
) {
|
||||||
Ok((extracted_vectors, unused_vectors_distribution)) => {
|
Ok((extracted_vectors, unused_vectors_distribution)) => {
|
||||||
for ExtractedVectorPoints {
|
for ExtractedVectorPoints {
|
||||||
|
@ -2744,6 +2744,7 @@ mod tests {
|
|||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: Setting::NotSet,
|
distribution: Setting::NotSet,
|
||||||
|
headers: Setting::NotSet,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
settings.set_embedder_settings(embedders);
|
settings.set_embedder_settings(embedders);
|
||||||
|
@ -67,6 +67,13 @@ impl<T> Setting<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn some_or_not_set(option: Option<T>) -> Self {
|
||||||
|
match option {
|
||||||
|
Some(value) => Setting::Set(value),
|
||||||
|
None => Setting::NotSet,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub const fn as_ref(&self) -> Setting<&T> {
|
pub const fn as_ref(&self) -> Setting<&T> {
|
||||||
match *self {
|
match *self {
|
||||||
Self::Set(ref value) => Setting::Set(value),
|
Self::Set(ref value) => Setting::Set(value),
|
||||||
@ -1544,6 +1551,7 @@ fn validate_prompt(
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
}) => {
|
}) => {
|
||||||
// validate
|
// validate
|
||||||
let template = crate::prompt::Prompt::new(template)
|
let template = crate::prompt::Prompt::new(template)
|
||||||
@ -1561,6 +1569,7 @@ fn validate_prompt(
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
new => Ok(new),
|
new => Ok(new),
|
||||||
@ -1584,6 +1593,7 @@ pub fn validate_embedding_settings(
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
} = settings;
|
} = settings;
|
||||||
|
|
||||||
if let Some(0) = dimensions.set() {
|
if let Some(0) = dimensions.set() {
|
||||||
@ -1622,6 +1632,7 @@ pub fn validate_embedding_settings(
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
match inferred_source {
|
match inferred_source {
|
||||||
@ -1630,6 +1641,7 @@ pub fn validate_embedding_settings(
|
|||||||
|
|
||||||
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
|
check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
|
||||||
|
|
||||||
if let Setting::Set(model) = &model {
|
if let Setting::Set(model) = &model {
|
||||||
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
|
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
|
||||||
@ -1662,13 +1674,12 @@ pub fn validate_embedding_settings(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
EmbedderSource::Ollama => {
|
EmbedderSource::Ollama => {
|
||||||
// Dimensions get inferred, only model name is required
|
|
||||||
check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
|
||||||
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
|
check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
|
||||||
}
|
}
|
||||||
EmbedderSource::HuggingFace => {
|
EmbedderSource::HuggingFace => {
|
||||||
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
|
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
|
||||||
@ -1677,6 +1688,7 @@ pub fn validate_embedding_settings(
|
|||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
|
check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
|
||||||
}
|
}
|
||||||
EmbedderSource::UserProvided => {
|
EmbedderSource::UserProvided => {
|
||||||
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
@ -1693,6 +1705,7 @@ pub fn validate_embedding_settings(
|
|||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?;
|
||||||
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?;
|
||||||
|
check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?;
|
||||||
}
|
}
|
||||||
EmbedderSource::Rest => {
|
EmbedderSource::Rest => {
|
||||||
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
|
||||||
@ -1713,6 +1726,7 @@ pub fn validate_embedding_settings(
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,22 +202,6 @@ impl Default for EmbedderOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbedderOptions {
|
|
||||||
/// Default options for the Hugging Face embedder
|
|
||||||
pub fn huggingface() -> Self {
|
|
||||||
Self::HuggingFace(hf::EmbedderOptions::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Default options for the OpenAI embedder
|
|
||||||
pub fn openai(api_key: Option<String>) -> Self {
|
|
||||||
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn ollama(api_key: Option<String>, url: Option<String>) -> Self {
|
|
||||||
Self::Ollama(ollama::EmbedderOptions::with_default_model(api_key, url))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Embedder {
|
impl Embedder {
|
||||||
/// Spawns a new embedder built from its options.
|
/// Spawns a new embedder built from its options.
|
||||||
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
|
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
|
||||||
|
@ -17,11 +17,22 @@ pub struct EmbedderOptions {
|
|||||||
pub url: Option<String>,
|
pub url: Option<String>,
|
||||||
pub api_key: Option<String>,
|
pub api_key: Option<String>,
|
||||||
pub distribution: Option<DistributionShift>,
|
pub distribution: Option<DistributionShift>,
|
||||||
|
pub dimensions: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbedderOptions {
|
impl EmbedderOptions {
|
||||||
pub fn with_default_model(api_key: Option<String>, url: Option<String>) -> Self {
|
pub fn with_default_model(
|
||||||
Self { embedding_model: "nomic-embed-text".into(), api_key, url, distribution: None }
|
api_key: Option<String>,
|
||||||
|
url: Option<String>,
|
||||||
|
dimensions: Option<usize>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
embedding_model: "nomic-embed-text".into(),
|
||||||
|
api_key,
|
||||||
|
url,
|
||||||
|
distribution: None,
|
||||||
|
dimensions,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,7 +42,7 @@ impl Embedder {
|
|||||||
let rest_embedder = match RestEmbedder::new(
|
let rest_embedder = match RestEmbedder::new(
|
||||||
RestEmbedderOptions {
|
RestEmbedderOptions {
|
||||||
api_key: options.api_key,
|
api_key: options.api_key,
|
||||||
dimensions: None,
|
dimensions: options.dimensions,
|
||||||
distribution: options.distribution,
|
distribution: options.distribution,
|
||||||
url: options.url.unwrap_or_else(get_ollama_path),
|
url: options.url.unwrap_or_else(get_ollama_path),
|
||||||
request: serde_json::json!({
|
request: serde_json::json!({
|
||||||
@ -41,6 +52,7 @@ impl Embedder {
|
|||||||
response: serde_json::json!({
|
response: serde_json::json!({
|
||||||
"embedding": super::rest::RESPONSE_PLACEHOLDER,
|
"embedding": super::rest::RESPONSE_PLACEHOLDER,
|
||||||
}),
|
}),
|
||||||
|
headers: Default::default(),
|
||||||
},
|
},
|
||||||
super::rest::ConfigurationSource::Ollama,
|
super::rest::ConfigurationSource::Ollama,
|
||||||
) {
|
) {
|
||||||
|
@ -195,6 +195,7 @@ impl Embedder {
|
|||||||
super::rest::REPEAT_PLACEHOLDER
|
super::rest::REPEAT_PLACEHOLDER
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
|
headers: Default::default(),
|
||||||
},
|
},
|
||||||
super::rest::ConfigurationSource::OpenAi,
|
super::rest::ConfigurationSource::OpenAi,
|
||||||
)?;
|
)?;
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||||
@ -80,6 +82,7 @@ pub struct Embedder {
|
|||||||
struct EmbedderData {
|
struct EmbedderData {
|
||||||
client: ureq::Agent,
|
client: ureq::Agent,
|
||||||
bearer: Option<String>,
|
bearer: Option<String>,
|
||||||
|
headers: BTreeMap<String, String>,
|
||||||
url: String,
|
url: String,
|
||||||
request: Request,
|
request: Request,
|
||||||
response: Response,
|
response: Response,
|
||||||
@ -94,6 +97,7 @@ pub struct EmbedderOptions {
|
|||||||
pub url: String,
|
pub url: String,
|
||||||
pub request: serde_json::Value,
|
pub request: serde_json::Value,
|
||||||
pub response: serde_json::Value,
|
pub response: serde_json::Value,
|
||||||
|
pub headers: BTreeMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::hash::Hash for EmbedderOptions {
|
impl std::hash::Hash for EmbedderOptions {
|
||||||
@ -138,6 +142,7 @@ impl Embedder {
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
configuration_source,
|
configuration_source,
|
||||||
|
headers: options.headers,
|
||||||
};
|
};
|
||||||
|
|
||||||
let dimensions = if let Some(dimensions) = options.dimensions {
|
let dimensions = if let Some(dimensions) = options.dimensions {
|
||||||
@ -223,7 +228,10 @@ where
|
|||||||
} else {
|
} else {
|
||||||
request
|
request
|
||||||
};
|
};
|
||||||
let request = request.set("Content-Type", "application/json");
|
let mut request = request.set("Content-Type", "application/json");
|
||||||
|
for (header, value) in &data.headers {
|
||||||
|
request = request.set(header.as_str(), value.as_str());
|
||||||
|
}
|
||||||
|
|
||||||
let body = data.request.inject_texts(inputs);
|
let body = data.request.inject_texts(inputs);
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -41,6 +43,9 @@ pub struct EmbeddingSettings {
|
|||||||
pub response: Setting<serde_json::Value>,
|
pub response: Setting<serde_json::Value>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
|
pub headers: Setting<BTreeMap<String, String>>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
pub distribution: Setting<DistributionShift>,
|
pub distribution: Setting<DistributionShift>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,6 +110,7 @@ impl SettingsDiff {
|
|||||||
mut request,
|
mut request,
|
||||||
mut response,
|
mut response,
|
||||||
mut distribution,
|
mut distribution,
|
||||||
|
mut headers,
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
@ -118,6 +124,7 @@ impl SettingsDiff {
|
|||||||
request: new_request,
|
request: new_request,
|
||||||
response: new_response,
|
response: new_response,
|
||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
|
headers: new_headers,
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
let mut reindex_action = None;
|
let mut reindex_action = None;
|
||||||
@ -135,6 +142,7 @@ impl SettingsDiff {
|
|||||||
&mut request,
|
&mut request,
|
||||||
&mut response,
|
&mut response,
|
||||||
&mut document_template,
|
&mut document_template,
|
||||||
|
&mut headers,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if model.apply(new_model) {
|
if model.apply(new_model) {
|
||||||
@ -144,7 +152,18 @@ impl SettingsDiff {
|
|||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
}
|
}
|
||||||
if dimensions.apply(new_dimensions) {
|
if dimensions.apply(new_dimensions) {
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
match source {
|
||||||
|
// regenerate on dimensions change in OpenAI since truncation is supported
|
||||||
|
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
||||||
|
ReindexAction::push_action(
|
||||||
|
&mut reindex_action,
|
||||||
|
ReindexAction::FullReindex,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// for all other embedders, the parameter is a hint that should not be able to change the result
|
||||||
|
// and so won't cause a reindex by itself.
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if url.apply(new_url) {
|
if url.apply(new_url) {
|
||||||
match source {
|
match source {
|
||||||
@ -173,6 +192,7 @@ impl SettingsDiff {
|
|||||||
|
|
||||||
distribution.apply(new_distribution);
|
distribution.apply(new_distribution);
|
||||||
api_key.apply(new_api_key);
|
api_key.apply(new_api_key);
|
||||||
|
headers.apply(new_headers);
|
||||||
|
|
||||||
let updated_settings = EmbeddingSettings {
|
let updated_settings = EmbeddingSettings {
|
||||||
source,
|
source,
|
||||||
@ -185,6 +205,7 @@ impl SettingsDiff {
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
};
|
};
|
||||||
|
|
||||||
match reindex_action {
|
match reindex_action {
|
||||||
@ -218,6 +239,7 @@ fn apply_default_for_source(
|
|||||||
request: &mut Setting<serde_json::Value>,
|
request: &mut Setting<serde_json::Value>,
|
||||||
response: &mut Setting<serde_json::Value>,
|
response: &mut Setting<serde_json::Value>,
|
||||||
document_template: &mut Setting<String>,
|
document_template: &mut Setting<String>,
|
||||||
|
headers: &mut Setting<BTreeMap<String, String>>,
|
||||||
) {
|
) {
|
||||||
match source {
|
match source {
|
||||||
Setting::Set(EmbedderSource::HuggingFace) => {
|
Setting::Set(EmbedderSource::HuggingFace) => {
|
||||||
@ -227,6 +249,7 @@ fn apply_default_for_source(
|
|||||||
*url = Setting::NotSet;
|
*url = Setting::NotSet;
|
||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::Ollama) => {
|
Setting::Set(EmbedderSource::Ollama) => {
|
||||||
*model = Setting::Reset;
|
*model = Setting::Reset;
|
||||||
@ -235,6 +258,7 @@ fn apply_default_for_source(
|
|||||||
*url = Setting::NotSet;
|
*url = Setting::NotSet;
|
||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
||||||
*model = Setting::Reset;
|
*model = Setting::Reset;
|
||||||
@ -243,6 +267,7 @@ fn apply_default_for_source(
|
|||||||
*url = Setting::Reset;
|
*url = Setting::Reset;
|
||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::Rest) => {
|
Setting::Set(EmbedderSource::Rest) => {
|
||||||
*model = Setting::NotSet;
|
*model = Setting::NotSet;
|
||||||
@ -251,6 +276,7 @@ fn apply_default_for_source(
|
|||||||
*url = Setting::Reset;
|
*url = Setting::Reset;
|
||||||
*request = Setting::Reset;
|
*request = Setting::Reset;
|
||||||
*response = Setting::Reset;
|
*response = Setting::Reset;
|
||||||
|
*headers = Setting::Reset;
|
||||||
}
|
}
|
||||||
Setting::Set(EmbedderSource::UserProvided) => {
|
Setting::Set(EmbedderSource::UserProvided) => {
|
||||||
*model = Setting::NotSet;
|
*model = Setting::NotSet;
|
||||||
@ -260,6 +286,7 @@ fn apply_default_for_source(
|
|||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*document_template = Setting::NotSet;
|
*document_template = Setting::NotSet;
|
||||||
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::NotSet => {}
|
Setting::NotSet => {}
|
||||||
}
|
}
|
||||||
@ -293,6 +320,7 @@ impl EmbeddingSettings {
|
|||||||
pub const URL: &'static str = "url";
|
pub const URL: &'static str = "url";
|
||||||
pub const REQUEST: &'static str = "request";
|
pub const REQUEST: &'static str = "request";
|
||||||
pub const RESPONSE: &'static str = "response";
|
pub const RESPONSE: &'static str = "response";
|
||||||
|
pub const HEADERS: &'static str = "headers";
|
||||||
|
|
||||||
pub const DISTRIBUTION: &'static str = "distribution";
|
pub const DISTRIBUTION: &'static str = "distribution";
|
||||||
|
|
||||||
@ -312,9 +340,12 @@ impl EmbeddingSettings {
|
|||||||
Self::API_KEY => {
|
Self::API_KEY => {
|
||||||
&[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest]
|
&[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest]
|
||||||
}
|
}
|
||||||
Self::DIMENSIONS => {
|
Self::DIMENSIONS => &[
|
||||||
&[EmbedderSource::OpenAi, EmbedderSource::UserProvided, EmbedderSource::Rest]
|
EmbedderSource::OpenAi,
|
||||||
}
|
EmbedderSource::UserProvided,
|
||||||
|
EmbedderSource::Ollama,
|
||||||
|
EmbedderSource::Rest,
|
||||||
|
],
|
||||||
Self::DOCUMENT_TEMPLATE => &[
|
Self::DOCUMENT_TEMPLATE => &[
|
||||||
EmbedderSource::HuggingFace,
|
EmbedderSource::HuggingFace,
|
||||||
EmbedderSource::OpenAi,
|
EmbedderSource::OpenAi,
|
||||||
@ -324,6 +355,7 @@ impl EmbeddingSettings {
|
|||||||
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi],
|
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi],
|
||||||
Self::REQUEST => &[EmbedderSource::Rest],
|
Self::REQUEST => &[EmbedderSource::Rest],
|
||||||
Self::RESPONSE => &[EmbedderSource::Rest],
|
Self::RESPONSE => &[EmbedderSource::Rest],
|
||||||
|
Self::HEADERS => &[EmbedderSource::Rest],
|
||||||
Self::DISTRIBUTION => &[
|
Self::DISTRIBUTION => &[
|
||||||
EmbedderSource::HuggingFace,
|
EmbedderSource::HuggingFace,
|
||||||
EmbedderSource::Ollama,
|
EmbedderSource::Ollama,
|
||||||
@ -359,6 +391,7 @@ impl EmbeddingSettings {
|
|||||||
Self::DOCUMENT_TEMPLATE,
|
Self::DOCUMENT_TEMPLATE,
|
||||||
Self::URL,
|
Self::URL,
|
||||||
Self::API_KEY,
|
Self::API_KEY,
|
||||||
|
Self::DIMENSIONS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
],
|
],
|
||||||
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION],
|
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION],
|
||||||
@ -370,6 +403,7 @@ impl EmbeddingSettings {
|
|||||||
Self::URL,
|
Self::URL,
|
||||||
Self::REQUEST,
|
Self::REQUEST,
|
||||||
Self::RESPONSE,
|
Self::RESPONSE,
|
||||||
|
Self::HEADERS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
@ -433,14 +467,15 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
}) => Self {
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::HuggingFace),
|
source: Setting::Set(EmbedderSource::HuggingFace),
|
||||||
model: Setting::Set(model),
|
model: Setting::Set(model),
|
||||||
revision: revision.map(Setting::Set).unwrap_or_default(),
|
revision: Setting::some_or_not_set(revision),
|
||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
headers: Setting::NotSet,
|
||||||
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
url,
|
url,
|
||||||
@ -452,30 +487,33 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
source: Setting::Set(EmbedderSource::OpenAi),
|
source: Setting::Set(EmbedderSource::OpenAi),
|
||||||
model: Setting::Set(embedding_model.name().to_owned()),
|
model: Setting::Set(embedding_model.name().to_owned()),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: url.map(Setting::Set).unwrap_or_default(),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
headers: Setting::NotSet,
|
||||||
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
|
||||||
embedding_model,
|
embedding_model,
|
||||||
url,
|
url,
|
||||||
api_key,
|
api_key,
|
||||||
distribution,
|
distribution,
|
||||||
|
dimensions,
|
||||||
}) => Self {
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::Ollama),
|
source: Setting::Set(EmbedderSource::Ollama),
|
||||||
model: Setting::Set(embedding_model),
|
model: Setting::Set(embedding_model),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: url.map(Setting::Set).unwrap_or_default(),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
headers: Setting::NotSet,
|
||||||
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
|
||||||
dimensions,
|
dimensions,
|
||||||
@ -490,7 +528,8 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
headers: Setting::NotSet,
|
||||||
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
|
||||||
api_key,
|
api_key,
|
||||||
@ -499,17 +538,19 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
}) => Self {
|
}) => Self {
|
||||||
source: Setting::Set(EmbedderSource::Rest),
|
source: Setting::Set(EmbedderSource::Rest),
|
||||||
model: Setting::NotSet,
|
model: Setting::NotSet,
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::Set(url),
|
url: Setting::Set(url),
|
||||||
request: Setting::Set(request),
|
request: Setting::Set(request),
|
||||||
response: Setting::Set(response),
|
response: Setting::Set(response),
|
||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: Setting::some_or_not_set(distribution),
|
||||||
|
headers: Setting::Set(headers),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -529,6 +570,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
|
headers,
|
||||||
} = value;
|
} = value;
|
||||||
|
|
||||||
if let Some(source) = source.set() {
|
if let Some(source) = source.set() {
|
||||||
@ -557,6 +599,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
super::ollama::EmbedderOptions::with_default_model(
|
super::ollama::EmbedderOptions::with_default_model(
|
||||||
api_key.set(),
|
api_key.set(),
|
||||||
url.set(),
|
url.set(),
|
||||||
|
dimensions.set(),
|
||||||
);
|
);
|
||||||
if let Some(model) = model.set() {
|
if let Some(model) = model.set() {
|
||||||
options.embedding_model = model;
|
options.embedding_model = model;
|
||||||
@ -598,6 +641,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
request: request.set().unwrap(),
|
request: request.set().unwrap(),
|
||||||
response: response.set().unwrap(),
|
response: response.set().unwrap(),
|
||||||
distribution: distribution.set(),
|
distribution: distribution.set(),
|
||||||
|
headers: headers.set().unwrap_or_default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user