diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 88c670fb3..9fed808b0 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -407,3 +407,551 @@ async fn ollama_url_checks() { } "###); } + +#[actix_rt::test] +async fn composite_checks() { + let server = Server::new().await; + let index = server.index("test"); + // inner distribution + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "distribution": { + "mean": 0.5, + "sigma": 0.2, + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `distribution` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `distribution` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // manual source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "userProvided", + "dimensions": 42, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `userProvided` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // composite source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `composite` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": {}, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.indexingEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": {}, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `indexingEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `searchEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // inner quantized + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": true, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": false, + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `binaryQuantized` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `binaryQuantized` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // prompt in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "documentTemplate": "toto", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `documentTemplate` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `documentTemplate` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // dimensions don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "dimensions": 0x42, + "model": "does-not-exist", + }, + "indexingEmbedder": { + "source": "ollama", + "dimensions": 42, + "model": "does-not-exist", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 66 + }, + "indexingEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 42 + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: 66\n - Indexing time dimensions: 42\n - Note: Dimensions of embeddings produced by both embedders are required to match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + // pooling don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance 0.25\n - Meilisearch requires a maximum distance of 0.01.\n - Note: check that both embedders produce similar embeddings.\n - Make sure the `model`, `revision` and `pooling` of both embedders match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // ok + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +}