Test composite embedders

This commit is contained in:
Louis Dureuil 2025-02-26 15:11:01 +01:00
parent b190b612a3
commit 57a6beee30
No known key found for this signature in database

View File

@ -407,3 +407,551 @@ async fn ollama_url_checks() {
}
"###);
}
#[actix_rt::test]
async fn composite_checks() {
let server = Server::new().await;
let index = server.index("test");
// inner distribution
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": {
"mean": 0.5,
"sigma": 0.2,
}
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder`: Field `distribution` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `distribution` is available when source `huggingFace` is not for the search embedder",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// manual source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "userProvided",
"dimensions": 42,
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder.source`: Source `userProvided` is not available in a nested embedder",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// composite source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder.source`: Source `composite` is not available in a nested embedder",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// no source in indexing
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.indexingEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// no source in search
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// no indexing
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test`: Missing field `indexingEmbedder` (note: this field is mandatory for source `composite`)",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// no search
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test`: Missing field `searchEmbedder` (note: this field is mandatory for source `composite`)",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// inner quantized
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"binaryQuantized": true,
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"binaryQuantized": false,
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder`: Field `binaryQuantized` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `binaryQuantized` is available when source `huggingFace` is not for the search embedder",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// prompt in search
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "toto",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "`.embedders.test.searchEmbedder`: Field `documentTemplate` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `documentTemplate` is available when source `huggingFace` is not for the search embedder",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
}
"###);
// dimensions don't match
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "ollama",
"dimensions": 0x42,
"model": "does-not-exist",
},
"indexingEmbedder": {
"source": "ollama",
"dimensions": 42,
"model": "does-not-exist",
},
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "ollama",
"model": "does-not-exist",
"dimensions": 66
},
"indexingEmbedder": {
"source": "ollama",
"model": "does-not-exist",
"dimensions": 42
}
}
}
},
"error": {
"message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: 66\n - Indexing time dimensions: 42\n - Note: Dimensions of embeddings produced by both embedders are required to match.",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// pooling don't match
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceMean"
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceCls"
},
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "test",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceMean"
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"pooling": "forceCls"
}
}
}
},
"error": {
"message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance 0.25\n - Meilisearch requires a maximum distance of 0.01.\n - Note: check that both embedders produce similar embeddings.\n - Make sure the `model`, `revision` and `pooling` of both embedders match.",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
// ok
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
snapshot!(response, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "test",
"status": "succeeded",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e"
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e"
}
}
}
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}