mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Compare commits
2 Commits
1fcd5f091e
...
68bbf674c9
Author | SHA1 | Date | |
---|---|---|---|
|
68bbf674c9 | ||
|
980921e078 |
@ -1,5 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
||||||
|
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
use reqwest::IntoUrl;
|
use reqwest::IntoUrl;
|
||||||
@ -13,13 +12,22 @@ use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
|||||||
async fn create_mock() -> (MockServer, Value) {
|
async fn create_mock() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let counter = AtomicUsize::new(0);
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("kefir", [0.0, 0.0, 0.0]),
|
||||||
|
("intel", [1.0, 1.0, 1.0]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
.respond_with(move |_req: &Request| {
|
.respond_with(move |req: &Request| {
|
||||||
let counter = counter.fetch_add(1, Ordering::Relaxed);
|
let text: String = req.body_json().unwrap();
|
||||||
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] }))
|
ResponseTemplate::new(200).set_body_json(
|
||||||
|
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.mount(&mock_server)
|
.mount(&mock_server)
|
||||||
.await;
|
.await;
|
||||||
@ -32,13 +40,14 @@ async fn create_mock() -> (MockServer, Value) {
|
|||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": {
|
"response": {
|
||||||
"data": "{{embedding}}"
|
"data": "{{embedding}}"
|
||||||
}
|
},
|
||||||
|
"documentTemplate": "{{doc.name}}",
|
||||||
});
|
});
|
||||||
|
|
||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mock_map() -> (MockServer, Value) {
|
async fn create_mock_default_template() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
@ -97,7 +106,14 @@ struct SingleResponse {
|
|||||||
async fn create_mock_multiple() -> (MockServer, Value) {
|
async fn create_mock_multiple() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let counter = AtomicUsize::new(0);
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("kefir", [0.0, 0.0, 0.0]),
|
||||||
|
("intel", [1.0, 1.0, 1.0]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
@ -115,8 +131,11 @@ async fn create_mock_multiple() -> (MockServer, Value) {
|
|||||||
.input
|
.input
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|text| SingleResponse {
|
.map(|text| SingleResponse {
|
||||||
|
embedding: text_to_embedding
|
||||||
|
.get(text.as_str())
|
||||||
|
.unwrap_or(&[99., 99., 99.])
|
||||||
|
.to_vec(),
|
||||||
text,
|
text,
|
||||||
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
|
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@ -142,7 +161,8 @@ async fn create_mock_multiple() -> (MockServer, Value) {
|
|||||||
},
|
},
|
||||||
"{{..}}"
|
"{{..}}"
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"documentTemplate": "{{doc.name}}"
|
||||||
});
|
});
|
||||||
|
|
||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
@ -156,7 +176,14 @@ struct SingleRequest {
|
|||||||
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let counter = AtomicUsize::new(0);
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("kefir", [0.0, 0.0, 0.0]),
|
||||||
|
("intel", [1.0, 1.0, 1.0]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
@ -171,8 +198,11 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let output = vec![SingleResponse {
|
let output = vec![SingleResponse {
|
||||||
|
embedding: text_to_embedding
|
||||||
|
.get(req.input.as_str())
|
||||||
|
.unwrap_or(&[99., 99., 99.])
|
||||||
|
.to_vec(),
|
||||||
text: req.input,
|
text: req.input,
|
||||||
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
|
|
||||||
}];
|
}];
|
||||||
|
|
||||||
let response = MultipleResponse { output };
|
let response = MultipleResponse { output };
|
||||||
@ -196,7 +226,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
|||||||
"embedding": "{{embedding}}"
|
"embedding": "{{embedding}}"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"documentTemplate": "{{doc.name}}"
|
||||||
});
|
});
|
||||||
|
|
||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
@ -205,7 +236,14 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
|||||||
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let counter = AtomicUsize::new(0);
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("kefir", [0.0, 0.0, 0.0]),
|
||||||
|
("intel", [1.0, 1.0, 1.0]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
@ -223,7 +261,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let _req: String = match req.body_json() {
|
let req: String = match req.body_json() {
|
||||||
Ok(req) => req,
|
Ok(req) => req,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
return ResponseTemplate::new(400).set_body_json(json!({
|
return ResponseTemplate::new(400).set_body_json(json!({
|
||||||
@ -232,7 +270,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
|
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
|
||||||
|
|
||||||
ResponseTemplate::new(200).set_body_json(output)
|
ResponseTemplate::new(200).set_body_json(output)
|
||||||
})
|
})
|
||||||
@ -245,7 +283,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
|||||||
"url": url,
|
"url": url,
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}",
|
"response": "{{embedding}}",
|
||||||
"headers": {"my-nonstandard-auth": "bearer of the ring"}
|
"headers": {"my-nonstandard-auth": "bearer of the ring"},
|
||||||
|
"documentTemplate": "{{doc.name}}"
|
||||||
});
|
});
|
||||||
|
|
||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
@ -254,12 +293,19 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
|||||||
async fn create_mock_raw() -> (MockServer, Value) {
|
async fn create_mock_raw() -> (MockServer, Value) {
|
||||||
let mock_server = MockServer::start().await;
|
let mock_server = MockServer::start().await;
|
||||||
|
|
||||||
let counter = AtomicUsize::new(0);
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
// text -> embedding
|
||||||
|
("kefir", [0.0, 0.0, 0.0]),
|
||||||
|
("intel", [1.0, 1.0, 1.0]),
|
||||||
|
]
|
||||||
|
// turn into btree
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
|
||||||
Mock::given(method("POST"))
|
Mock::given(method("POST"))
|
||||||
.and(path("/"))
|
.and(path("/"))
|
||||||
.respond_with(move |req: &Request| {
|
.respond_with(move |req: &Request| {
|
||||||
let _req: String = match req.body_json() {
|
let req: String = match req.body_json() {
|
||||||
Ok(req) => req,
|
Ok(req) => req,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
return ResponseTemplate::new(400).set_body_json(json!({
|
return ResponseTemplate::new(400).set_body_json(json!({
|
||||||
@ -268,7 +314,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
|
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
|
||||||
|
|
||||||
ResponseTemplate::new(200).set_body_json(output)
|
ResponseTemplate::new(200).set_body_json(output)
|
||||||
})
|
})
|
||||||
@ -281,29 +327,30 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
|||||||
"url": url,
|
"url": url,
|
||||||
"dimensions": 3,
|
"dimensions": 3,
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}"
|
"response": "{{embedding}}",
|
||||||
|
"documentTemplate": "{{doc.name}}"
|
||||||
});
|
});
|
||||||
|
|
||||||
(mock_server, embedder_settings)
|
(mock_server, embedder_settings)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn post<T: IntoUrl>(url: T) -> reqwest::Result<reqwest::Response> {
|
pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> {
|
||||||
reqwest::Client::builder().build()?.post(url).send().await
|
reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn dummy_testing_the_mock() {
|
async fn dummy_testing_the_mock() {
|
||||||
let (mock, _setting) = create_mock().await;
|
let (mock, _setting) = create_mock().await;
|
||||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||||
snapshot!(body, @r###"{"data":[0,0,0]}"###);
|
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
|
||||||
snapshot!(body, @r###"{"data":[1,1,1]}"###);
|
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
|
||||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||||
snapshot!(body, @r###"{"data":[2,2,2]}"###);
|
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||||
snapshot!(body, @r###"{"data":[3,3,3]}"###);
|
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
|
||||||
snapshot!(body, @r###"{"data":[4,4,4]}"###);
|
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
@ -953,7 +1000,7 @@ async fn bad_settings() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.update_settings(json!({
|
.update_settings(json!({
|
||||||
"embedders": {
|
"embedders": {
|
||||||
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }),
|
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2, "documentTemplate": "{{doc.name}}" }),
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
@ -1920,6 +1967,7 @@ async fn server_custom_header() {
|
|||||||
"embedders": {
|
"embedders": {
|
||||||
"rest": {
|
"rest": {
|
||||||
"source": "rest",
|
"source": "rest",
|
||||||
|
"documentTemplate": "{{doc.name}}",
|
||||||
"url": "[url]",
|
"url": "[url]",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}",
|
"response": "{{embedding}}",
|
||||||
@ -1940,7 +1988,7 @@ async fn server_custom_header() {
|
|||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn searchable_reindex() {
|
async fn searchable_reindex() {
|
||||||
let (_mock, setting) = create_mock_map().await;
|
let (_mock, setting) = create_mock_default_template().await;
|
||||||
let server = get_server_vector().await;
|
let server = get_server_vector().await;
|
||||||
let index = server.index("doggo");
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
@ -115,11 +115,8 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
|||||||
new_vectors.vectors_for_key(embedder_name).transpose()
|
new_vectors.vectors_for_key(embedder_name).transpose()
|
||||||
}) {
|
}) {
|
||||||
let new_vectors = new_vectors?;
|
let new_vectors = new_vectors?;
|
||||||
match (old_vectors.regenerate, new_vectors.regenerate) {
|
if old_vectors.regenerate != new_vectors.regenerate {
|
||||||
(true, true) | (false, false) => todo!(),
|
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
|
||||||
_ => {
|
|
||||||
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// do we have set embeddings?
|
// do we have set embeddings?
|
||||||
if let Some(embeddings) = new_vectors.embeddings {
|
if let Some(embeddings) = new_vectors.embeddings {
|
||||||
|
@ -180,7 +180,13 @@ fn entry_from_raw_value(
|
|||||||
},
|
},
|
||||||
RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
|
RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
|
||||||
has_configured_embedder,
|
has_configured_embedder,
|
||||||
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
|
// implicitly user provided always provide embeddings
|
||||||
|
// `None` here means that there are no embeddings
|
||||||
|
embeddings: Some(
|
||||||
|
value
|
||||||
|
.map(Embeddings::FromJsonImplicityUserProvided)
|
||||||
|
.unwrap_or(Embeddings::FromDb(Default::default())),
|
||||||
|
),
|
||||||
regenerate: false,
|
regenerate: false,
|
||||||
implicit: true,
|
implicit: true,
|
||||||
},
|
},
|
||||||
|
@ -113,7 +113,7 @@ impl Embedder {
|
|||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.chunk_count_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk))
|
.map(move |chunk| self.embed(chunk))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
@ -266,7 +266,7 @@ impl Embedder {
|
|||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.chunk_count_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed(chunk))
|
.map(move |chunk| self.embed(chunk))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ impl Embedder {
|
|||||||
threads
|
threads
|
||||||
.install(move || {
|
.install(move || {
|
||||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||||
.par_chunks(self.chunk_count_hint())
|
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||||
.map(move |chunk| self.embed_ref(chunk))
|
.map(move |chunk| self.embed_ref(chunk))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user