Compare commits

...

2 Commits

Author SHA1 Message Date
Louis Dureuil
68bbf674c9
Make REST mock thread independent 2024-11-12 16:31:31 +01:00
Louis Dureuil
980921e078
Vector fixes 2024-11-12 16:31:22 +01:00
6 changed files with 95 additions and 44 deletions

View File

@ -1,5 +1,4 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use meili_snap::{json_string, snapshot}; use meili_snap::{json_string, snapshot};
use reqwest::IntoUrl; use reqwest::IntoUrl;
@ -13,13 +12,22 @@ use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) { async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0); let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
.respond_with(move |_req: &Request| { .respond_with(move |req: &Request| {
let counter = counter.fetch_add(1, Ordering::Relaxed); let text: String = req.body_json().unwrap();
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] })) ResponseTemplate::new(200).set_body_json(
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
)
}) })
.mount(&mock_server) .mount(&mock_server)
.await; .await;
@ -32,13 +40,14 @@ async fn create_mock() -> (MockServer, Value) {
"request": "{{text}}", "request": "{{text}}",
"response": { "response": {
"data": "{{embedding}}" "data": "{{embedding}}"
} },
"documentTemplate": "{{doc.name}}",
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
} }
async fn create_mock_map() -> (MockServer, Value) { async fn create_mock_default_template() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let text_to_embedding: BTreeMap<_, _> = vec![
@ -97,7 +106,14 @@ struct SingleResponse {
async fn create_mock_multiple() -> (MockServer, Value) { async fn create_mock_multiple() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0); let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -115,8 +131,11 @@ async fn create_mock_multiple() -> (MockServer, Value) {
.input .input
.into_iter() .into_iter()
.map(|text| SingleResponse { .map(|text| SingleResponse {
embedding: text_to_embedding
.get(text.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text, text,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
}) })
.collect(); .collect();
@ -142,7 +161,8 @@ async fn create_mock_multiple() -> (MockServer, Value) {
}, },
"{{..}}" "{{..}}"
] ]
} },
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -156,7 +176,14 @@ struct SingleRequest {
async fn create_mock_single_response_in_array() -> (MockServer, Value) { async fn create_mock_single_response_in_array() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0); let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -171,8 +198,11 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
}; };
let output = vec![SingleResponse { let output = vec![SingleResponse {
embedding: text_to_embedding
.get(req.input.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text: req.input, text: req.input,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
}]; }];
let response = MultipleResponse { output }; let response = MultipleResponse { output };
@ -196,7 +226,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
"embedding": "{{embedding}}" "embedding": "{{embedding}}"
} }
] ]
} },
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -205,7 +236,14 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) { async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0); let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -223,7 +261,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
} }
} }
let _req: String = match req.body_json() { let req: String = match req.body_json() {
Ok(req) => req, Ok(req) => req,
Err(error) => { Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({ return ResponseTemplate::new(400).set_body_json(json!({
@ -232,7 +270,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
} }
}; };
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3]; let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
ResponseTemplate::new(200).set_body_json(output) ResponseTemplate::new(200).set_body_json(output)
}) })
@ -245,7 +283,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
"url": url, "url": url,
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}", "response": "{{embedding}}",
"headers": {"my-nonstandard-auth": "bearer of the ring"} "headers": {"my-nonstandard-auth": "bearer of the ring"},
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -254,12 +293,19 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
async fn create_mock_raw() -> (MockServer, Value) { async fn create_mock_raw() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0); let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
.respond_with(move |req: &Request| { .respond_with(move |req: &Request| {
let _req: String = match req.body_json() { let req: String = match req.body_json() {
Ok(req) => req, Ok(req) => req,
Err(error) => { Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({ return ResponseTemplate::new(400).set_body_json(json!({
@ -268,7 +314,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
} }
}; };
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3]; let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
ResponseTemplate::new(200).set_body_json(output) ResponseTemplate::new(200).set_body_json(output)
}) })
@ -281,29 +327,30 @@ async fn create_mock_raw() -> (MockServer, Value) {
"url": url, "url": url,
"dimensions": 3, "dimensions": 3,
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}" "response": "{{embedding}}",
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
} }
pub async fn post<T: IntoUrl>(url: T) -> reqwest::Result<reqwest::Response> { pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> {
reqwest::Client::builder().build()?.post(url).send().await reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await
} }
#[actix_rt::test] #[actix_rt::test]
async fn dummy_testing_the_mock() { async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await; let (mock, _setting) = create_mock().await;
let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0,0,0]}"###); snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1,1,1]}"###); snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[2,2,2]}"###); snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[3,3,3]}"###); snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[4,4,4]}"###); snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
} }
#[actix_rt::test] #[actix_rt::test]
@ -953,7 +1000,7 @@ async fn bad_settings() {
let (response, code) = index let (response, code) = index
.update_settings(json!({ .update_settings(json!({
"embedders": { "embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }), "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2, "documentTemplate": "{{doc.name}}" }),
}, },
})) }))
.await; .await;
@ -1920,6 +1967,7 @@ async fn server_custom_header() {
"embedders": { "embedders": {
"rest": { "rest": {
"source": "rest", "source": "rest",
"documentTemplate": "{{doc.name}}",
"url": "[url]", "url": "[url]",
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}", "response": "{{embedding}}",
@ -1940,7 +1988,7 @@ async fn server_custom_header() {
#[actix_rt::test] #[actix_rt::test]
async fn searchable_reindex() { async fn searchable_reindex() {
let (_mock, setting) = create_mock_map().await; let (_mock, setting) = create_mock_default_template().await;
let server = get_server_vector().await; let server = get_server_vector().await;
let index = server.index("doggo"); let index = server.index("doggo");

View File

@ -115,12 +115,9 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
new_vectors.vectors_for_key(embedder_name).transpose() new_vectors.vectors_for_key(embedder_name).transpose()
}) { }) {
let new_vectors = new_vectors?; let new_vectors = new_vectors?;
match (old_vectors.regenerate, new_vectors.regenerate) { if old_vectors.regenerate != new_vectors.regenerate {
(true, true) | (false, false) => todo!(),
_ => {
chunks.set_regenerate(update.docid(), new_vectors.regenerate); chunks.set_regenerate(update.docid(), new_vectors.regenerate);
} }
}
// do we have set embeddings? // do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors( chunks.set_vectors(

View File

@ -180,7 +180,13 @@ fn entry_from_raw_value(
}, },
RawVectors::ImplicitlyUserProvided(value) => VectorEntry { RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
has_configured_embedder, has_configured_embedder,
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided), // implicitly user provided always provide embeddings
// `None` here means that there are no embeddings
embeddings: Some(
value
.map(Embeddings::FromJsonImplicityUserProvided)
.unwrap_or(Embeddings::FromDb(Default::default())),
),
regenerate: false, regenerate: false,
implicit: true, implicit: true,
}, },

View File

@ -113,7 +113,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint()) .par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed(chunk)) .map(move |chunk| self.embed(chunk))
.collect(); .collect();

View File

@ -266,7 +266,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint()) .par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed(chunk)) .map(move |chunk| self.embed(chunk))
.collect(); .collect();

View File

@ -193,7 +193,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.chunk_count_hint()) .par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed_ref(chunk)) .map(move |chunk| self.embed_ref(chunk))
.collect(); .collect();