Compare commits

..

No commits in common. "68bbf674c9fe3641b33b867a6f43abf95c7fbe07" and "1fcd5f091ec158e8f0bb2c2942d727ac0506c89b" have entirely different histories.

6 changed files with 44 additions and 95 deletions

View File

@ -1,4 +1,5 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use meili_snap::{json_string, snapshot}; use meili_snap::{json_string, snapshot};
use reqwest::IntoUrl; use reqwest::IntoUrl;
@ -12,22 +13,13 @@ use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) { async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let counter = AtomicUsize::new(0);
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
.respond_with(move |req: &Request| { .respond_with(move |_req: &Request| {
let text: String = req.body_json().unwrap(); let counter = counter.fetch_add(1, Ordering::Relaxed);
ResponseTemplate::new(200).set_body_json( ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] }))
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
)
}) })
.mount(&mock_server) .mount(&mock_server)
.await; .await;
@ -40,14 +32,13 @@ async fn create_mock() -> (MockServer, Value) {
"request": "{{text}}", "request": "{{text}}",
"response": { "response": {
"data": "{{embedding}}" "data": "{{embedding}}"
}, }
"documentTemplate": "{{doc.name}}",
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
} }
async fn create_mock_default_template() -> (MockServer, Value) { async fn create_mock_map() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let text_to_embedding: BTreeMap<_, _> = vec![
@ -106,14 +97,7 @@ struct SingleResponse {
async fn create_mock_multiple() -> (MockServer, Value) { async fn create_mock_multiple() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let counter = AtomicUsize::new(0);
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -131,11 +115,8 @@ async fn create_mock_multiple() -> (MockServer, Value) {
.input .input
.into_iter() .into_iter()
.map(|text| SingleResponse { .map(|text| SingleResponse {
embedding: text_to_embedding
.get(text.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text, text,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
}) })
.collect(); .collect();
@ -161,8 +142,7 @@ async fn create_mock_multiple() -> (MockServer, Value) {
}, },
"{{..}}" "{{..}}"
] ]
}, }
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -176,14 +156,7 @@ struct SingleRequest {
async fn create_mock_single_response_in_array() -> (MockServer, Value) { async fn create_mock_single_response_in_array() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let counter = AtomicUsize::new(0);
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -198,11 +171,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
}; };
let output = vec![SingleResponse { let output = vec![SingleResponse {
embedding: text_to_embedding
.get(req.input.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text: req.input, text: req.input,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
}]; }];
let response = MultipleResponse { output }; let response = MultipleResponse { output };
@ -226,8 +196,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
"embedding": "{{embedding}}" "embedding": "{{embedding}}"
} }
] ]
}, }
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -236,14 +205,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) { async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let counter = AtomicUsize::new(0);
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
@ -261,7 +223,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
} }
} }
let req: String = match req.body_json() { let _req: String = match req.body_json() {
Ok(req) => req, Ok(req) => req,
Err(error) => { Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({ return ResponseTemplate::new(400).set_body_json(json!({
@ -270,7 +232,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
} }
}; };
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec(); let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
ResponseTemplate::new(200).set_body_json(output) ResponseTemplate::new(200).set_body_json(output)
}) })
@ -283,8 +245,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
"url": url, "url": url,
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}", "response": "{{embedding}}",
"headers": {"my-nonstandard-auth": "bearer of the ring"}, "headers": {"my-nonstandard-auth": "bearer of the ring"}
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
@ -293,19 +254,12 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
async fn create_mock_raw() -> (MockServer, Value) { async fn create_mock_raw() -> (MockServer, Value) {
let mock_server = MockServer::start().await; let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![ let counter = AtomicUsize::new(0);
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST")) Mock::given(method("POST"))
.and(path("/")) .and(path("/"))
.respond_with(move |req: &Request| { .respond_with(move |req: &Request| {
let req: String = match req.body_json() { let _req: String = match req.body_json() {
Ok(req) => req, Ok(req) => req,
Err(error) => { Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({ return ResponseTemplate::new(400).set_body_json(json!({
@ -314,7 +268,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
} }
}; };
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec(); let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
ResponseTemplate::new(200).set_body_json(output) ResponseTemplate::new(200).set_body_json(output)
}) })
@ -327,30 +281,29 @@ async fn create_mock_raw() -> (MockServer, Value) {
"url": url, "url": url,
"dimensions": 3, "dimensions": 3,
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}", "response": "{{embedding}}"
"documentTemplate": "{{doc.name}}"
}); });
(mock_server, embedder_settings) (mock_server, embedder_settings)
} }
pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> { pub async fn post<T: IntoUrl>(url: T) -> reqwest::Result<reqwest::Response> {
reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await reqwest::Client::builder().build()?.post(url).send().await
} }
#[actix_rt::test] #[actix_rt::test]
async fn dummy_testing_the_mock() { async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await; let (mock, _setting) = create_mock().await;
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap(); let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###); snapshot!(body, @r###"{"data":[0,0,0]}"###);
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap(); let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###); snapshot!(body, @r###"{"data":[1,1,1]}"###);
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap(); let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###); snapshot!(body, @r###"{"data":[2,2,2]}"###);
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap(); let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###); snapshot!(body, @r###"{"data":[3,3,3]}"###);
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap(); let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###); snapshot!(body, @r###"{"data":[4,4,4]}"###);
} }
#[actix_rt::test] #[actix_rt::test]
@ -1000,7 +953,7 @@ async fn bad_settings() {
let (response, code) = index let (response, code) = index
.update_settings(json!({ .update_settings(json!({
"embedders": { "embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2, "documentTemplate": "{{doc.name}}" }), "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }),
}, },
})) }))
.await; .await;
@ -1967,7 +1920,6 @@ async fn server_custom_header() {
"embedders": { "embedders": {
"rest": { "rest": {
"source": "rest", "source": "rest",
"documentTemplate": "{{doc.name}}",
"url": "[url]", "url": "[url]",
"request": "{{text}}", "request": "{{text}}",
"response": "{{embedding}}", "response": "{{embedding}}",
@ -1988,7 +1940,7 @@ async fn server_custom_header() {
#[actix_rt::test] #[actix_rt::test]
async fn searchable_reindex() { async fn searchable_reindex() {
let (_mock, setting) = create_mock_default_template().await; let (_mock, setting) = create_mock_map().await;
let server = get_server_vector().await; let server = get_server_vector().await;
let index = server.index("doggo"); let index = server.index("doggo");

View File

@ -115,8 +115,11 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
new_vectors.vectors_for_key(embedder_name).transpose() new_vectors.vectors_for_key(embedder_name).transpose()
}) { }) {
let new_vectors = new_vectors?; let new_vectors = new_vectors?;
if old_vectors.regenerate != new_vectors.regenerate { match (old_vectors.regenerate, new_vectors.regenerate) {
chunks.set_regenerate(update.docid(), new_vectors.regenerate); (true, true) | (false, false) => todo!(),
_ => {
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
}
} }
// do we have set embeddings? // do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {

View File

@ -180,13 +180,7 @@ fn entry_from_raw_value(
}, },
RawVectors::ImplicitlyUserProvided(value) => VectorEntry { RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
has_configured_embedder, has_configured_embedder,
// implicitly user provided always provide embeddings embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
// `None` here means that there are no embeddings
embeddings: Some(
value
.map(Embeddings::FromJsonImplicityUserProvided)
.unwrap_or(Embeddings::FromDb(Default::default())),
),
regenerate: false, regenerate: false,
implicit: true, implicit: true,
}, },

View File

@ -113,7 +113,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.prompt_count_in_chunk_hint()) .par_chunks(self.chunk_count_hint())
.map(move |chunk| self.embed(chunk)) .map(move |chunk| self.embed(chunk))
.collect(); .collect();

View File

@ -266,7 +266,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.prompt_count_in_chunk_hint()) .par_chunks(self.chunk_count_hint())
.map(move |chunk| self.embed(chunk)) .map(move |chunk| self.embed(chunk))
.collect(); .collect();

View File

@ -193,7 +193,7 @@ impl Embedder {
threads threads
.install(move || { .install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.prompt_count_in_chunk_hint()) .par_chunks(self.chunk_count_hint())
.map(move |chunk| self.embed_ref(chunk)) .map(move |chunk| self.embed_ref(chunk))
.collect(); .collect();