mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Compare commits
No commits in common. "68bbf674c9fe3641b33b867a6f43abf95c7fbe07" and "1fcd5f091ec158e8f0bb2c2942d727ac0506c89b" have entirely different histories.
68bbf674c9
...
1fcd5f091e
@ -1,4 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use reqwest::IntoUrl;
|
||||
@ -12,22 +13,13 @@ use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
||||
async fn create_mock() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("kefir", [0.0, 0.0, 0.0]),
|
||||
("intel", [1.0, 1.0, 1.0]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
let counter = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
.respond_with(move |req: &Request| {
|
||||
let text: String = req.body_json().unwrap();
|
||||
ResponseTemplate::new(200).set_body_json(
|
||||
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
|
||||
)
|
||||
.respond_with(move |_req: &Request| {
|
||||
let counter = counter.fetch_add(1, Ordering::Relaxed);
|
||||
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] }))
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
@ -40,14 +32,13 @@ async fn create_mock() -> (MockServer, Value) {
|
||||
"request": "{{text}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
},
|
||||
"documentTemplate": "{{doc.name}}",
|
||||
}
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_mock_default_template() -> (MockServer, Value) {
|
||||
async fn create_mock_map() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
@ -106,14 +97,7 @@ struct SingleResponse {
|
||||
async fn create_mock_multiple() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("kefir", [0.0, 0.0, 0.0]),
|
||||
("intel", [1.0, 1.0, 1.0]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
let counter = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
@ -131,11 +115,8 @@ async fn create_mock_multiple() -> (MockServer, Value) {
|
||||
.input
|
||||
.into_iter()
|
||||
.map(|text| SingleResponse {
|
||||
embedding: text_to_embedding
|
||||
.get(text.as_str())
|
||||
.unwrap_or(&[99., 99., 99.])
|
||||
.to_vec(),
|
||||
text,
|
||||
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
|
||||
})
|
||||
.collect();
|
||||
|
||||
@ -161,8 +142,7 @@ async fn create_mock_multiple() -> (MockServer, Value) {
|
||||
},
|
||||
"{{..}}"
|
||||
]
|
||||
},
|
||||
"documentTemplate": "{{doc.name}}"
|
||||
}
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
@ -176,14 +156,7 @@ struct SingleRequest {
|
||||
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("kefir", [0.0, 0.0, 0.0]),
|
||||
("intel", [1.0, 1.0, 1.0]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
let counter = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
@ -198,11 +171,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
};
|
||||
|
||||
let output = vec![SingleResponse {
|
||||
embedding: text_to_embedding
|
||||
.get(req.input.as_str())
|
||||
.unwrap_or(&[99., 99., 99.])
|
||||
.to_vec(),
|
||||
text: req.input,
|
||||
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
|
||||
}];
|
||||
|
||||
let response = MultipleResponse { output };
|
||||
@ -226,8 +196,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
"embedding": "{{embedding}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
"documentTemplate": "{{doc.name}}"
|
||||
}
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
@ -236,14 +205,7 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
|
||||
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("kefir", [0.0, 0.0, 0.0]),
|
||||
("intel", [1.0, 1.0, 1.0]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
let counter = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
@ -261,7 +223,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
}
|
||||
}
|
||||
|
||||
let req: String = match req.body_json() {
|
||||
let _req: String = match req.body_json() {
|
||||
Ok(req) => req,
|
||||
Err(error) => {
|
||||
return ResponseTemplate::new(400).set_body_json(json!({
|
||||
@ -270,7 +232,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
}
|
||||
};
|
||||
|
||||
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
|
||||
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(output)
|
||||
})
|
||||
@ -283,8 +245,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
"url": url,
|
||||
"request": "{{text}}",
|
||||
"response": "{{embedding}}",
|
||||
"headers": {"my-nonstandard-auth": "bearer of the ring"},
|
||||
"documentTemplate": "{{doc.name}}"
|
||||
"headers": {"my-nonstandard-auth": "bearer of the ring"}
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
@ -293,19 +254,12 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
|
||||
async fn create_mock_raw() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("kefir", [0.0, 0.0, 0.0]),
|
||||
("intel", [1.0, 1.0, 1.0]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
let counter = AtomicUsize::new(0);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
.respond_with(move |req: &Request| {
|
||||
let req: String = match req.body_json() {
|
||||
let _req: String = match req.body_json() {
|
||||
Ok(req) => req,
|
||||
Err(error) => {
|
||||
return ResponseTemplate::new(400).set_body_json(json!({
|
||||
@ -314,7 +268,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
||||
}
|
||||
};
|
||||
|
||||
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
|
||||
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
|
||||
|
||||
ResponseTemplate::new(200).set_body_json(output)
|
||||
})
|
||||
@ -327,30 +281,29 @@ async fn create_mock_raw() -> (MockServer, Value) {
|
||||
"url": url,
|
||||
"dimensions": 3,
|
||||
"request": "{{text}}",
|
||||
"response": "{{embedding}}",
|
||||
"documentTemplate": "{{doc.name}}"
|
||||
"response": "{{embedding}}"
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> {
|
||||
reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await
|
||||
pub async fn post<T: IntoUrl>(url: T) -> reqwest::Result<reqwest::Response> {
|
||||
reqwest::Client::builder().build()?.post(url).send().await
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn dummy_testing_the_mock() {
|
||||
let (mock, _setting) = create_mock().await;
|
||||
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
|
||||
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
|
||||
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
|
||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[0,0,0]}"###);
|
||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[1,1,1]}"###);
|
||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[2,2,2]}"###);
|
||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[3,3,3]}"###);
|
||||
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
|
||||
snapshot!(body, @r###"{"data":[4,4,4]}"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -1000,7 +953,7 @@ async fn bad_settings() {
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2, "documentTemplate": "{{doc.name}}" }),
|
||||
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }),
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
@ -1967,7 +1920,6 @@ async fn server_custom_header() {
|
||||
"embedders": {
|
||||
"rest": {
|
||||
"source": "rest",
|
||||
"documentTemplate": "{{doc.name}}",
|
||||
"url": "[url]",
|
||||
"request": "{{text}}",
|
||||
"response": "{{embedding}}",
|
||||
@ -1988,7 +1940,7 @@ async fn server_custom_header() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn searchable_reindex() {
|
||||
let (_mock, setting) = create_mock_default_template().await;
|
||||
let (_mock, setting) = create_mock_map().await;
|
||||
let server = get_server_vector().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
|
@ -115,8 +115,11 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
|
||||
new_vectors.vectors_for_key(embedder_name).transpose()
|
||||
}) {
|
||||
let new_vectors = new_vectors?;
|
||||
if old_vectors.regenerate != new_vectors.regenerate {
|
||||
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
|
||||
match (old_vectors.regenerate, new_vectors.regenerate) {
|
||||
(true, true) | (false, false) => todo!(),
|
||||
_ => {
|
||||
chunks.set_regenerate(update.docid(), new_vectors.regenerate);
|
||||
}
|
||||
}
|
||||
// do we have set embeddings?
|
||||
if let Some(embeddings) = new_vectors.embeddings {
|
||||
|
@ -180,13 +180,7 @@ fn entry_from_raw_value(
|
||||
},
|
||||
RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
|
||||
has_configured_embedder,
|
||||
// implicitly user provided always provide embeddings
|
||||
// `None` here means that there are no embeddings
|
||||
embeddings: Some(
|
||||
value
|
||||
.map(Embeddings::FromJsonImplicityUserProvided)
|
||||
.unwrap_or(Embeddings::FromDb(Default::default())),
|
||||
),
|
||||
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
|
||||
regenerate: false,
|
||||
implicit: true,
|
||||
},
|
||||
|
@ -113,7 +113,7 @@ impl Embedder {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.par_chunks(self.chunk_count_hint())
|
||||
.map(move |chunk| self.embed(chunk))
|
||||
.collect();
|
||||
|
||||
|
@ -266,7 +266,7 @@ impl Embedder {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.par_chunks(self.chunk_count_hint())
|
||||
.map(move |chunk| self.embed(chunk))
|
||||
.collect();
|
||||
|
||||
|
@ -193,7 +193,7 @@ impl Embedder {
|
||||
threads
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.par_chunks(self.chunk_count_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk))
|
||||
.collect();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user