mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
add a batch of tests
This commit is contained in:
parent
7cef2299cf
commit
3493093c4f
@ -2016,6 +2016,7 @@ mod tests {
|
|||||||
// Wait for one successful batch.
|
// Wait for one successful batch.
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
fn advance_one_successful_batch(&mut self) {
|
fn advance_one_successful_batch(&mut self) {
|
||||||
|
self.index_scheduler.assert_internally_consistent();
|
||||||
self.advance_till([Start, BatchCreated]);
|
self.advance_till([Start, BatchCreated]);
|
||||||
loop {
|
loop {
|
||||||
match self.advance() {
|
match self.advance() {
|
||||||
@ -2025,12 +2026,16 @@ mod tests {
|
|||||||
// the batch went successfully, we can stop the loop and go on with the next states.
|
// the batch went successfully, we can stop the loop and go on with the next states.
|
||||||
ProcessBatchSucceeded => break,
|
ProcessBatchSucceeded => break,
|
||||||
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
AbortedIndexation => panic!("The batch was aborted.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
||||||
ProcessBatchFailed => panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler)),
|
ProcessBatchFailed => {
|
||||||
|
while self.advance() != Start {}
|
||||||
|
panic!("The batch failed.\n{}", snapshot_index_scheduler(&self.index_scheduler))
|
||||||
|
},
|
||||||
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.advance_till([AfterProcessing]);
|
self.advance_till([AfterProcessing]);
|
||||||
|
self.index_scheduler.assert_internally_consistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for one failed batch.
|
// Wait for one failed batch.
|
||||||
@ -5012,7 +5017,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
|
||||||
|
|
||||||
@ -5105,7 +5109,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
|
||||||
|
|
||||||
@ -5180,7 +5183,6 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||||
|
|
||||||
@ -5303,9 +5305,7 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
handle.advance_one_successful_batch();
|
handle.advance_one_successful_batch();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
let index = index_scheduler.index("doggos").unwrap();
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
@ -5452,9 +5452,7 @@ mod tests {
|
|||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
handle.advance_one_successful_batch();
|
handle.advance_one_successful_batch();
|
||||||
index_scheduler.assert_internally_consistent();
|
|
||||||
|
|
||||||
// the document with the id 3 should have its original embedding updated
|
// the document with the id 3 should have its original embedding updated
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
@ -5481,4 +5479,166 @@ mod tests {
|
|||||||
|
|
||||||
assert!(!embedding.is_empty());
|
assert!(!embedding.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn delete_document_containing_vector() {
|
||||||
|
// 1. Add an embedder
|
||||||
|
// 2. Push two documents containing a simple vector
|
||||||
|
// 3. Delete the first document
|
||||||
|
// 4. The user defined roaring bitmap shouldn't contains the id of the first document anymore
|
||||||
|
// 5. Clear the index
|
||||||
|
// 6. The user defined roaring bitmap shouldn't contains the id of the second document
|
||||||
|
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||||
|
|
||||||
|
let setting = meilisearch_types::settings::Settings::<Unchecked> {
|
||||||
|
embedders: Setting::Set(maplit::btreemap! {
|
||||||
|
S("manual") => Setting::Set(EmbeddingSettings {
|
||||||
|
source: Setting::Set(milli::vector::settings::EmbedderSource::UserProvided),
|
||||||
|
dimensions: Setting::Set(3),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::SettingsUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
new_settings: Box::new(setting),
|
||||||
|
is_deletion: false,
|
||||||
|
allow_index_creation: true,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let content = serde_json::json!(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"doggo": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![0, 0, 0],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "intel",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": vec![1, 1, 1],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0_u128).unwrap();
|
||||||
|
let documents_count =
|
||||||
|
read_json(serde_json::to_string_pretty(&content).unwrap().as_bytes(), &mut file)
|
||||||
|
.unwrap();
|
||||||
|
snapshot!(documents_count, @"2");
|
||||||
|
file.persist().unwrap();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentAdditionOrUpdate {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
primary_key: None,
|
||||||
|
method: ReplaceDocuments,
|
||||||
|
content_file: uuid,
|
||||||
|
documents_count,
|
||||||
|
allow_index_creation: false,
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(
|
||||||
|
KindWithContent::DocumentDeletion {
|
||||||
|
index_uid: S("doggos"),
|
||||||
|
documents_ids: vec![S("1")],
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
|
||||||
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
// TODO: Here the user provided vectors should NOT contains 1
|
||||||
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
|
[
|
||||||
|
IndexEmbeddingConfig {
|
||||||
|
name: "manual",
|
||||||
|
config: EmbeddingConfig {
|
||||||
|
embedder_options: UserProvided(
|
||||||
|
EmbedderOptions {
|
||||||
|
dimensions: 3,
|
||||||
|
distribution: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
user_provided: RoaringBitmap<[0, 1]>,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
|
let embedding = &embeddings["manual"];
|
||||||
|
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||||
|
|
||||||
|
index_scheduler
|
||||||
|
.register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false)
|
||||||
|
.unwrap();
|
||||||
|
handle.advance_one_successful_batch();
|
||||||
|
|
||||||
|
let index = index_scheduler.index("doggos").unwrap();
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
|
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||||
|
let documents = index
|
||||||
|
.all_documents(&rtxn)
|
||||||
|
.unwrap()
|
||||||
|
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
|
||||||
|
let conf = index.embedding_configs(&rtxn).unwrap();
|
||||||
|
// TODO: Here the user provided vectors should contains nothing
|
||||||
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
|
[
|
||||||
|
IndexEmbeddingConfig {
|
||||||
|
name: "manual",
|
||||||
|
config: EmbeddingConfig {
|
||||||
|
embedder_options: UserProvided(
|
||||||
|
EmbedderOptions {
|
||||||
|
dimensions: 3,
|
||||||
|
distribution: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
prompt: PromptData {
|
||||||
|
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
user_provided: RoaringBitmap<[0, 1]>,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
|
mod settings;
|
||||||
|
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use crate::common::index::Index;
|
||||||
use crate::common::{GetAllDocumentsOptions, Server};
|
use crate::common::{GetAllDocumentsOptions, Server};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
@ -147,3 +150,78 @@ async fn add_remove_user_provided() {
|
|||||||
}
|
}
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn generate_default_user_provided_documents(server: &Server) -> Index {
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1] }},
|
||||||
|
{"id": 2, "name": "billou", "_vectors": { "manual": [[2, 2, 2], [2, 2, 3]] }},
|
||||||
|
{"id": 3, "name": "intel", "_vectors": { "manual": { "userProvided": true, "embeddings": [3, 3, 3] }}},
|
||||||
|
{"id": 4, "name": "max", "_vectors": { "manual": { "userProvided": true, "embeddings": [[4, 4, 4], [4, 4, 5]] }}},
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn clear_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
|
let (value, _code) = index.clear_all_documents().await;
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
// Make sure the documents DB has been cleared
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Make sure the arroy DB has been cleared
|
||||||
|
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0,
|
||||||
|
"semanticHitCount": 0
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
161
meilisearch/tests/vector/settings.rs
Normal file
161
meilisearch/tests/vector/settings.rs
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use crate::common::{GetAllDocumentsOptions, Server};
|
||||||
|
use crate::json;
|
||||||
|
use crate::vector::generate_default_user_provided_documents;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn update_embedder() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": { "manual": {}},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 2,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
|
||||||
|
let ret = server.wait_task(response.uid()).await;
|
||||||
|
snapshot!(ret, @r###"
|
||||||
|
{
|
||||||
|
"uid": 1,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "settingsUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "`.embedders.manual`: Field `model` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`). Available fields: `source`, `dimensions`, `distribution`",
|
||||||
|
"code": "invalid_settings_embedders",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn reset_embedder_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
|
let (response, code) = index.delete_settings().await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
// Make sure the documents are still present
|
||||||
|
let (documents, _code) = index.get_all_documents(Default::default()).await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"name": "billou"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"name": "max"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 5
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Make sure we are still able to retrieve their vectors
|
||||||
|
let (documents, _code) = index
|
||||||
|
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||||
|
.await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"name": "kefir",
|
||||||
|
"_vectors": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"name": "echo",
|
||||||
|
"_vectors": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"name": "billou",
|
||||||
|
"_vectors": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"name": "intel",
|
||||||
|
"_vectors": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"name": "max",
|
||||||
|
"_vectors": {}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"offset": 0,
|
||||||
|
"limit": 20,
|
||||||
|
"total": 5
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Make sure the arroy DB has been cleared
|
||||||
|
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||||
|
snapshot!(json_string!(documents), @r###"
|
||||||
|
{
|
||||||
|
"message": "Cannot find embedder with name `default`.",
|
||||||
|
"code": "invalid_embedder",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_embedder"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user