mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-30 23:13:09 +08:00
fix two bug and add a dump test
This commit is contained in:
parent
5d50850e12
commit
cc5dca8321
@ -909,6 +909,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
|
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||||
|
|
||||||
// 3.1. Dump the documents
|
// 3.1. Dump the documents
|
||||||
for ret in index.all_documents(&rtxn)? {
|
for ret in index.all_documents(&rtxn)? {
|
||||||
@ -951,16 +952,21 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, embeddings) in embeddings {
|
for (embedder_name, embeddings) in embeddings {
|
||||||
// don't change the entry if it already exists, because it was user-provided
|
let user_provided = embedding_configs
|
||||||
vectors.entry(embedder_name).or_insert_with(|| {
|
.iter()
|
||||||
let embeddings = ExplicitVectors {
|
.find(|conf| conf.name == embedder_name)
|
||||||
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
|
.is_some_and(|conf| conf.user_defined.contains(id));
|
||||||
embeddings,
|
|
||||||
),
|
let embeddings = ExplicitVectors {
|
||||||
user_provided: false,
|
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
};
|
embeddings,
|
||||||
serde_json::to_value(embeddings).unwrap()
|
),
|
||||||
});
|
user_provided,
|
||||||
|
};
|
||||||
|
vectors.insert(
|
||||||
|
embedder_name,
|
||||||
|
serde_json::to_value(embeddings).unwrap(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ pub struct SearchQueryGet {
|
|||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||||
attributes_to_retrieve: Option<CS<String>>,
|
attributes_to_retrieve: Option<CS<String>>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRetrieveVectors>)]
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: Param<bool>,
|
||||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
#[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||||
attributes_to_crop: Option<CS<String>>,
|
attributes_to_crop: Option<CS<String>>,
|
||||||
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
#[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||||
@ -155,7 +155,7 @@ impl From<SearchQueryGet> for SearchQuery {
|
|||||||
page: other.page.as_deref().copied(),
|
page: other.page.as_deref().copied(),
|
||||||
hits_per_page: other.hits_per_page.as_deref().copied(),
|
hits_per_page: other.hits_per_page.as_deref().copied(),
|
||||||
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||||
retrieve_vectors: other.retrieve_vectors,
|
retrieve_vectors: other.retrieve_vectors.0,
|
||||||
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()),
|
||||||
crop_length: other.crop_length.0,
|
crop_length: other.crop_length.0,
|
||||||
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
|
||||||
|
@ -1051,6 +1051,7 @@ fn make_hits(
|
|||||||
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||||
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||||
let documents_iter = index.documents(rtxn, documents_ids)?;
|
let documents_iter = index.documents(rtxn, documents_ids)?;
|
||||||
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
@ -1066,12 +1067,19 @@ fn make_hits(
|
|||||||
if retrieve_vectors {
|
if retrieve_vectors {
|
||||||
let mut vectors = serde_json::Map::new();
|
let mut vectors = serde_json::Map::new();
|
||||||
for (name, mut vector) in index.embeddings(&rtxn, id)? {
|
for (name, mut vector) in index.embeddings(&rtxn, id)? {
|
||||||
|
let user_defined = embedding_configs
|
||||||
|
.iter()
|
||||||
|
.find(|conf| conf.name == name)
|
||||||
|
.is_some_and(|conf| conf.user_defined.contains(id));
|
||||||
|
let mut embedding = serde_json::Map::new();
|
||||||
|
embedding.insert("userDefined".to_string(), user_defined.into());
|
||||||
if vector.len() == 1 {
|
if vector.len() == 1 {
|
||||||
let vector = vector.pop().unwrap();
|
let vector = vector.pop().unwrap();
|
||||||
vectors.insert(name.into(), vector.into());
|
embedding.insert("embedding".to_string(), vector.into());
|
||||||
} else {
|
} else {
|
||||||
vectors.insert(name.into(), vector.into());
|
embedding.insert("embedding".to_string(), vector.into());
|
||||||
}
|
}
|
||||||
|
vectors.insert(name.into(), embedding.into());
|
||||||
}
|
}
|
||||||
document.insert("_vectors".into(), vectors.into());
|
document.insert("_vectors".into(), vectors.into());
|
||||||
}
|
}
|
||||||
|
@ -1938,3 +1938,209 @@ async fn import_dump_v6_containing_experimental_features() {
|
|||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In this test we must generate the dump ourselves to ensure the
|
||||||
|
// `user defined` vectors are well set
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn generate_and_import_dump_containing_vectors() {
|
||||||
|
let temp = tempfile::tempdir().unwrap();
|
||||||
|
let mut opt = default_settings(temp.path());
|
||||||
|
let server = Server::new_with_options(opt.clone()).await.unwrap();
|
||||||
|
let (code, _) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
let index = server.index("pets");
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!(
|
||||||
|
{
|
||||||
|
"embedders": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
"documentTemplate": "{{doc.doggo}}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let response = index.wait_task(response.uid()).await;
|
||||||
|
snapshot!(response);
|
||||||
|
let (response, code) = index
|
||||||
|
.add_documents(
|
||||||
|
json!([
|
||||||
|
{"id": 0, "doggo": "kefir", "_vectors": { "doggo_embedder": vec![0; 384] }},
|
||||||
|
{"id": 1, "doggo": "echo", "_vectors": { "doggo_embedder": { "userProvided": true, "embeddings": vec![1; 384] }}},
|
||||||
|
{"id": 2, "doggo": "intel", "_vectors": { "doggo_embedder": { "userProvided": false, "embeddings": vec![2; 384] }}},
|
||||||
|
{"id": 3, "doggo": "bill", "_vectors": { "doggo_embedder": { "userProvided": false }}},
|
||||||
|
{"id": 4, "doggo": "max" },
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let response = index.wait_task(response.uid()).await;
|
||||||
|
snapshot!(response);
|
||||||
|
|
||||||
|
let (response, code) = server.create_dump().await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let response = index.wait_task(response.uid()).await;
|
||||||
|
snapshot!(response["status"], @r###""succeeded""###);
|
||||||
|
|
||||||
|
// ========= We made a dump, now we should clear the DB except and try to import our dump
|
||||||
|
drop(server);
|
||||||
|
tokio::fs::remove_dir_all(&opt.db_path).await.unwrap();
|
||||||
|
let dump_name = format!("{}.dump", response["details"]["dumpUid"].as_str().unwrap());
|
||||||
|
let dump_path = opt.dump_dir.join(dump_name);
|
||||||
|
assert!(dump_path.exists(), "path: `{}`", dump_path.display());
|
||||||
|
|
||||||
|
opt.import_dump = Some(dump_path);
|
||||||
|
// NOTE: We shouldn't have to change the database path but I lost one hour
|
||||||
|
// because of a « bad path » error and that fixed it.
|
||||||
|
opt.db_path = temp.path().join("data.ms");
|
||||||
|
|
||||||
|
let mut server = Server::new_auth_with_options(opt, temp).await;
|
||||||
|
server.use_api_key("MASTER_KEY");
|
||||||
|
|
||||||
|
let (indexes, code) = server.list_indexes(None, None).await;
|
||||||
|
assert_eq!(code, 200, "{indexes}");
|
||||||
|
|
||||||
|
snapshot!(indexes["results"].as_array().unwrap().len(), @"1");
|
||||||
|
snapshot!(indexes["results"][0]["uid"], @r###""pets""###);
|
||||||
|
snapshot!(indexes["results"][0]["primaryKey"], @r###""id""###);
|
||||||
|
|
||||||
|
let (response, code) = server.get_features().await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let index = server.index("pets");
|
||||||
|
|
||||||
|
let (response, code) = index.settings().await;
|
||||||
|
meili_snap::snapshot!(code, @"200 OK");
|
||||||
|
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"displayedAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"searchableAttributes": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [],
|
||||||
|
"sortableAttributes": [],
|
||||||
|
"rankingRules": [
|
||||||
|
"words",
|
||||||
|
"typo",
|
||||||
|
"proximity",
|
||||||
|
"attribute",
|
||||||
|
"sort",
|
||||||
|
"exactness"
|
||||||
|
],
|
||||||
|
"stopWords": [],
|
||||||
|
"nonSeparatorTokens": [],
|
||||||
|
"separatorTokens": [],
|
||||||
|
"dictionary": [],
|
||||||
|
"synonyms": {},
|
||||||
|
"distinctAttribute": null,
|
||||||
|
"proximityPrecision": "byWord",
|
||||||
|
"typoTolerance": {
|
||||||
|
"enabled": true,
|
||||||
|
"minWordSizeForTypos": {
|
||||||
|
"oneTypo": 5,
|
||||||
|
"twoTypos": 9
|
||||||
|
},
|
||||||
|
"disableOnWords": [],
|
||||||
|
"disableOnAttributes": []
|
||||||
|
},
|
||||||
|
"faceting": {
|
||||||
|
"maxValuesPerFacet": 100,
|
||||||
|
"sortFacetValuesBy": {
|
||||||
|
"*": "alpha"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"maxTotalHits": 1000
|
||||||
|
},
|
||||||
|
"embedders": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
|
"documentTemplate": "{{doc.doggo}}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"searchCutoffMs": null
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
index
|
||||||
|
.search(json!({"retrieveVectors": true}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"], { "[]._vectors.doggo_embedder.embedding" => "[vector]" }), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"doggo": "kefir",
|
||||||
|
"_vectors": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"userDefined": true,
|
||||||
|
"embedding": "[vector]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"doggo": "echo",
|
||||||
|
"_vectors": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"userDefined": true,
|
||||||
|
"embedding": "[vector]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"doggo": "intel",
|
||||||
|
"_vectors": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"userDefined": false,
|
||||||
|
"embedding": "[vector]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"doggo": "bill",
|
||||||
|
"_vectors": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"userDefined": false,
|
||||||
|
"embedding": "[vector]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"doggo": "max",
|
||||||
|
"_vectors": {
|
||||||
|
"doggo_embedder": {
|
||||||
|
"userDefined": false,
|
||||||
|
"embedding": "[vector]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user