mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
always push the user defined vectors in arroy
This commit is contained in:
parent
a73ccc78a6
commit
5d50850e12
@ -5173,8 +5173,8 @@ mod tests {
|
|||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||||
|
|
||||||
println!("HEEEEERE");
|
println!("HEEEEERE");
|
||||||
// handle.advance_one_successful_batch();
|
handle.advance_one_successful_batch();
|
||||||
handle.advance_one_failed_batch();
|
// handle.advance_one_failed_batch();
|
||||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
|
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -5351,9 +5351,9 @@ mod tests {
|
|||||||
// as user provided since we explicitely marked it as NOT user provided.
|
// as user provided since we explicitely marked it as NOT user provided.
|
||||||
snapshot!(format!("{conf:#?}"), @r###"
|
snapshot!(format!("{conf:#?}"), @r###"
|
||||||
[
|
[
|
||||||
(
|
IndexEmbeddingConfig {
|
||||||
"my_doggo_embedder",
|
name: "my_doggo_embedder",
|
||||||
EmbeddingConfig {
|
config: EmbeddingConfig {
|
||||||
embedder_options: HuggingFace(
|
embedder_options: HuggingFace(
|
||||||
EmbedderOptions {
|
EmbedderOptions {
|
||||||
model: "sentence-transformers/all-MiniLM-L6-v2",
|
model: "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
@ -5367,8 +5367,8 @@ mod tests {
|
|||||||
template: "{{doc.doggo}}",
|
template: "{{doc.doggo}}",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
RoaringBitmap<[1, 2]>,
|
user_defined: RoaringBitmap<[1, 2]>,
|
||||||
),
|
},
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
|
@ -6,10 +6,6 @@ expression: doc
|
|||||||
"doggo": "Intel",
|
"doggo": "Intel",
|
||||||
"breed": "beagle",
|
"breed": "beagle",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"A_fakerest": {
|
|
||||||
"embeddings": "[vector]",
|
|
||||||
"userProvided": true
|
|
||||||
},
|
|
||||||
"noise": [
|
"noise": [
|
||||||
0.1,
|
0.1,
|
||||||
0.2,
|
0.2,
|
@ -6,10 +6,6 @@ expression: doc
|
|||||||
"doggo": "kefir",
|
"doggo": "kefir",
|
||||||
"breed": "patou",
|
"breed": "patou",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"A_fakerest": {
|
|
||||||
"embeddings": "[vector]",
|
|
||||||
"userProvided": true
|
|
||||||
},
|
|
||||||
"noise": [
|
"noise": [
|
||||||
0.1,
|
0.1,
|
||||||
0.2,
|
0.2,
|
@ -1,4 +0,0 @@
|
|||||||
---
|
|
||||||
source: index-scheduler/src/lib.rs
|
|
||||||
---
|
|
||||||
[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]
|
|
@ -204,7 +204,7 @@ async fn distribution_shift() {
|
|||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||||
|
|
||||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
|
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
||||||
let (response, code) = index.search_post(search.clone()).await;
|
let (response, code) = index.search_post(search.clone()).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||||
@ -239,20 +239,23 @@ async fn highlighter() {
|
|||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 0.2},
|
"hybrid": {"semanticRatio": 0.2},
|
||||||
"attributesToHighlight": [
|
"retrieveVectors": true,
|
||||||
"desc"
|
"attributesToHighlight": [
|
||||||
|
"desc",
|
||||||
|
"_vectors",
|
||||||
],
|
],
|
||||||
"highlightPreTag": "**BEGIN**",
|
"highlightPreTag": "**BEGIN**",
|
||||||
"highlightPostTag": "**END**"
|
"highlightPostTag": "**END**",
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"0");
|
snapshot!(response["semanticHitCount"], @"0");
|
||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 0.8},
|
"hybrid": {"semanticRatio": 0.8},
|
||||||
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
"desc"
|
"desc"
|
||||||
@ -262,13 +265,14 @@ async fn highlighter() {
|
|||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"3");
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
|
|
||||||
// no highlighting on full semantic
|
// no highlighting on full semantic
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||||
"hybrid": {"semanticRatio": 1.0},
|
"hybrid": {"semanticRatio": 1.0},
|
||||||
|
"retrieveVectors": true,
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
"attributesToHighlight": [
|
"attributesToHighlight": [
|
||||||
"desc"
|
"desc"
|
||||||
@ -278,7 +282,7 @@ async fn highlighter() {
|
|||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||||
snapshot!(response["semanticHitCount"], @"3");
|
snapshot!(response["semanticHitCount"], @"3");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -361,7 +365,7 @@ async fn single_document() {
|
|||||||
|
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -377,7 +381,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// search without query and vector, but with hybrid => still placeholder
|
// search without query and vector, but with hybrid => still placeholder
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -386,7 +390,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// same with a different semantic ratio
|
// same with a different semantic ratio
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
|
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -395,7 +399,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// wrong vector dimensions
|
// wrong vector dimensions
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -410,7 +414,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full vector
|
// full vector
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -419,7 +423,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// full keyword, without a query
|
// full keyword, without a query
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -428,7 +432,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// query + vector, full keyword => keyword
|
// query + vector, full keyword => keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -437,7 +441,7 @@ async fn query_combination() {
|
|||||||
|
|
||||||
// query + vector, no hybrid keyword =>
|
// query + vector, no hybrid keyword =>
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
|
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
@ -453,7 +457,7 @@ async fn query_combination() {
|
|||||||
// full vector, without a vector => error
|
// full vector, without a vector => error
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@ -470,7 +474,7 @@ async fn query_combination() {
|
|||||||
// hybrid without a vector => full keyword
|
// hybrid without a vector => full keyword
|
||||||
let (response, code) = index
|
let (response, code) = index
|
||||||
.search_post(
|
.search_post(
|
||||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
|
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
@ -1337,6 +1337,7 @@ async fn experimental_feature_vector_store() {
|
|||||||
.search_post(json!({
|
.search_post(json!({
|
||||||
"vector": [1.0, 2.0, 3.0],
|
"vector": [1.0, 2.0, 3.0],
|
||||||
"showRankingScore": true,
|
"showRankingScore": true,
|
||||||
|
"retrieveVectors": true,
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ async fn basic() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 143}), |response, code| {
|
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
@ -88,9 +88,9 @@ async fn basic() {
|
|||||||
"id": "522681",
|
"id": "522681",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.1,
|
0.10000000149011612,
|
||||||
0.6,
|
0.6000000238418579,
|
||||||
0.8
|
0.800000011920929
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -100,9 +100,9 @@ async fn basic() {
|
|||||||
"id": "299537",
|
"id": "299537",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.6,
|
0.6000000238418579,
|
||||||
0.8,
|
0.800000011920929,
|
||||||
-0.2
|
-0.20000000298023224
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -112,9 +112,9 @@ async fn basic() {
|
|||||||
"id": "166428",
|
"id": "166428",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.7,
|
0.699999988079071,
|
||||||
0.7,
|
0.699999988079071,
|
||||||
-0.4
|
-0.4000000059604645
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -124,8 +124,8 @@ async fn basic() {
|
|||||||
"id": "287947",
|
"id": "287947",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.8,
|
0.800000011920929,
|
||||||
0.4,
|
0.4000000059604645,
|
||||||
-0.5
|
-0.5
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -136,7 +136,7 @@ async fn basic() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": "299537"}), |response, code| {
|
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
[
|
[
|
||||||
@ -146,9 +146,9 @@ async fn basic() {
|
|||||||
"id": "166428",
|
"id": "166428",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.7,
|
0.699999988079071,
|
||||||
0.7,
|
0.699999988079071,
|
||||||
-0.4
|
-0.4000000059604645
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -158,8 +158,8 @@ async fn basic() {
|
|||||||
"id": "287947",
|
"id": "287947",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.8,
|
0.800000011920929,
|
||||||
0.4,
|
0.4000000059604645,
|
||||||
-0.5
|
-0.5
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -170,9 +170,9 @@ async fn basic() {
|
|||||||
"id": "522681",
|
"id": "522681",
|
||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
0.1,
|
0.10000000149011612,
|
||||||
0.6,
|
0.6000000238418579,
|
||||||
0.8
|
0.800000011920929
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -183,8 +183,8 @@ async fn basic() {
|
|||||||
"_vectors": {
|
"_vectors": {
|
||||||
"manual": [
|
"manual": [
|
||||||
-0.5,
|
-0.5,
|
||||||
0.3,
|
0.30000001192092896,
|
||||||
0.85
|
0.8500000238418579
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -456,71 +456,77 @@ async fn filter() {
|
|||||||
index.wait_task(value.uid()).await;
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
[
|
snapshot!(code, @"200 OK");
|
||||||
{
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
"title": "Captain Marvel",
|
[
|
||||||
"release_year": 2019,
|
{
|
||||||
"id": "299537",
|
"title": "Captain Marvel",
|
||||||
"_vectors": {
|
"release_year": 2019,
|
||||||
"manual": [
|
"id": "299537",
|
||||||
0.6,
|
"_vectors": {
|
||||||
0.8,
|
"manual": [
|
||||||
-0.2
|
0.6000000238418579,
|
||||||
]
|
0.800000011920929,
|
||||||
}
|
-0.20000000298023224
|
||||||
},
|
]
|
||||||
{
|
}
|
||||||
"title": "How to Train Your Dragon: The Hidden World",
|
},
|
||||||
"release_year": 2019,
|
{
|
||||||
"id": "166428",
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
"_vectors": {
|
"release_year": 2019,
|
||||||
"manual": [
|
"id": "166428",
|
||||||
0.7,
|
"_vectors": {
|
||||||
0.7,
|
"manual": [
|
||||||
-0.4
|
0.699999988079071,
|
||||||
]
|
0.699999988079071,
|
||||||
}
|
-0.4000000059604645
|
||||||
},
|
]
|
||||||
{
|
}
|
||||||
"title": "Shazam!",
|
},
|
||||||
"release_year": 2019,
|
{
|
||||||
"id": "287947",
|
"title": "Shazam!",
|
||||||
"_vectors": {
|
"release_year": 2019,
|
||||||
"manual": [
|
"id": "287947",
|
||||||
0.8,
|
"_vectors": {
|
||||||
0.4,
|
"manual": [
|
||||||
-0.5
|
0.800000011920929,
|
||||||
]
|
0.4000000059604645,
|
||||||
}
|
-0.5
|
||||||
}
|
]
|
||||||
]
|
}
|
||||||
"###);
|
}
|
||||||
})
|
]
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
[
|
snapshot!(code, @"200 OK");
|
||||||
{
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
"title": "All Quiet on the Western Front",
|
[
|
||||||
"release_year": 1930,
|
{
|
||||||
"id": "143",
|
"title": "All Quiet on the Western Front",
|
||||||
"_vectors": {
|
"release_year": 1930,
|
||||||
"manual": [
|
"id": "143",
|
||||||
-0.5,
|
"_vectors": {
|
||||||
0.3,
|
"manual": [
|
||||||
0.85
|
-0.5,
|
||||||
]
|
0.30000001192092896,
|
||||||
}
|
0.8500000238418579
|
||||||
}
|
]
|
||||||
]
|
}
|
||||||
"###);
|
}
|
||||||
})
|
]
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -579,24 +585,27 @@ async fn limit_and_offset() {
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
index
|
index
|
||||||
.similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
|
.similar(
|
||||||
snapshot!(code, @"200 OK");
|
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
||||||
snapshot!(json_string!(response["hits"]), @r###"
|
|response, code| {
|
||||||
[
|
snapshot!(code, @"200 OK");
|
||||||
{
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
"title": "Captain Marvel",
|
[
|
||||||
"release_year": 2019,
|
{
|
||||||
"id": "299537",
|
"title": "Captain Marvel",
|
||||||
"_vectors": {
|
"release_year": 2019,
|
||||||
"manual": [
|
"id": "299537",
|
||||||
0.6,
|
"_vectors": {
|
||||||
0.8,
|
"manual": [
|
||||||
-0.2
|
0.6000000238418579,
|
||||||
]
|
0.800000011920929,
|
||||||
}
|
-0.20000000298023224
|
||||||
}
|
]
|
||||||
]
|
}
|
||||||
"###);
|
}
|
||||||
})
|
]
|
||||||
|
"###);
|
||||||
|
},
|
||||||
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
@ -1,244 +0,0 @@
|
|||||||
---
|
|
||||||
source: milli/src/search/new/tests/attribute_fid.rs
|
|
||||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
|
||||||
---
|
|
||||||
[
|
|
||||||
(
|
|
||||||
2,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 19,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 91,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
6,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 15,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 81,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
5,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 14,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 79,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
4,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 13,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 77,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
3,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 12,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 83,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
9,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 11,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 75,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
8,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 10,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 79,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
7,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 10,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 73,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
11,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 7,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 77,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
10,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 6,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 81,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
13,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 6,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 81,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
12,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 6,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 78,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
14,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 5,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 75,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
0,
|
|
||||||
[
|
|
||||||
Fid(
|
|
||||||
Rank {
|
|
||||||
rank: 1,
|
|
||||||
max_rank: 19,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Position(
|
|
||||||
Rank {
|
|
||||||
rank: 91,
|
|
||||||
max_rank: 91,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
],
|
|
||||||
),
|
|
||||||
]
|
|
@ -1,7 +0,0 @@
|
|||||||
---
|
|
||||||
source: milli/src/index.rs
|
|
||||||
---
|
|
||||||
age 1 |
|
|
||||||
id 2 |
|
|
||||||
name 2 |
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
|||||||
---
|
|
||||||
source: milli/src/index.rs
|
|
||||||
---
|
|
||||||
age 1 |
|
|
||||||
id 2 |
|
|
||||||
name 2 |
|
|
||||||
|
|
@ -8,7 +8,6 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use bytemuck::cast_slice;
|
use bytemuck::cast_slice;
|
||||||
use grenad::Writer;
|
use grenad::Writer;
|
||||||
use itertools::EitherOrBoth;
|
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@ -50,7 +49,7 @@ enum VectorStateDelta {
|
|||||||
// Note: changing the value of the manually specified vector **should not record** this delta
|
// Note: changing the value of the manually specified vector **should not record** this delta
|
||||||
WasGeneratedNowManual(Vec<Vec<f32>>),
|
WasGeneratedNowManual(Vec<Vec<f32>>),
|
||||||
|
|
||||||
ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>),
|
ManualDelta(Vec<Vec<f32>>),
|
||||||
|
|
||||||
// Add the vector computed from the specified prompt
|
// Add the vector computed from the specified prompt
|
||||||
// Remove any previous vector
|
// Remove any previous vector
|
||||||
@ -59,14 +58,12 @@ enum VectorStateDelta {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl VectorStateDelta {
|
impl VectorStateDelta {
|
||||||
fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) {
|
fn into_values(self) -> (bool, String, Vec<Vec<f32>>) {
|
||||||
match self {
|
match self {
|
||||||
VectorStateDelta::NoChange => Default::default(),
|
VectorStateDelta::NoChange => Default::default(),
|
||||||
VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()),
|
VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()),
|
||||||
VectorStateDelta::WasGeneratedNowManual(add) => {
|
VectorStateDelta::WasGeneratedNowManual(add) => (true, Default::default(), add),
|
||||||
(true, Default::default(), (Default::default(), add))
|
VectorStateDelta::ManualDelta(add) => (false, Default::default(), add),
|
||||||
}
|
|
||||||
VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)),
|
|
||||||
VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()),
|
VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -166,8 +163,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
// lazily get it when needed
|
// lazily get it when needed
|
||||||
let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
|
let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
|
||||||
|
|
||||||
let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
|
let mut parsed_vectors = ParsedVectorsDiff::new(
|
||||||
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
|
docid,
|
||||||
|
embedders_configs,
|
||||||
|
obkv,
|
||||||
|
old_vectors_fid,
|
||||||
|
new_vectors_fid,
|
||||||
|
)
|
||||||
|
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
|
||||||
|
|
||||||
for EmbedderVectorExtractor {
|
for EmbedderVectorExtractor {
|
||||||
embedder_name,
|
embedder_name,
|
||||||
@ -182,7 +185,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
{
|
{
|
||||||
let delta = match parsed_vectors.remove(embedder_name) {
|
let delta = match parsed_vectors.remove(embedder_name) {
|
||||||
(Some(old), Some(new)) => {
|
(Some(old), Some(new)) => {
|
||||||
match (old.is_user_provided(), new.is_user_provided()) {
|
match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
|
||||||
(true, true) | (false, false) => (),
|
(true, true) | (false, false) => (),
|
||||||
(true, false) => {
|
(true, false) => {
|
||||||
remove_from_user_defined.insert(docid);
|
remove_from_user_defined.insert(docid);
|
||||||
@ -193,7 +196,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// no autogeneration
|
// no autogeneration
|
||||||
let del_vectors = old.into_array_of_vectors();
|
|
||||||
let add_vectors = new.into_array_of_vectors();
|
let add_vectors = new.into_array_of_vectors();
|
||||||
|
|
||||||
if add_vectors.len() > usize::from(u8::MAX) {
|
if add_vectors.len() > usize::from(u8::MAX) {
|
||||||
@ -203,15 +205,15 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
|
VectorStateDelta::ManualDelta(add_vectors)
|
||||||
}
|
}
|
||||||
(Some(_old), None) => {
|
(Some(old), None) => {
|
||||||
// Do we keep this document?
|
// Do we keep this document?
|
||||||
let document_is_kept = obkv
|
let document_is_kept = obkv
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||||
if document_is_kept {
|
if document_is_kept && old.is_some() {
|
||||||
remove_from_user_defined.insert(docid);
|
remove_from_user_defined.insert(docid);
|
||||||
// becomes autogenerated
|
// becomes autogenerated
|
||||||
VectorStateDelta::NowGenerated(prompt.render(
|
VectorStateDelta::NowGenerated(prompt.render(
|
||||||
@ -219,6 +221,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
DelAdd::Addition,
|
DelAdd::Addition,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
)?)
|
)?)
|
||||||
|
} else if document_is_kept && old.is_none() {
|
||||||
|
VectorStateDelta::NoChange
|
||||||
} else {
|
} else {
|
||||||
VectorStateDelta::NowRemoved
|
VectorStateDelta::NowRemoved
|
||||||
}
|
}
|
||||||
@ -315,8 +319,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Computes the diff between both Del and Add numbers and
|
/// We cannot compute the diff between both Del and Add vectors.
|
||||||
/// only inserts the parts that differ in the sorter.
|
/// We'll push every vector and compute the difference later in TypedChunk.
|
||||||
fn push_vectors_diff(
|
fn push_vectors_diff(
|
||||||
remove_vectors_writer: &mut Writer<BufWriter<File>>,
|
remove_vectors_writer: &mut Writer<BufWriter<File>>,
|
||||||
prompts_writer: &mut Writer<BufWriter<File>>,
|
prompts_writer: &mut Writer<BufWriter<File>>,
|
||||||
@ -325,7 +329,7 @@ fn push_vectors_diff(
|
|||||||
delta: VectorStateDelta,
|
delta: VectorStateDelta,
|
||||||
reindex_vectors: bool,
|
reindex_vectors: bool,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
let (must_remove, prompt, mut add_vectors) = delta.into_values();
|
||||||
if must_remove
|
if must_remove
|
||||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
// When vector pipeline will be optimized, this should be removed.
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
@ -340,44 +344,25 @@ fn push_vectors_diff(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We sort and dedup the vectors
|
// We sort and dedup the vectors
|
||||||
del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
|
||||||
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
||||||
del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
|
||||||
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
||||||
|
|
||||||
let merged_vectors_iter =
|
// let merged_vectors_iter =
|
||||||
itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
// itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
||||||
|
|
||||||
// insert vectors into the writer
|
// insert vectors into the writer
|
||||||
for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
|
for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||||
// Generate the key by extending the unique index to it.
|
// Generate the key by extending the unique index to it.
|
||||||
key_buffer.truncate(TRUNCATE_SIZE);
|
key_buffer.truncate(TRUNCATE_SIZE);
|
||||||
let index = u16::try_from(i).unwrap();
|
let index = u16::try_from(i).unwrap();
|
||||||
key_buffer.extend_from_slice(&index.to_be_bytes());
|
key_buffer.extend_from_slice(&index.to_be_bytes());
|
||||||
|
|
||||||
match eob {
|
// We insert only the Add part of the Obkv to inform
|
||||||
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
// that we only want to remove all those vectors.
|
||||||
EitherOrBoth::Left(vector) => {
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
|
||||||
// When vector pipeline will be optimized, this should be removed.
|
let bytes = obkv.into_inner()?;
|
||||||
if !reindex_vectors {
|
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||||
// We insert only the Del part of the Obkv to inform
|
|
||||||
// that we only want to remove all those vectors.
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
|
||||||
obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
|
|
||||||
let bytes = obkv.into_inner()?;
|
|
||||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
EitherOrBoth::Right(vector) => {
|
|
||||||
// We insert only the Add part of the Obkv to inform
|
|
||||||
// that we only want to remove all those vectors.
|
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
|
||||||
obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
|
|
||||||
let bytes = obkv.into_inner()?;
|
|
||||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -4,8 +4,9 @@ use obkv::KvReader;
|
|||||||
use serde_json::{from_slice, Value};
|
use serde_json::{from_slice, Value};
|
||||||
|
|
||||||
use super::Embedding;
|
use super::Embedding;
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||||
use crate::{FieldId, InternalError, UserError};
|
use crate::{DocumentId, FieldId, InternalError, UserError};
|
||||||
|
|
||||||
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
||||||
|
|
||||||
@ -42,17 +43,19 @@ pub struct ExplicitVectors {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct ParsedVectorsDiff {
|
pub struct ParsedVectorsDiff {
|
||||||
pub old: Option<BTreeMap<String, Vectors>>,
|
pub old: BTreeMap<String, Option<Vectors>>,
|
||||||
pub new: Option<BTreeMap<String, Vectors>>,
|
pub new: Option<BTreeMap<String, Vectors>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ParsedVectorsDiff {
|
impl ParsedVectorsDiff {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
|
docid: DocumentId,
|
||||||
|
embedders_configs: &[IndexEmbeddingConfig],
|
||||||
documents_diff: KvReader<'_, FieldId>,
|
documents_diff: KvReader<'_, FieldId>,
|
||||||
old_vectors_fid: Option<FieldId>,
|
old_vectors_fid: Option<FieldId>,
|
||||||
new_vectors_fid: Option<FieldId>,
|
new_vectors_fid: Option<FieldId>,
|
||||||
) -> Result<Self, Error> {
|
) -> Result<Self, Error> {
|
||||||
let old = match old_vectors_fid
|
let mut old = match old_vectors_fid
|
||||||
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
||||||
.map(KvReaderDelAdd::new)
|
.map(KvReaderDelAdd::new)
|
||||||
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
|
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
|
||||||
@ -68,7 +71,13 @@ impl ParsedVectorsDiff {
|
|||||||
return Err(error);
|
return Err(error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.flatten();
|
.flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect());
|
||||||
|
for embedding_config in embedders_configs {
|
||||||
|
if embedding_config.user_defined.contains(docid) {
|
||||||
|
old.entry(embedding_config.name.to_string()).or_insert(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let new = new_vectors_fid
|
let new = new_vectors_fid
|
||||||
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
||||||
.map(KvReaderDelAdd::new)
|
.map(KvReaderDelAdd::new)
|
||||||
@ -78,8 +87,9 @@ impl ParsedVectorsDiff {
|
|||||||
Ok(Self { old, new })
|
Ok(Self { old, new })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) {
|
/// Return (Some(None), _) in case the vector is user defined and contained in the database.
|
||||||
let old = self.old.as_mut().and_then(|old| old.remove(embedder_name));
|
pub fn remove(&mut self, embedder_name: &str) -> (Option<Option<Vectors>>, Option<Vectors>) {
|
||||||
|
let old = self.old.remove(embedder_name);
|
||||||
let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
|
let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
|
||||||
(old, new)
|
(old, new)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user