mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
always push the user defined vectors in arroy
This commit is contained in:
parent
a73ccc78a6
commit
5d50850e12
@ -5173,8 +5173,8 @@ mod tests {
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
|
||||
|
||||
println!("HEEEEERE");
|
||||
// handle.advance_one_successful_batch();
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
// handle.advance_one_failed_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
|
||||
|
||||
{
|
||||
@ -5351,9 +5351,9 @@ mod tests {
|
||||
// as user provided since we explicitely marked it as NOT user provided.
|
||||
snapshot!(format!("{conf:#?}"), @r###"
|
||||
[
|
||||
(
|
||||
"my_doggo_embedder",
|
||||
EmbeddingConfig {
|
||||
IndexEmbeddingConfig {
|
||||
name: "my_doggo_embedder",
|
||||
config: EmbeddingConfig {
|
||||
embedder_options: HuggingFace(
|
||||
EmbedderOptions {
|
||||
model: "sentence-transformers/all-MiniLM-L6-v2",
|
||||
@ -5367,8 +5367,8 @@ mod tests {
|
||||
template: "{{doc.doggo}}",
|
||||
},
|
||||
},
|
||||
RoaringBitmap<[1, 2]>,
|
||||
),
|
||||
user_defined: RoaringBitmap<[1, 2]>,
|
||||
},
|
||||
]
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
|
@ -6,10 +6,6 @@ expression: doc
|
||||
"doggo": "Intel",
|
||||
"breed": "beagle",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
@ -6,10 +6,6 @@ expression: doc
|
||||
"doggo": "kefir",
|
||||
"breed": "patou",
|
||||
"_vectors": {
|
||||
"A_fakerest": {
|
||||
"embeddings": "[vector]",
|
||||
"userProvided": true
|
||||
},
|
||||
"noise": [
|
||||
0.1,
|
||||
0.2,
|
@ -1,4 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]
|
@ -204,7 +204,7 @@ async fn distribution_shift() {
|
||||
let server = Server::new().await;
|
||||
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
|
||||
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
|
||||
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
|
||||
let (response, code) = index.search_post(search.clone()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
|
||||
@ -239,20 +239,23 @@ async fn highlighter() {
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.2},
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
"retrieveVectors": true,
|
||||
"attributesToHighlight": [
|
||||
"desc",
|
||||
"_vectors",
|
||||
],
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**"
|
||||
"highlightPreTag": "**BEGIN**",
|
||||
"highlightPostTag": "**END**",
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"0");
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 0.8},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
@ -262,13 +265,14 @@ async fn highlighter() {
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
|
||||
// no highlighting on full semantic
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
|
||||
"hybrid": {"semanticRatio": 1.0},
|
||||
"retrieveVectors": true,
|
||||
"showRankingScore": true,
|
||||
"attributesToHighlight": [
|
||||
"desc"
|
||||
@ -278,7 +282,7 @@ async fn highlighter() {
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
|
||||
snapshot!(response["semanticHitCount"], @"3");
|
||||
}
|
||||
|
||||
@ -361,7 +365,7 @@ async fn single_document() {
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@ -377,7 +381,7 @@ async fn query_combination() {
|
||||
|
||||
// search without query and vector, but with hybrid => still placeholder
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -386,7 +390,7 @@ async fn query_combination() {
|
||||
|
||||
// same with a different semantic ratio
|
||||
let (response, code) = index
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
|
||||
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -395,7 +399,7 @@ async fn query_combination() {
|
||||
|
||||
// wrong vector dimensions
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@ -410,7 +414,7 @@ async fn query_combination() {
|
||||
|
||||
// full vector
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -419,7 +423,7 @@ async fn query_combination() {
|
||||
|
||||
// full keyword, without a query
|
||||
let (response, code) = index
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -428,7 +432,7 @@ async fn query_combination() {
|
||||
|
||||
// query + vector, full keyword => keyword
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"200 OK");
|
||||
@ -437,7 +441,7 @@ async fn query_combination() {
|
||||
|
||||
// query + vector, no hybrid keyword =>
|
||||
let (response, code) = index
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
|
||||
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
|
||||
.await;
|
||||
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
@ -453,7 +457,7 @@ async fn query_combination() {
|
||||
// full vector, without a vector => error
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
|
||||
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
@ -470,7 +474,7 @@ async fn query_combination() {
|
||||
// hybrid without a vector => full keyword
|
||||
let (response, code) = index
|
||||
.search_post(
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
|
||||
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
|
||||
)
|
||||
.await;
|
||||
|
||||
|
@ -1337,6 +1337,7 @@ async fn experimental_feature_vector_store() {
|
||||
.search_post(json!({
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"showRankingScore": true,
|
||||
"retrieveVectors": true,
|
||||
}))
|
||||
.await;
|
||||
|
||||
|
@ -78,7 +78,7 @@ async fn basic() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143}), |response, code| {
|
||||
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
@ -88,9 +88,9 @@ async fn basic() {
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -100,9 +100,9 @@ async fn basic() {
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -112,9 +112,9 @@ async fn basic() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -124,8 +124,8 @@ async fn basic() {
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
@ -136,7 +136,7 @@ async fn basic() {
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": "299537"}), |response, code| {
|
||||
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
@ -146,9 +146,9 @@ async fn basic() {
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -158,8 +158,8 @@ async fn basic() {
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
@ -170,9 +170,9 @@ async fn basic() {
|
||||
"id": "522681",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.1,
|
||||
0.6,
|
||||
0.8
|
||||
0.10000000149011612,
|
||||
0.6000000238418579,
|
||||
0.800000011920929
|
||||
]
|
||||
}
|
||||
},
|
||||
@ -183,8 +183,8 @@ async fn basic() {
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
0.30000001192092896,
|
||||
0.8500000238418579
|
||||
]
|
||||
}
|
||||
}
|
||||
@ -456,71 +456,77 @@ async fn filter() {
|
||||
index.wait_task(value.uid()).await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.7,
|
||||
0.7,
|
||||
-0.4
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.8,
|
||||
0.4,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "How to Train Your Dragon: The Hidden World",
|
||||
"release_year": 2019,
|
||||
"id": "166428",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.699999988079071,
|
||||
0.699999988079071,
|
||||
-0.4000000059604645
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Shazam!",
|
||||
"release_year": 2019,
|
||||
"id": "287947",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.800000011920929,
|
||||
0.4000000059604645,
|
||||
-0.5
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.3,
|
||||
0.85
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "All Quiet on the Western Front",
|
||||
"release_year": 1930,
|
||||
"id": "143",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
-0.5,
|
||||
0.30000001192092896,
|
||||
0.8500000238418579
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@ -579,24 +585,27 @@ async fn limit_and_offset() {
|
||||
.await;
|
||||
|
||||
index
|
||||
.similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6,
|
||||
0.8,
|
||||
-0.2
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.similar(
|
||||
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"title": "Captain Marvel",
|
||||
"release_year": 2019,
|
||||
"id": "299537",
|
||||
"_vectors": {
|
||||
"manual": [
|
||||
0.6000000238418579,
|
||||
0.800000011920929,
|
||||
-0.20000000298023224
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -1,244 +0,0 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_fid.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
(
|
||||
2,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 19,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
6,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 15,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
5,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 14,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
4,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 13,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
3,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 12,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 83,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
9,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 11,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
8,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 79,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
7,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 10,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 73,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
11,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 7,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 77,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
10,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
13,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 81,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
12,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 6,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 78,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
14,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 5,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 75,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
0,
|
||||
[
|
||||
Fid(
|
||||
Rank {
|
||||
rank: 1,
|
||||
max_rank: 19,
|
||||
},
|
||||
),
|
||||
Position(
|
||||
Rank {
|
||||
rank: 91,
|
||||
max_rank: 91,
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
]
|
@ -1,7 +0,0 @@
|
||||
---
|
||||
source: milli/src/index.rs
|
||||
---
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
|
@ -1,7 +0,0 @@
|
||||
---
|
||||
source: milli/src/index.rs
|
||||
---
|
||||
age 1 |
|
||||
id 2 |
|
||||
name 2 |
|
||||
|
@ -8,7 +8,6 @@ use std::sync::Arc;
|
||||
|
||||
use bytemuck::cast_slice;
|
||||
use grenad::Writer;
|
||||
use itertools::EitherOrBoth;
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
@ -50,7 +49,7 @@ enum VectorStateDelta {
|
||||
// Note: changing the value of the manually specified vector **should not record** this delta
|
||||
WasGeneratedNowManual(Vec<Vec<f32>>),
|
||||
|
||||
ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>),
|
||||
ManualDelta(Vec<Vec<f32>>),
|
||||
|
||||
// Add the vector computed from the specified prompt
|
||||
// Remove any previous vector
|
||||
@ -59,14 +58,12 @@ enum VectorStateDelta {
|
||||
}
|
||||
|
||||
impl VectorStateDelta {
|
||||
fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) {
|
||||
fn into_values(self) -> (bool, String, Vec<Vec<f32>>) {
|
||||
match self {
|
||||
VectorStateDelta::NoChange => Default::default(),
|
||||
VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()),
|
||||
VectorStateDelta::WasGeneratedNowManual(add) => {
|
||||
(true, Default::default(), (Default::default(), add))
|
||||
}
|
||||
VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)),
|
||||
VectorStateDelta::WasGeneratedNowManual(add) => (true, Default::default(), add),
|
||||
VectorStateDelta::ManualDelta(add) => (false, Default::default(), add),
|
||||
VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()),
|
||||
}
|
||||
}
|
||||
@ -166,8 +163,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
// lazily get it when needed
|
||||
let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
|
||||
|
||||
let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
|
||||
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
|
||||
let mut parsed_vectors = ParsedVectorsDiff::new(
|
||||
docid,
|
||||
embedders_configs,
|
||||
obkv,
|
||||
old_vectors_fid,
|
||||
new_vectors_fid,
|
||||
)
|
||||
.map_err(|error| error.to_crate_error(document_id().to_string()))?;
|
||||
|
||||
for EmbedderVectorExtractor {
|
||||
embedder_name,
|
||||
@ -182,7 +185,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
{
|
||||
let delta = match parsed_vectors.remove(embedder_name) {
|
||||
(Some(old), Some(new)) => {
|
||||
match (old.is_user_provided(), new.is_user_provided()) {
|
||||
match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
|
||||
(true, true) | (false, false) => (),
|
||||
(true, false) => {
|
||||
remove_from_user_defined.insert(docid);
|
||||
@ -193,7 +196,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
// no autogeneration
|
||||
let del_vectors = old.into_array_of_vectors();
|
||||
let add_vectors = new.into_array_of_vectors();
|
||||
|
||||
if add_vectors.len() > usize::from(u8::MAX) {
|
||||
@ -203,15 +205,15 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
)));
|
||||
}
|
||||
|
||||
VectorStateDelta::ManualDelta(del_vectors, add_vectors)
|
||||
VectorStateDelta::ManualDelta(add_vectors)
|
||||
}
|
||||
(Some(_old), None) => {
|
||||
(Some(old), None) => {
|
||||
// Do we keep this document?
|
||||
let document_is_kept = obkv
|
||||
.iter()
|
||||
.map(|(_, deladd)| KvReaderDelAdd::new(deladd))
|
||||
.any(|deladd| deladd.get(DelAdd::Addition).is_some());
|
||||
if document_is_kept {
|
||||
if document_is_kept && old.is_some() {
|
||||
remove_from_user_defined.insert(docid);
|
||||
// becomes autogenerated
|
||||
VectorStateDelta::NowGenerated(prompt.render(
|
||||
@ -219,6 +221,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
DelAdd::Addition,
|
||||
new_fields_ids_map,
|
||||
)?)
|
||||
} else if document_is_kept && old.is_none() {
|
||||
VectorStateDelta::NoChange
|
||||
} else {
|
||||
VectorStateDelta::NowRemoved
|
||||
}
|
||||
@ -315,8 +319,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Computes the diff between both Del and Add numbers and
|
||||
/// only inserts the parts that differ in the sorter.
|
||||
/// We cannot compute the diff between both Del and Add vectors.
|
||||
/// We'll push every vector and compute the difference later in TypedChunk.
|
||||
fn push_vectors_diff(
|
||||
remove_vectors_writer: &mut Writer<BufWriter<File>>,
|
||||
prompts_writer: &mut Writer<BufWriter<File>>,
|
||||
@ -325,7 +329,7 @@ fn push_vectors_diff(
|
||||
delta: VectorStateDelta,
|
||||
reindex_vectors: bool,
|
||||
) -> Result<()> {
|
||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
||||
let (must_remove, prompt, mut add_vectors) = delta.into_values();
|
||||
if must_remove
|
||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
@ -340,44 +344,25 @@ fn push_vectors_diff(
|
||||
}
|
||||
|
||||
// We sort and dedup the vectors
|
||||
del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
||||
add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
|
||||
del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
||||
add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
|
||||
|
||||
let merged_vectors_iter =
|
||||
itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
||||
// let merged_vectors_iter =
|
||||
// itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
|
||||
|
||||
// insert vectors into the writer
|
||||
for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
// Generate the key by extending the unique index to it.
|
||||
key_buffer.truncate(TRUNCATE_SIZE);
|
||||
let index = u16::try_from(i).unwrap();
|
||||
key_buffer.extend_from_slice(&index.to_be_bytes());
|
||||
|
||||
match eob {
|
||||
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
||||
EitherOrBoth::Left(vector) => {
|
||||
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||
// When vector pipeline will be optimized, this should be removed.
|
||||
if !reindex_vectors {
|
||||
// We insert only the Del part of the Obkv to inform
|
||||
// that we only want to remove all those vectors.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
}
|
||||
EitherOrBoth::Right(vector) => {
|
||||
// We insert only the Add part of the Obkv to inform
|
||||
// that we only want to remove all those vectors.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
}
|
||||
// We insert only the Add part of the Obkv to inform
|
||||
// that we only want to remove all those vectors.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -4,8 +4,9 @@ use obkv::KvReader;
|
||||
use serde_json::{from_slice, Value};
|
||||
|
||||
use super::Embedding;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
use crate::{FieldId, InternalError, UserError};
|
||||
use crate::{DocumentId, FieldId, InternalError, UserError};
|
||||
|
||||
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
||||
|
||||
@ -42,17 +43,19 @@ pub struct ExplicitVectors {
|
||||
}
|
||||
|
||||
pub struct ParsedVectorsDiff {
|
||||
pub old: Option<BTreeMap<String, Vectors>>,
|
||||
pub old: BTreeMap<String, Option<Vectors>>,
|
||||
pub new: Option<BTreeMap<String, Vectors>>,
|
||||
}
|
||||
|
||||
impl ParsedVectorsDiff {
|
||||
pub fn new(
|
||||
docid: DocumentId,
|
||||
embedders_configs: &[IndexEmbeddingConfig],
|
||||
documents_diff: KvReader<'_, FieldId>,
|
||||
old_vectors_fid: Option<FieldId>,
|
||||
new_vectors_fid: Option<FieldId>,
|
||||
) -> Result<Self, Error> {
|
||||
let old = match old_vectors_fid
|
||||
let mut old = match old_vectors_fid
|
||||
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
||||
.map(KvReaderDelAdd::new)
|
||||
.map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
|
||||
@ -68,7 +71,13 @@ impl ParsedVectorsDiff {
|
||||
return Err(error);
|
||||
}
|
||||
}
|
||||
.flatten();
|
||||
.flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect());
|
||||
for embedding_config in embedders_configs {
|
||||
if embedding_config.user_defined.contains(docid) {
|
||||
old.entry(embedding_config.name.to_string()).or_insert(None);
|
||||
}
|
||||
}
|
||||
|
||||
let new = new_vectors_fid
|
||||
.and_then(|vectors_fid| documents_diff.get(vectors_fid))
|
||||
.map(KvReaderDelAdd::new)
|
||||
@ -78,8 +87,9 @@ impl ParsedVectorsDiff {
|
||||
Ok(Self { old, new })
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) {
|
||||
let old = self.old.as_mut().and_then(|old| old.remove(embedder_name));
|
||||
/// Return (Some(None), _) in case the vector is user defined and contained in the database.
|
||||
pub fn remove(&mut self, embedder_name: &str) -> (Option<Option<Vectors>>, Option<Vectors>) {
|
||||
let old = self.old.remove(embedder_name);
|
||||
let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
|
||||
(old, new)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user