From 806e5b68997d2c0081e54be2f4f9367aefb0e006 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 13 Dec 2023 21:49:13 +0100 Subject: [PATCH] Tests pass --- meilisearch/tests/search/mod.rs | 122 +++++++++++++++++++++--- meilisearch/tests/search/multi.rs | 45 ++++++++- milli/src/search/new/vector_sort.rs | 7 +- milli/src/update/index_documents/mod.rs | 30 +++++- 4 files changed, 179 insertions(+), 25 deletions(-) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index fa97beaaf..ad9c2aaa2 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -20,22 +20,27 @@ static DOCUMENTS: Lazy = Lazy::new(|| { { "title": "Shazam!", "id": "287947", + "_vectors": { "manual": [1, 2, 3]}, }, { "title": "Captain Marvel", "id": "299537", + "_vectors": { "manual": [1, 2, 54] }, }, { "title": "Escape Room", "id": "522681", + "_vectors": { "manual": [10, -23, 32] }, }, { "title": "How to Train Your Dragon: The Hidden World", "id": "166428", + "_vectors": { "manual": [-100, 231, 32] }, }, { "title": "Gläss", "id": "450465", + "_vectors": { "manual": [-100, 340, 90] }, } ]) }); @@ -57,6 +62,7 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { }, ], "cattos": "pésti", + "_vectors": { "manual": [1, 2, 3]}, }, { "id": 654, @@ -69,12 +75,14 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { }, ], "cattos": ["simba", "pestiféré"], + "_vectors": { "manual": [1, 2, 54] }, }, { "id": 750, "father": "romain", "mother": "michelle", "cattos": ["enigma"], + "_vectors": { "manual": [10, 23, 32] }, }, { "id": 951, @@ -91,6 +99,7 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { }, ], "cattos": ["moumoute", "gomez"], + "_vectors": { "manual": [10, 23, 32] }, }, ]) }); @@ -802,6 +811,13 @@ async fn experimental_feature_score_details() { { "title": "How to Train Your Dragon: The Hidden World", "id": "166428", + "_vectors": { + "manual": [ + -100, + 231, + 32 + ] + }, "_rankingScoreDetails": { "words": { "order": 0, @@ -823,7 +839,7 @@ async fn experimental_feature_score_details() { "order": 3, "attributeRankingOrderScore": 1.0, "queryWordDistanceScore": 0.8095238095238095, - "score": 0.9365079365079364 + "score": 0.9727891156462584 }, "exactness": { "order": 4, @@ -870,34 +886,89 @@ async fn experimental_feature_vector_store() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(response["vectorStore"], @"true"); + let (response, code) = index + .update_settings(json!({"embedders": { + "manual": { + "source": { + "userProvided": {"dimensions": 3} + } + } + }})) + .await; + + meili_snap::snapshot!(code, @"202 Accepted"); + let response = index.wait_task(response.uid()).await; + + meili_snap::snapshot!(meili_snap::json_string!(response["status"]), @"\"succeeded\""); + let (response, code) = index .search_post(json!({ "vector": [1.0, 2.0, 3.0], })) .await; + meili_snap::snapshot!(code, @"200 OK"); // vector search returns all documents that don't have vectors in the last bucket, like all sorts meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" [ { "title": "Shazam!", - "id": "287947" + "id": "287947", + "_vectors": { + "manual": [ + 1, + 2, + 3 + ] + }, + "_semanticScore": 1.0 }, { "title": "Captain Marvel", - "id": "299537" - }, - { - "title": "Escape Room", - "id": "522681" - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "id": "166428" + "id": "299537", + "_vectors": { + "manual": [ + 1, + 2, + 54 + ] + }, + "_semanticScore": 0.9129112 }, { "title": "Gläss", - "id": "450465" + "id": "450465", + "_vectors": { + "manual": [ + -100, + 340, + 90 + ] + }, + "_semanticScore": 0.8106413 + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428", + "_vectors": { + "manual": [ + -100, + 231, + 32 + ] + }, + "_semanticScore": 0.74120104 + }, + { + "title": "Escape Room", + "id": "522681", + "_vectors": { + "manual": [ + 10, + -23, + 32 + ] + } } ] "###); @@ -1150,7 +1221,14 @@ async fn simple_search_with_strange_synonyms() { [ { "title": "How to Train Your Dragon: The Hidden World", - "id": "166428" + "id": "166428", + "_vectors": { + "manual": [ + -100, + 231, + 32 + ] + } } ] "###); @@ -1164,7 +1242,14 @@ async fn simple_search_with_strange_synonyms() { [ { "title": "How to Train Your Dragon: The Hidden World", - "id": "166428" + "id": "166428", + "_vectors": { + "manual": [ + -100, + 231, + 32 + ] + } } ] "###); @@ -1178,7 +1263,14 @@ async fn simple_search_with_strange_synonyms() { [ { "title": "How to Train Your Dragon: The Hidden World", - "id": "166428" + "id": "166428", + "_vectors": { + "manual": [ + -100, + 231, + 32 + ] + } } ] "###); diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 0e2e5158d..aeec1bad4 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -72,7 +72,14 @@ async fn simple_search_single_index() { "hits": [ { "title": "Gläss", - "id": "450465" + "id": "450465", + "_vectors": { + "manual": [ + -100, + 340, + 90 + ] + } } ], "query": "glass", @@ -86,7 +93,14 @@ async fn simple_search_single_index() { "hits": [ { "title": "Captain Marvel", - "id": "299537" + "id": "299537", + "_vectors": { + "manual": [ + 1, + 2, + 54 + ] + } } ], "query": "captain", @@ -177,7 +191,14 @@ async fn simple_search_two_indexes() { "hits": [ { "title": "Gläss", - "id": "450465" + "id": "450465", + "_vectors": { + "manual": [ + -100, + 340, + 90 + ] + } } ], "query": "glass", @@ -203,7 +224,14 @@ async fn simple_search_two_indexes() { "age": 4 } ], - "cattos": "pésti" + "cattos": "pésti", + "_vectors": { + "manual": [ + 1, + 2, + 3 + ] + } }, { "id": 654, @@ -218,7 +246,14 @@ async fn simple_search_two_indexes() { "cattos": [ "simba", "pestiféré" - ] + ], + "_vectors": { + "manual": [ + 1, + 2, + 54 + ] + } } ], "query": "pésti", diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 6a37ceb7d..b29a72827 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -27,8 +27,11 @@ impl VectorSort { distribution_shift: Option, embedder_name: &str, ) -> Result { - /// FIXME: unwrap - let embedder_index = ctx.index.embedder_category_id.get(ctx.txn, embedder_name)?.unwrap(); + let embedder_index = ctx + .index + .embedder_category_id + .get(ctx.txn, embedder_name)? + .ok_or_else(|| crate::UserError::InvalidEmbedder(embedder_name.to_owned()))?; Ok(Self { query: None, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index efc6b22ff..6906bbcd3 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -750,6 +750,8 @@ fn execute_word_prefix_docids( #[cfg(test)] mod tests { + use std::collections::BTreeMap; + use big_s::S; use fst::IntoStreamer; use heed::RwTxn; @@ -759,6 +761,7 @@ mod tests { use crate::documents::documents_batch_reader_from_objects; use crate::index::tests::TempIndex; use crate::search::TermsMatchingStrategy; + use crate::update::Setting; use crate::{db_snap, Filter, Search}; #[test] @@ -2550,13 +2553,34 @@ mod tests { /// Vectors must be of the same length. #[test] fn test_multiple_vectors() { + use crate::vector::settings::{EmbedderSettings, EmbeddingSettings}; let index = TempIndex::new(); - index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap(); - index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap(); + index + .update_settings(|settings| { + let mut embedders = BTreeMap::default(); + embedders.insert( + "manual".to_string(), + Setting::Set(EmbeddingSettings { + embedder_options: Setting::Set(EmbedderSettings::UserProvided( + crate::vector::settings::UserProvidedSettings { dimensions: 3 }, + )), + document_template: Setting::NotSet, + }), + ); + settings.set_embedder_settings(embedders); + }) + .unwrap(); + index .add_documents( - documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]), + documents!([{"id": 0, "_vectors": { "manual": [[0, 1, 2], [3, 4, 5]] } }]), + ) + .unwrap(); + index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap(); + index + .add_documents( + documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]), ) .unwrap();