diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index fae93bd66..59436e0bc 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: schedule: # Everyday at 5:00am - - cron: '0 5 * * *' + - cron: "0 5 * * *" pull_request: push: # trying and staging branches are for Bors config @@ -84,10 +84,42 @@ jobs: - uses: dtolnay/rust-toolchain@1.81 - name: Run cargo build with almost all features run: | - cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" + cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)" - name: Run cargo test with almost all features run: | - cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" + cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)" + + ollama-ubuntu: + name: Test with Ollama + runs-on: ubuntu-latest + env: + MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434" + steps: + - uses: actions/checkout@v1 + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sudo -E sh + - name: Start serving + run: | + # Run it in the background, there is no way to daemonise at the moment + ollama serve & + + # A short pause is required before the HTTP port is opened + sleep 5 + + # This endpoint blocks until ready + time curl -i http://localhost:11434 + + - name: Pull nomic-embed-text & all-minilm + run: | + ollama pull nomic-embed-text + ollama pull all-minilm + + - name: Run cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --locked --release --all --features test-ollama ollama test-disabled-tokenization: name: Test disabled tokenization diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index 1baff114f..60af4dcba 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -145,6 +145,7 @@ zip = { version = "2.2.2", optional = true } [features] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] swagger = ["utoipa-scalar"] +test-ollama = [] mini-dashboard = [ "static-files", "anyhow", diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index 7dc865e0e..67da51702 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -1,4 +1,6 @@ mod binary_quantized; +#[cfg(feature = "test-ollama")] +mod ollama; mod openai; mod rest; mod settings; diff --git a/crates/meilisearch/tests/vector/ollama.rs b/crates/meilisearch/tests/vector/ollama.rs new file mode 100644 index 000000000..eb80758df --- /dev/null +++ b/crates/meilisearch/tests/vector/ollama.rs @@ -0,0 +1,733 @@ +//! Tests ollama embedders with the server at the location described by `MEILI_TEST_OLLAMA_SERVER` environment variable. + +use std::env::VarError; + +use meili_snap::{json_string, snapshot}; + +use crate::common::{GetAllDocumentsOptions, Value}; +use crate::json; +use crate::vector::get_server_vector; + +pub enum Endpoint { + /// Deprecated, undocumented endpoint + Embeddings, + /// Current endpoint + Embed, +} + +impl Endpoint { + fn suffix(&self) -> &'static str { + match self { + Endpoint::Embeddings => "/api/embeddings", + Endpoint::Embed => "/api/embed", + } + } +} + +pub enum Model { + Nomic, + AllMinilm, +} + +impl Model { + fn name(&self) -> &'static str { + match self { + Model::Nomic => "nomic-embed-text", + Model::AllMinilm => "all-minilm", + } + } +} + +const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}} + {%- else -%} + Un chien nommé {{doc.name}}, né en {{doc.birthyear}} + {%- endif %}, de race {{doc.breed}}."#; + +fn create_ollama_config_with_template( + document_template: &str, + model: Model, + endpoint: Endpoint, +) -> Option { + let ollama_base_url = match std::env::var("MEILI_TEST_OLLAMA_SERVER") { + Ok(ollama_base_url) => ollama_base_url, + Err(VarError::NotPresent) => return None, + Err(VarError::NotUnicode(s)) => panic!( + "`MEILI_TEST_OLLAMA_SERVER` was not properly utf-8, `{:?}`", + s.as_encoded_bytes() + ), + }; + + Some(json!({ + "source": "ollama", + "url": format!("{ollama_base_url}{}", endpoint.suffix()), + "documentTemplate": document_template, + "documentTemplateMaxBytes": 8000000, + "model": model.name() + })) +} + +#[actix_rt::test] +async fn test_both_apis() { + let Some(embed_settings) = + create_ollama_config_with_template(DOGGO_TEMPLATE, Model::AllMinilm, Endpoint::Embed) + else { + panic!("Missing `MEILI_TEST_OLLAMA_SERVER` environment variable, skipping `test_both_apis` test."); + }; + + let Some(embeddings_settings) = + create_ollama_config_with_template(DOGGO_TEMPLATE, Model::AllMinilm, Endpoint::Embeddings) + else { + return; + }; + + let Some(nomic_embed_settings) = + create_ollama_config_with_template(DOGGO_TEMPLATE, Model::Nomic, Endpoint::Embed) + else { + return; + }; + + let Some(nomic_embeddings_settings) = + create_ollama_config_with_template(DOGGO_TEMPLATE, Model::Nomic, Endpoint::Embeddings) + else { + return; + }; + + let server = get_server_vector().await; + + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "embed": embed_settings, + "embeddings": embeddings_settings, + "nomic_embed": nomic_embed_settings, + "nomic_embeddings": nomic_embeddings_settings, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.*.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "embeddings": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embeddings": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "embeddings": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embeddings": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "embeddings": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embeddings": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "embeddings": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embed": { + "embeddings": "[vector]", + "regenerate": true + }, + "nomic_embeddings": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0, "embedder": "embed"}, + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"}, + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0, "embedder": "embed"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0, "embedder": "embed"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"}, + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"}, + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + } + ] + "###); +} diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs index 942362f4f..f260bd404 100644 --- a/crates/xtask/src/main.rs +++ b/crates/xtask/src/main.rs @@ -6,8 +6,8 @@ use xtask::bench::BenchDeriveArgs; /// List features available in the workspace #[derive(Parser, Debug)] struct ListFeaturesDeriveArgs { - /// Feature to exclude from the list. Repeat the argument to exclude multiple features - #[arg(short, long)] + /// Feature to exclude from the list. Use a comma to separate multiple features. + #[arg(short, long, value_delimiter = ',')] exclude_feature: Vec, }