Merge #5308
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m50s
Publish binaries to GitHub release / Check the version validity (push) Failing after 7s
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been skipped
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Test suite / Tests on macos-13 (push) Waiting to run
Test suite / Tests almost all features (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 14s
Test suite / Tests on windows-2022 (push) Failing after 25s
Test suite / Test disabled tokenization (push) Failing after 15s
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Test with Ollama (push) Failing after 1m0s
Test suite / Run Rustfmt (push) Successful in 2m20s
Test suite / Run Clippy (push) Successful in 6m43s
Look for flaky tests / flaky (push) Failing after 13s

5308: Ollama Integration Tests r=dureuill a=Kerollmops

This PR improves test coverage of #4757 by providing a new CI to test the Ollama setup with Ollama.

## To Do
- [x] Clean up the commits
- [x] Feature gate the Ollama tests and run them only in the CI

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-02-06 17:21:51 +00:00 committed by GitHub
commit f2185438ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 773 additions and 5 deletions

View File

@ -4,7 +4,7 @@ on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: '0 5 * * *'
- cron: "0 5 * * *"
pull_request:
push:
# trying and staging branches are for Bors config
@ -84,10 +84,42 @@ jobs:
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v1
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
- name: Start serving
run: |
# Run it in the background, there is no way to daemonise at the moment
ollama serve &
# A short pause is required before the HTTP port is opened
sleep 5
# This endpoint blocks until ready
time curl -i http://localhost:11434
- name: Pull nomic-embed-text & all-minilm
run: |
ollama pull nomic-embed-text
ollama pull all-minilm
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization

View File

@ -145,6 +145,7 @@ zip = { version = "2.2.2", optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
swagger = ["utoipa-scalar"]
test-ollama = []
mini-dashboard = [
"static-files",
"anyhow",

View File

@ -1,4 +1,6 @@
mod binary_quantized;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;
mod rest;
mod settings;

View File

@ -0,0 +1,733 @@
//! Tests ollama embedders with the server at the location described by `MEILI_TEST_OLLAMA_SERVER` environment variable.
use std::env::VarError;
use meili_snap::{json_string, snapshot};
use crate::common::{GetAllDocumentsOptions, Value};
use crate::json;
use crate::vector::get_server_vector;
pub enum Endpoint {
/// Deprecated, undocumented endpoint
Embeddings,
/// Current endpoint
Embed,
}
impl Endpoint {
fn suffix(&self) -> &'static str {
match self {
Endpoint::Embeddings => "/api/embeddings",
Endpoint::Embed => "/api/embed",
}
}
}
pub enum Model {
Nomic,
AllMinilm,
}
impl Model {
fn name(&self) -> &'static str {
match self {
Model::Nomic => "nomic-embed-text",
Model::AllMinilm => "all-minilm",
}
}
}
const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}
{%- else -%}
Un chien nommé {{doc.name}}, en {{doc.birthyear}}
{%- endif %}, de race {{doc.breed}}."#;
fn create_ollama_config_with_template(
document_template: &str,
model: Model,
endpoint: Endpoint,
) -> Option<Value> {
let ollama_base_url = match std::env::var("MEILI_TEST_OLLAMA_SERVER") {
Ok(ollama_base_url) => ollama_base_url,
Err(VarError::NotPresent) => return None,
Err(VarError::NotUnicode(s)) => panic!(
"`MEILI_TEST_OLLAMA_SERVER` was not properly utf-8, `{:?}`",
s.as_encoded_bytes()
),
};
Some(json!({
"source": "ollama",
"url": format!("{ollama_base_url}{}", endpoint.suffix()),
"documentTemplate": document_template,
"documentTemplateMaxBytes": 8000000,
"model": model.name()
}))
}
#[actix_rt::test]
async fn test_both_apis() {
let Some(embed_settings) =
create_ollama_config_with_template(DOGGO_TEMPLATE, Model::AllMinilm, Endpoint::Embed)
else {
panic!("Missing `MEILI_TEST_OLLAMA_SERVER` environment variable, skipping `test_both_apis` test.");
};
let Some(embeddings_settings) =
create_ollama_config_with_template(DOGGO_TEMPLATE, Model::AllMinilm, Endpoint::Embeddings)
else {
return;
};
let Some(nomic_embed_settings) =
create_ollama_config_with_template(DOGGO_TEMPLATE, Model::Nomic, Endpoint::Embed)
else {
return;
};
let Some(nomic_embeddings_settings) =
create_ollama_config_with_template(DOGGO_TEMPLATE, Model::Nomic, Endpoint::Embeddings)
else {
return;
};
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"embed": embed_settings,
"embeddings": embeddings_settings,
"nomic_embed": nomic_embed_settings,
"nomic_embeddings": nomic_embeddings_settings,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
{"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
{"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
{"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents, {".results.*._vectors.*.embeddings" => "[vector]"}), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou",
"_vectors": {
"embed": {
"embeddings": "[vector]",
"regenerate": true
},
"embeddings": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embed": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embeddings": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle",
"_vectors": {
"embed": {
"embeddings": "[vector]",
"regenerate": true
},
"embeddings": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embed": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embeddings": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier",
"_vectors": {
"embed": {
"embeddings": "[vector]",
"regenerate": true
},
"embeddings": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embed": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embeddings": {
"embeddings": "[vector]",
"regenerate": true
}
}
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever",
"_vectors": {
"embed": {
"embeddings": "[vector]",
"regenerate": true
},
"embeddings": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embed": {
"embeddings": "[vector]",
"regenerate": true
},
"nomic_embeddings": {
"embeddings": "[vector]",
"regenerate": true
}
}
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0, "embedder": "embed"},
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"},
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0, "embedder": "embed"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0, "embedder": "embed"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0, "embedder": "embeddings"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"},
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"},
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "petit chien",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embed"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0, "embedder": "nomic_embeddings"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
},
{
"id": 3,
"name": "Max",
"gender": "M",
"birthyear": 1995,
"breed": "Labrador Retriever"
},
{
"id": 2,
"name": "Vénus",
"gender": "F",
"birthyear": 2003,
"breed": "Jack Russel Terrier"
},
{
"id": 1,
"name": "Intel",
"gender": "M",
"birthyear": 2011,
"breed": "Beagle"
}
]
"###);
}

View File

@ -6,8 +6,8 @@ use xtask::bench::BenchDeriveArgs;
/// List features available in the workspace
#[derive(Parser, Debug)]
struct ListFeaturesDeriveArgs {
/// Feature to exclude from the list. Repeat the argument to exclude multiple features
#[arg(short, long)]
/// Feature to exclude from the list. Use a comma to separate multiple features.
#[arg(short, long, value_delimiter = ',')]
exclude_feature: Vec<String>,
}