meilisearch/crates/milli/src/vector/ollama.rs

use std::time::Instant;

use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
use rayon::slice::ParallelSlice as _;

use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
use super::{DistributionShift, REQUEST_PARALLELISM};
use crate::error::FaultSource;
use crate::vector::Embedding;
use crate::ThreadPoolNoAbort;

#[derive(Debug)]
pub struct Embedder {
    rest_embedder: RestEmbedder,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
    pub embedding_model: String,
    pub url: Option<String>,
    pub api_key: Option<String>,
    pub distribution: Option<DistributionShift>,
    pub dimensions: Option<usize>,
}

impl EmbedderOptions {
    pub fn with_default_model(
        api_key: Option<String>,
        url: Option<String>,
        dimensions: Option<usize>,
    ) -> Self {
        Self {
            embedding_model: "nomic-embed-text".into(),
            api_key,
            url,
            distribution: None,
            dimensions,
        }
    }

    fn into_rest_embedder_config(self) -> Result<RestEmbedderOptions, NewEmbedderError> {
        let url = self.url.unwrap_or_else(get_ollama_path);
        let model = self.embedding_model.as_str();

        // **warning**: do not swap these two `if`s, as the second one is always true when the first one is.
        let (request, response) = if url.ends_with("/api/embeddings") {
            (
                serde_json::json!({
                    "model": model,
                    "prompt": super::rest::REQUEST_PLACEHOLDER,
                }),
                serde_json::json!({
                    "embedding": super::rest::RESPONSE_PLACEHOLDER,
                }),
            )
        } else if url.ends_with("/api/embed") {
            (
                serde_json::json!({"model": model, "input": [super::rest::REQUEST_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]}),
                serde_json::json!({"embeddings": [super::rest::RESPONSE_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]}),
            )
        } else {
            return Err(NewEmbedderError::ollama_unsupported_url(url));
        };
        Ok(RestEmbedderOptions {
            api_key: self.api_key,
            dimensions: self.dimensions,
            distribution: self.distribution,
            url,
            request,
            response,
            headers: Default::default(),
        })
    }
}

impl Embedder {
    pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
        let rest_embedder = match RestEmbedder::new(
            options.into_rest_embedder_config()?,
            super::rest::ConfigurationSource::Ollama,
        ) {
            Ok(embedder) => embedder,
            Err(NewEmbedderError {
                kind:
                    NewEmbedderErrorKind::CouldNotDetermineDimension(EmbedError {
                        kind: super::error::EmbedErrorKind::RestOtherStatusCode(404, error),
                        fault: _,
                    }),
                fault: _,
            }) => {
                return Err(NewEmbedderError::could_not_determine_dimension(
                    EmbedError::ollama_model_not_found(error),
                ))
            }
            Err(error) => return Err(error),
        };

        Ok(Self { rest_embedder })
    }

    pub fn embed<S: AsRef<str> + serde::Serialize>(
        &self,
        texts: &[S],
        deadline: Option<Instant>,
    ) -> Result<Vec<Embedding>, EmbedError> {
        match self.rest_embedder.embed_ref(texts, deadline) {
            Ok(embeddings) => Ok(embeddings),
            Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => {
                Err(EmbedError::ollama_model_not_found(error))
            }
            Err(error) => Err(error),
        }
    }

    pub fn embed_chunks(
        &self,
        text_chunks: Vec<Vec<String>>,
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
        threads
            .install(move || {
                text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
            })
            .map_err(|error| EmbedError {
                kind: EmbedErrorKind::PanicInThreadPool(error),
                fault: FaultSource::Bug,
            })?
    }

    pub(crate) fn embed_chunks_ref(
        &self,
        texts: &[&str],
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<f32>>, EmbedError> {
        if threads.active_operations() >= REQUEST_PARALLELISM {
            let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
                .chunks(self.prompt_count_in_chunk_hint())
                .map(move |chunk| self.embed(chunk, None))
                .collect();

            let embeddings = embeddings?;
            Ok(embeddings.into_iter().flatten().collect())
        } else {
            threads
                .install(move || {
                    let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
                        .par_chunks(self.prompt_count_in_chunk_hint())
                        .map(move |chunk| self.embed(chunk, None))
                        .collect();

                    let embeddings = embeddings?;
                    Ok(embeddings.into_iter().flatten().collect())
                })
                .map_err(|error| EmbedError {
                    kind: EmbedErrorKind::PanicInThreadPool(error),
                    fault: FaultSource::Bug,
                })?
        }
    }

    pub fn chunk_count_hint(&self) -> usize {
        self.rest_embedder.chunk_count_hint()
    }

    pub fn prompt_count_in_chunk_hint(&self) -> usize {
        self.rest_embedder.prompt_count_in_chunk_hint()
    }

    pub fn dimensions(&self) -> usize {
        self.rest_embedder.dimensions()
    }

    pub fn distribution(&self) -> Option<DistributionShift> {
        self.rest_embedder.distribution()
    }
}

fn get_ollama_path() -> String {
    // Important: Hostname not enough, has to be entire path to embeddings endpoint
    std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string())
}
Add deadline of 3 seconds to embedding requests made in the context of hybrid search 2024-11-06 09:24:51 +01:00			`use std::time::Instant;`

Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};`
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`use rayon::slice::ParallelSlice as _;`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};`
			`use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};`
Do not create too many rayon tasks 2025-01-28 16:53:34 +01:00			`use super::{DistributionShift, REQUEST_PARALLELISM};`
Introduce the ThreadPoolNoAbort wrapper 2024-04-24 16:40:12 +02:00			`use crate::error::FaultSource;`
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`use crate::vector::Embedding;`
Introduce the ThreadPoolNoAbort wrapper 2024-04-24 16:40:12 +02:00			`use crate::ThreadPoolNoAbort;`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00
			`#[derive(Debug)]`
			`pub struct Embedder {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`rest_embedder: RestEmbedder,`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]`
			`pub struct EmbedderOptions {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`pub embedding_model: String,`
Allow `url` parameter for ollama embedder 2024-03-25 11:13:21 +01:00			`pub url: Option<String>,`
also allow `api_key` 2024-03-25 11:50:00 +01:00			`pub api_key: Option<String>,`
Add distribution to all embedders 2024-03-27 11:50:22 +01:00			`pub distribution: Option<DistributionShift>,`
Allow explicit `dimensions` for ollama 2024-07-22 12:09:52 +02:00			`pub dimensions: Option<usize>,`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`impl EmbedderOptions {`
Allow explicit `dimensions` for ollama 2024-07-22 12:09:52 +02:00			`pub fn with_default_model(`
			`api_key: Option<String>,`
			`url: Option<String>,`
			`dimensions: Option<usize>,`
			`) -> Self {`
			`Self {`
			`embedding_model: "nomic-embed-text".into(),`
			`api_key,`
			`url,`
			`distribution: None,`
			`dimensions,`
			`}`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00			`fn into_rest_embedder_config(self) -> Result<RestEmbedderOptions, NewEmbedderError> {`
			`let url = self.url.unwrap_or_else(get_ollama_path);`
			`let model = self.embedding_model.as_str();`

			// warning: do not swap these two `if`s, as the second one is always true when the first one is.
			`let (request, response) = if url.ends_with("/api/embeddings") {`
			`(`
			`serde_json::json!({`
ollama and openai use new EmbedderOptions 2024-07-16 15:17:49 +02:00			`"model": model,`
			`"prompt": super::rest::REQUEST_PLACEHOLDER,`
			`}),`
Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00			`serde_json::json!({`
ollama and openai use new EmbedderOptions 2024-07-16 15:17:49 +02:00			`"embedding": super::rest::RESPONSE_PLACEHOLDER,`
			`}),`
Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00			`)`
Swap implementations of ollama 2025-01-20 22:22:22 +01:00			`} else if url.ends_with("/api/embed") {`
			`(`
			`serde_json::json!({"model": model, "input": [super::rest::REQUEST_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]}),`
			`serde_json::json!({"embeddings": [super::rest::RESPONSE_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]}),`
			`)`
Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00			`} else {`
			`return Err(NewEmbedderError::ollama_unsupported_url(url));`
			`};`
			`Ok(RestEmbedderOptions {`
			`api_key: self.api_key,`
			`dimensions: self.dimensions,`
			`distribution: self.distribution,`
			`url,`
			`request,`
			`response,`
			`headers: Default::default(),`
			`})`
			`}`
			`}`

			`impl Embedder {`
			`pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {`
			`let rest_embedder = match RestEmbedder::new(`
			`options.into_rest_embedder_config()?,`
ollama and openai use new EmbedderOptions 2024-07-16 15:17:49 +02:00			`super::rest::ConfigurationSource::Ollama,`
			`) {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`Ok(embedder) => embedder,`
			`Err(NewEmbedderError {`
			`kind:`
			`NewEmbedderErrorKind::CouldNotDetermineDimension(EmbedError {`
			`kind: super::error::EmbedErrorKind::RestOtherStatusCode(404, error),`
			`fault: _,`
			`}),`
			`fault: _,`
			`}) => {`
			`return Err(NewEmbedderError::could_not_determine_dimension(`
			`EmbedError::ollama_model_not_found(error),`
			`))`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`Err(error) => return Err(error),`
			`};`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`Ok(Self { rest_embedder })`
			`}`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`pub fn embed<S: AsRef<str> + serde::Serialize>(`
			`&self,`
			`texts: &[S],`
Add deadline of 3 seconds to embedding requests made in the context of hybrid search 2024-11-06 09:24:51 +01:00			`deadline: Option<Instant>,`
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`) -> Result<Vec<Embedding>, EmbedError> {`
Add deadline of 3 seconds to embedding requests made in the context of hybrid search 2024-11-06 09:24:51 +01:00			`match self.rest_embedder.embed_ref(texts, deadline) {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`Ok(embeddings) => Ok(embeddings),`
			`Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => {`
			`Err(EmbedError::ollama_model_not_found(error))`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`Err(error) => Err(error),`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`
			`}`

			`pub fn embed_chunks(`
			`&self,`
			`text_chunks: Vec<Vec<String>>,`
Introduce the ThreadPoolNoAbort wrapper 2024-04-24 16:40:12 +02:00			`threads: &ThreadPoolNoAbort,`
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`) -> Result<Vec<Vec<Embedding>>, EmbedError> {`
Introduce the ThreadPoolNoAbort wrapper 2024-04-24 16:40:12 +02:00			`threads`
			`.install(move \|\| {`
Add deadline of 3 seconds to embedding requests made in the context of hybrid search 2024-11-06 09:24:51 +01:00			`text_chunks.into_par_iter().map(move \|chunk\| self.embed(&chunk, None)).collect()`
Add embed_chunks_ref 2024-10-28 14:08:54 +01:00			`})`
			`.map_err(\|error\| EmbedError {`
			`kind: EmbedErrorKind::PanicInThreadPool(error),`
			`fault: FaultSource::Bug,`
			`})?`
			`}`

			`pub(crate) fn embed_chunks_ref(`
			`&self,`
			`texts: &[&str],`
			`threads: &ThreadPoolNoAbort,`
			`) -> Result<Vec<Vec<f32>>, EmbedError> {`
Do not create too many rayon tasks 2025-01-28 16:53:34 +01:00			`if threads.active_operations() >= REQUEST_PARALLELISM {`
			`let embeddings: Result<Vec<Vec<Embedding>>, _> = texts`
			`.chunks(self.prompt_count_in_chunk_hint())`
			`.map(move \|chunk\| self.embed(chunk, None))`
			`.collect();`

			`let embeddings = embeddings?;`
			`Ok(embeddings.into_iter().flatten().collect())`
			`} else {`
			`threads`
			`.install(move \|\| {`
			`let embeddings: Result<Vec<Vec<Embedding>>, _> = texts`
			`.par_chunks(self.prompt_count_in_chunk_hint())`
			`.map(move \|chunk\| self.embed(chunk, None))`
			`.collect();`

			`let embeddings = embeddings?;`
			`Ok(embeddings.into_iter().flatten().collect())`
			`})`
			`.map_err(\|error\| EmbedError {`
			`kind: EmbedErrorKind::PanicInThreadPool(error),`
			`fault: FaultSource::Bug,`
			`})?`
			`}`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`pub fn chunk_count_hint(&self) -> usize {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`self.rest_embedder.chunk_count_hint()`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`pub fn prompt_count_in_chunk_hint(&self) -> usize {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`self.rest_embedder.prompt_count_in_chunk_hint()`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`pub fn dimensions(&self) -> usize {`
Update ollama and openai impls to use the rest embedder internally 2024-03-19 15:41:37 +01:00			`self.rest_embedder.dimensions()`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`

			`pub fn distribution(&self) -> Option<DistributionShift> {`
Add distribution to all embedders 2024-03-27 11:50:22 +01:00			`self.rest_embedder.distribution()`
Revert "Revert "Merge remote-tracking branch 'origin/main' into release-v1.7.1"" 2024-03-20 10:08:28 +01:00			`}`
			`}`

			`fn get_ollama_path() -> String {`
			`// Important: Hostname not enough, has to be entire path to embeddings endpoint`
			`std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string())`
			`}`