mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
ollama and openai use new EmbedderOptions
This commit is contained in:
parent
a1beddd5d9
commit
d731fa661b
@ -28,19 +28,22 @@ impl EmbedderOptions {
|
|||||||
impl Embedder {
|
impl Embedder {
|
||||||
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||||
let model = options.embedding_model.as_str();
|
let model = options.embedding_model.as_str();
|
||||||
let rest_embedder = match RestEmbedder::new(RestEmbedderOptions {
|
let rest_embedder = match RestEmbedder::new(
|
||||||
api_key: options.api_key,
|
RestEmbedderOptions {
|
||||||
dimensions: None,
|
api_key: options.api_key,
|
||||||
distribution: options.distribution,
|
dimensions: None,
|
||||||
url: options.url.unwrap_or_else(get_ollama_path),
|
distribution: options.distribution,
|
||||||
query: serde_json::json!({
|
url: options.url.unwrap_or_else(get_ollama_path),
|
||||||
"model": model,
|
request: serde_json::json!({
|
||||||
}),
|
"model": model,
|
||||||
input_field: vec!["prompt".to_owned()],
|
"prompt": super::rest::REQUEST_PLACEHOLDER,
|
||||||
path_to_embeddings: Default::default(),
|
}),
|
||||||
embedding_object: vec!["embedding".to_owned()],
|
response: serde_json::json!({
|
||||||
input_type: super::rest::InputType::Text,
|
"embedding": super::rest::RESPONSE_PLACEHOLDER,
|
||||||
}) {
|
}),
|
||||||
|
},
|
||||||
|
super::rest::ConfigurationSource::Ollama,
|
||||||
|
) {
|
||||||
Ok(embedder) => embedder,
|
Ok(embedder) => embedder,
|
||||||
Err(NewEmbedderError {
|
Err(NewEmbedderError {
|
||||||
kind:
|
kind:
|
||||||
|
@ -26,20 +26,21 @@ impl EmbedderOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn query(&self) -> serde_json::Value {
|
pub fn request(&self) -> serde_json::Value {
|
||||||
let model = self.embedding_model.name();
|
let model = self.embedding_model.name();
|
||||||
|
|
||||||
let mut query = serde_json::json!({
|
let mut request = serde_json::json!({
|
||||||
"model": model,
|
"model": model,
|
||||||
|
"input": [super::rest::REQUEST_PLACEHOLDER, super::rest::REPEAT_PLACEHOLDER]
|
||||||
});
|
});
|
||||||
|
|
||||||
if self.embedding_model.supports_overriding_dimensions() {
|
if self.embedding_model.supports_overriding_dimensions() {
|
||||||
if let Some(dimensions) = self.dimensions {
|
if let Some(dimensions) = self.dimensions {
|
||||||
query["dimensions"] = dimensions.into();
|
request["dimensions"] = dimensions.into();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
query
|
request
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||||
@ -180,17 +181,23 @@ impl Embedder {
|
|||||||
|
|
||||||
let url = options.url.as_deref().unwrap_or(OPENAI_EMBEDDINGS_URL).to_owned();
|
let url = options.url.as_deref().unwrap_or(OPENAI_EMBEDDINGS_URL).to_owned();
|
||||||
|
|
||||||
let rest_embedder = RestEmbedder::new(RestEmbedderOptions {
|
let rest_embedder = RestEmbedder::new(
|
||||||
api_key: Some(api_key.clone()),
|
RestEmbedderOptions {
|
||||||
distribution: None,
|
api_key: Some(api_key.clone()),
|
||||||
dimensions: Some(options.dimensions()),
|
distribution: None,
|
||||||
url,
|
dimensions: Some(options.dimensions()),
|
||||||
query: options.query(),
|
url,
|
||||||
input_field: vec!["input".to_owned()],
|
request: options.request(),
|
||||||
input_type: crate::vector::rest::InputType::TextArray,
|
response: serde_json::json!({
|
||||||
path_to_embeddings: vec!["data".to_owned()],
|
"data": [{
|
||||||
embedding_object: vec!["embedding".to_owned()],
|
"embedding": super::rest::RESPONSE_PLACEHOLDER
|
||||||
})?;
|
},
|
||||||
|
super::rest::REPEAT_PLACEHOLDER
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
super::rest::ConfigurationSource::OpenAi,
|
||||||
|
)?;
|
||||||
|
|
||||||
// looking at the code it is very unclear that this can actually fail.
|
// looking at the code it is very unclear that this can actually fail.
|
||||||
let tokenizer = tiktoken_rs::cl100k_base().unwrap();
|
let tokenizer = tiktoken_rs::cl100k_base().unwrap();
|
||||||
@ -201,7 +208,7 @@ impl Embedder {
|
|||||||
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
match self.rest_embedder.embed_ref(&texts) {
|
match self.rest_embedder.embed_ref(&texts) {
|
||||||
Ok(embeddings) => Ok(embeddings),
|
Ok(embeddings) => Ok(embeddings),
|
||||||
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error), fault: _ }) => {
|
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => {
|
||||||
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
||||||
self.try_embed_tokenized(&texts)
|
self.try_embed_tokenized(&texts)
|
||||||
}
|
}
|
||||||
@ -225,7 +232,7 @@ impl Embedder {
|
|||||||
|
|
||||||
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
||||||
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
||||||
EmbedError::openai_unexpected_dimension(self.dimensions(), got.len())
|
EmbedError::rest_unexpected_dimension(self.dimensions(), got.len())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
all_embeddings.push(embeddings_for_prompt);
|
all_embeddings.push(embeddings_for_prompt);
|
||||||
|
Loading…
Reference in New Issue
Block a user