mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
Add deadline of 3 seconds to embedding requests made in the context of hybrid search
This commit is contained in:
parent
a05e448cf8
commit
e9d17136b2
@ -5214,9 +5214,10 @@ mod tests {
|
||||
|
||||
let configs = index_scheduler.embedders(configs).unwrap();
|
||||
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
|
||||
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
|
||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
|
||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
|
||||
let beagle_embed =
|
||||
hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
|
||||
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap();
|
||||
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap();
|
||||
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
|
||||
};
|
||||
|
||||
|
@ -796,8 +796,10 @@ fn prepare_search<'t>(
|
||||
let span = tracing::trace_span!(target: "search::vector", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
|
||||
|
||||
embedder
|
||||
.embed_one(query.q.clone().unwrap())
|
||||
.embed_one(query.q.clone().unwrap(), Some(deadline))
|
||||
.map_err(milli::vector::Error::from)
|
||||
.map_err(milli::Error::from)?
|
||||
}
|
||||
|
@ -201,7 +201,9 @@ impl<'a> Search<'a> {
|
||||
let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
|
||||
let _entered = span.enter();
|
||||
|
||||
match embedder.embed_one(query) {
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
|
||||
|
||||
match embedder.embed_one(query, Some(deadline)) {
|
||||
Ok(embedding) => embedding,
|
||||
Err(error) => {
|
||||
tracing::error!(error=%error, "Embedding failed");
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use arroy::distances::{BinaryQuantizedCosine, Cosine};
|
||||
use arroy::ItemId;
|
||||
@ -595,18 +596,26 @@ impl Embedder {
|
||||
/// Embed one or multiple texts.
|
||||
///
|
||||
/// Each text can be embedded as one or multiple embeddings.
|
||||
pub fn embed(&self, texts: Vec<String>) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||
pub fn embed(
|
||||
&self,
|
||||
texts: Vec<String>,
|
||||
deadline: Option<Instant>,
|
||||
) -> std::result::Result<Vec<Embedding>, EmbedError> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.embed(texts),
|
||||
Embedder::OpenAi(embedder) => embedder.embed(&texts),
|
||||
Embedder::Ollama(embedder) => embedder.embed(&texts),
|
||||
Embedder::OpenAi(embedder) => embedder.embed(&texts, deadline),
|
||||
Embedder::Ollama(embedder) => embedder.embed(&texts, deadline),
|
||||
Embedder::UserProvided(embedder) => embedder.embed(&texts),
|
||||
Embedder::Rest(embedder) => embedder.embed(texts),
|
||||
Embedder::Rest(embedder) => embedder.embed(texts, deadline),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn embed_one(&self, text: String) -> std::result::Result<Embedding, EmbedError> {
|
||||
let mut embedding = self.embed(vec![text])?;
|
||||
pub fn embed_one(
|
||||
&self,
|
||||
text: String,
|
||||
deadline: Option<Instant>,
|
||||
) -> std::result::Result<Embedding, EmbedError> {
|
||||
let mut embedding = self.embed(vec![text], deadline)?;
|
||||
let embedding = embedding.pop().ok_or_else(EmbedError::missing_embedding)?;
|
||||
Ok(embedding)
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||
use rayon::slice::ParallelSlice as _;
|
||||
|
||||
@ -80,8 +82,9 @@ impl Embedder {
|
||||
pub fn embed<S: AsRef<str> + serde::Serialize>(
|
||||
&self,
|
||||
texts: &[S],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
match self.rest_embedder.embed_ref(texts) {
|
||||
match self.rest_embedder.embed_ref(texts, deadline) {
|
||||
Ok(embeddings) => Ok(embeddings),
|
||||
Err(EmbedError { kind: EmbedErrorKind::RestOtherStatusCode(404, error), fault: _ }) => {
|
||||
Err(EmbedError::ollama_model_not_found(error))
|
||||
@ -97,7 +100,7 @@ impl Embedder {
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk)).collect()
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
@ -114,7 +117,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk))
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use ordered_float::OrderedFloat;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
||||
use rayon::slice::ParallelSlice as _;
|
||||
@ -211,18 +213,23 @@ impl Embedder {
|
||||
pub fn embed<S: AsRef<str> + serde::Serialize>(
|
||||
&self,
|
||||
texts: &[S],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
match self.rest_embedder.embed_ref(texts) {
|
||||
match self.rest_embedder.embed_ref(texts, deadline) {
|
||||
Ok(embeddings) => Ok(embeddings),
|
||||
Err(EmbedError { kind: EmbedErrorKind::RestBadRequest(error, _), fault: _ }) => {
|
||||
tracing::warn!(error=?error, "OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your document template.");
|
||||
self.try_embed_tokenized(texts)
|
||||
self.try_embed_tokenized(texts, deadline)
|
||||
}
|
||||
Err(error) => Err(error),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_embed_tokenized<S: AsRef<str>>(&self, text: &[S]) -> Result<Vec<Embedding>, EmbedError> {
|
||||
fn try_embed_tokenized<S: AsRef<str>>(
|
||||
&self,
|
||||
text: &[S],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
let mut all_embeddings = Vec::with_capacity(text.len());
|
||||
for text in text {
|
||||
let text = text.as_ref();
|
||||
@ -230,13 +237,13 @@ impl Embedder {
|
||||
let encoded = self.tokenizer.encode_ordinary(text);
|
||||
let len = encoded.len();
|
||||
if len < max_token_count {
|
||||
all_embeddings.append(&mut self.rest_embedder.embed_ref(&[text])?);
|
||||
all_embeddings.append(&mut self.rest_embedder.embed_ref(&[text], deadline)?);
|
||||
continue;
|
||||
}
|
||||
|
||||
let tokens = &encoded.as_slice()[0..max_token_count];
|
||||
|
||||
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
||||
let embedding = self.rest_embedder.embed_tokens(tokens, deadline)?;
|
||||
|
||||
all_embeddings.push(embedding);
|
||||
}
|
||||
@ -250,7 +257,7 @@ impl Embedder {
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk)).collect()
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
@ -267,7 +274,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed(chunk))
|
||||
.map(move |chunk| self.embed(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
|
@ -1,4 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::time::Instant;
|
||||
|
||||
use deserr::Deserr;
|
||||
use rand::Rng;
|
||||
@ -153,19 +154,31 @@ impl Embedder {
|
||||
Ok(Self { data, dimensions, distribution: options.distribution })
|
||||
}
|
||||
|
||||
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embedding>, EmbedError> {
|
||||
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions))
|
||||
pub fn embed(
|
||||
&self,
|
||||
texts: Vec<String>,
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline)
|
||||
}
|
||||
|
||||
pub fn embed_ref<S>(&self, texts: &[S]) -> Result<Vec<Embedding>, EmbedError>
|
||||
pub fn embed_ref<S>(
|
||||
&self,
|
||||
texts: &[S],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError>
|
||||
where
|
||||
S: AsRef<str> + Serialize,
|
||||
{
|
||||
embed(&self.data, texts, texts.len(), Some(self.dimensions))
|
||||
embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline)
|
||||
}
|
||||
|
||||
pub fn embed_tokens(&self, tokens: &[usize]) -> Result<Embedding, EmbedError> {
|
||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions))?;
|
||||
pub fn embed_tokens(
|
||||
&self,
|
||||
tokens: &[usize],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Embedding, EmbedError> {
|
||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;
|
||||
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
||||
Ok(embeddings.pop().unwrap())
|
||||
}
|
||||
@ -177,7 +190,7 @@ impl Embedder {
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
@ -194,7 +207,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk))
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
@ -227,7 +240,7 @@ impl Embedder {
|
||||
}
|
||||
|
||||
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
||||
let v = embed(data, ["test"].as_slice(), 1, None)
|
||||
let v = embed(data, ["test"].as_slice(), 1, None, None)
|
||||
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
||||
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
||||
Ok(v.first().unwrap().len())
|
||||
@ -238,6 +251,7 @@ fn embed<S>(
|
||||
inputs: &[S],
|
||||
expected_count: usize,
|
||||
expected_dimension: Option<usize>,
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Vec<Embedding>, EmbedError>
|
||||
where
|
||||
S: Serialize,
|
||||
@ -265,8 +279,19 @@ where
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
tracing::warn!("Failed: {}", retry.error);
|
||||
if let Some(deadline) = deadline {
|
||||
let now = std::time::Instant::now();
|
||||
if now > deadline {
|
||||
tracing::warn!("Could not embed due to deadline");
|
||||
return Err(retry.into_error());
|
||||
}
|
||||
|
||||
let duration_to_deadline = deadline - now;
|
||||
retry.into_duration(attempt).map(|duration| duration.min(duration_to_deadline))
|
||||
} else {
|
||||
retry.into_duration(attempt)
|
||||
}
|
||||
}
|
||||
}?;
|
||||
|
||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
||||
|
Loading…
Reference in New Issue
Block a user