WIP: Failing embedding no longer causes the whole search to fail

This commit is contained in:
Louis Dureuil 2024-03-05 12:29:00 +01:00
parent 0c216048b5
commit 083cdec3de
No known key found for this signature in database
2 changed files with 53 additions and 45 deletions

View File

@ -261,33 +261,56 @@ pub async fn embed(
index_scheduler: &IndexScheduler, index_scheduler: &IndexScheduler,
index: &milli::Index, index: &milli::Index,
) -> Result<Option<DistributionShift>, ResponseError> { ) -> Result<Option<DistributionShift>, ResponseError> {
match (&query.hybrid, &query.vector, &query.q) { /// TEST:
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q)) // - pure vector search without hybrid
if !q.trim().is_empty() => // - pure vector search without hybrid passing a vector of the wrong dimension
{ // - pure vector search without hybrid, with multiple embedders, none of them called 'default'
let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let Some(hybrid) = &query.hybrid
let embedders = index_scheduler.embedders(embedder_configs)?; else {
return Ok(None);
let embedder = if let Some(embedder_name) = embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
}; };
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder_name = match &hybrid.embedder {
Some(embedder_name) => embedder_name.clone(),
None => embedders.get_default_embedder_name(),
};
let embedder = embedders.get(&embedder_name);
let embedder = embedder let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned())) .ok_or(milli::UserError::InvalidEmbedder(embedder_name))
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
.0; .0;
let distribution = embedder.distribution(); let distribution = embedder.distribution();
let embeddings = embedder match (&query.vector, &query.q) {
.embed(vec![q.to_owned()]) (None, Some(q)) if !q.trim().is_empty() => {
let embeddings = match tokio::time::timeout(
tokio::time::Duration::from_secs(10),
embedder.embed(vec![q.to_owned()]),
)
.await .await
.map_err(milli::vector::Error::from) {
.map_err(milli::Error::from)? Ok(Ok(mut embeddings)) => embeddings.pop(),
.pop() Ok(Err(error)) => {
.expect("No vector returned from embedding"); warn!(%error, "error while embedding");
None
}
Err(_) => {
warn!("timeout while embedding");
None
}
};
let Some(embeddings) = embeddings else {
warn!("no embedding available, vector search will not take place");
return Ok(distribution);
};
if embeddings.iter().nth(1).is_some() { if embeddings.iter().nth(1).is_some() {
warn!("Ignoring embeddings past the first one in long search query"); warn!("Ignoring embeddings past the first one in long search query");
@ -295,23 +318,10 @@ pub async fn embed(
} else { } else {
query.vector = Some(embeddings.into_inner()); query.vector = Some(embeddings.into_inner());
} }
Ok(distribution) Ok(distribution)
} }
(Some(hybrid), vector, _) => { (vector, _) => {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = &hybrid.embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
if let Some(vector) = vector { if let Some(vector) = vector {
if vector.len() != embedder.dimensions() { if vector.len() != embedder.dimensions() {
return Err(meilisearch_types::milli::Error::UserError( return Err(meilisearch_types::milli::Error::UserError(
@ -326,7 +336,6 @@ pub async fn embed(
Ok(embedder.distribution()) Ok(embedder.distribution())
} }
_ => Ok(None),
} }
} }

View File

@ -98,17 +98,16 @@ impl EmbeddingConfigs {
} }
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> { pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
self.get_default_embedder_name().and_then(|default| self.get(&default)) self.get(&self.get_default_embedder_name())
} }
pub fn get_default_embedder_name(&self) -> Option<String> { pub fn get_default_embedder_name(&self) -> String {
let mut it = self.0.keys(); let mut it = self.0.keys();
let first_name = it.next(); let first_name = it.next();
let second_name = it.next(); let second_name = it.next();
match (first_name, second_name) { match (first_name, second_name) {
(None, _) => None, (Some(first), None) => first.to_owned(),
(Some(first), None) => Some(first.to_owned()), _ => "default".to_owned(),
(Some(_), Some(_)) => Some("default".to_owned()),
} }
} }
} }