mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-25 19:45:05 +08:00
WIP: Failing embedding no longer causes the whole search to fail
This commit is contained in:
parent
0c216048b5
commit
083cdec3de
@ -261,33 +261,56 @@ pub async fn embed(
|
|||||||
index_scheduler: &IndexScheduler,
|
index_scheduler: &IndexScheduler,
|
||||||
index: &milli::Index,
|
index: &milli::Index,
|
||||||
) -> Result<Option<DistributionShift>, ResponseError> {
|
) -> Result<Option<DistributionShift>, ResponseError> {
|
||||||
match (&query.hybrid, &query.vector, &query.q) {
|
/// TEST:
|
||||||
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
|
// - pure vector search without hybrid
|
||||||
if !q.trim().is_empty() =>
|
// - pure vector search without hybrid passing a vector of the wrong dimension
|
||||||
{
|
// - pure vector search without hybrid, with multiple embedders, none of them called 'default'
|
||||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
let Some(hybrid) = &query.hybrid
|
||||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
let embedder = if let Some(embedder_name) = embedder {
|
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
||||||
embedders.get(embedder_name)
|
|
||||||
} else {
|
let embedders = index_scheduler.embedders(embedder_configs)?;
|
||||||
embedders.get_default()
|
|
||||||
|
let embedder_name = match &hybrid.embedder {
|
||||||
|
Some(embedder_name) => embedder_name.clone(),
|
||||||
|
None => embedders.get_default_embedder_name(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let embedder = embedders.get(&embedder_name);
|
||||||
|
|
||||||
|
let embedder = embedder
|
||||||
|
.ok_or(milli::UserError::InvalidEmbedder(embedder_name))
|
||||||
|
.map_err(milli::Error::from)?
|
||||||
|
.0;
|
||||||
|
|
||||||
|
let distribution = embedder.distribution();
|
||||||
|
|
||||||
|
match (&query.vector, &query.q) {
|
||||||
|
(None, Some(q)) if !q.trim().is_empty() => {
|
||||||
|
let embeddings = match tokio::time::timeout(
|
||||||
|
tokio::time::Duration::from_secs(10),
|
||||||
|
embedder.embed(vec![q.to_owned()]),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(Ok(mut embeddings)) => embeddings.pop(),
|
||||||
|
Ok(Err(error)) => {
|
||||||
|
warn!(%error, "error while embedding");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
warn!("timeout while embedding");
|
||||||
|
None
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let embedder = embedder
|
let Some(embeddings) = embeddings else {
|
||||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
warn!("no embedding available, vector search will not take place");
|
||||||
.map_err(milli::Error::from)?
|
return Ok(distribution);
|
||||||
.0;
|
};
|
||||||
|
|
||||||
let distribution = embedder.distribution();
|
|
||||||
|
|
||||||
let embeddings = embedder
|
|
||||||
.embed(vec![q.to_owned()])
|
|
||||||
.await
|
|
||||||
.map_err(milli::vector::Error::from)
|
|
||||||
.map_err(milli::Error::from)?
|
|
||||||
.pop()
|
|
||||||
.expect("No vector returned from embedding");
|
|
||||||
|
|
||||||
if embeddings.iter().nth(1).is_some() {
|
if embeddings.iter().nth(1).is_some() {
|
||||||
warn!("Ignoring embeddings past the first one in long search query");
|
warn!("Ignoring embeddings past the first one in long search query");
|
||||||
@ -295,23 +318,10 @@ pub async fn embed(
|
|||||||
} else {
|
} else {
|
||||||
query.vector = Some(embeddings.into_inner());
|
query.vector = Some(embeddings.into_inner());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(distribution)
|
Ok(distribution)
|
||||||
}
|
}
|
||||||
(Some(hybrid), vector, _) => {
|
(vector, _) => {
|
||||||
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
|
|
||||||
let embedders = index_scheduler.embedders(embedder_configs)?;
|
|
||||||
|
|
||||||
let embedder = if let Some(embedder_name) = &hybrid.embedder {
|
|
||||||
embedders.get(embedder_name)
|
|
||||||
} else {
|
|
||||||
embedders.get_default()
|
|
||||||
};
|
|
||||||
|
|
||||||
let embedder = embedder
|
|
||||||
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
|
|
||||||
.map_err(milli::Error::from)?
|
|
||||||
.0;
|
|
||||||
|
|
||||||
if let Some(vector) = vector {
|
if let Some(vector) = vector {
|
||||||
if vector.len() != embedder.dimensions() {
|
if vector.len() != embedder.dimensions() {
|
||||||
return Err(meilisearch_types::milli::Error::UserError(
|
return Err(meilisearch_types::milli::Error::UserError(
|
||||||
@ -326,7 +336,6 @@ pub async fn embed(
|
|||||||
|
|
||||||
Ok(embedder.distribution())
|
Ok(embedder.distribution())
|
||||||
}
|
}
|
||||||
_ => Ok(None),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,17 +98,16 @@ impl EmbeddingConfigs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||||
self.get_default_embedder_name().and_then(|default| self.get(&default))
|
self.get(&self.get_default_embedder_name())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_default_embedder_name(&self) -> Option<String> {
|
pub fn get_default_embedder_name(&self) -> String {
|
||||||
let mut it = self.0.keys();
|
let mut it = self.0.keys();
|
||||||
let first_name = it.next();
|
let first_name = it.next();
|
||||||
let second_name = it.next();
|
let second_name = it.next();
|
||||||
match (first_name, second_name) {
|
match (first_name, second_name) {
|
||||||
(None, _) => None,
|
(Some(first), None) => first.to_owned(),
|
||||||
(Some(first), None) => Some(first.to_owned()),
|
_ => "default".to_owned(),
|
||||||
(Some(_), Some(_)) => Some("default".to_owned()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user