4459: Put a bound on OpenAI timeout r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #4460 

## What does this PR do?
- Makes sure that the timeout of the openai embedder is limited to max 1min, rather than the prior 15min+



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-03-05 15:18:51 +00:00 committed by GitHub
commit eefc1c421e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 22 deletions

View File

@ -59,8 +59,8 @@ pub enum EmbedErrorKind {
OpenAiAuth(OpenAiError), OpenAiAuth(OpenAiError),
#[error("sent too many requests to OpenAI: {0}")] #[error("sent too many requests to OpenAI: {0}")]
OpenAiTooManyRequests(OpenAiError), OpenAiTooManyRequests(OpenAiError),
#[error("received internal error from OpenAI: {0}")] #[error("received internal error from OpenAI: {0:?}")]
OpenAiInternalServerError(OpenAiError), OpenAiInternalServerError(Option<OpenAiError>),
#[error("sent too many tokens in a request to OpenAI: {0}")] #[error("sent too many tokens in a request to OpenAI: {0}")]
OpenAiTooManyTokens(OpenAiError), OpenAiTooManyTokens(OpenAiError),
#[error("received unhandled HTTP status code {0} from OpenAI")] #[error("received unhandled HTTP status code {0} from OpenAI")]
@ -106,7 +106,7 @@ impl EmbedError {
Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime } Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime }
} }
pub(crate) fn openai_internal_server_error(inner: OpenAiError) -> EmbedError { pub(crate) fn openai_internal_server_error(inner: Option<OpenAiError>) -> EmbedError {
Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime } Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime }
} }

View File

@ -178,6 +178,8 @@ impl Embedder {
retry.into_duration(attempt) retry.into_duration(attempt)
} }
}?; }?;
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
tracing::warn!( tracing::warn!(
"Attempt #{}, retrying after {}ms.", "Attempt #{}, retrying after {}ms.",
attempt, attempt,
@ -220,24 +222,12 @@ impl Embedder {
error_response.error, error_response.error,
))); )));
} }
StatusCode::INTERNAL_SERVER_ERROR => { StatusCode::INTERNAL_SERVER_ERROR
let error_response: OpenAiErrorResponse = response | StatusCode::BAD_GATEWAY
.json() | StatusCode::SERVICE_UNAVAILABLE => {
.await let error_response: Result<OpenAiErrorResponse, _> = response.json().await;
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
return Err(Retry::retry_later(EmbedError::openai_internal_server_error( return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
error_response.error, error_response.ok().map(|error_response| error_response.error),
)));
}
StatusCode::SERVICE_UNAVAILABLE => {
let error_response: OpenAiErrorResponse = response
.json()
.await
.map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?;
return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
error_response.error,
))); )));
} }
StatusCode::BAD_REQUEST => { StatusCode::BAD_REQUEST => {
@ -248,14 +238,14 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected) .map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?; .map_err(Retry::retry_later)?;
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); tracing::warn!("OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
error_response.error, error_response.error,
))); )));
} }
code => { code => {
return Err(Retry::give_up(EmbedError::openai_unhandled_status_code( return Err(Retry::retry_later(EmbedError::openai_unhandled_status_code(
code.as_u16(), code.as_u16(),
))); )));
} }