4930: Return `UserError::InvalidDocumentId` for primary keys with a length greater than 512 bytes r=curquiza a=flevi29

# Pull Request

## Related issue
Fixes #4843

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: F. Levi <55688616+flevi29@users.noreply.github.com>
This commit is contained in:
meili-bors[bot] 2024-09-30 15:55:05 +00:00 committed by GitHub
commit e78da35287
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 50 additions and 32 deletions

View File

@ -74,7 +74,8 @@ impl Display for IndexUidFormatError {
f,
"invalid index uid `{}`, the uid must be an integer \
or a string containing only alphanumeric characters \
a-z A-Z 0-9, hyphens - and underscores _.",
a-z A-Z 0-9, hyphens - and underscores _, \
and can not be more than 400 bytes.",
self.invalid_uid,
)
}

View File

@ -74,7 +74,8 @@ impl Display for IndexUidFormatError {
f,
"invalid index uid `{}`, the uid must be an integer \
or a string containing only alphanumeric characters \
a-z A-Z 0-9, hyphens - and underscores _.",
a-z A-Z 0-9, hyphens - and underscores _, \
and can not be more than 400 bytes.",
self.invalid_uid,
)
}

View File

@ -101,7 +101,7 @@ pub enum Error {
)]
InvalidTaskCanceledBy { canceled_by: String },
#[error(
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)."
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
)]
InvalidIndexUid { index_uid: String },
#[error("Task `{0}` not found.")]

View File

@ -543,7 +543,8 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
f,
"the value of `id` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
and can not be more than 512 bytes."
)
}
}

View File

@ -88,7 +88,8 @@ impl fmt::Display for IndexUidFormatError {
f,
"`{}` is not a valid index uid. Index uid can be an \
integer or a string containing only alphanumeric \
characters, hyphens (-) and underscores (_).",
characters, hyphens (-) and underscores (_), \
and can not be more than 512 bytes.",
self.invalid_uid,
)
}

View File

@ -616,7 +616,7 @@ mod tests {
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -628,7 +628,7 @@ mod tests {
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
snapshot!(meili_snap::json_string!(err), @r###"
{
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -1023,7 +1023,7 @@ async fn error_document_add_create_index_bad_uid() {
snapshot!(json_string!(response),
@r###"
{
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -1280,7 +1280,7 @@ async fn error_add_documents_bad_document_id() {
"indexedDocuments": 0
},
"error": {
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
"message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_document_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_id"

View File

@ -11,7 +11,7 @@ async fn error_document_update_create_index_bad_uid() {
let (response, code) = index.update_documents(json!([{"id": 1}]), None).await;
let expected_response = json!({
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -173,7 +173,7 @@ async fn error_update_documents_bad_document_id() {
assert_eq!(
response["error"]["message"],
json!(
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."#
r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
)
);
assert_eq!(response["error"]["code"], json!("invalid_document_id"));

View File

@ -125,11 +125,11 @@ async fn create_index_with_primary_key() {
#[actix_rt::test]
async fn create_index_with_invalid_primary_key() {
let document = json!([ { "id": 2, "title": "Pride and Prejudice" } ]);
let documents = json!([ { "id": 2, "title": "Pride and Prejudice" } ]);
let server = Server::new().await;
let index = server.index("movies");
let (_response, code) = index.add_documents(document, Some("title")).await;
let (_response, code) = index.add_documents(documents, Some("title")).await;
assert_eq!(code, 202);
index.wait_task(0).await;
@ -137,6 +137,17 @@ async fn create_index_with_invalid_primary_key() {
let (response, code) = index.get().await;
assert_eq!(code, 200);
assert_eq!(response["primaryKey"], json!(null));
let documents = json!([ { "id": "e".repeat(513) } ]);
let (_response, code) = index.add_documents(documents, Some("id")).await;
assert_eq!(code, 202);
index.wait_task(1).await;
let (response, code) = index.get().await;
assert_eq!(code, 200);
assert_eq!(response["primaryKey"], json!(null));
}
#[actix_rt::test]
@ -192,7 +203,7 @@ async fn error_create_with_invalid_index_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -75,7 +75,7 @@ async fn create_index_bad_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -136,7 +136,7 @@ async fn get_index_bad_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -232,7 +232,7 @@ async fn update_index_bad_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -247,7 +247,7 @@ async fn delete_index_bad_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -186,7 +186,7 @@ async fn get_invalid_index_uid() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -412,7 +412,7 @@ async fn simple_search_illegal_index_uid() {
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -437,7 +437,7 @@ async fn federation_search_illegal_index_uid() {
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -330,7 +330,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -79,7 +79,7 @@ async fn similar_bad_id() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@ -172,7 +172,7 @@ async fn similar_invalid_id() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_similar_id",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"

View File

@ -173,7 +173,7 @@ async fn task_bad_index_uids() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -184,7 +184,7 @@ async fn task_bad_index_uids() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
@ -195,7 +195,7 @@ async fn task_bad_index_uids() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"

View File

@ -150,12 +150,13 @@ fn starts_with(selector: &str, key: &str) -> bool {
// FIXME: move to a DocumentId struct
fn validate_document_id(document_id: &str) -> Option<&str> {
if !document_id.is_empty()
&& document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
if document_id.is_empty()
|| document_id.len() > 512
|| !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
{
Some(document_id)
} else {
None
} else {
Some(document_id)
}
}
@ -166,6 +167,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
Some(s) => Ok(s.to_string()),
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
},
// a `u64` or `i64` cannot be more than 512 bytes once converted to a string
Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
content => Err(UserError::InvalidDocumentId { document_id: content }),
}

View File

@ -106,7 +106,8 @@ pub enum UserError {
#[error(
"Document identifier `{}` is invalid. \
A document identifier can be of type integer or string, \
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string()
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
and can not be more than 512 bytes.", .document_id.to_string()
)]
InvalidDocumentId { document_id: Value },
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]