diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index 5fb04828c..f6ee1f685 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -1,12 +1,13 @@ use std::fmt::Display; -use crate::TaskId; use meilisearch_types::batches::BatchId; use meilisearch_types::error::{Code, ErrorCode}; use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::{heed, milli}; use thiserror::Error; +use crate::TaskId; + #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum DateField { BeforeEnqueuedAt, @@ -103,7 +104,7 @@ pub enum Error { )] InvalidTaskCanceledBy { canceled_by: String }, #[error( - "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes." + "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes." )] InvalidIndexUid { index_uid: String }, #[error("Task `{0}` not found.")] diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index afc876b42..0c4027899 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId { "the value of `id` is invalid. \ A document identifier can be of type integer or string, \ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ - and can not be more than 512 bytes." + and can not be more than 511 bytes." ) } } diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs index 750bf7ae9..d72b1a7a8 100644 --- a/crates/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -1264,15 +1264,18 @@ async fn error_add_documents_bad_document_id() { let server = Server::new().await; let index = server.index("test"); index.create(Some("docid")).await; + + // unsupported characters + let documents = json!([ { "docid": "foo & bar", "content": "foobar" } ]); - index.add_documents(documents, None).await; - index.wait_task(1).await; - let (response, code) = index.get_task(1).await; + let (value, _code) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + let (response, code) = index.get_task(value.uid()).await; snapshot!(code, @"200 OK"); snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###" @@ -1288,7 +1291,81 @@ async fn error_add_documents_bad_document_id() { "indexedDocuments": 0 }, "error": { - "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", + "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", + "code": "invalid_document_id", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_id" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // More than 512 bytes + let documents = json!([ + { + "docid": "a".repeat(600), + "content": "foobar" + } + ]); + let (value, _code) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + let (response, code) = index.get_task(value.uid()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + @r###" + { + "uid": 2, + "batchUid": 2, + "indexUid": "test", + "status": "failed", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 0 + }, + "error": { + "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", + "code": "invalid_document_id", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_id" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // Exactly 512 bytes + let documents = json!([ + { + "docid": "a".repeat(512), + "content": "foobar" + } + ]); + let (value, _code) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + let (response, code) = index.get_task(value.uid()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), + @r###" + { + "uid": 3, + "batchUid": 3, + "indexUid": "test", + "status": "failed", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 0 + }, + "error": { + "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "code": "invalid_document_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_id" diff --git a/crates/meilisearch/tests/documents/update_documents.rs b/crates/meilisearch/tests/documents/update_documents.rs index c0703e81b..aaf529ce5 100644 --- a/crates/meilisearch/tests/documents/update_documents.rs +++ b/crates/meilisearch/tests/documents/update_documents.rs @@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() { assert_eq!( response["error"]["message"], json!( - r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."# + r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."# ) ); assert_eq!(response["error"]["code"], json!("invalid_document_id")); diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs index 1e933e1c0..86fca97ad 100644 --- a/crates/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -79,7 +79,7 @@ async fn similar_bad_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" @@ -172,7 +172,7 @@ async fn similar_invalid_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" diff --git a/crates/milli/src/documents/primary_key.rs b/crates/milli/src/documents/primary_key.rs index fb8b3d027..c1dd9a9b8 100644 --- a/crates/milli/src/documents/primary_key.rs +++ b/crates/milli/src/documents/primary_key.rs @@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool { pub fn validate_document_id_str(document_id: &str) -> Option<&str> { if document_id.is_empty() - || document_id.len() > 512 + || document_id.len() >= 512 || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') { None diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index a6774a7bd..2bd57bba5 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -114,7 +114,7 @@ pub enum UserError { "Document identifier `{}` is invalid. \ A document identifier can be of type integer or string, \ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ -and can not be more than 512 bytes.", .document_id.to_string() +and can not be more than 511 bytes.", .document_id.to_string() )] InvalidDocumentId { document_id: Value }, #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]