From a7e0f0de89ef8b7a0e261dbaff4f0893cbd1f7d6 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 14 Jun 2023 16:34:09 +0200 Subject: [PATCH] Introduce a new error message for invalid vector dimensions --- meilisearch-types/src/error.rs | 2 ++ milli/src/error.rs | 4 +++- .../src/update/index_documents/typed_chunk.rs | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 6d81ff241..886a0fe30 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -217,6 +217,7 @@ InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; +InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; @@ -335,6 +336,7 @@ impl ErrorCode for milli::Error { UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, + UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, UserError::SortError(_) => Code::InvalidSearchSort, UserError::InvalidMinTypoWordLenSetting(_, _) => { Code::InvalidSettingsTypoTolerance diff --git a/milli/src/error.rs b/milli/src/error.rs index 8d55eabbd..a12334f90 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -110,9 +110,11 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error(transparent)] InvalidGeoField(#[from] GeoError), + #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)] + InvalidVectorDimensions { expected: usize, found: usize }, #[error("{0}")] InvalidFilter(String), - #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))] + #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))] InvalidFilterExpression(&'static [&'static str], Value), #[error("Attribute `{}` is not sortable. {}", .field, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index e136dc139..0e2e85c1c 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -11,11 +11,13 @@ use heed::types::ByteSlice; use heed::RwTxn; use hnsw::Searcher; use roaring::RoaringBitmap; +use space::KnnPoints; use super::helpers::{ self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, }; use super::{ClonableMmap, MergeFn}; +use crate::error::UserError; use crate::facet::FacetType; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::as_cloneable_grenad; @@ -228,12 +230,28 @@ pub(crate) fn write_typed_chunk_into_index( let mut hnsw = index.vector_hnsw(wtxn)?.unwrap_or_default(); let mut searcher = Searcher::new(); + let mut expected_dimensions = match index.vector_id_docid.iter(wtxn)?.next() { + Some(result) => { + let (vector_id, _) = result?; + Some(hnsw.get_point(vector_id.get() as usize).len()) + } + None => None, + }; + let mut cursor = vector_points.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { // convert the key back to a u32 (4 bytes) let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); // convert the vector back to a Vec let vector: Vec = pod_collect_to_vec(value); + + // TODO Move this error in the vector extractor + let found = vector.len(); + let expected = *expected_dimensions.get_or_insert(found); + if expected != found { + return Err(UserError::InvalidVectorDimensions { expected, found })?; + } + let vector_id = hnsw.insert(vector, &mut searcher) as u32; index.vector_id_docid.put(wtxn, &BEU32::new(vector_id), &BEU32::new(docid))?; }