3998: Accept the `null` JSON value as a value of the `_vectors` field r=irevoire a=Kerollmops

This PR fixes #3979 by accepting `null` JSON values in the `_vectors` fields provided by the user.

Can the reviewer please verify that I am merging in the right branch?
I think we must create a new _release-v1.3.2_.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-08-16 08:12:24 +00:00 committed by GitHub
commit 4c35817c5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 13 deletions

View File

@ -666,6 +666,7 @@ fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option
.map_err(InternalError::SerdeJson)?; .map_err(InternalError::SerdeJson)?;
Ok(vectors Ok(vectors
.into_iter() .into_iter()
.flatten()
.map(|v| OrderedFloat(dot_product_similarity(query, &v))) .map(|v| OrderedFloat(dot_product_similarity(query, &v)))
.max() .max()
.map(OrderedFloat::into_inner)) .map(OrderedFloat::into_inner))

View File

@ -293,15 +293,15 @@ pub fn normalize_facet(original: &str) -> String {
#[derive(serde::Serialize, serde::Deserialize, Debug)] #[derive(serde::Serialize, serde::Deserialize, Debug)]
#[serde(transparent)] #[serde(transparent)]
pub struct VectorOrArrayOfVectors { pub struct VectorOrArrayOfVectors {
#[serde(with = "either::serde_untagged")] #[serde(with = "either::serde_untagged_optional")]
inner: either::Either<Vec<f32>, Vec<Vec<f32>>>, inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
} }
impl VectorOrArrayOfVectors { impl VectorOrArrayOfVectors {
pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> { pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
match self.inner { match self.inner? {
either::Either::Left(vector) => vec![vector], either::Either::Left(vector) => Some(vec![vector]),
either::Either::Right(vectors) => vectors, either::Either::Right(vectors) => Some(vectors),
} }
} }
} }

View File

@ -33,7 +33,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
// lazily get it when needed // lazily get it when needed
let document_id = || -> Value { let document_id = || -> Value {
let document_id = obkv.get(primary_key_id).unwrap(); let document_id = obkv.get(primary_key_id).unwrap();
serde_json::from_slice(document_id).unwrap() from_slice(document_id).unwrap()
}; };
// first we retrieve the _vectors field // first we retrieve the _vectors field
@ -50,6 +50,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
} }
}; };
if let Some(vectors) = vectors {
for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) { for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
let index = u16::try_from(i).unwrap(); let index = u16::try_from(i).unwrap();
let mut key = docid_bytes.to_vec(); let mut key = docid_bytes.to_vec();
@ -58,6 +59,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
writer.insert(key, bytes)?; writer.insert(key, bytes)?;
} }
} }
}
// else => the `_vectors` object was `null`, there is nothing to do // else => the `_vectors` object was `null`, there is nothing to do
} }