From 5185aa21b8a5b31bf7b9df61f3b43447e2221faa Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Fri, 8 Nov 2024 00:05:36 +0100 Subject: [PATCH] Know if your vectors are implicit when writing them back in documents + don't write empty _vectors --- crates/milli/src/update/new/document.rs | 18 +++++++++++++----- crates/milli/src/update/new/vector_document.rs | 4 ++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index f43eb63e4..692277597 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -332,14 +332,22 @@ where } vectors.insert( name, - serde_json::json!({ - "regenerate": entry.regenerate, - // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object - "embeddings": entry.embeddings, - }), + if entry.implicit { + serde_json::json!(entry.embeddings) + } else { + serde_json::json!({ + "regenerate": entry.regenerate, + // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object + "embeddings": entry.embeddings, + }) + }, ); } + if vectors.is_empty() { + break 'inject_vectors; + } + vectors_value = serde_json::value::to_raw_value(&vectors).unwrap(); unordered_field_buffer.push((vectors_fid, &vectors_value)); } diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index 4a27361a9..e96e29053 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -71,6 +71,7 @@ pub struct VectorEntry<'doc> { pub has_configured_embedder: bool, pub embeddings: Option>, pub regenerate: bool, + pub implicit: bool, } pub trait VectorDocument<'doc> { @@ -125,6 +126,7 @@ impl<'t> VectorDocumentFromDb<'t> { has_configured_embedder: true, embeddings: Some(Embeddings::FromDb(vectors)), regenerate: !config.user_provided.contains(self.docid), + implicit: false, }) } } @@ -174,11 +176,13 @@ fn entry_from_raw_value( has_configured_embedder, embeddings: raw_explicit_vectors.embeddings.map(Embeddings::FromJsonExplicit), regenerate: raw_explicit_vectors.regenerate, + implicit: false, }, RawVectors::ImplicitlyUserProvided(value) => VectorEntry { has_configured_embedder, embeddings: value.map(Embeddings::FromJsonImplicityUserProvided), regenerate: false, + implicit: true, }, }) }