Know if your vectors are implicit when writing them back in documents + don't write empty _vectors

This commit is contained in:
Louis Dureuil 2024-11-08 00:05:36 +01:00
parent 8a314ab81d
commit 5185aa21b8
No known key found for this signature in database
2 changed files with 17 additions and 5 deletions

View File

@ -332,14 +332,22 @@ where
} }
vectors.insert( vectors.insert(
name, name,
if entry.implicit {
serde_json::json!(entry.embeddings)
} else {
serde_json::json!({ serde_json::json!({
"regenerate": entry.regenerate, "regenerate": entry.regenerate,
// TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object // TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object
"embeddings": entry.embeddings, "embeddings": entry.embeddings,
}), })
},
); );
} }
if vectors.is_empty() {
break 'inject_vectors;
}
vectors_value = serde_json::value::to_raw_value(&vectors).unwrap(); vectors_value = serde_json::value::to_raw_value(&vectors).unwrap();
unordered_field_buffer.push((vectors_fid, &vectors_value)); unordered_field_buffer.push((vectors_fid, &vectors_value));
} }

View File

@ -71,6 +71,7 @@ pub struct VectorEntry<'doc> {
pub has_configured_embedder: bool, pub has_configured_embedder: bool,
pub embeddings: Option<Embeddings<'doc>>, pub embeddings: Option<Embeddings<'doc>>,
pub regenerate: bool, pub regenerate: bool,
pub implicit: bool,
} }
pub trait VectorDocument<'doc> { pub trait VectorDocument<'doc> {
@ -125,6 +126,7 @@ impl<'t> VectorDocumentFromDb<'t> {
has_configured_embedder: true, has_configured_embedder: true,
embeddings: Some(Embeddings::FromDb(vectors)), embeddings: Some(Embeddings::FromDb(vectors)),
regenerate: !config.user_provided.contains(self.docid), regenerate: !config.user_provided.contains(self.docid),
implicit: false,
}) })
} }
} }
@ -174,11 +176,13 @@ fn entry_from_raw_value(
has_configured_embedder, has_configured_embedder,
embeddings: raw_explicit_vectors.embeddings.map(Embeddings::FromJsonExplicit), embeddings: raw_explicit_vectors.embeddings.map(Embeddings::FromJsonExplicit),
regenerate: raw_explicit_vectors.regenerate, regenerate: raw_explicit_vectors.regenerate,
implicit: false,
}, },
RawVectors::ImplicitlyUserProvided(value) => VectorEntry { RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
has_configured_embedder, has_configured_embedder,
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided), embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
regenerate: false, regenerate: false,
implicit: true,
}, },
}) })
} }