Know if your vectors are implicit when writing them back in documents + don't write empty _vectors

This commit is contained in:
Louis Dureuil 2024-11-08 00:05:36 +01:00
parent 8a314ab81d
commit 5185aa21b8
No known key found for this signature in database
2 changed files with 17 additions and 5 deletions

View File

@ -332,14 +332,22 @@ where
}
vectors.insert(
name,
if entry.implicit {
serde_json::json!(entry.embeddings)
} else {
serde_json::json!({
"regenerate": entry.regenerate,
// TODO: consider optimizing the shape of embedders here to store an array of f32 rather than a JSON object
"embeddings": entry.embeddings,
}),
})
},
);
}
if vectors.is_empty() {
break 'inject_vectors;
}
vectors_value = serde_json::value::to_raw_value(&vectors).unwrap();
unordered_field_buffer.push((vectors_fid, &vectors_value));
}

View File

@ -71,6 +71,7 @@ pub struct VectorEntry<'doc> {
pub has_configured_embedder: bool,
pub embeddings: Option<Embeddings<'doc>>,
pub regenerate: bool,
pub implicit: bool,
}
pub trait VectorDocument<'doc> {
@ -125,6 +126,7 @@ impl<'t> VectorDocumentFromDb<'t> {
has_configured_embedder: true,
embeddings: Some(Embeddings::FromDb(vectors)),
regenerate: !config.user_provided.contains(self.docid),
implicit: false,
})
}
}
@ -174,11 +176,13 @@ fn entry_from_raw_value(
has_configured_embedder,
embeddings: raw_explicit_vectors.embeddings.map(Embeddings::FromJsonExplicit),
regenerate: raw_explicit_vectors.regenerate,
implicit: false,
},
RawVectors::ImplicitlyUserProvided(value) => VectorEntry {
has_configured_embedder,
embeddings: value.map(Embeddings::FromJsonImplicityUserProvided),
regenerate: false,
implicit: true,
},
})
}