From 9375b7bba58980e517f7c09d680586314a3ade65 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 15 Jul 2024 11:56:39 +0200 Subject: [PATCH] Inject generated vectors in dumps --- index-scheduler/src/batch.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 3161dc499..4f80cc23b 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -914,8 +914,34 @@ impl IndexScheduler { if self.must_stop_processing.get() { return Err(Error::AbortedTask); } - let (_id, doc) = ret?; - let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; + let (id, doc) = ret?; + let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; + + 'inject_vectors: { + let embeddings = index.embeddings(&rtxn, id)?; + + if embeddings.is_empty() { + break 'inject_vectors; + } + + let vectors = document + .entry("_vectors".to_owned()) + .or_insert(serde_json::Value::Object(Default::default())); + + let serde_json::Value::Object(vectors) = vectors else { + break 'inject_vectors; + }; + + for (embedder_name, embeddings) in embeddings { + vectors.entry(embedder_name).or_insert_with(|| { + serde_json::json!({ + "embeddings": embeddings, + "regenerate": true + }) + }); + } + } + index_dumper.push_document(&document)?; }