From cc4bd54669b64b6fa195616fb18ca7da38c299a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 28 Nov 2024 13:53:25 +0100 Subject: [PATCH] Correctly construct the Embeddings struct --- crates/milli/src/update/new/channel.rs | 14 ++++++++++++++ crates/milli/src/update/new/indexer/mod.rs | 13 ++++++------- crates/milli/src/vector/mod.rs | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 7eaa50df1..237c19a5c 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -293,6 +293,20 @@ impl ArroySetVectors { }); Some(&vec[..]) } + + /// Read all the embeddings and write them into an aligned `f32` Vec. + pub fn read_all_embeddings_into_vec<'v>( + &self, + frame: &FrameGrantR<'_>, + vec: &'v mut Vec, + ) -> &'v [f32] { + vec.clear(); + Self::remaining_bytes(frame).chunks_exact(mem::size_of::()).for_each(|bytes| { + let f = bytes.try_into().map(f32::from_ne_bytes).unwrap(); + vec.push(f); + }); + &vec[..] + } } #[derive(Debug, Clone, Copy)] diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 9ad7a8f0b..a8a94cb7c 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -442,11 +442,12 @@ where let LargeVectors { docid, embedder_id, .. } = large_vectors; let (_, _, writer, dimensions) = arroy_writers.get(&embedder_id).expect("requested a missing embedder"); - writer.del_items(wtxn, *dimensions, docid)?; let mut embeddings = Embeddings::new(*dimensions); for embedding in large_vectors.read_embeddings() { embeddings.push(embedding.to_vec()).unwrap(); } + writer.del_items(wtxn, *dimensions, docid)?; + writer.add_items(wtxn, docid, &embeddings)?; } } @@ -607,13 +608,11 @@ fn write_from_bbqueue( let frame = frame_with_header.frame(); let (_, _, writer, dimensions) = arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + let mut embeddings = Embeddings::new(*dimensions); + let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); + embeddings.append(all_embeddings.to_vec()).unwrap(); writer.del_items(wtxn, *dimensions, docid)?; - for index in 0.. { - match asvs.read_embedding_into_vec(frame, index, aligned_embedding) { - Some(embedding) => writer.add_item(wtxn, docid, embedding)?, - None => break, - } - } + writer.add_items(wtxn, docid, &embeddings)?; } } } diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 3047e6dfc..a1d71ef93 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -475,7 +475,7 @@ impl Embeddings { Ok(()) } - /// Append a flat vector of embeddings a the end of the embeddings. + /// Append a flat vector of embeddings at the end of the embeddings. /// /// If `embeddings.len() % self.dimension != 0`, then the append operation fails. pub fn append(&mut self, mut embeddings: Vec) -> Result<(), Vec> {