From 363a5cc59099c42e14b7b0e9eb12e2598ff79d14 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 15 Jul 2024 11:56:18 +0200 Subject: [PATCH] Retrieve function from v1.9 to get embeddings in documents --- milli/src/index.rs | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 27b273393..634630f35 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -22,7 +22,7 @@ use crate::heed_codec::{ }; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; -use crate::vector::EmbeddingConfig; +use crate::vector::{Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, @@ -1516,6 +1516,42 @@ impl Index { .unwrap_or_default()) } + pub fn embeddings( + &self, + rtxn: &RoTxn<'_>, + docid: DocumentId, + ) -> Result>> { + let mut res = BTreeMap::new(); + for row in self.embedder_category_id.iter(rtxn)? { + let (embedder_name, embedder_id) = row?; + let embedder_id = (embedder_id as u16) << 8; + let mut embeddings = Vec::new(); + 'vectors: for i in 0..=u8::MAX { + let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) + .map(Some) + .or_else(|e| match e { + arroy::Error::MissingMetadata => Ok(None), + e => Err(e), + }) + .transpose(); + + let Some(reader) = reader else { + break 'vectors; + }; + + let embedding = reader?.item_vector(rtxn, docid)?; + if let Some(embedding) = embedding { + embeddings.push(embedding) + } else { + break 'vectors; + } + } + + res.insert(embedder_name.to_owned(), embeddings); + } + Ok(res) + } + pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) }