Rename embed_one to embed_search and embed_chunks* to embed_index*

This commit is contained in:
Louis Dureuil 2025-02-20 11:36:42 +01:00
parent 8d2d9066ba
commit 4a2643daa2
No known key found for this signature in database
11 changed files with 36 additions and 31 deletions

View File

@ -104,9 +104,10 @@ fn import_vectors() {
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); let beagle_embed =
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
}; };

View File

@ -916,7 +916,7 @@ fn prepare_search<'t>(
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
embedder embedder
.embed_one(query.q.clone().unwrap(), Some(deadline)) .embed_search(query.q.clone().unwrap(), Some(deadline))
.map_err(milli::vector::Error::from) .map_err(milli::vector::Error::from)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
} }

View File

@ -203,7 +203,7 @@ impl<'a> Search<'a> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
match embedder.embed_one(query, Some(deadline)) { match embedder.embed_search(query, Some(deadline)) {
Ok(embedding) => embedding, Ok(embedding) => embedding,
Err(error) => { Err(error) => {
tracing::error!(error=%error, "Embedding failed"); tracing::error!(error=%error, "Embedding failed");

View File

@ -795,7 +795,7 @@ fn embed_chunks(
unused_vectors_distribution: &UnusedVectorsDistribution, unused_vectors_distribution: &UnusedVectorsDistribution,
request_threads: &ThreadPoolNoAbort, request_threads: &ThreadPoolNoAbort,
) -> Result<Vec<Vec<Embedding>>> { ) -> Result<Vec<Vec<Embedding>>> {
match embedder.embed_chunks(text_chunks, request_threads) { match embedder.embed_index(text_chunks, request_threads) {
Ok(chunks) => Ok(chunks), Ok(chunks) => Ok(chunks),
Err(error) => { Err(error) => {
if let FaultSource::Bug = error.fault { if let FaultSource::Bug = error.fault {

View File

@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))); return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)));
} }
let res = match embedder.embed_chunks_ref(texts.as_slice(), threads) { let res = match embedder.embed_index_ref(texts.as_slice(), threads) {
Ok(embeddings) => { Ok(embeddings) => {
for (docid, embedding) in ids.into_iter().zip(embeddings) { for (docid, embedding) in ids.into_iter().zip(embeddings) {
sender.set_vector(*docid, embedder_id, embedding).unwrap(); sender.set_vector(*docid, embedder_id, embedding).unwrap();

View File

@ -346,7 +346,7 @@ impl Embedder {
Ok(embedding) Ok(embedding)
} }
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> { ) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
@ -378,7 +378,7 @@ impl Embedder {
}) })
} }
pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> { pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
texts.iter().map(|text| self.embed_one(text)).collect() texts.iter().map(|text| self.embed_one(text)).collect()
} }
} }

View File

@ -30,7 +30,7 @@ impl Embedder {
self.dimensions self.dimensions
} }
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
) -> Result<Vec<Vec<Embedding>>, EmbedError> { ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
@ -41,7 +41,7 @@ impl Embedder {
self.distribution self.distribution
} }
pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> { pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
texts.iter().map(|text| self.embed_one(text)).collect() texts.iter().map(|text| self.embed_one(text)).collect()
} }
} }

View File

@ -628,13 +628,16 @@ impl Embedder {
EmbedderOptions::Rest(options) => { EmbedderOptions::Rest(options) => {
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?) Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
} }
EmbedderOptions::Composite(options) => {
Self::Composite(composite::Embedder::new(options)?)
}
}) })
} }
/// Embed one or multiple texts. /// Embed one or multiple texts.
/// ///
/// Each text can be embedded as one or multiple embeddings. /// Each text can be embedded as one or multiple embeddings.
pub fn embed( fn embed(
&self, &self,
texts: Vec<String>, texts: Vec<String>,
deadline: Option<Instant>, deadline: Option<Instant>,
@ -649,7 +652,7 @@ impl Embedder {
} }
#[tracing::instrument(level = "debug", skip_all, target = "search")] #[tracing::instrument(level = "debug", skip_all, target = "search")]
pub fn embed_one( pub fn embed_search(
&self, &self,
text: String, text: String,
deadline: Option<Instant>, deadline: Option<Instant>,
@ -662,31 +665,32 @@ impl Embedder {
/// Embed multiple chunks of texts. /// Embed multiple chunks of texts.
/// ///
/// Each chunk is composed of one or multiple texts. /// Each chunk is composed of one or multiple texts.
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> { ) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks, threads), Embedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads),
Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks, threads), Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads),
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks),
Embedder::Rest(embedder) => embedder.embed_chunks(text_chunks, threads), Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads),
} }
} }
pub fn embed_chunks_ref( /// Non-owning variant of [`Self::embed_index`].
pub fn embed_index_ref(
&self, &self,
texts: &[&str], texts: &[&str],
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,
) -> std::result::Result<Vec<Embedding>, EmbedError> { ) -> std::result::Result<Vec<Embedding>, EmbedError> {
match self { match self {
Embedder::HuggingFace(embedder) => embedder.embed_chunks_ref(texts), Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
Embedder::OpenAi(embedder) => embedder.embed_chunks_ref(texts, threads), Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads),
Embedder::Ollama(embedder) => embedder.embed_chunks_ref(texts, threads), Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads),
Embedder::UserProvided(embedder) => embedder.embed_chunks_ref(texts), Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts),
Embedder::Rest(embedder) => embedder.embed_chunks_ref(texts, threads), Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads),
} }
} }

View File

@ -113,7 +113,7 @@ impl Embedder {
} }
} }
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,
@ -134,7 +134,7 @@ impl Embedder {
} }
} }
pub(crate) fn embed_chunks_ref( pub(crate) fn embed_index_ref(
&self, &self,
texts: &[&str], texts: &[&str],
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,

View File

@ -250,7 +250,7 @@ impl Embedder {
Ok(all_embeddings) Ok(all_embeddings)
} }
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,
@ -271,7 +271,7 @@ impl Embedder {
} }
} }
pub(crate) fn embed_chunks_ref( pub(crate) fn embed_index_ref(
&self, &self,
texts: &[&str], texts: &[&str],
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,

View File

@ -184,7 +184,7 @@ impl Embedder {
Ok(embeddings.pop().unwrap()) Ok(embeddings.pop().unwrap())
} }
pub fn embed_chunks( pub fn embed_index(
&self, &self,
text_chunks: Vec<Vec<String>>, text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,
@ -205,7 +205,7 @@ impl Embedder {
} }
} }
pub(crate) fn embed_chunks_ref( pub(crate) fn embed_index_ref(
&self, &self,
texts: &[&str], texts: &[&str],
threads: &ThreadPoolNoAbort, threads: &ThreadPoolNoAbort,