Rename embed_one to embed_search and embed_chunks* to embed_index*

This commit is contained in:
Louis Dureuil 2025-02-20 11:36:42 +01:00
parent 8d2d9066ba
commit 4a2643daa2
No known key found for this signature in database
11 changed files with 36 additions and 31 deletions

View File

@ -104,9 +104,10 @@ fn import_vectors() {
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap();
let beagle_embed =
hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap();
let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
};

View File

@ -916,7 +916,7 @@ fn prepare_search<'t>(
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
embedder
.embed_one(query.q.clone().unwrap(), Some(deadline))
.embed_search(query.q.clone().unwrap(), Some(deadline))
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
}

View File

@ -203,7 +203,7 @@ impl<'a> Search<'a> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
match embedder.embed_one(query, Some(deadline)) {
match embedder.embed_search(query, Some(deadline)) {
Ok(embedding) => embedding,
Err(error) => {
tracing::error!(error=%error, "Embedding failed");

View File

@ -795,7 +795,7 @@ fn embed_chunks(
unused_vectors_distribution: &UnusedVectorsDistribution,
request_threads: &ThreadPoolNoAbort,
) -> Result<Vec<Vec<Embedding>>> {
match embedder.embed_chunks(text_chunks, request_threads) {
match embedder.embed_index(text_chunks, request_threads) {
Ok(chunks) => Ok(chunks),
Err(error) => {
if let FaultSource::Bug = error.fault {

View File

@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)));
}
let res = match embedder.embed_chunks_ref(texts.as_slice(), threads) {
let res = match embedder.embed_index_ref(texts.as_slice(), threads) {
Ok(embeddings) => {
for (docid, embedding) in ids.into_iter().zip(embeddings) {
sender.set_vector(*docid, embedder_id, embedding).unwrap();

View File

@ -346,7 +346,7 @@ impl Embedder {
Ok(embedding)
}
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
@ -378,7 +378,7 @@ impl Embedder {
})
}
pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
texts.iter().map(|text| self.embed_one(text)).collect()
}
}

View File

@ -30,7 +30,7 @@ impl Embedder {
self.dimensions
}
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
@ -41,7 +41,7 @@ impl Embedder {
self.distribution
}
pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
texts.iter().map(|text| self.embed_one(text)).collect()
}
}

View File

@ -628,13 +628,16 @@ impl Embedder {
EmbedderOptions::Rest(options) => {
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
}
EmbedderOptions::Composite(options) => {
Self::Composite(composite::Embedder::new(options)?)
}
})
}
/// Embed one or multiple texts.
///
/// Each text can be embedded as one or multiple embeddings.
pub fn embed(
fn embed(
&self,
texts: Vec<String>,
deadline: Option<Instant>,
@ -649,7 +652,7 @@ impl Embedder {
}
#[tracing::instrument(level = "debug", skip_all, target = "search")]
pub fn embed_one(
pub fn embed_search(
&self,
text: String,
deadline: Option<Instant>,
@ -662,31 +665,32 @@ impl Embedder {
/// Embed multiple chunks of texts.
///
/// Each chunk is composed of one or multiple texts.
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort,
) -> std::result::Result<Vec<Vec<Embedding>>, EmbedError> {
match self {
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks, threads),
Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks, threads),
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks),
Embedder::Rest(embedder) => embedder.embed_chunks(text_chunks, threads),
Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks),
Embedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads),
Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads),
Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks),
Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads),
}
}
pub fn embed_chunks_ref(
/// Non-owning variant of [`Self::embed_index`].
pub fn embed_index_ref(
&self,
texts: &[&str],
threads: &ThreadPoolNoAbort,
) -> std::result::Result<Vec<Embedding>, EmbedError> {
match self {
Embedder::HuggingFace(embedder) => embedder.embed_chunks_ref(texts),
Embedder::OpenAi(embedder) => embedder.embed_chunks_ref(texts, threads),
Embedder::Ollama(embedder) => embedder.embed_chunks_ref(texts, threads),
Embedder::UserProvided(embedder) => embedder.embed_chunks_ref(texts),
Embedder::Rest(embedder) => embedder.embed_chunks_ref(texts, threads),
Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts),
Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads),
Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads),
Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts),
Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads),
}
}

View File

@ -113,7 +113,7 @@ impl Embedder {
}
}
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort,
@ -134,7 +134,7 @@ impl Embedder {
}
}
pub(crate) fn embed_chunks_ref(
pub(crate) fn embed_index_ref(
&self,
texts: &[&str],
threads: &ThreadPoolNoAbort,

View File

@ -250,7 +250,7 @@ impl Embedder {
Ok(all_embeddings)
}
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort,
@ -271,7 +271,7 @@ impl Embedder {
}
}
pub(crate) fn embed_chunks_ref(
pub(crate) fn embed_index_ref(
&self,
texts: &[&str],
threads: &ThreadPoolNoAbort,

View File

@ -184,7 +184,7 @@ impl Embedder {
Ok(embeddings.pop().unwrap())
}
pub fn embed_chunks(
pub fn embed_index(
&self,
text_chunks: Vec<Vec<String>>,
threads: &ThreadPoolNoAbort,
@ -205,7 +205,7 @@ impl Embedder {
}
}
pub(crate) fn embed_chunks_ref(
pub(crate) fn embed_index_ref(
&self,
texts: &[&str],
threads: &ThreadPoolNoAbort,