48 lines
1.4 KiB
Rust
Raw Normal View History

use super::error::EmbedError;
2024-10-28 14:08:54 +01:00
use super::DistributionShift;
use crate::vector::Embedding;
#[derive(Debug, Clone, Copy)]
pub struct Embedder {
dimensions: usize,
2024-03-27 11:50:22 +01:00
distribution: Option<DistributionShift>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub dimensions: usize,
2024-03-27 11:50:22 +01:00
pub distribution: Option<DistributionShift>,
}
impl Embedder {
pub fn new(options: EmbedderOptions) -> Self {
2024-03-27 11:50:22 +01:00
Self { dimensions: options.dimensions, distribution: options.distribution }
}
2024-10-28 14:08:54 +01:00
pub fn embed<S: AsRef<str>>(&self, texts: &[S]) -> Result<Vec<Embedding>, EmbedError> {
texts.as_ref().iter().map(|text| self.embed_one(text)).collect()
}
2024-10-28 14:08:54 +01:00
pub fn embed_one<S: AsRef<str>>(&self, text: S) -> Result<Embedding, EmbedError> {
Err(EmbedError::embed_on_manual_embedder(text.as_ref().chars().take(250).collect()))
}
pub fn dimensions(&self) -> usize {
self.dimensions
}
pub fn embed_chunks(
&self,
text_chunks: Vec<Vec<String>>,
2024-10-28 14:08:54 +01:00
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
text_chunks.into_iter().map(|prompts| self.embed(&prompts)).collect()
}
2024-03-27 11:50:22 +01:00
pub fn distribution(&self) -> Option<DistributionShift> {
self.distribution
}
2024-10-28 14:08:54 +01:00
pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result<Vec<Embedding>, EmbedError> {
texts.iter().map(|text| self.embed_one(text)).collect()
}
}