mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
wraps the index embedding config in a struct
This commit is contained in:
parent
04f6523f3c
commit
9eb6f522ea
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -5310,9 +5310,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tracing-actix-web"
|
name = "tracing-actix-web"
|
||||||
version = "0.7.10"
|
version = "0.7.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fa069bd1503dd526ee793bb3fce408895136c95fc86d2edb2acf1c646d7f0684"
|
checksum = "4ee9e39a66d9b615644893ffc1704d2a89b5b315b7fd0228ad3182ca9a306b19"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"mutually_exclusive_features",
|
"mutually_exclusive_features",
|
||||||
|
@ -53,6 +53,7 @@ use meilisearch_types::heed::byteorder::BE;
|
|||||||
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
||||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
|
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
@ -1459,33 +1460,39 @@ impl IndexScheduler {
|
|||||||
// TODO: consider using a type alias or a struct embedder/template
|
// TODO: consider using a type alias or a struct embedder/template
|
||||||
pub fn embedders(
|
pub fn embedders(
|
||||||
&self,
|
&self,
|
||||||
embedding_configs: Vec<(String, milli::vector::EmbeddingConfig, RoaringBitmap)>,
|
embedding_configs: Vec<IndexEmbeddingConfig>,
|
||||||
) -> Result<EmbeddingConfigs> {
|
) -> Result<EmbeddingConfigs> {
|
||||||
let res: Result<_> = embedding_configs
|
let res: Result<_> = embedding_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt }, _)| {
|
.map(
|
||||||
let prompt =
|
|IndexEmbeddingConfig {
|
||||||
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
name,
|
||||||
// optimistically return existing embedder
|
config: milli::vector::EmbeddingConfig { embedder_options, prompt },
|
||||||
{
|
..
|
||||||
let embedders = self.embedders.read().unwrap();
|
}| {
|
||||||
if let Some(embedder) = embedders.get(&embedder_options) {
|
let prompt =
|
||||||
return Ok((name, (embedder.clone(), prompt)));
|
Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
|
||||||
|
// optimistically return existing embedder
|
||||||
|
{
|
||||||
|
let embedders = self.embedders.read().unwrap();
|
||||||
|
if let Some(embedder) = embedders.get(&embedder_options) {
|
||||||
|
return Ok((name, (embedder.clone(), prompt)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// add missing embedder
|
// add missing embedder
|
||||||
let embedder = Arc::new(
|
let embedder = Arc::new(
|
||||||
Embedder::new(embedder_options.clone())
|
Embedder::new(embedder_options.clone())
|
||||||
.map_err(meilisearch_types::milli::vector::Error::from)
|
.map_err(meilisearch_types::milli::vector::Error::from)
|
||||||
.map_err(meilisearch_types::milli::Error::from)?,
|
.map_err(meilisearch_types::milli::Error::from)?,
|
||||||
);
|
);
|
||||||
{
|
{
|
||||||
let mut embedders = self.embedders.write().unwrap();
|
let mut embedders = self.embedders.write().unwrap();
|
||||||
embedders.insert(embedder_options, embedder.clone());
|
embedders.insert(embedder_options, embedder.clone());
|
||||||
}
|
}
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt)))
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.collect();
|
.collect();
|
||||||
res.map(EmbeddingConfigs::new)
|
res.map(EmbeddingConfigs::new)
|
||||||
}
|
}
|
||||||
@ -3055,10 +3062,10 @@ mod tests {
|
|||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
let (name, embedding_config, user_provided) = configs.first().unwrap();
|
let IndexEmbeddingConfig { name, config, user_defined } = configs.first().unwrap();
|
||||||
insta::assert_snapshot!(name, @"default");
|
insta::assert_snapshot!(name, @"default");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
||||||
insta::assert_json_snapshot!(embedding_config.embedder_options);
|
insta::assert_json_snapshot!(config.embedder_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -5022,15 +5029,17 @@ mod tests {
|
|||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let (name, fakerest_config, user_provided) = configs.get(0).unwrap();
|
let IndexEmbeddingConfig { name, config: fakerest_config, user_defined } =
|
||||||
|
configs.get(0).unwrap();
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
||||||
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
insta::assert_json_snapshot!(fakerest_config.embedder_options);
|
||||||
let fakerest_name = name.clone();
|
let fakerest_name = name.clone();
|
||||||
|
|
||||||
let (name, simple_hf_config, user_provided) = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: simple_hf_config, user_defined } =
|
||||||
|
configs.get(1).unwrap();
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
||||||
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
|
||||||
let simple_hf_name = name.clone();
|
let simple_hf_name = name.clone();
|
||||||
|
|
||||||
@ -5102,11 +5111,11 @@ mod tests {
|
|||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let (name, _config, user_defined) = configs.get(0).unwrap();
|
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(0).unwrap();
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||||
|
|
||||||
let (name, _config, user_defined) = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(1).unwrap();
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
||||||
|
|
||||||
@ -5178,11 +5187,13 @@ mod tests {
|
|||||||
let configs = index.embedding_configs(&rtxn).unwrap();
|
let configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
// for consistency with the below
|
// for consistency with the below
|
||||||
#[allow(clippy::get_first)]
|
#[allow(clippy::get_first)]
|
||||||
let (name, _config, user_defined) = configs.get(0).unwrap();
|
let IndexEmbeddingConfig { name, config: _, user_defined } =
|
||||||
|
configs.get(0).unwrap();
|
||||||
insta::assert_snapshot!(name, @"A_fakerest");
|
insta::assert_snapshot!(name, @"A_fakerest");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
|
||||||
|
|
||||||
let (name, _config, user_defined) = configs.get(1).unwrap();
|
let IndexEmbeddingConfig { name, config: _, user_defined } =
|
||||||
|
configs.get(1).unwrap();
|
||||||
insta::assert_snapshot!(name, @"B_small_hf");
|
insta::assert_snapshot!(name, @"B_small_hf");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
|||||||
|
|
||||||
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
|
use milli::index::IndexEmbeddingConfig;
|
||||||
use milli::proximity::ProximityPrecision;
|
use milli::proximity::ProximityPrecision;
|
||||||
use milli::update::Setting;
|
use milli::update::Setting;
|
||||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||||
@ -672,7 +673,7 @@ pub fn settings(
|
|||||||
let embedders: BTreeMap<_, _> = index
|
let embedders: BTreeMap<_, _> = index
|
||||||
.embedding_configs(rtxn)?
|
.embedding_configs(rtxn)?
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, config, _)| (name, Setting::Set(config.into())))
|
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
|
||||||
.collect();
|
.collect();
|
||||||
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ use heed::types::*;
|
|||||||
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
@ -1579,24 +1580,23 @@ impl Index {
|
|||||||
pub(crate) fn put_embedding_configs(
|
pub(crate) fn put_embedding_configs(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn<'_>,
|
wtxn: &mut RwTxn<'_>,
|
||||||
configs: Vec<(String, EmbeddingConfig, RoaringBitmap)>,
|
configs: Vec<IndexEmbeddingConfig>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main
|
self.main.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>().put(
|
||||||
.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig, RoaringBitmap)>>>()
|
wtxn,
|
||||||
.put(wtxn, main_key::EMBEDDING_CONFIGS, &configs)
|
main_key::EMBEDDING_CONFIGS,
|
||||||
|
&configs,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
pub(crate) fn delete_embedding_configs(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
|
self.main.remap_key_type::<Str>().delete(wtxn, main_key::EMBEDDING_CONFIGS)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embedding_configs(
|
pub fn embedding_configs(&self, rtxn: &RoTxn<'_>) -> Result<Vec<IndexEmbeddingConfig>> {
|
||||||
&self,
|
|
||||||
rtxn: &RoTxn<'_>,
|
|
||||||
) -> Result<Vec<(String, EmbeddingConfig, RoaringBitmap)>> {
|
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.remap_types::<Str, SerdeJson<Vec<(String, EmbeddingConfig, RoaringBitmap)>>>()
|
.remap_types::<Str, SerdeJson<Vec<IndexEmbeddingConfig>>>()
|
||||||
.get(rtxn, main_key::EMBEDDING_CONFIGS)?
|
.get(rtxn, main_key::EMBEDDING_CONFIGS)?
|
||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
@ -1668,6 +1668,13 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct IndexEmbeddingConfig {
|
||||||
|
pub name: String,
|
||||||
|
pub config: EmbeddingConfig,
|
||||||
|
pub user_defined: RoaringBitmap,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
@ -785,6 +785,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::documents::documents_batch_reader_from_objects;
|
use crate::documents::documents_batch_reader_from_objects;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::search::TermsMatchingStrategy;
|
use crate::search::TermsMatchingStrategy;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
use crate::{db_snap, Filter, Search};
|
use crate::{db_snap, Filter, Search};
|
||||||
@ -2620,7 +2621,8 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let mut embedding_configs = index.embedding_configs(&rtxn).unwrap();
|
let mut embedding_configs = index.embedding_configs(&rtxn).unwrap();
|
||||||
let (embedder_name, embedder, user_defined) = embedding_configs.pop().unwrap();
|
let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_defined } =
|
||||||
|
embedding_configs.pop().unwrap();
|
||||||
insta::assert_snapshot!(embedder_name, @"manual");
|
insta::assert_snapshot!(embedder_name, @"manual");
|
||||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0, 1, 2]>");
|
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0, 1, 2]>");
|
||||||
let embedder =
|
let embedder =
|
||||||
|
@ -20,6 +20,7 @@ use super::MergeFn;
|
|||||||
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::index::db_name::DOCUMENTS;
|
use crate::index::db_name::DOCUMENTS;
|
||||||
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::proximity::MAX_DISTANCE;
|
use crate::proximity::MAX_DISTANCE;
|
||||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
|
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
|
||||||
use crate::update::facet::FacetsUpdate;
|
use crate::update::facet::FacetsUpdate;
|
||||||
@ -156,8 +157,11 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let mut docids = index.documents_ids(wtxn)?;
|
let mut docids = index.documents_ids(wtxn)?;
|
||||||
let mut iter = merger.into_stream_merger_iter()?;
|
let mut iter = merger.into_stream_merger_iter()?;
|
||||||
|
|
||||||
let embedders: BTreeSet<_> =
|
let embedders: BTreeSet<_> = index
|
||||||
index.embedding_configs(wtxn)?.into_iter().map(|(name, _, _)| name).collect();
|
.embedding_configs(wtxn)?
|
||||||
|
.into_iter()
|
||||||
|
.map(|IndexEmbeddingConfig { name, .. }| name)
|
||||||
|
.collect();
|
||||||
let mut vectors_buffer = Vec::new();
|
let mut vectors_buffer = Vec::new();
|
||||||
while let Some((key, reader)) = iter.next()? {
|
while let Some((key, reader)) = iter.next()? {
|
||||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||||
@ -653,10 +657,12 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let Some((expected_dimension, embedder_name)) = params else { unreachable!() };
|
let Some((expected_dimension, embedder_name)) = params else { unreachable!() };
|
||||||
|
|
||||||
let mut embedding_configs = index.embedding_configs(&wtxn)?;
|
let mut embedding_configs = index.embedding_configs(&wtxn)?;
|
||||||
let (_name, _conf, ud) =
|
let index_embedder_config = embedding_configs
|
||||||
embedding_configs.iter_mut().find(|config| config.0 == embedder_name).unwrap();
|
.iter_mut()
|
||||||
*ud -= remove_from_user_defined;
|
.find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name)
|
||||||
*ud |= user_defined;
|
.unwrap();
|
||||||
|
index_embedder_config.user_defined -= remove_from_user_defined;
|
||||||
|
index_embedder_config.user_defined |= user_defined;
|
||||||
|
|
||||||
index.put_embedding_configs(wtxn, embedding_configs)?;
|
index.put_embedding_configs(wtxn, embedding_configs)?;
|
||||||
|
|
||||||
|
@ -15,7 +15,9 @@ use super::index_documents::{IndexDocumentsConfig, Transform};
|
|||||||
use super::IndexerConfig;
|
use super::IndexerConfig;
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{
|
||||||
|
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||||
|
};
|
||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
@ -930,8 +932,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> =
|
let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> =
|
||||||
old_configs
|
old_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, setting, user_defined)| {
|
.map(|IndexEmbeddingConfig { name, config, user_defined }| {
|
||||||
(name, (Setting::Set(setting.into()), user_defined))
|
(name, (Setting::Set(config.into()), user_defined))
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
@ -975,23 +977,27 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let new_configs: Vec<(String, EmbeddingConfig, RoaringBitmap)> = new_configs
|
let new_configs: Vec<IndexEmbeddingConfig> = new_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|(name, (setting, user_defined))| match setting {
|
.filter_map(|(name, (config, user_defined))| match config {
|
||||||
Setting::Set(settings) => Some((name, settings.into(), user_defined)),
|
Setting::Set(config) => {
|
||||||
Setting::Reset => None,
|
Some(IndexEmbeddingConfig { name, config: config.into(), user_defined })
|
||||||
Setting::NotSet => {
|
|
||||||
Some((name, EmbeddingSettings::default().into(), user_defined))
|
|
||||||
}
|
}
|
||||||
|
Setting::Reset => None,
|
||||||
|
Setting::NotSet => Some(IndexEmbeddingConfig {
|
||||||
|
name,
|
||||||
|
config: EmbeddingSettings::default().into(),
|
||||||
|
user_defined,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
self.index.embedder_category_id.clear(self.wtxn)?;
|
self.index.embedder_category_id.clear(self.wtxn)?;
|
||||||
for (index, (embedder_name, _, _)) in new_configs.iter().enumerate() {
|
for (index, index_embedding_config) in new_configs.iter().enumerate() {
|
||||||
self.index.embedder_category_id.put_with_flags(
|
self.index.embedder_category_id.put_with_flags(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
heed::PutFlags::APPEND,
|
heed::PutFlags::APPEND,
|
||||||
embedder_name,
|
&index_embedding_config.name,
|
||||||
&index
|
&index
|
||||||
.try_into()
|
.try_into()
|
||||||
.map_err(|_| UserError::TooManyEmbedders(new_configs.len()))?,
|
.map_err(|_| UserError::TooManyEmbedders(new_configs.len()))?,
|
||||||
@ -1371,21 +1377,25 @@ impl InnerIndexSettings {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn embedders(
|
fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingConfigs> {
|
||||||
embedding_configs: Vec<(String, EmbeddingConfig, RoaringBitmap)>,
|
|
||||||
) -> Result<EmbeddingConfigs> {
|
|
||||||
let res: Result<_> = embedding_configs
|
let res: Result<_> = embedding_configs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(name, EmbeddingConfig { embedder_options, prompt }, _)| {
|
.map(
|
||||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
|IndexEmbeddingConfig {
|
||||||
|
name,
|
||||||
|
config: EmbeddingConfig { embedder_options, prompt },
|
||||||
|
..
|
||||||
|
}| {
|
||||||
|
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||||
|
|
||||||
let embedder = Arc::new(
|
let embedder = Arc::new(
|
||||||
Embedder::new(embedder_options.clone())
|
Embedder::new(embedder_options.clone())
|
||||||
.map_err(crate::vector::Error::from)
|
.map_err(crate::vector::Error::from)
|
||||||
.map_err(crate::Error::from)?,
|
.map_err(crate::Error::from)?,
|
||||||
);
|
);
|
||||||
Ok((name, (embedder, prompt)))
|
Ok((name, (embedder, prompt)))
|
||||||
})
|
},
|
||||||
|
)
|
||||||
.collect();
|
.collect();
|
||||||
res.map(EmbeddingConfigs::new)
|
res.map(EmbeddingConfigs::new)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user