rename all occurences of user_defined to user_provided for consistency

This commit is contained in:
Tamo 2024-06-05 15:38:49 +02:00
parent b7349910d9
commit d85ab23b82
8 changed files with 42 additions and 37 deletions

View File

@ -955,7 +955,7 @@ impl IndexScheduler {
let user_provided = embedding_configs let user_provided = embedding_configs
.iter() .iter()
.find(|conf| conf.name == embedder_name) .find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_defined.contains(id)); .is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors { let embeddings = ExplicitVectors {
embeddings: VectorOrArrayOfVectors::from_array_of_vectors( embeddings: VectorOrArrayOfVectors::from_array_of_vectors(

View File

@ -3062,9 +3062,9 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, user_defined } = configs.first().unwrap(); let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
insta::assert_snapshot!(name, @"default"); insta::assert_snapshot!(name, @"default");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(config.embedder_options); insta::assert_json_snapshot!(config.embedder_options);
} }
@ -5029,17 +5029,17 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below // for consistency with the below
#[allow(clippy::get_first)] #[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: fakerest_config, user_defined } = let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
configs.get(0).unwrap(); configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest"); insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(fakerest_config.embedder_options); insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone(); let fakerest_name = name.clone();
let IndexEmbeddingConfig { name, config: simple_hf_config, user_defined } = let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
configs.get(1).unwrap(); configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf"); insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(simple_hf_config.embedder_options); insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone(); let simple_hf_name = name.clone();
@ -5111,13 +5111,14 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below // for consistency with the below
#[allow(clippy::get_first)] #[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(0).unwrap(); let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest"); insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(1).unwrap(); let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf"); insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap(); let embeddings = index.embeddings(&rtxn, 0).unwrap();
@ -5185,15 +5186,15 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap(); let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below // for consistency with the below
#[allow(clippy::get_first)] #[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_defined } = let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap(); configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest"); insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>"); insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_defined } = let IndexEmbeddingConfig { name, config: _, user_provided } =
configs.get(1).unwrap(); configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf"); insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap(); let embeddings = index.embeddings(&rtxn, 0).unwrap();

View File

@ -1067,12 +1067,12 @@ fn make_hits(
if retrieve_vectors { if retrieve_vectors {
let mut vectors = serde_json::Map::new(); let mut vectors = serde_json::Map::new();
for (name, mut vector) in index.embeddings(rtxn, id)? { for (name, mut vector) in index.embeddings(rtxn, id)? {
let user_defined = embedding_configs let user_provided = embedding_configs
.iter() .iter()
.find(|conf| conf.name == name) .find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_defined.contains(id)); .is_some_and(|conf| conf.user_provided.contains(id));
let mut embedding = serde_json::Map::new(); let mut embedding = serde_json::Map::new();
embedding.insert("userDefined".to_string(), user_defined.into()); embedding.insert("userProvided".to_string(), user_provided.into());
match vector.as_mut_slice() { match vector.as_mut_slice() {
[one] => embedding.insert("embedding".to_string(), std::mem::take(one).into()), [one] => embedding.insert("embedding".to_string(), std::mem::take(one).into()),
_ => embedding.insert("embedding".to_string(), vector.into()), _ => embedding.insert("embedding".to_string(), vector.into()),

View File

@ -1672,7 +1672,7 @@ impl Index {
pub struct IndexEmbeddingConfig { pub struct IndexEmbeddingConfig {
pub name: String, pub name: String,
pub config: EmbeddingConfig, pub config: EmbeddingConfig,
pub user_defined: RoaringBitmap, pub user_provided: RoaringBitmap,
} }
#[cfg(test)] #[cfg(test)]

View File

@ -2623,10 +2623,10 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let mut embedding_configs = index.embedding_configs(&rtxn).unwrap(); let mut embedding_configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_defined } = let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_provided } =
embedding_configs.pop().unwrap(); embedding_configs.pop().unwrap();
insta::assert_snapshot!(embedder_name, @"manual"); insta::assert_snapshot!(embedder_name, @"manual");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0, 1, 2]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
let embedder = let embedder =
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
let res = index let res = index

View File

@ -625,8 +625,8 @@ pub(crate) fn write_typed_chunk_into_index(
let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn); let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn); let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn); let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn);
let mut user_defined = RoaringBitmap::new(); let mut user_provided = RoaringBitmap::new();
let mut remove_from_user_defined = RoaringBitmap::new(); let mut remove_from_user_provided = RoaringBitmap::new();
let mut params = None; let mut params = None;
for typed_chunk in typed_chunks { for typed_chunk in typed_chunks {
let TypedChunk::VectorPoints { let TypedChunk::VectorPoints {
@ -649,8 +649,8 @@ pub(crate) fn write_typed_chunk_into_index(
if let Some(embeddings) = embeddings { if let Some(embeddings) = embeddings {
embeddings_builder.push(embeddings.into_cursor()?); embeddings_builder.push(embeddings.into_cursor()?);
} }
user_defined |= ud; user_provided |= ud;
remove_from_user_defined |= rud; remove_from_user_provided |= rud;
} }
// typed chunks has always at least 1 chunk. // typed chunks has always at least 1 chunk.
@ -661,8 +661,8 @@ pub(crate) fn write_typed_chunk_into_index(
.iter_mut() .iter_mut()
.find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name) .find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name)
.unwrap(); .unwrap();
index_embedder_config.user_defined -= remove_from_user_defined; index_embedder_config.user_provided -= remove_from_user_provided;
index_embedder_config.user_defined |= user_defined; index_embedder_config.user_provided |= user_provided;
index.put_embedding_configs(wtxn, embedding_configs)?; index.put_embedding_configs(wtxn, embedding_configs)?;

View File

@ -932,9 +932,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> = let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> =
old_configs old_configs
.into_iter() .into_iter()
.map(|IndexEmbeddingConfig { name, config, user_defined }| { .map(
(name, (Setting::Set(config.into()), user_defined)) |IndexEmbeddingConfig { name, config, user_provided: user_defined }| {
}) (name, (Setting::Set(config.into()), user_defined))
},
)
.collect(); .collect();
let mut new_configs = BTreeMap::new(); let mut new_configs = BTreeMap::new();
@ -944,19 +946,19 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
{ {
match joined { match joined {
// updated config // updated config
EitherOrBoth::Both((name, (mut old, user_defined)), (_, new)) => { EitherOrBoth::Both((name, (mut old, user_provided)), (_, new)) => {
changed |= EmbeddingSettings::apply_and_need_reindex(&mut old, new); changed |= EmbeddingSettings::apply_and_need_reindex(&mut old, new);
if changed { if changed {
tracing::debug!( tracing::debug!(
embedder = name, embedder = name,
documents = user_defined.len(), user_provided = user_provided.len(),
"need reindex" "need reindex"
); );
} else { } else {
tracing::debug!(embedder = name, "skip reindex"); tracing::debug!(embedder = name, "skip reindex");
} }
let new = validate_embedding_settings(old, &name)?; let new = validate_embedding_settings(old, &name)?;
new_configs.insert(name, (new, user_defined)); new_configs.insert(name, (new, user_provided));
} }
// unchanged config // unchanged config
EitherOrBoth::Left((name, setting)) => { EitherOrBoth::Left((name, setting)) => {
@ -979,15 +981,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
} }
let new_configs: Vec<IndexEmbeddingConfig> = new_configs let new_configs: Vec<IndexEmbeddingConfig> = new_configs
.into_iter() .into_iter()
.filter_map(|(name, (config, user_defined))| match config { .filter_map(|(name, (config, user_provided))| match config {
Setting::Set(config) => { Setting::Set(config) => Some(IndexEmbeddingConfig {
Some(IndexEmbeddingConfig { name, config: config.into(), user_defined }) name,
} config: config.into(),
user_provided,
}),
Setting::Reset => None, Setting::Reset => None,
Setting::NotSet => Some(IndexEmbeddingConfig { Setting::NotSet => Some(IndexEmbeddingConfig {
name, name,
config: EmbeddingSettings::default().into(), config: EmbeddingSettings::default().into(),
user_defined, user_provided,
}), }),
}) })
.collect(); .collect();

View File

@ -73,7 +73,7 @@ impl ParsedVectorsDiff {
} }
.flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect()); .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect());
for embedding_config in embedders_configs { for embedding_config in embedders_configs {
if embedding_config.user_defined.contains(docid) { if embedding_config.user_provided.contains(docid) {
old.entry(embedding_config.name.to_string()).or_insert(None); old.entry(embedding_config.name.to_string()).or_insert(None);
} }
} }