mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Don't remove embedders that are not in the config from the document DB
This commit is contained in:
parent
8f7c8ca7f0
commit
16037e2169
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::{BTreeSet, HashMap};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader};
|
use std::io::{self, BufReader};
|
||||||
@ -211,6 +211,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let mut docids = index.documents_ids(wtxn)?;
|
let mut docids = index.documents_ids(wtxn)?;
|
||||||
let mut iter = merger.into_stream_merger_iter()?;
|
let mut iter = merger.into_stream_merger_iter()?;
|
||||||
|
|
||||||
|
let embedders: BTreeSet<_> =
|
||||||
|
index.embedding_configs(wtxn)?.into_iter().map(|(k, _v)| k).collect();
|
||||||
let mut vectors_buffer = Vec::new();
|
let mut vectors_buffer = Vec::new();
|
||||||
while let Some((key, reader)) = iter.next()? {
|
while let Some((key, reader)) = iter.next()? {
|
||||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||||
@ -225,9 +227,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let del_add_reader = KvReaderDelAdd::new(value);
|
let del_add_reader = KvReaderDelAdd::new(value);
|
||||||
|
|
||||||
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
|
if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
|
||||||
let addition = match vectors_fid {
|
let addition = if vectors_fid == Some(field_id) {
|
||||||
// for the "_vectors" field, only keep vectors that are marked as userProvided
|
'vectors: {
|
||||||
Some(vectors_fid) if vectors_fid == field_id => 'vectors: {
|
|
||||||
vectors_buffer.clear();
|
vectors_buffer.clear();
|
||||||
let Ok(mut vectors) =
|
let Ok(mut vectors) =
|
||||||
crate::vector::parsed_vectors::ParsedVectors::from_bytes(
|
crate::vector::parsed_vectors::ParsedVectors::from_bytes(
|
||||||
@ -237,7 +238,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
// if the `_vectors` field cannot be parsed as map of vectors, just write it as-is
|
// if the `_vectors` field cannot be parsed as map of vectors, just write it as-is
|
||||||
break 'vectors Some(addition);
|
break 'vectors Some(addition);
|
||||||
};
|
};
|
||||||
vectors.retain_user_provided_vectors();
|
vectors.retain_user_provided_vectors(&embedders);
|
||||||
let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
|
let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
|
||||||
if vectors.is_empty() {
|
if vectors.is_empty() {
|
||||||
// skip writing empty `_vectors` map
|
// skip writing empty `_vectors` map
|
||||||
@ -248,8 +249,10 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
Some(vectors_buffer.as_slice())
|
Some(vectors_buffer.as_slice())
|
||||||
}
|
}
|
||||||
_ => Some(addition),
|
} else {
|
||||||
|
Some(addition)
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(addition) = addition {
|
if let Some(addition) = addition {
|
||||||
writer.insert(field_id, addition)?;
|
writer.insert(field_id, addition)?;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, BTreeSet};
|
||||||
|
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
use serde_json::{from_slice, Value};
|
use serde_json::{from_slice, Value};
|
||||||
@ -89,10 +89,14 @@ impl ParsedVectors {
|
|||||||
Ok(ParsedVectors(value))
|
Ok(ParsedVectors(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn retain_user_provided_vectors(&mut self) {
|
pub fn retain_user_provided_vectors(&mut self, embedders: &BTreeSet<String>) {
|
||||||
self.0.retain(|_k, v| match v {
|
self.0.retain(|k, v| match v {
|
||||||
Vectors::ImplicitlyUserProvided(_) => true,
|
Vectors::ImplicitlyUserProvided(_) => true,
|
||||||
Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => *user_provided,
|
Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => {
|
||||||
|
*user_provided
|
||||||
|
// if the embedder is not in the config, then never touch it
|
||||||
|
|| !embedders.contains(k)
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user