mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 17:38:28 +08:00
Update older embedding
This commit is contained in:
parent
1960003805
commit
af9f96e2af
@ -21,7 +21,7 @@ use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::vector::settings::ReindexAction;
|
||||
use crate::vector::{Embedder, Embeddings};
|
||||
use crate::vector::{Embedder, Embedding};
|
||||
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
@ -536,9 +536,11 @@ fn extract_vector_document_diff(
|
||||
}
|
||||
// Don't give up if the old prompt was failing
|
||||
let old_prompt = Some(&prompt).map(|p| {
|
||||
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
||||
p.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
|
||||
.unwrap_or_default()
|
||||
});
|
||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
let new_prompt =
|
||||
prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
if old_prompt.as_ref() != Some(&new_prompt) {
|
||||
let old_prompt = old_prompt.unwrap_or_default();
|
||||
tracing::trace!(
|
||||
@ -570,7 +572,7 @@ fn extract_vector_document_diff(
|
||||
return Ok(VectorStateDelta::NoChange);
|
||||
}
|
||||
// becomes autogenerated
|
||||
VectorStateDelta::NowGenerated(prompt.render(
|
||||
VectorStateDelta::NowGenerated(prompt.render_kvdeladd(
|
||||
obkv,
|
||||
DelAdd::Addition,
|
||||
new_fields_ids_map,
|
||||
@ -613,9 +615,10 @@ fn regenerate_if_prompt_changed(
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
) -> Result<VectorStateDelta> {
|
||||
let old_prompt =
|
||||
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
|
||||
let new_prompt = new_prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
let old_prompt = old_prompt
|
||||
.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
|
||||
.unwrap_or(Default::default());
|
||||
let new_prompt = new_prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
|
||||
if new_prompt == old_prompt {
|
||||
return Ok(VectorStateDelta::NoChange);
|
||||
@ -628,7 +631,7 @@ fn regenerate_prompt(
|
||||
prompt: &Prompt,
|
||||
new_fields_ids_map: &FieldsIdsMapWithMetadata,
|
||||
) -> Result<VectorStateDelta> {
|
||||
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
|
||||
Ok(VectorStateDelta::NowGenerated(prompt))
|
||||
}
|
||||
@ -738,7 +741,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
.flat_map(|docids| docids.iter())
|
||||
.zip(chunked_embeds.iter().flat_map(|embeds| embeds.iter()))
|
||||
{
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?;
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?;
|
||||
}
|
||||
chunks_ids.clear();
|
||||
}
|
||||
@ -759,7 +762,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
.flat_map(|docids| docids.iter())
|
||||
.zip(chunked_embeds.iter().flat_map(|embeds| embeds.iter()))
|
||||
{
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?;
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -775,7 +778,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
|
||||
if let Some(embeds) = embeds.first() {
|
||||
for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) {
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings.as_inner()))?;
|
||||
state_writer.insert(docid.to_be_bytes(), cast_slice(embeddings))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -790,7 +793,7 @@ fn embed_chunks(
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embeddings<f32>>>> {
|
||||
) -> Result<Vec<Vec<Embedding>>> {
|
||||
match embedder.embed_chunks(text_chunks, request_threads) {
|
||||
Ok(chunks) => Ok(chunks),
|
||||
Err(error) => {
|
||||
|
Loading…
Reference in New Issue
Block a user