mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Merge #4597
4597: Fix embeddings settings update r=ManyTheFish a=ManyTheFish # Pull Request - add some conditions reducing the work done when changing the settings - add some benchmarks on embedders ## Related issue Fixes #4585 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
ebca29f3de
@ -198,11 +198,16 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
if document_is_kept {
|
if document_is_kept {
|
||||||
// Don't give up if the old prompt was failing
|
// Don't give up if the old prompt was failing
|
||||||
let old_prompt = prompt
|
let old_prompt = Some(prompt)
|
||||||
.render(obkv, DelAdd::Deletion, old_fields_ids_map)
|
// TODO: this filter works because we erase the vec database when a embedding setting changes.
|
||||||
.unwrap_or_default();
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
|
.filter(|_| !settings_diff.reindex_vectors())
|
||||||
|
.map(|p| {
|
||||||
|
p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
|
||||||
|
});
|
||||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||||
if old_prompt != new_prompt {
|
if old_prompt.as_ref() != Some(&new_prompt) {
|
||||||
|
let old_prompt = old_prompt.unwrap_or_default();
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||||
);
|
);
|
||||||
@ -224,6 +229,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
&mut manual_vectors_writer,
|
&mut manual_vectors_writer,
|
||||||
&mut key_buffer,
|
&mut key_buffer,
|
||||||
delta,
|
delta,
|
||||||
|
settings_diff,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,10 +270,15 @@ fn push_vectors_diff(
|
|||||||
manual_vectors_writer: &mut Writer<BufWriter<File>>,
|
manual_vectors_writer: &mut Writer<BufWriter<File>>,
|
||||||
key_buffer: &mut Vec<u8>,
|
key_buffer: &mut Vec<u8>,
|
||||||
delta: VectorStateDelta,
|
delta: VectorStateDelta,
|
||||||
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
puffin::profile_function!();
|
puffin::profile_function!();
|
||||||
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
|
||||||
if must_remove {
|
if must_remove
|
||||||
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
|
&& !settings_diff.reindex_vectors()
|
||||||
|
{
|
||||||
key_buffer.truncate(TRUNCATE_SIZE);
|
key_buffer.truncate(TRUNCATE_SIZE);
|
||||||
remove_vectors_writer.insert(&key_buffer, [])?;
|
remove_vectors_writer.insert(&key_buffer, [])?;
|
||||||
}
|
}
|
||||||
@ -295,6 +306,9 @@ fn push_vectors_diff(
|
|||||||
match eob {
|
match eob {
|
||||||
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
EitherOrBoth::Both(_, _) => (), // no need to touch anything
|
||||||
EitherOrBoth::Left(vector) => {
|
EitherOrBoth::Left(vector) => {
|
||||||
|
// TODO: the below condition works because we erase the vec database when a embedding setting changes.
|
||||||
|
// When vector pipeline will be optimized, this should be removed.
|
||||||
|
if !settings_diff.reindex_vectors() {
|
||||||
// We insert only the Del part of the Obkv to inform
|
// We insert only the Del part of the Obkv to inform
|
||||||
// that we only want to remove all those vectors.
|
// that we only want to remove all those vectors.
|
||||||
let mut obkv = KvWriterDelAdd::memory();
|
let mut obkv = KvWriterDelAdd::memory();
|
||||||
@ -302,6 +316,7 @@ fn push_vectors_diff(
|
|||||||
let bytes = obkv.into_inner()?;
|
let bytes = obkv.into_inner()?;
|
||||||
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
manual_vectors_writer.insert(&key_buffer, bytes)?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
EitherOrBoth::Right(vector) => {
|
EitherOrBoth::Right(vector) => {
|
||||||
// We insert only the Add part of the Obkv to inform
|
// We insert only the Add part of the Obkv to inform
|
||||||
// that we only want to remove all those vectors.
|
// that we only want to remove all those vectors.
|
||||||
|
68
workloads/movies-subset-hf-embeddings.json
Normal file
68
workloads/movies-subset-hf-embeddings.json
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
{
|
||||||
|
"name": "movies-subset-hf-embeddings",
|
||||||
|
"run_count": 5,
|
||||||
|
"extra_cli_args": [
|
||||||
|
"--max-indexing-threads=4"
|
||||||
|
],
|
||||||
|
"assets": {
|
||||||
|
"movies-100.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
|
||||||
|
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
{
|
||||||
|
"route": "experimental-features",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"vectorStore": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"title",
|
||||||
|
"overview"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"release_date"
|
||||||
|
],
|
||||||
|
"sortableAttributes": [
|
||||||
|
"release_date"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies-100.json"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
72
workloads/settings-add-embeddings.json
Normal file
72
workloads/settings-add-embeddings.json
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
{
|
||||||
|
"name": "settings-add-embeddings-hf",
|
||||||
|
"run_count": 5,
|
||||||
|
"extra_cli_args": [
|
||||||
|
"--max-indexing-threads=4"
|
||||||
|
],
|
||||||
|
"assets": {
|
||||||
|
"movies-100.json": {
|
||||||
|
"local_location": null,
|
||||||
|
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
|
||||||
|
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"commands": [
|
||||||
|
{
|
||||||
|
"route": "experimental-features",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"vectorStore": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"searchableAttributes": [
|
||||||
|
"title",
|
||||||
|
"overview"
|
||||||
|
],
|
||||||
|
"filterableAttributes": [
|
||||||
|
"genres",
|
||||||
|
"release_date"
|
||||||
|
],
|
||||||
|
"sortableAttributes": [
|
||||||
|
"release_date"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "DontWait"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/documents",
|
||||||
|
"method": "POST",
|
||||||
|
"body": {
|
||||||
|
"asset": "movies-100.json"
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"route": "indexes/movies/settings",
|
||||||
|
"method": "PATCH",
|
||||||
|
"body": {
|
||||||
|
"inline": {
|
||||||
|
"embedders": {
|
||||||
|
"default": {
|
||||||
|
"source": "huggingFace",
|
||||||
|
"model": null,
|
||||||
|
"revision": null,
|
||||||
|
"documentTemplate": null,
|
||||||
|
"distribution": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"synchronous": "WaitForTask"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user